Tmp

Narsil · Narsil · commit a3864e6d5b3d · 2025-04-25T11:51:15.000+02:00
diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -0,0 +1,33 @@
+{
+  description = "Inference Benchmarker - A terminal-based benchmarker for LLMs";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs =
+    {
+      nixpkgs,
+      flake-utils,
+      ...
+    }:
+    flake-utils.lib.eachDefaultSystem (
+      system:
+      let
+        pkgs = import nixpkgs {
+          inherit system;
+        };
+      in
+      {
+        devShells.default = pkgs.mkShell {
+          buildInputs = with pkgs; [
+            rustup
+            pkg-config
+            openssl
+          ];
+
+        };
+      }
+    );
+}
diff --git a/src/benchmark.rs b/src/benchmark.rs
@@ -337,6 +337,63 @@ impl Benchmark {
     }
 
     pub async fn run_perf(&mut self) -> anyhow::Result<()> {
+        info!("Running performance benchmark");
+
+        let id = "performance".to_string();
+
+        // notify start event
+        self.event_bus.send(Event::BenchmarkStart(BenchmarkEvent {
+            id: id.clone(),
+            scheduler_type: ExecutorType::ConstantVUs,
+            request_throughput: None,
+            progress: 0.0,
+            results: None,
+            successful_requests: 0,
+            failed_requests: 0,
+        }))?;
+
+        // create progress handler
+        let tx = self.handle_progress(id.clone()).await;
+
+        let mut successful_requests = 0u64;
+        let mut failed_requests = 0u64;
+
+        for i in (1usize..2).map(|i| i.pow(2)) {
+            // start scheduler
+            let mut scheduler = scheduler::Scheduler::new(
+                id.clone(),
+                self.backend.clone(),
+                ExecutorType::ConstantVUs,
+                executors::ExecutorConfig {
+                    max_vus: i as u64,
+                    duration: self.config.duration,
+                    rate: None,
+                },
+                self.requests.clone(),
+                tx.clone(),
+                self.stop_sender.clone(),
+            );
+            scheduler.run().await?;
+            let results = scheduler.get_results().lock().await.clone();
+            info!("Result {results:?}");
+            self.report.add_benchmark_result(results.clone());
+            successful_requests += results.successful_requests() as u64;
+            failed_requests += results.failed_requests() as u64;
+        }
+
+        // send None to close the progress handler
+        tx.send(None).await.unwrap();
+
+        // notify end event
+        self.event_bus.send(Event::BenchmarkEnd(BenchmarkEvent {
+            id: id.clone(),
+            scheduler_type: ExecutorType::ConstantVUs,
+            request_throughput: Some(0.0),
+            progress: 100.0,
+            results: None,
+            successful_requests,
+            failed_requests,
+        }))?;
         Ok(())
     }
 
diff --git a/src/main.rs b/src/main.rs
@@ -1,7 +1,8 @@
+use anyhow::Result;
 use clap::error::ErrorKind::InvalidValue;
 use clap::{ArgGroup, Error, Parser};
 use inference_benchmarker::{run, BenchmarkKind, RunConfiguration, TokenizeOptions};
-use log::{debug, error};
+use log::debug;
 use reqwest::Url;
 use std::collections::HashMap;
 use std::time::Duration;
@@ -23,7 +24,7 @@ struct Args {
     #[clap(default_value = "128", short, long, env, group = "group_manual")]
     max_vus: u64,
     /// The duration of each benchmark step
-    #[clap(default_value = "120s", short, long, env, group = "group_manual")]
+    #[clap(default_value = "10s", short, long, env, group = "group_manual")]
     #[arg(value_parser = parse_duration)]
     duration: Duration,
     /// A list of rates of requests to send per second (only valid for the ConstantArrivalRate benchmark).
@@ -38,7 +39,7 @@ struct Args {
     profile: Option<String>,
     /// The kind of benchmark to run (throughput, sweep, optimum)
     #[clap(
-        default_value = "sweep",
+        default_value = "perf",
         short,
         long,
         env,
@@ -176,7 +177,7 @@ fn parse_tokenizer_options(s: &str) -> Result<TokenizeOptions, Error> {
 }
 
 #[tokio::main]
-async fn main() {
+async fn main() -> Result<()> {
     let args = Args::parse();
     let git_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
     println!(
@@ -234,14 +235,5 @@ async fn main() {
         model_name,
         run_id,
     };
-    let main_thread = tokio::spawn(async move {
-        match run(run_config, stop_sender_clone).await {
-            Ok(_) => {}
-            Err(e) => {
-                error!("Fatal: {:?}", e);
-                println!("Fatal: {:?}", e)
-            }
-        };
-    });
-    let _ = main_thread.await;
+    run(run_config, stop_sender_clone).await
 }
diff --git a/src/requests.rs b/src/requests.rs
@@ -140,7 +140,7 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
         let body = OpenAITextGenerationRequest {
             model: self.model_name.clone(),
             messages,
-            max_tokens: request.num_decode_tokens,
+            max_tokens: Some(20),
             stream: true,
             stop: None,
             temperature: 0.0,
@@ -154,7 +154,6 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
             )
             .json(&serde_json::json!(body))
             .timeout(self.timeout);
-        info!("Sending request");
         // start timer
         aggregated_response.start();
         let mut es = EventSource::new(req).unwrap();
@@ -218,24 +217,14 @@ impl TextGenerationBackend for OpenAITextGenerationBackend {
                     };
                 }
                 Err(e) => {
-                    error!("Got SSE error : {e}");
                     match e {
-                        Error::Utf8(_) => {
-                            aggregated_response.fail();
-                        }
-                        Error::Parser(_) => {
-                            aggregated_response.fail();
-                        }
-                        Error::Transport(_) => {
-                            aggregated_response.fail();
-                        }
-                        Error::InvalidContentType(_, _) => {
-                            aggregated_response.fail();
-                        }
-                        Error::InvalidStatusCode(_, _) => {
-                            aggregated_response.fail();
-                        }
-                        Error::InvalidLastEventId(_) => {
+                        Error::Utf8(_)
+                        | Error::Parser(_)
+                        | Error::Transport(_)
+                        | Error::InvalidContentType(_, _)
+                        | Error::InvalidStatusCode(_, _)
+                        | Error::InvalidLastEventId(_) => {
+                            error!("Got SSE error : {e}");
                             aggregated_response.fail();
                         }
                         Error::StreamEnded => {
diff --git a/src/results.rs b/src/results.rs
@@ -241,7 +241,7 @@ impl BenchmarkResults {
     /// Calculate the quantile of a given data set using interpolation method
     /// Results are similar to `numpy.percentile`
     fn quantile_duration(&self, mut data: Vec<Duration>, quantile: f64) -> anyhow::Result<f64> {
-        if self.is_ready() {
+        if self.is_ready() && data.len() > 1 {
             data.sort();
             let i = (quantile * (data.len() - 1) as f64).floor();
             let delta = (data.len() - 1) as f64 * quantile - i;