Merge branch 'main' of https://github.com/arc53/llm-price-compass

arc53 · Aug 17, 2024 · 7828c33 · 7828c33
2 parents 97579f4 + 675ba59
commit 7828c33
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 11 deletions.
diff --git a/LLM-Provider-comparison.csv b/LLM-Provider-comparison.csv
@@ -1,10 +1,11 @@
-Inference providers,LLama 3.1 8b instruct ingest price,LLama 3.1 8b instruct output price,LLama 3.1 70b instruct ingest price,LLama 3.1 70b instruct output price
-Groq,$0.05,$0.08,$0.59,$0.79
-Deepinfra,$0.06,$0.06,$0.52,$0.75
-OctoAI,$0.15,$0.15,$0.90,$0.90
-"Lepton AI
-",$0.07,$0.07,$0.80,$0.80
-Fireworks AI,$0.20,$0.20,$0.90,$0.90
-Perplexity,$0.20,$0.20,$1.00,$1.00
-Together.ai,$0.18,$0.18,$0.88,$0.88
-Databricks,,,$1.00,$1.00
+Inference providers,LLama 3.1 8b instruct ingest price,LLama 3.1 8b instruct output price,LLama 3.1 70b instruct ingest price,LLama 3.1 70b instruct output price
+Groq,$0.05,$0.08,$0.59,$0.79
+Deepinfra,$0.06,$0.06,$0.52,$0.75
+OctoAI,$0.15,$0.15,$0.90,$0.90
+Lepton AI,$0.07,$0.07,$0.80,$0.80
+Fireworks AI,$0.20,$0.20,$0.90,$0.90
+Perplexity,$0.20,$0.20,$1.00,$1.00
+Together.ai,$0.18,$0.18,$0.88,$0.88
+Databricks,,,$1.00,$1.00
+NVIDIA H100,0.13819152503408116,0.14993358240180893,,
+NVIDIA L4,0.41777757572197777,0.4546602500521734,,
diff --git a/gpu-benchmarks.csv b/gpu-benchmarks.csv
@@ -1,3 +1,3 @@
 "backend","benchmark_duration","best_of","completed","cost_per_hour","date","duration","gpu","input_throughput","mean_e2e_latency","mean_itl","mean_tpot","mean_ttft","median_e2e_latency","median_itl","median_tpot","median_ttft","model_id","num_prompts","output_throughput","p99_itl","p99_tpot","p99_ttft","provider","request_rate","request_throughput","successful_requests","tokenizer_id","total_generated_tokens","total_generated_tokens_ret","total_input_tokens","total_output_tokens","traffic_request_rate","use_beam_search"
-"sglang",35.45,,,3.02,"20240814-155812",,"NVIDIA H100",6070.48,14328.17,71.79,145.31,773.1,13759.53,49.35,81.55,193.72,,,5595.07,291.88,915.34,4118.65,"scaleway",,28.21,1000,,198343,197967,215196,,200,
+"sglang",35.45,,,3.02,"20240814-155812",,"NVIDIA H100",6070.48,14328.17,71.79,145.31,773.1,13759.53,49.35,81.55,193.72,"meta-llama/Meta-Llama-3.1-8B-Instruct",,5595.07,291.88,915.34,4118.65,"scaleway",,28.21,1000,"meta-llama/Meta-Llama-3.1-8B-Instruct",198343,197967,215196,,200,
 "vllm",,1,1000,0.84,"20240813-143958",385.30312793600024,"NVIDIA L4",558.5109084184351,,,,,,,,,"meta-llama/Meta-Llama-3.1-8B-Instruct",1000,513.2037236740132,,,,"scaleway","inf",2.5953591536015312,,"meta-llama/Meta-Llama-3.1-8B-Instruct",,,215196,197739,,false
diff --git a/gpu-benchmarks.json b/gpu-benchmarks.json
@@ -5,6 +5,8 @@
     "gpu": "NVIDIA H100",
     "cost_per_hour": 3.02,
     "backend": "sglang",
+    "model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "tokenizer_id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "traffic_request_rate": 200,
     "successful_requests": 1000,
     "benchmark_duration": 35.45,