From c5157a9b5206bf5ea4943089639e039f9c018402 Mon Sep 17 00:00:00 2001 From: Mustafa Eyceoz Date: Mon, 12 Feb 2024 09:56:56 -0500 Subject: [PATCH] Update default client-side batches --- language/llama2-70b/SUT.py | 2 +- language/llama2-70b/api-endpoint-artifacts/benchmark.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py index c146731e6d..9734587529 100644 --- a/language/llama2-70b/SUT.py +++ b/language/llama2-70b/SUT.py @@ -112,7 +112,7 @@ def __init__(self, if not batch_size: if device == "cpu": - batch_size = 512 + batch_size = 2000 else: batch_size = 32 # Reduce to 8 if using 4 GPUs, 16 for 8. self.batch_size = batch_size diff --git a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml index a9e10c15b1..402342a4e6 100644 --- a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml +++ b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml @@ -6,7 +6,7 @@ spec: restartPolicy: Never containers: - name: mlperf-env - image: quay.io/meyceoz/mlperf-inference:v6 + image: quay.io/meyceoz/mlperf-inference:v7 resources: requests: memory: 20000Mi