From c5157a9b5206bf5ea4943089639e039f9c018402 Mon Sep 17 00:00:00 2001
From: Mustafa Eyceoz <meyceoz@redhat.com>
Date: Mon, 12 Feb 2024 09:56:56 -0500
Subject: [PATCH] Update default client-side batches

---
 language/llama2-70b/SUT.py                                | 2 +-
 language/llama2-70b/api-endpoint-artifacts/benchmark.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py
index c146731e6d..9734587529 100644
--- a/language/llama2-70b/SUT.py
+++ b/language/llama2-70b/SUT.py
@@ -112,7 +112,7 @@ def __init__(self,
 
         if not batch_size:
             if device == "cpu":
-                batch_size = 512
+                batch_size = 2000
             else:
                 batch_size = 32  # Reduce to 8 if using 4 GPUs, 16 for 8.
         self.batch_size = batch_size
diff --git a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
index a9e10c15b1..402342a4e6 100644
--- a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
+++ b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
@@ -6,7 +6,7 @@ spec:
   restartPolicy: Never
   containers:
   - name: mlperf-env
-    image: quay.io/meyceoz/mlperf-inference:v6
+    image: quay.io/meyceoz/mlperf-inference:v7
     resources:
       requests:
         memory: 20000Mi