diff --git a/language/llama2-70b/SUT.py b/language/llama2-70b/SUT.py
index 6fbc756561..8065023493 100644
--- a/language/llama2-70b/SUT.py
+++ b/language/llama2-70b/SUT.py
@@ -112,7 +112,7 @@ def __init__(self,
 
         if not batch_size:
             if device == "cpu":
-                batch_size = 512
+                batch_size = 2000
             else:
                 batch_size = 32  # Reduce to 8 if using 4 GPUs, 16 for 8.
         self.batch_size = batch_size
diff --git a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
index a9e10c15b1..402342a4e6 100644
--- a/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
+++ b/language/llama2-70b/api-endpoint-artifacts/benchmark.yaml
@@ -6,7 +6,7 @@ spec:
   restartPolicy: Never
   containers:
   - name: mlperf-env
-    image: quay.io/meyceoz/mlperf-inference:v6
+    image: quay.io/meyceoz/mlperf-inference:v7
     resources:
       requests:
         memory: 20000Mi