chatqna benchmark for v1.1 release (opea-project#1120)

chensuyue · web-flow · commit f70d9c38539c · 2024-11-19T22:57:25.000+08:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
Signed-off-by: Cathy Zhang &lt;cathy.zhang@intel.com&gt;
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh
@@ -75,6 +75,9 @@ function run_benchmark() {
     export DEPLOYMENT_TYPE=${deployment_type}
     export SERVICE_IP=${service_ip:-"None"}
     export SERVICE_PORT=${service_port:-"None"}
+    export LOAD_SHAPE=${load_shape:-"constant"}
+    export CONCURRENT_LEVEL=${concurrent_level:-5}
+    export ARRIVAL_RATE=${arrival_rate:-1.0}
     if [[ -z $USER_QUERIES ]]; then
         user_query=$((query_per_node*node_number))
         export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml
@@ -17,14 +17,12 @@ test_suite_config: # Overall configuration settings for the test suite
   llm_model: "Intel/neural-chat-7b-v3-3"  # The LLM model used for the test
   test_output_dir: "${TEST_OUTPUT_DIR}"  # The directory to store the test output
   load_shape:              # Tenant concurrency pattern
-    name: constant           # poisson or constant(locust default load shape)
+    name: ${LOAD_SHAPE}      # poisson or constant(locust default load shape)
     params:                  # Loadshape-specific parameters
       constant:                # Constant load shape specific parameters, activate only if load_shape.name is constant
-        concurrent_level: 5      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
-        # arrival_rate: 1.0       # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
+        concurrent_level: ${CONCURRENT_LEVEL}      # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
       poisson:                 # Poisson load shape specific parameters, activate only if load_shape.name is poisson
-        arrival_rate: 1.0        # Request arrival rate
-  namespace: "my-chatqna"
+        arrival_rate: ${ARRIVAL_RATE}        # Request arrival rate
 
 test_cases:
   chatqna:
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py
@@ -103,39 +103,6 @@ def delete_helm_repo(repo_name):
         print(f"Failed to delete Helm repo {repo_name}. It may not exist.")
 
 
-def configmap_exists(name, namespace):
-    """Check if a ConfigMap exists in the specified namespace."""
-    check_command = ["kubectl", "get", "configmap", name, "-n", namespace]
-    result = subprocess.run(check_command, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-    return result.returncode == 0
-
-
-def create_configmap(name, namespace, data):
-    """Create a ConfigMap if it does not already exist."""
-    if configmap_exists(name, namespace):
-        print(f"ConfigMap '{name}' already exists in namespace '{namespace}', skipping creation.")
-    else:
-        create_command = (
-            ["kubectl", "create", "configmap", name]
-            + [f"--from-literal={k}={v}" for k, v in data.items()]
-            + ["-n", namespace]
-        )
-        print(f"Creating ConfigMap '{name}' in namespace '{namespace}'...")
-        subprocess.run(create_command, check=True)
-        print(f"ConfigMap '{name}' created successfully.")
-
-
-def delete_configmap(name, namespace):
-    """Delete a ConfigMap if it exists."""
-    if configmap_exists(name, namespace):
-        delete_command = ["kubectl", "delete", "configmap", name, "-n", namespace]
-        print(f"Deleting ConfigMap '{name}'...")
-        subprocess.run(delete_command, check=True)
-        print(f"ConfigMap '{name}' deleted successfully.")
-    else:
-        print(f"ConfigMap '{name}' does not exist in namespace '{namespace}', skipping deletion.")
-
-
 def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
     """Deploy a Helm release with a specified name and chart.
 
@@ -145,7 +112,6 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
     - namespace: The Kubernetes namespace for deployment.
     - values_file: The user values file for deployment.
     - device_type: The device type (e.g., "gaudi") for specific configurations (optional).
-    - extra_env_configmap_name: Name of the ConfigMap for extra environment variables (default "extra-env").
     """
 
     # Check if the namespace exists; if not, create it
@@ -160,9 +126,6 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
         subprocess.run(command, check=True)
         print(f"Namespace '{namespace}' created successfully.")
 
-    # This is workaround for teirerank-gaudi, will be removed later
-    create_configmap("extra-env", namespace, {"MAX_WARMUP_SEQUENCE_LENGTH": "512"})
-
     # Handle gaudi-specific values file if device_type is "gaudi"
     hw_values_file = None
     untar_dir = None
@@ -217,9 +180,6 @@ def uninstall_helm_release(release_name, namespace=None):
         namespace = "default"
 
     try:
-        # This is workaround for teirerank-gaudi, will be removed later
-        delete_configmap("extra-env", namespace)
-
         # Uninstall the Helm release
         command = ["helm", "uninstall", release_name, "--namespace", namespace]
         print(f"Uninstalling Helm release {release_name} in namespace {namespace}...")
diff --git a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py
@@ -26,20 +26,16 @@ def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selec
         "data-prep": {"nodeSelector": {key: value for key, value in node_selector.items()}},
         "redis-vector-db": {"nodeSelector": {key: value for key, value in node_selector.items()}},
         "retriever-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
-        "llm-uservice": {"nodeSelector": {key: value for key, value in node_selector.items()}},
-        "embedding-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
         "chatqna-ui": {"nodeSelector": {key: value for key, value in node_selector.items()}},
         "global": {
             "HUGGINGFACEHUB_API_TOKEN": hf_token,  # Use passed token
             "modelUseHostPath": model_dir,  # Use passed model directory
-            "extraEnvConfig": "extra-env",  # Added MAX_WARMUP_SEQUENCE_LENGTH: 512 to extra-env in deploy.py
         },
         "nodeSelector": {key: value for key, value in node_selector.items()},
     }
 
     if with_rerank:
         values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
-        values["reranking-usvc"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
     else:
         values["image"] = {"repository": "opea/chatqna-without-rerank"}
 
@@ -133,9 +129,9 @@ def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selec
 
     # Determine the filename based on 'with_rerank' and 'num_nodes'
     if with_rerank:
-        filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
+        filename = f"{mode}-{num_nodes}-gaudi-with-rerank-values.yaml"
     else:
-        filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
+        filename = f"{mode}-{num_nodes}-gaudi-without-rerank-values.yaml"
 
     # Write the YAML data to the file
     with open(filename, "w") as file: