Skip to content

Commit f70d9c3

Browse files
authored
chatqna benchmark for v1.1 release (opea-project#1120)
Signed-off-by: chensuyue <[email protected]> Signed-off-by: Cathy Zhang <[email protected]>
1 parent 8808b51 commit f70d9c3

File tree

4 files changed

+8
-51
lines changed

4 files changed

+8
-51
lines changed

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ function run_benchmark() {
7575
export DEPLOYMENT_TYPE=${deployment_type}
7676
export SERVICE_IP=${service_ip:-"None"}
7777
export SERVICE_PORT=${service_port:-"None"}
78+
export LOAD_SHAPE=${load_shape:-"constant"}
79+
export CONCURRENT_LEVEL=${concurrent_level:-5}
80+
export ARRIVAL_RATE=${arrival_rate:-1.0}
7881
if [[ -z $USER_QUERIES ]]; then
7982
user_query=$((query_per_node*node_number))
8083
export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,12 @@ test_suite_config: # Overall configuration settings for the test suite
1717
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
1818
test_output_dir: "${TEST_OUTPUT_DIR}" # The directory to store the test output
1919
load_shape: # Tenant concurrency pattern
20-
name: constant # poisson or constant(locust default load shape)
20+
name: ${LOAD_SHAPE} # poisson or constant(locust default load shape)
2121
params: # Loadshape-specific parameters
2222
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
23-
concurrent_level: 5 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
24-
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
23+
concurrent_level: ${CONCURRENT_LEVEL} # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
2524
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
26-
arrival_rate: 1.0 # Request arrival rate
27-
namespace: "my-chatqna"
25+
arrival_rate: ${ARRIVAL_RATE} # Request arrival rate
2826

2927
test_cases:
3028
chatqna:

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -103,39 +103,6 @@ def delete_helm_repo(repo_name):
103103
print(f"Failed to delete Helm repo {repo_name}. It may not exist.")
104104

105105

106-
def configmap_exists(name, namespace):
107-
"""Check if a ConfigMap exists in the specified namespace."""
108-
check_command = ["kubectl", "get", "configmap", name, "-n", namespace]
109-
result = subprocess.run(check_command, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
110-
return result.returncode == 0
111-
112-
113-
def create_configmap(name, namespace, data):
114-
"""Create a ConfigMap if it does not already exist."""
115-
if configmap_exists(name, namespace):
116-
print(f"ConfigMap '{name}' already exists in namespace '{namespace}', skipping creation.")
117-
else:
118-
create_command = (
119-
["kubectl", "create", "configmap", name]
120-
+ [f"--from-literal={k}={v}" for k, v in data.items()]
121-
+ ["-n", namespace]
122-
)
123-
print(f"Creating ConfigMap '{name}' in namespace '{namespace}'...")
124-
subprocess.run(create_command, check=True)
125-
print(f"ConfigMap '{name}' created successfully.")
126-
127-
128-
def delete_configmap(name, namespace):
129-
"""Delete a ConfigMap if it exists."""
130-
if configmap_exists(name, namespace):
131-
delete_command = ["kubectl", "delete", "configmap", name, "-n", namespace]
132-
print(f"Deleting ConfigMap '{name}'...")
133-
subprocess.run(delete_command, check=True)
134-
print(f"ConfigMap '{name}' deleted successfully.")
135-
else:
136-
print(f"ConfigMap '{name}' does not exist in namespace '{namespace}', skipping deletion.")
137-
138-
139106
def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
140107
"""Deploy a Helm release with a specified name and chart.
141108
@@ -145,7 +112,6 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
145112
- namespace: The Kubernetes namespace for deployment.
146113
- values_file: The user values file for deployment.
147114
- device_type: The device type (e.g., "gaudi") for specific configurations (optional).
148-
- extra_env_configmap_name: Name of the ConfigMap for extra environment variables (default "extra-env").
149115
"""
150116

151117
# Check if the namespace exists; if not, create it
@@ -160,9 +126,6 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
160126
subprocess.run(command, check=True)
161127
print(f"Namespace '{namespace}' created successfully.")
162128

163-
# This is workaround for teirerank-gaudi, will be removed later
164-
create_configmap("extra-env", namespace, {"MAX_WARMUP_SEQUENCE_LENGTH": "512"})
165-
166129
# Handle gaudi-specific values file if device_type is "gaudi"
167130
hw_values_file = None
168131
untar_dir = None
@@ -217,9 +180,6 @@ def uninstall_helm_release(release_name, namespace=None):
217180
namespace = "default"
218181

219182
try:
220-
# This is workaround for teirerank-gaudi, will be removed later
221-
delete_configmap("extra-env", namespace)
222-
223183
# Uninstall the Helm release
224184
command = ["helm", "uninstall", release_name, "--namespace", namespace]
225185
print(f"Uninstalling Helm release {release_name} in namespace {namespace}...")

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/generate_helm_values.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,16 @@ def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selec
2626
"data-prep": {"nodeSelector": {key: value for key, value in node_selector.items()}},
2727
"redis-vector-db": {"nodeSelector": {key: value for key, value in node_selector.items()}},
2828
"retriever-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
29-
"llm-uservice": {"nodeSelector": {key: value for key, value in node_selector.items()}},
30-
"embedding-usvc": {"nodeSelector": {key: value for key, value in node_selector.items()}},
3129
"chatqna-ui": {"nodeSelector": {key: value for key, value in node_selector.items()}},
3230
"global": {
3331
"HUGGINGFACEHUB_API_TOKEN": hf_token, # Use passed token
3432
"modelUseHostPath": model_dir, # Use passed model directory
35-
"extraEnvConfig": "extra-env", # Added MAX_WARMUP_SEQUENCE_LENGTH: 512 to extra-env in deploy.py
3633
},
3734
"nodeSelector": {key: value for key, value in node_selector.items()},
3835
}
3936

4037
if with_rerank:
4138
values["teirerank"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
42-
values["reranking-usvc"] = {"nodeSelector": {key: value for key, value in node_selector.items()}}
4339
else:
4440
values["image"] = {"repository": "opea/chatqna-without-rerank"}
4541

@@ -133,9 +129,9 @@ def generate_helm_values(with_rerank, num_nodes, hf_token, model_dir, node_selec
133129

134130
# Determine the filename based on 'with_rerank' and 'num_nodes'
135131
if with_rerank:
136-
filename = f"{mode}_{num_nodes}_gaudi_with_rerank.yaml"
132+
filename = f"{mode}-{num_nodes}-gaudi-with-rerank-values.yaml"
137133
else:
138-
filename = f"{mode}_{num_nodes}_gaudi_without_rerank.yaml"
134+
filename = f"{mode}-{num_nodes}-gaudi-without-rerank-values.yaml"
139135

140136
# Write the YAML data to the file
141137
with open(filename, "w") as file:

0 commit comments

Comments
 (0)