opea-project · chensuyue · Aug 18, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025
@@ -5,6 +5,7 @@ tgi:
   enabled: false
 vllm:
   enabled: true
+  VLLM_CPU_OMP_THREADS_BIND: all
 
 speecht5:
   enabled: false

@@ -5,6 +5,7 @@ tgi:
   enabled: false
 vllm:
   enabled: true
+  VLLM_CPU_OMP_THREADS_BIND: all
 
 speecht5:
   enabled: true

@@ -14,7 +14,7 @@ scripts/update_dependency.sh
 helm dependency update codetrans
 export HFTOKEN="insert-your-huggingface-token-here"
 export MODELDIR="/mnt/opea-models"
-export MODELNAME="mistralai/Mistral-7B-Instruct-v0.3"
+export MODELNAME="Qwen/Qwen2.5-Coder-7B-Instruct"
 # To use CPU with vLLM
 helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-values.yaml
 # To use CPU with TGI
@@ -31,7 +31,7 @@ helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.m
 
 ### IMPORTANT NOTE
 
-1. To use model `mistralai/Mistral-7B-Instruct-v0.3`, you should first goto the [huggingface model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model.
+1. To use model `Qwen/Qwen2.5-Coder-7B-Instruct`, you should first goto the [huggingface model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model.
 
 2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model.
 
@@ -66,9 +66,9 @@ Open a browser to access `http://<k8s-node-ip-address>:${port}` to play with the
 
 ## Values
 
-| Key               | Type   | Default                                | Description                                                                            |
-| ----------------- | ------ | -------------------------------------- | -------------------------------------------------------------------------------------- |
-| image.repository  | string | `"opea/codetrans"`                     |                                                                                        |
-| service.port      | string | `"7777"`                               |                                                                                        |
-| tgi.LLM_MODEL_ID  | string | `"mistralai/Mistral-7B-Instruct-v0.3"` | Models id from https://huggingface.co/, or predownloaded model directory               |
-| global.monitoring | bool   | `false`                                | Enable usage metrics for the service components. See ../monitoring.md before enabling! |
+| Key               | Type   | Default                            | Description                                                                            |
+| ----------------- | ------ | ---------------------------------- | -------------------------------------------------------------------------------------- |
+| image.repository  | string | `"opea/codetrans"`                 |                                                                                        |
+| service.port      | string | `"7777"`                           |                                                                                        |
+| tgi.LLM_MODEL_ID  | string | `"Qwen/Qwen2.5-Coder-7B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory               |
+| global.monitoring | bool   | `false`                            | Enable usage metrics for the service components. See ../monitoring.md before enabling! |
@@ -60,15 +60,15 @@ affinity: {}
 # To override values in subchart tgi
 tgi:
   enabled: false
-  LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
+  LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
 
 vllm:
   enabled: true
-  LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
+  LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
 
 llm-uservice:
   TEXTGEN_BACKEND: vLLM
-  LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3
+  LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct
 
 nginx:
   service:

@@ -27,6 +27,9 @@ data:
   {{- if .Values.VLLM_CPU_KVCACHE_SPACE }}
   VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}}
   {{- end }}
+  {{- if .Values.VLLM_CPU_OMP_THREADS_BIND }}
+  VLLM_CPU_OMP_THREADS_BIND: {{ .Values.VLLM_CPU_OMP_THREADS_BIND | quote}}
+  {{- end }}
   {{- if .Values.VLLM_SKIP_WARMUP }}
   VLLM_SKIP_WARMUP: {{ .Values.VLLM_SKIP_WARMUP | quote }}
   {{- end }}

@@ -55,7 +55,7 @@ podSecurityContext: {}
 # Workaround for https://github.com/opea-project/GenAIComps/issues/1549
 # Need to run as root until upstream fixed and released.
 securityContext:
-  readOnlyRootFilesystem: true
+  readOnlyRootFilesystem: false
               chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}; 
               chmod -R g+w /data/models--{{ replace "/" "--" .Values.LLM_MODEL_ID }}; 
   allowPrivilegeEscalation: false
   runAsNonRoot: false
   runAsUser: 0
@@ -107,6 +107,7 @@ LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
 OMPI_MCA_btl_vader_single_copy_mechanism: ""
 PT_HPU_ENABLE_LAZY_COLLECTIVES: ""
 VLLM_CPU_KVCACHE_SPACE: ""
+VLLM_CPU_OMP_THREADS_BIND: ""
 
 global:
   http_proxy: ""