diff --git a/helm-charts/audioqna/cpu-multilang-values.yaml b/helm-charts/audioqna/cpu-multilang-values.yaml index 84ff9161b..ebb269c6a 100644 --- a/helm-charts/audioqna/cpu-multilang-values.yaml +++ b/helm-charts/audioqna/cpu-multilang-values.yaml @@ -5,6 +5,7 @@ tgi: enabled: false vllm: enabled: true + VLLM_CPU_OMP_THREADS_BIND: all speecht5: enabled: false diff --git a/helm-charts/audioqna/cpu-values.yaml b/helm-charts/audioqna/cpu-values.yaml index 5a3c42335..1e2865dd5 100644 --- a/helm-charts/audioqna/cpu-values.yaml +++ b/helm-charts/audioqna/cpu-values.yaml @@ -5,6 +5,7 @@ tgi: enabled: false vllm: enabled: true + VLLM_CPU_OMP_THREADS_BIND: all speecht5: enabled: true diff --git a/helm-charts/codetrans/README.md b/helm-charts/codetrans/README.md index 38d1d4cbc..53bdb53e2 100644 --- a/helm-charts/codetrans/README.md +++ b/helm-charts/codetrans/README.md @@ -14,7 +14,7 @@ scripts/update_dependency.sh helm dependency update codetrans export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" -export MODELNAME="mistralai/Mistral-7B-Instruct-v0.3" +export MODELNAME="Qwen/Qwen2.5-Coder-7B-Instruct" # To use CPU with vLLM helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-values.yaml # To use CPU with TGI @@ -31,7 +31,7 @@ helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.m ### IMPORTANT NOTE -1. To use model `mistralai/Mistral-7B-Instruct-v0.3`, you should first goto the [huggingface model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model. +1. To use model `Qwen/Qwen2.5-Coder-7B-Instruct`, you should first goto the [huggingface model card](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) to apply for the model access first. You need to make sure your huggingface token has at least read access to that model. 2. Make sure your `MODELDIR` exists on the node where your workload is schedueled so you can cache the downloaded model for next time use. Otherwise, set `global.modelUseHostPath` to 'null' if you don't want to cache the model. @@ -66,9 +66,9 @@ Open a browser to access `http://:${port}` to play with the ## Values -| Key | Type | Default | Description | -| ----------------- | ------ | -------------------------------------- | -------------------------------------------------------------------------------------- | -| image.repository | string | `"opea/codetrans"` | | -| service.port | string | `"7777"` | | -| tgi.LLM_MODEL_ID | string | `"mistralai/Mistral-7B-Instruct-v0.3"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.monitoring | bool | `false` | Enable usage metrics for the service components. See ../monitoring.md before enabling! | +| Key | Type | Default | Description | +| ----------------- | ------ | ---------------------------------- | -------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/codetrans"` | | +| service.port | string | `"7777"` | | +| tgi.LLM_MODEL_ID | string | `"Qwen/Qwen2.5-Coder-7B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.monitoring | bool | `false` | Enable usage metrics for the service components. See ../monitoring.md before enabling! | diff --git a/helm-charts/codetrans/values.yaml b/helm-charts/codetrans/values.yaml index 24f0a96bb..37885baa4 100644 --- a/helm-charts/codetrans/values.yaml +++ b/helm-charts/codetrans/values.yaml @@ -60,15 +60,15 @@ affinity: {} # To override values in subchart tgi tgi: enabled: false - LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 + LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct vllm: enabled: true - LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 + LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct llm-uservice: TEXTGEN_BACKEND: vLLM - LLM_MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 + LLM_MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct nginx: service: diff --git a/helm-charts/common/vllm/templates/configmap.yaml b/helm-charts/common/vllm/templates/configmap.yaml index 9cfdb4f89..c7eea5232 100644 --- a/helm-charts/common/vllm/templates/configmap.yaml +++ b/helm-charts/common/vllm/templates/configmap.yaml @@ -27,6 +27,9 @@ data: {{- if .Values.VLLM_CPU_KVCACHE_SPACE }} VLLM_CPU_KVCACHE_SPACE: {{ .Values.VLLM_CPU_KVCACHE_SPACE | quote}} {{- end }} + {{- if .Values.VLLM_CPU_OMP_THREADS_BIND }} + VLLM_CPU_OMP_THREADS_BIND: {{ .Values.VLLM_CPU_OMP_THREADS_BIND | quote}} + {{- end }} {{- if .Values.VLLM_SKIP_WARMUP }} VLLM_SKIP_WARMUP: {{ .Values.VLLM_SKIP_WARMUP | quote }} {{- end }} diff --git a/helm-charts/common/vllm/values.yaml b/helm-charts/common/vllm/values.yaml index 5dc0d12a5..b353c9068 100644 --- a/helm-charts/common/vllm/values.yaml +++ b/helm-charts/common/vllm/values.yaml @@ -55,7 +55,7 @@ podSecurityContext: {} # Workaround for https://github.com/opea-project/GenAIComps/issues/1549 # Need to run as root until upstream fixed and released. securityContext: - readOnlyRootFilesystem: true + readOnlyRootFilesystem: false allowPrivilegeEscalation: false runAsNonRoot: false runAsUser: 0 @@ -107,6 +107,7 @@ LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct OMPI_MCA_btl_vader_single_copy_mechanism: "" PT_HPU_ENABLE_LAZY_COLLECTIVES: "" VLLM_CPU_KVCACHE_SPACE: "" +VLLM_CPU_OMP_THREADS_BIND: "" global: http_proxy: ""