diff --git a/helm-charts/agentqna/README.md b/helm-charts/agentqna/README.md index 030c0bfda..aafca7d6e 100644 --- a/helm-charts/agentqna/README.md +++ b/helm-charts/agentqna/README.md @@ -51,9 +51,9 @@ Deploy everything on Gaudi enabled Kubernetes cluster: If you want to try with latest version, use `helm pull oci://ghcr.io/opea-project/charts/agentqna --version 0-latest --untar` ``` -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +export HF_TOKEN="YourOwnToken" helm pull oci://ghcr.io/opea-project/charts/agentqna --untar -helm install agentqna agentqna -f agentqna/gaudi-values.yaml --set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +helm install agentqna agentqna -f agentqna/gaudi-values.yaml --set global.HF_TOKEN=${HF_TOKEN} # To use AMD ROCm device cd GenAIInfra/helm-charts/ @@ -62,10 +62,10 @@ helm dependency update agentqna export HFTOKEN="your_huggingface_token" export MODELDIR="/mnt/opea-models" # with vLLM -helm upgrade --install agentqna agentqna -f agentqna/rocm-values.yaml --set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +helm upgrade --install agentqna agentqna -f agentqna/rocm-values.yaml --set global.HF_TOKEN=${HF_TOKEN} # with TGI -helm upgrade --install agentqna agentqna -f agentqna/rocm-tgi-values.yaml --set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +helm upgrade --install agentqna agentqna -f agentqna/rocm-tgi-values.yaml --set global.HF_TOKEN=${HF_TOKEN} ``` ## Verify diff --git a/helm-charts/agentqna/values.yaml b/helm-charts/agentqna/values.yaml index 17cb2a7db..3cd7a909c 100644 --- a/helm-charts/agentqna/values.yaml +++ b/helm-charts/agentqna/values.yaml @@ -134,7 +134,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/audioqna/README.md b/helm-charts/audioqna/README.md index 411081ff2..60e7ffbad 100644 --- a/helm-charts/audioqna/README.md +++ b/helm-charts/audioqna/README.md @@ -21,15 +21,15 @@ scripts/update_dependency.sh helm dependency update audioqna export HFTOKEN="insert-your-huggingface-token-here" # To use CPU with vLLM -helm install audioqna audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f audioqna/cpu-values.yaml +helm install audioqna audioqna --set global.HF_TOKEN=${HFTOKEN} -f audioqna/cpu-values.yaml # To use CPU with TGI -# helm install audioqna audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f audioqna/cpu-tgi-values.yaml +# helm install audioqna audioqna --set global.HF_TOKEN=${HFTOKEN} -f audioqna/cpu-tgi-values.yaml # To use CPU with vLLM with multilang tts -# helm install audioqna audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-multilang-values.yaml +# helm install audioqna audioqna --set global.HF_TOKEN=${HFTOKEN} -f cpu-multilang-values.yaml # To use Gaudi device with vLLM -# helm install audioqna audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f audioqna/gaudi-values.yaml +# helm install audioqna audioqna --set global.HF_TOKEN=${HFTOKEN} -f audioqna/gaudi-values.yaml # To use Gaudi device with TGI -# helm install audioqna audioqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f audioqna/gaudi-tgi-values.yaml +# helm install audioqna audioqna --set global.HF_TOKEN=${HFTOKEN} -f audioqna/gaudi-tgi-values.yaml ``` ### IMPORTANT NOTE diff --git a/helm-charts/audioqna/values.yaml b/helm-charts/audioqna/values.yaml index 67af4e929..359a43936 100644 --- a/helm-charts/audioqna/values.yaml +++ b/helm-charts/audioqna/values.yaml @@ -85,7 +85,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/chatqna/README.md b/helm-charts/chatqna/README.md index 88b89344a..1daa9809b 100644 --- a/helm-charts/chatqna/README.md +++ b/helm-charts/chatqna/README.md @@ -25,33 +25,33 @@ export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="meta-llama/Meta-Llama-3-8B-Instruct" # To use CPU with vLLM -helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} +helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} # To use Gaudi device with vLLM -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-values.yaml # To use CPU with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/cpu-tgi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/cpu-tgi-values.yaml # To use Gaudi device with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-tgi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/gaudi-tgi-values.yaml # To use Nvidia GPU with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/nv-values.yaml # To include guardrail component in chatqna on Gaudi with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/guardrails-gaudi-values.yaml # To run chatqna with Intel TDX feature -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set vllm.LLM_MODEL_ID=${MODELNAME} --set redis-vector-db.tdxEnabled=true --set redis-vector-db.resources.limits.memory=4Gi --set retriever-usvc.tdxEnabled=true --set retriever-usvc.resources.limits.memory=7Gi --set tei.tdxEnabled=true --set tei.resources.limits.memory=4Gi --set teirerank.tdxEnabled=true --set teirerank.resources.limits.memory=6Gi --set nginx.tdxEnabled=true --set chatqna-ui.tdxEnabled=true --set chatqna-ui.resources.limits.memory=2Gi --set data-prep.tdxEnabled=true --set data-prep.resources.limits.memory=11Gi --set vllm.tdxEnabled=true --set vllm.resources.limits.memory=80Gi +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set vllm.LLM_MODEL_ID=${MODELNAME} --set redis-vector-db.tdxEnabled=true --set redis-vector-db.resources.limits.memory=4Gi --set retriever-usvc.tdxEnabled=true --set retriever-usvc.resources.limits.memory=7Gi --set tei.tdxEnabled=true --set tei.resources.limits.memory=4Gi --set teirerank.tdxEnabled=true --set teirerank.resources.limits.memory=6Gi --set nginx.tdxEnabled=true --set chatqna-ui.tdxEnabled=true --set chatqna-ui.resources.limits.memory=2Gi --set data-prep.tdxEnabled=true --set data-prep.resources.limits.memory=11Gi --set vllm.tdxEnabled=true --set vllm.resources.limits.memory=80Gi # To use CPU with vLLM with Qdrant DB -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/cpu-qdrant-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/cpu-qdrant-values.yaml # To use AMD ROCm device with vLLM -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/rocm-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/rocm-values.yaml # To use AMD ROCm device with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/rocm-tgi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -f chatqna/rocm-tgi-values.yaml # To deploy FaqGen -#helm install faqgen chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/faqgen-cpu-values.yaml +#helm install faqgen chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f chatqna/faqgen-cpu-values.yaml # To deploy FaqGen based application on AMD ROCm device with vLLM -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/faqgen-rocm-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/faqgen-rocm-values.yaml # To deploy FaqGen based application on AMD ROCm device with TGI -#helm install chatqna chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/faqgen-rocm-tgi-values.yaml +#helm install chatqna chatqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set vllm.LLM_MODEL_ID=${MODELNAME} -f chatqna/faqgen-rocm-tgi-values.yaml ``` diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml index 334e965f5..35eb22e8b 100644 --- a/helm-charts/chatqna/values.yaml +++ b/helm-charts/chatqna/values.yaml @@ -167,7 +167,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/codegen/README.md b/helm-charts/codegen/README.md index c01f6aaec..3debcd09e 100644 --- a/helm-charts/codegen/README.md +++ b/helm-charts/codegen/README.md @@ -23,17 +23,17 @@ export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="Qwen/Qwen2.5-Coder-7B-Instruct" # To use CPU with vLLM -helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/cpu-values.yaml +helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/cpu-values.yaml # To use CPU with TGI -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/cpu-tgi-values.yaml +# helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/cpu-tgi-values.yaml # To use Gaudi device with vLLM -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-values.yaml +# helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-values.yaml # To use Gaudi device with TGI -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-tgi-values.yaml +# helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/gaudi-tgi-values.yaml # To use AMD ROCm device with vLLM -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/rocm-values.yaml +# helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codegen/rocm-values.yaml # To use AMD ROCm device with TGI -# helm install codegen codegen --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/rocm-tgi-values.yaml +# helm install codegen codegen --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codegen/rocm-tgi-values.yaml ``` diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml index 0e9205e80..a751d1e3b 100644 --- a/helm-charts/codegen/values.yaml +++ b/helm-charts/codegen/values.yaml @@ -109,7 +109,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/codetrans/README.md b/helm-charts/codetrans/README.md index fd347bd74..38d1d4cbc 100644 --- a/helm-charts/codetrans/README.md +++ b/helm-charts/codetrans/README.md @@ -16,17 +16,17 @@ export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="mistralai/Mistral-7B-Instruct-v0.3" # To use CPU with vLLM -helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-values.yaml +helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-values.yaml # To use CPU with TGI -# helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-tgi-values.yaml +# helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/cpu-tgi-values.yaml # To use Gaudi device with vLLM -# helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/gaudi-values.yaml +# helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/gaudi-values.yaml # To use Gaudi device with TGI -# helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/gaudi-tgi-values.yaml +# helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/gaudi-tgi-values.yaml # To use AMD ROCm device with vLLM -# helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/rocm-values.yaml +# helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} -f codetrans/rocm-values.yaml # To use AMD ROCm device with TGI -# helm install codetrans codetrans --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/rocm-tgi-values.yaml +# helm install codetrans codetrans --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservcie.LLM_MODEL_ID=${MODELNAME} --set tgi.LLM_MODEL_ID=${MODELNAME} -f codetrans/rocm-tgi-values.yaml ``` ### IMPORTANT NOTE diff --git a/helm-charts/codetrans/values.yaml b/helm-charts/codetrans/values.yaml index 3faa80e1d..24f0a96bb 100644 --- a/helm-charts/codetrans/values.yaml +++ b/helm-charts/codetrans/values.yaml @@ -85,7 +85,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/agent/README.md b/helm-charts/common/agent/README.md index 94013083c..e71fa4f70 100644 --- a/helm-charts/common/agent/README.md +++ b/helm-charts/common/agent/README.md @@ -37,12 +37,12 @@ curl http://localhost:9090/v1/chat/completions \ For global options, see Global Options. -| Key | Type | Default | Description | -| ------------------------------- | ------ | -------------- | --------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| image.repository | string | `"opea/agent"` | | -| service.port | string | `"9090"` | | -| llm_endpoint_url | string | `""` | LLM endpoint | -| toolHostPath | string | `""` | hostPath to be mounted to agent's /home/user/tools, used for passing files for tools | -| toolPVC | string | `""` | Same as toolHostPath, but use PVC. You can only specify one of toolHostPath and toolPVC | -| global.monitoring | bop; | false | Service usage metrics | +| Key | Type | Default | Description | +| ----------------- | ------ | -------------- | --------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/agent"` | | +| service.port | string | `"9090"` | | +| llm_endpoint_url | string | `""` | LLM endpoint | +| toolHostPath | string | `""` | hostPath to be mounted to agent's /home/user/tools, used for passing files for tools | +| toolPVC | string | `""` | Same as toolHostPath, but use PVC. You can only specify one of toolHostPath and toolPVC | +| global.monitoring | bop; | false | Service usage metrics | diff --git a/helm-charts/common/agent/templates/configmap.yaml b/helm-charts/common/agent/templates/configmap.yaml index 453cd8ff3..c857b62d5 100644 --- a/helm-charts/common/agent/templates/configmap.yaml +++ b/helm-charts/common/agent/templates/configmap.yaml @@ -103,7 +103,8 @@ data: {{- if .Values.FINANCIAL_DATASETS_API_KEY }} FINANCIAL_DATASETS_API_KEY: {{ .Values.FINANCIAL_DATASETS_API_KEY | quote }} {{- end }} - HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} HF_HOME: "/tmp/.cache/huggingface" {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote }} diff --git a/helm-charts/common/agent/values.yaml b/helm-charts/common/agent/values.yaml index 129de0bd7..813fe7ca1 100644 --- a/helm-charts/common/agent/values.yaml +++ b/helm-charts/common/agent/values.yaml @@ -112,7 +112,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/data-prep/README.md b/helm-charts/common/data-prep/README.md index aba7f46f8..d4224f74f 100644 --- a/helm-charts/common/data-prep/README.md +++ b/helm-charts/common/data-prep/README.md @@ -29,17 +29,17 @@ export TEI_EMBEDDING_ENDPOINT="http://tei" # Install data-prep with Redis DB backend export DATAPREP_BACKEND="REDIS" export DB_HOST="redis-vector-db" -helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set REDIS_HOST=${DB_HOST} +helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set REDIS_HOST=${DB_HOST} # Install data-prep with Milvus DB backend # export DATAPREP_BACKEND="MILVUS" # export DB_HOST="milvus" -# helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set MILVUS_HOST=${DB_HOST},MILVUS_PORT=19530,COLLECTION_NAME=rag__milvus +# helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set MILVUS_HOST=${DB_HOST},MILVUS_PORT=19530,COLLECTION_NAME=rag__milvus # Install data-prep with Qdrant DB backend # export DATAPREP_BACKEND="QDRANT" # export DB_HOST="qdrant" -# helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set QDRANT_HOST=${DB_HOST},QDRANT_PORT=6333,COLLECTION_NAME=rag_qdrant +# helm install data-prep . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set DATAPREP_BACKEND=${DATAPREP_BACKEND} --set QDRANT_HOST=${DB_HOST},QDRANT_PORT=6333,COLLECTION_NAME=rag_qdrant ``` ### Install the microservice in air gapped (offline) mode @@ -95,17 +95,17 @@ curl http://localhost:6007/v1/dataprep/ingest \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | --------- | ------------------------------------------------------------------------------------------------------- | -| service.port | string | `"6007"` | | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | -| DATAPREP_BACKEND | string | `"REDIS"` | vector DB backend to use, one of "REDIS", "MILVUS", "QDRANT" | -| REDIS_HOST | string | `""` | Redis service URL host, only valid for Redis, please see `values.yaml` for other Redis configuration | -| MILVUS_HOST | string | `""` | Milvus service URL host, only valid for Milvus, please see `values.yaml` for other Milvus configuration | -| QDRANT_HOST | string | `""` | Qdrant service URL host, only valid for Qdrant, please see `values.yaml` for other Qdrant configuration | -| TEI_EMBEDDING_ENDPOINT | string | `""` | | -| global.monitoring | bool | `false` | See ../../monitoring.md before enabling! | +| Key | Type | Default | Description | +| ---------------------- | ------ | --------- | ------------------------------------------------------------------------------------------------------- | +| service.port | string | `"6007"` | | +| global.HF_TOKEN | string | `""` | Your own Hugging Face API token | +| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | +| DATAPREP_BACKEND | string | `"REDIS"` | vector DB backend to use, one of "REDIS", "MILVUS", "QDRANT" | +| REDIS_HOST | string | `""` | Redis service URL host, only valid for Redis, please see `values.yaml` for other Redis configuration | +| MILVUS_HOST | string | `""` | Milvus service URL host, only valid for Milvus, please see `values.yaml` for other Milvus configuration | +| QDRANT_HOST | string | `""` | Qdrant service URL host, only valid for Qdrant, please see `values.yaml` for other Qdrant configuration | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | +| global.monitoring | bool | `false` | See ../../monitoring.md before enabling! | ## Milvus support diff --git a/helm-charts/common/data-prep/templates/configmap.yaml b/helm-charts/common/data-prep/templates/configmap.yaml index b1a69e029..800882672 100644 --- a/helm-charts/common/data-prep/templates/configmap.yaml +++ b/helm-charts/common/data-prep/templates/configmap.yaml @@ -76,8 +76,8 @@ data: {{- else }} {{- cat "Invalid DATAPREP_BACKEND:" .Values.DATAPREP_BACKEND | fail }} {{- end }} - HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} HF_HOME: "/tmp/.cache/huggingface" {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} diff --git a/helm-charts/common/data-prep/values.yaml b/helm-charts/common/data-prep/values.yaml index 04e2ed92a..64472d97c 100644 --- a/helm-charts/common/data-prep/values.yaml +++ b/helm-charts/common/data-prep/values.yaml @@ -135,7 +135,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/gpt-sovits/README.md b/helm-charts/common/gpt-sovits/README.md index d9651d184..9a4f0705c 100644 --- a/helm-charts/common/gpt-sovits/README.md +++ b/helm-charts/common/gpt-sovits/README.md @@ -68,11 +68,11 @@ curl localhost:9880/ -XPOST -d '{ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| image.repository | string | `"opea/gpt-sovits"` | | -| service.port | string | `"9880"` | | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | -| global.modelUseHostPath | string | `""` | Cached models directory on Kubernetes node, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to the container as /data directory. Setting this to null/empty will force the pod to download the model every time during startup. May not be set if `global.modelUsePVC` is also set. | -| global.modelUsePVC | string | `""` | Name of Persistent Volume Claim to use for model cache. The Persistent Volume will be mounted to the container as /data directory. Setting this to null/empty will force the pod to download the model every time during startup. May not be set if `global.modelUseHostPath` is also set. | +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/gpt-sovits"` | | +| service.port | string | `"9880"` | | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | +| global.modelUseHostPath | string | `""` | Cached models directory on Kubernetes node, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to the container as /data directory. Setting this to null/empty will force the pod to download the model every time during startup. May not be set if `global.modelUsePVC` is also set. | +| global.modelUsePVC | string | `""` | Name of Persistent Volume Claim to use for model cache. The Persistent Volume will be mounted to the container as /data directory. Setting this to null/empty will force the pod to download the model every time during startup. May not be set if `global.modelUseHostPath` is also set. | diff --git a/helm-charts/common/gpt-sovits/templates/configmap.yaml b/helm-charts/common/gpt-sovits/templates/configmap.yaml index 52e5e6be4..ca419e4a4 100644 --- a/helm-charts/common/gpt-sovits/templates/configmap.yaml +++ b/helm-charts/common/gpt-sovits/templates/configmap.yaml @@ -17,4 +17,4 @@ data: {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} diff --git a/helm-charts/common/gpt-sovits/values.yaml b/helm-charts/common/gpt-sovits/values.yaml index 316697c16..6dfe5fde9 100644 --- a/helm-charts/common/gpt-sovits/values.yaml +++ b/helm-charts/common/gpt-sovits/values.yaml @@ -98,4 +98,4 @@ global: modelUseHostPath: "" modelUsePVC: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" diff --git a/helm-charts/common/guardrails-usvc/README.md b/helm-charts/common/guardrails-usvc/README.md index 4edf79201..3a93ab936 100644 --- a/helm-charts/common/guardrails-usvc/README.md +++ b/helm-charts/common/guardrails-usvc/README.md @@ -23,7 +23,7 @@ export HFTOKEN="insert-your-huggingface-token-here" export SAFETY_GUARD_ENDPOINT="http://tgi" export SAFETY_GUARD_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B" export GUARDRAILS_BACKEND="LLAMA" -helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --set GUARDRAILS_BACKEND=${GUARDRAILS_BACKEND} --wait +helm install guardrails-usvc . --set global.HF_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --set GUARDRAILS_BACKEND=${GUARDRAILS_BACKEND} --wait ``` ### Use Allen Institute AI's WildGuard models: @@ -41,7 +41,7 @@ export HFTOKEN="insert-your-huggingface-token-here" export SAFETY_GUARD_ENDPOINT="http://tgi" export SAFETY_GUARD_MODEL_ID="allenai/wildguard" export GUARDRAILS_BACKEND="WILD" -helm install guardrails-usvc . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --set GUARDRAILS_BACKEND=${GUARDRAILS_BACKEND} --wait +helm install guardrails-usvc . --set global.HF_TOKEN=${HFTOKEN} --set SAFETY_GUARD_ENDPOINT=${SAFETY_GUARD_ENDPOINT} --set SAFETY_GUARD_MODEL_ID=${SAFETY_GUARD_MODEL_ID} --set GUARDRAILS_BACKEND=${GUARDRAILS_BACKEND} --wait ``` ## Verify @@ -61,11 +61,11 @@ curl http://localhost:9090/v1/guardrails \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | --------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| image.repository | string | `"opea/guardrails-usvc"` | | -| service.port | string | `"9090"` | | -| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint | -| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using | -| GUARDRAIL_BACKEND | string | `"LLAMA"` | different gaurdrail model family to use, one of `LLAMA`, `WILD` | +| Key | Type | Default | Description | +| --------------------- | ------ | -------------------------------------- | --------------------------------------------------------------- | +| global.HF_TOKEN | string | `"insert-your-huggingface-token-here"` | Your own Hugging Face API token | +| image.repository | string | `"opea/guardrails-usvc"` | | +| service.port | string | `"9090"` | | +| SAFETY_GUARD_ENDPOINT | string | `""` | LLM endpoint | +| SAFETY_GUARD_MODEL_ID | string | `"meta-llama/Meta-Llama-Guard-2-8B"` | Model ID for the underlying LLM service is using | +| GUARDRAIL_BACKEND | string | `"LLAMA"` | different gaurdrail model family to use, one of `LLAMA`, `WILD` | diff --git a/helm-charts/common/guardrails-usvc/templates/configmap.yaml b/helm-charts/common/guardrails-usvc/templates/configmap.yaml index f5379d202..16a5762d2 100644 --- a/helm-charts/common/guardrails-usvc/templates/configmap.yaml +++ b/helm-charts/common/guardrails-usvc/templates/configmap.yaml @@ -21,7 +21,8 @@ data: {{- else }} {{- cat "Invalid GUARDRAIL_BACKEND:" .Values.GUARDRAIL_BACKEND | fail }} {{- end }} - HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} HF_HOME: "/tmp/.cache/huggingface" LOGFLAG: {{ .Values.LOGFLAG | quote }} {{- if .Values.global.HF_ENDPOINT }} diff --git a/helm-charts/common/guardrails-usvc/values.yaml b/helm-charts/common/guardrails-usvc/values.yaml index 035b25988..86ecbfd4e 100644 --- a/helm-charts/common/guardrails-usvc/values.yaml +++ b/helm-charts/common/guardrails-usvc/values.yaml @@ -102,7 +102,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/llm-uservice/README.md b/helm-charts/common/llm-uservice/README.md index aed911c32..0e18f4c00 100644 --- a/helm-charts/common/llm-uservice/README.md +++ b/helm-charts/common/llm-uservice/README.md @@ -30,10 +30,10 @@ export LLM_ENDPOINT="http://tgi" export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" # install llm-textgen with TGI backend -helm install llm-uservice . --set TEXTGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +helm install llm-uservice . --set TEXTGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HF_TOKEN=${HFTOKEN} --wait # install llm-textgen with vLLM backend -# helm install llm-uservice . --set TEXTGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +# helm install llm-uservice . --set TEXTGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HF_TOKEN=${HFTOKEN} --wait # install llm-textgen with BEDROCK backend export LLM_MODEL_ID="insert-bedrock-model-id-here" @@ -48,16 +48,16 @@ export SERVICE_ACCOUNT_NAME="insert-service-account-name" helm install llm-uservice . --set TEXTGEN_BACKEND="BEDROCK" --set LLM_MODEL_ID=${LLM_MODEL_ID} --set bedrock.AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} --set serviceAccount.create=true --set serviceAccount.name=${SERVICE_ACCOUNT_NAME} --wait # install llm-docsum with TGI backend -# helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +# helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HF_TOKEN=${HFTOKEN} --wait # install llm-docsum with vLLM backend -# helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +# helm install llm-uservice . --set image.repository="opea/llm-docsum" --set DOCSUM_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set MAX_INPUT_TOKENS=2048 --set MAX_TOTAL_TOKENS=4096 --set global.HF_TOKEN=${HFTOKEN} --wait # install llm-faqgen with TGI backend -# helm install llm-uservice . --set image.repository="opea/llm-faqgen" --set FAQGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +# helm install llm-uservice . --set image.repository="opea/llm-faqgen" --set FAQGEN_BACKEND="TGI" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HF_TOKEN=${HFTOKEN} --wait # install llm-faqgen with vLLM backend -# helm install llm-uservice . --set image.repository="opea/llm-faqgen" --set FAQGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --wait +# helm install llm-uservice . --set image.repository="opea/llm-faqgen" --set FAQGEN_BACKEND="vLLM" --set LLM_ENDPOINT=${LLM_ENDPOINT} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set global.HF_TOKEN=${HFTOKEN} --wait ``` ### Install the microservice in air gapped (offline) mode @@ -126,17 +126,17 @@ curl http://localhost:9000/v1/faqgen \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| image.repository | string | `"opea/llm-textgen"` | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen" | -| LLM_ENDPOINT | string | `""` | backend inference service endpoint | -| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend | -| TEXTGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM", "BEDROCK" | -| DOCSUM_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM" | -| FAQGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM" | -| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | -| global.monitoring | bool | `false` | Service usage metrics | -| bedrock.BEDROCK_REGION | string | `"us-east-1"` | The AWS Region to use when accessing the Bedrock service | -| bedrock.AWS_ACCESS_KEY_ID | string | `""` | The AWS Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_SECRET_ACCESS_KEY must also be set | -| bedrock.AWS_SECRET_ACCESS_KEY | string | `""` | The AWS Secret Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_ACCESS_KEY_ID must also be set | +| Key | Type | Default | Description | +| ----------------------------- | ------ | ----------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `""` | Your own Hugging Face API token | +| image.repository | string | `"opea/llm-textgen"` | one of "opea/llm-textgen", "opea/llm-docsum", "opea/llm-faqgen" | +| LLM_ENDPOINT | string | `""` | backend inference service endpoint | +| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | model used by the inference backend | +| TEXTGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-textgen image, one of "TGI", "vLLM", "BEDROCK" | +| DOCSUM_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-docsum image, one of "TGI", "vLLM" | +| FAQGEN_BACKEND | string | `"TGI"` | backend inference engine, only valid for llm-faqgen image, one of "TGi", "vLLM" | +| global.offline | bool | `false` | Whether to run the microservice in air gapped environment | +| global.monitoring | bool | `false` | Service usage metrics | +| bedrock.BEDROCK_REGION | string | `"us-east-1"` | The AWS Region to use when accessing the Bedrock service | +| bedrock.AWS_ACCESS_KEY_ID | string | `""` | The AWS Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_SECRET_ACCESS_KEY must also be set | +| bedrock.AWS_SECRET_ACCESS_KEY | string | `""` | The AWS Secret Access Key to use when authenticating with the Bedrock service. If set, bedrock.AWS_ACCESS_KEY_ID must also be set | diff --git a/helm-charts/common/llm-uservice/templates/configmap.yaml b/helm-charts/common/llm-uservice/templates/configmap.yaml index e0b90cba8..fc5f0ea8f 100644 --- a/helm-charts/common/llm-uservice/templates/configmap.yaml +++ b/helm-charts/common/llm-uservice/templates/configmap.yaml @@ -72,7 +72,7 @@ data: {{- end }} HF_HOME: "/tmp/.cache/huggingface" {{- if not .Values.global.offline }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} {{- end }} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote }} diff --git a/helm-charts/common/llm-uservice/values.yaml b/helm-charts/common/llm-uservice/values.yaml index fccd0ae8f..cbed13b33 100644 --- a/helm-charts/common/llm-uservice/values.yaml +++ b/helm-charts/common/llm-uservice/values.yaml @@ -117,7 +117,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/lvm-serve/README.md b/helm-charts/common/lvm-serve/README.md index fee0fd6d8..c1faab9f1 100644 --- a/helm-charts/common/lvm-serve/README.md +++ b/helm-charts/common/lvm-serve/README.md @@ -12,11 +12,11 @@ export MODELDIR=/mnt/opea-models export HFTOKEN="insert-your-huggingface-token-here" export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" # To deploy lvm-llava microserice on CPU -helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID} +helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID} # To deploy lvm-llava microserice on Gaudi -# helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID} --values lvm-serve/gaudi-values.yaml +# helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID} --values lvm-serve/gaudi-values.yaml # To deploy lvm-video-llama microserice on CPU -helm install lvm-serve lvm-serve --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values lvm-serve/variant_video-llama-values.yaml +helm install lvm-serve lvm-serve --set global.HF_TOKEN=${HFTOKEN} --values lvm-serve/variant_video-llama-values.yaml ``` By default, the lvm-serve-llava service will downloading the model "llava-hf/llava-1.5-7b-hf" which is about 14GB. @@ -51,10 +51,10 @@ curl $url -XPOST -d "$body" -H 'Content-Type: application/json' ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `""` | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| LVM_MODEL_ID | string | `"llava-hf/llava-1.5-7b-hf"` | | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| LVM_MODEL_ID | string | `"llava-hf/llava-1.5-7b-hf"` | | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/lvm-serve/templates/configmap.yaml b/helm-charts/common/lvm-serve/templates/configmap.yaml index 320aaf934..c27600ef1 100644 --- a/helm-charts/common/lvm-serve/templates/configmap.yaml +++ b/helm-charts/common/lvm-serve/templates/configmap.yaml @@ -21,8 +21,8 @@ data: {{- else if hasSuffix "lvm-llama-vision-tp" .Values.image.repository }} MODEL_ID: {{ .Values.LVM_MODEL_ID | quote }} {{- end }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} - HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} diff --git a/helm-charts/common/lvm-serve/values.yaml b/helm-charts/common/lvm-serve/values.yaml index 32cd730cc..e4396794f 100644 --- a/helm-charts/common/lvm-serve/values.yaml +++ b/helm-charts/common/lvm-serve/values.yaml @@ -171,7 +171,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/lvm-uservice/README.md b/helm-charts/common/lvm-uservice/README.md index e2cc06714..9d258bd5f 100644 --- a/helm-charts/common/lvm-uservice/README.md +++ b/helm-charts/common/lvm-uservice/README.md @@ -27,17 +27,17 @@ export HFTOKEN="insert-your-huggingface-token-here" export LLM_MODEL_ID="model-id-used-for-vllm" export LVM_BACKEND="vLLM" export LVM_ENDPOINT="http://myvllm" -helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait +helm install lvm-uservice . --set global.HF_TOKEN=${HFTOKEN} --set LLM_MODEL_ID=${LLM_MODEL_ID} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait # Use TGI as the backend # export LVM_BACKEND="TGI" # export LVM_ENDPOINT="http://tgi" -# helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait +# helm install lvm-uservice . --set global.HF_TOKEN=${HFTOKEN} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait # Use other lvm-serve engine variant as the backend, see file `values.yaml` more details # export LVM_ENDPOINT="http://lvm-serve" # export LVM_BACKEND="LLaVA" -# helm install lvm-uservice . --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait +# helm install lvm-uservice . --set global.HF_TOKEN=${HFTOKEN} --set LVM_BACKEND=${LVM_BACKEND} --set LVM_ENDPOINT=${LVM_ENDPOINT} --wait ``` ## Verify @@ -57,9 +57,9 @@ curl http://localhost:9000/v1/lvm \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | -------- | --------------------------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| LVM_BACKEND | string | `"vLLM"` | lvm backend engine, possible value "vLLM", "TGI", "LLaVA", "VideoLlama", "LlamaVision", "PredictionGuard" | -| LVM_ENDPOINT | string | `""` | LVM endpoint | -| global.monitoring | bool | `false` | Service usage metrics | +| Key | Type | Default | Description | +| ----------------- | ------ | -------- | --------------------------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `""` | Your own Hugging Face API token | +| LVM_BACKEND | string | `"vLLM"` | lvm backend engine, possible value "vLLM", "TGI", "LLaVA", "VideoLlama", "LlamaVision", "PredictionGuard" | +| LVM_ENDPOINT | string | `""` | LVM endpoint | +| global.monitoring | bool | `false` | Service usage metrics | diff --git a/helm-charts/common/lvm-uservice/templates/configmap.yaml b/helm-charts/common/lvm-uservice/templates/configmap.yaml index 0bcf3c93c..0deb3a2e5 100644 --- a/helm-charts/common/lvm-uservice/templates/configmap.yaml +++ b/helm-charts/common/lvm-uservice/templates/configmap.yaml @@ -43,7 +43,7 @@ data: {{- if .Values.LLM_MODEL_ID }} LLM_MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }} {{- end }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} HF_HOME: "/tmp/.cache/huggingface" {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} diff --git a/helm-charts/common/lvm-uservice/values.yaml b/helm-charts/common/lvm-uservice/values.yaml index 63da6b603..92507d21d 100644 --- a/helm-charts/common/lvm-uservice/values.yaml +++ b/helm-charts/common/lvm-uservice/values.yaml @@ -95,7 +95,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/mm-embedding/README.md b/helm-charts/common/mm-embedding/README.md index 6c899732e..40ee1f8e6 100644 --- a/helm-charts/common/mm-embedding/README.md +++ b/helm-charts/common/mm-embedding/README.md @@ -11,11 +11,11 @@ cd GenAIInfra/helm-charts/common export MODELDIR=/mnt/opea-models export HFTOKEN="insert-your-huggingface-token-here" # To deploy embedding-multimodal-bridgetower microserice on CPU -helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} # To deploy embedding-multimodal-bridgetower microserice on Gaudi -# helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values mm-embedding/gaudi-values.yaml +# helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --values mm-embedding/gaudi-values.yaml # To deploy embedding-multimodal-clip microserice on CPU -# helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values mm-embedding/variant_clip-values.yaml +# helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --values mm-embedding/variant_clip-values.yaml ``` By default, the embedding-multimodal-bridgetower service will download the "BridgeTower/bridgetower-large-itm-mlm-itc" model which is about 3.5GB, and the embedding-multimodal-clip service will download the "openai/clip-vit-base-patch32" model which is about 1.7GB. @@ -50,9 +50,9 @@ curl http://localhost:6990/v1/embeddings \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `""` | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/mm-embedding/templates/configmap.yaml b/helm-charts/common/mm-embedding/templates/configmap.yaml index 31f6fc571..a901cb419 100644 --- a/helm-charts/common/mm-embedding/templates/configmap.yaml +++ b/helm-charts/common/mm-embedding/templates/configmap.yaml @@ -9,7 +9,7 @@ metadata: {{- include "mm-embedding.labels" . | nindent 4 }} data: PORT: {{ .Values.service.port | quote }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} diff --git a/helm-charts/common/mm-embedding/values.yaml b/helm-charts/common/mm-embedding/values.yaml index 696d7a516..600083795 100644 --- a/helm-charts/common/mm-embedding/values.yaml +++ b/helm-charts/common/mm-embedding/values.yaml @@ -125,7 +125,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/retriever-usvc/README.md b/helm-charts/common/retriever-usvc/README.md index 055e8483f..80e15ddcd 100644 --- a/helm-charts/common/retriever-usvc/README.md +++ b/helm-charts/common/retriever-usvc/README.md @@ -29,17 +29,17 @@ export TEI_EMBEDDING_ENDPOINT="http://tei" # Install retriever-usvc with Redis DB backend export RETRIEVER_BACKEND="REDIS" export DB_HOST="redis-vector-db" -helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set REDIS_HOST=${DB_HOST} +helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set REDIS_HOST=${DB_HOST} # Install retriever-usvc with Milvus DB backend # export RETRIEVER_BACKEND="MILVUS" # export DB_HOST="milvus" -# helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set MILVUS_HOST=${DB_HOST} +# helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set MILVUS_HOST=${DB_HOST} # Install retriever-usvc with Qdrant DB backend # export RETRIEVER_BACKEND="QDRANT" # export DB_HOST="qdrant" -# helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HUGGINGFACEHUB_API_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set QDRANT_HOST=${DB_HOST} +# helm install retriever-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} --set global.HF_TOKEN=${HF_TOKEN} --set RETRIEVER_BACKEND=${RETRIEVER_BACKEND} --set QDRANT_HOST=${DB_HOST} ``` ## Verify @@ -60,16 +60,16 @@ curl http://localhost:7000/v1/retrieval \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | --------- | ------------------------------------------------------------------------------------------------------- | -| global.HUGGINGFACEHUB_API_TOKEN | string | `""` | Your own Hugging Face API token | -| service.port | string | `"7000"` | | -| RETRIEVER_BACKEND | string | `"REDIS"` | vector DB backend to use, one of "REDIS", "MILVUS", "QDRANT" | -| REDIS_HOST | string | `""` | Redis service URL host, only valid for Redis, please see `values.yaml` for other Redis configuration | -| MILVUS_HOST | string | `""` | Milvus service URL host, only valid for Milvus, please see `values.yaml` for other Milvus configuration | -| QDRANT_HOST | string | `""` | Qdrant service URL host, only valid for Qdrant, please see `values.yaml` for other Qdrant configuration | -| TEI_EMBEDDING_ENDPOINT | string | `""` | | -| global.monitoring | bool | `false` | | +| Key | Type | Default | Description | +| ---------------------- | ------ | --------- | ------------------------------------------------------------------------------------------------------- | +| global.HF_TOKEN | string | `""` | Your own Hugging Face API token | +| service.port | string | `"7000"` | | +| RETRIEVER_BACKEND | string | `"REDIS"` | vector DB backend to use, one of "REDIS", "MILVUS", "QDRANT" | +| REDIS_HOST | string | `""` | Redis service URL host, only valid for Redis, please see `values.yaml` for other Redis configuration | +| MILVUS_HOST | string | `""` | Milvus service URL host, only valid for Milvus, please see `values.yaml` for other Milvus configuration | +| QDRANT_HOST | string | `""` | Qdrant service URL host, only valid for Qdrant, please see `values.yaml` for other Qdrant configuration | +| TEI_EMBEDDING_ENDPOINT | string | `""` | | +| global.monitoring | bool | `false` | | ## Milvus support diff --git a/helm-charts/common/retriever-usvc/templates/configmap.yaml b/helm-charts/common/retriever-usvc/templates/configmap.yaml index 67d0f994a..64f0c55ef 100644 --- a/helm-charts/common/retriever-usvc/templates/configmap.yaml +++ b/helm-charts/common/retriever-usvc/templates/configmap.yaml @@ -61,7 +61,8 @@ data: no_proxy: {{ tpl .Values.global.no_proxy . | quote }} {{- end }} HF_HOME: "/tmp/.cache/huggingface" - HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} LOGFLAG: {{ .Values.LOGFLAG | quote }} NUMBA_CACHE_DIR: "/tmp/numba/cache" MPLCONFIGDIR: "/tmp/matplotlib" diff --git a/helm-charts/common/retriever-usvc/values.yaml b/helm-charts/common/retriever-usvc/values.yaml index 2d689d8f2..4857fe5f2 100644 --- a/helm-charts/common/retriever-usvc/values.yaml +++ b/helm-charts/common/retriever-usvc/values.yaml @@ -118,7 +118,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/speecht5/templates/configmap.yaml b/helm-charts/common/speecht5/templates/configmap.yaml index c153143f7..0a83a28d2 100644 --- a/helm-charts/common/speecht5/templates/configmap.yaml +++ b/helm-charts/common/speecht5/templates/configmap.yaml @@ -18,4 +18,4 @@ data: HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} HUGGINGFACE_HUB_CACHE: "/data" - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} diff --git a/helm-charts/common/speecht5/values.yaml b/helm-charts/common/speecht5/values.yaml index 46861ca64..6b444aba4 100644 --- a/helm-charts/common/speecht5/values.yaml +++ b/helm-charts/common/speecht5/values.yaml @@ -88,7 +88,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/tei/README.md b/helm-charts/common/tei/README.md index b21c2eb25..4af59dc97 100644 --- a/helm-charts/common/tei/README.md +++ b/helm-charts/common/tei/README.md @@ -10,7 +10,8 @@ To install the chart, run the following: cd ${GenAIInfro_repo}/helm-charts/common export MODELDIR=/mnt/opea-models export MODELNAME="BAAI/bge-base-en-v1.5" -helm install tei tei --set global.modelUseHostPath=${MODELDIR} --set EMBEDDING_MODEL_ID=${MODELNAME} +export HFTOKEN="insert-your-huggingface-token-here" +helm install tei tei --set global.modelUseHostPath=${MODELDIR} --set EMBEDDING_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} ``` By default, the tei service will downloading the "BAAI/bge-base-en-v1.5" which is about 1.1GB. diff --git a/helm-charts/common/tei/templates/configmap.yaml b/helm-charts/common/tei/templates/configmap.yaml index 7f7c25301..ad2589100 100644 --- a/helm-charts/common/tei/templates/configmap.yaml +++ b/helm-charts/common/tei/templates/configmap.yaml @@ -32,4 +32,4 @@ data: {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} {{- end }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 351f4fc58..749495a25 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -111,7 +111,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/teirerank/README.md b/helm-charts/common/teirerank/README.md index 79202497d..2dc411900 100644 --- a/helm-charts/common/teirerank/README.md +++ b/helm-charts/common/teirerank/README.md @@ -10,7 +10,8 @@ To install the chart, run the following: cd ${GenAIInfro_repo}/helm-charts/common export MODELDIR=/mnt/opea-models export MODELNAME="BAAI/bge-reranker-base" -helm install teirerank teirerank --set global.modelUseHostPath=${MODELDIR} --set RERANK_MODEL_ID=${MODELNAME} +export HFTOKEN="insert-your-huggingface-token-here" +helm install teirerank teirerank --set global.modelUseHostPath=${MODELDIR} --set RERANK_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} ``` By default, the teirerank service will downloading the "BAAI/bge-reranker-base" which is about 1.1GB. diff --git a/helm-charts/common/teirerank/templates/configmap.yaml b/helm-charts/common/teirerank/templates/configmap.yaml index e1a047eb4..8a6599c46 100644 --- a/helm-charts/common/teirerank/templates/configmap.yaml +++ b/helm-charts/common/teirerank/templates/configmap.yaml @@ -32,4 +32,4 @@ data: {{- if .Values.MAX_WARMUP_SEQUENCE_LENGTH }} MAX_WARMUP_SEQUENCE_LENGTH: {{ .Values.MAX_WARMUP_SEQUENCE_LENGTH | quote }} {{- end }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote}} diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index b40116ede..2723ad361 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -111,7 +111,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/text2image/README.md b/helm-charts/common/text2image/README.md index b17087553..6cb0d9b24 100644 --- a/helm-charts/common/text2image/README.md +++ b/helm-charts/common/text2image/README.md @@ -11,9 +11,9 @@ cd GenAIInfra/helm-charts/common export MODELDIR=/mnt/opea-models export MODELNAME=stable-diffusion-v1-5/stable-diffusion-v1-5 export HFTOKEN="insert-your-huggingface-token-here" -helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} # To deploy on Gaudi enabled kubernetes cluster -# helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +# helm install text2image text2image --set global.modelUseHostPath=${MODELDIR} --set MODEL=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} --values gaudi-values.yaml ``` By default, the text2image service will downloading the "stable-diffusion-v1-5/stable-diffusion-v1-5" which is about 45GB. @@ -41,10 +41,10 @@ curl http://localhost:9379/v1/text2image \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| MODEL | string | `"stable-diffusion-v1-5/stable-diffusion-v1-5"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `""` | Cached models directory, text2image will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ----------------------- | ------ | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| MODEL | string | `"stable-diffusion-v1-5/stable-diffusion-v1-5"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, text2image will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/text2image/templates/configmap.yaml b/helm-charts/common/text2image/templates/configmap.yaml index 205259ce4..adb6ff68c 100644 --- a/helm-charts/common/text2image/templates/configmap.yaml +++ b/helm-charts/common/text2image/templates/configmap.yaml @@ -9,7 +9,7 @@ metadata: {{- include "text2image.labels" . | nindent 4 }} data: MODEL: {{ .Values.MODEL | quote }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote}} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} diff --git a/helm-charts/common/text2image/values.yaml b/helm-charts/common/text2image/values.yaml index 17e6e0824..eb4a00210 100644 --- a/helm-charts/common/text2image/values.yaml +++ b/helm-charts/common/text2image/values.yaml @@ -120,7 +120,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/tgi/README.md b/helm-charts/common/tgi/README.md index d2daff1b5..35b500846 100644 --- a/helm-charts/common/tgi/README.md +++ b/helm-charts/common/tgi/README.md @@ -12,9 +12,9 @@ export MODELDIR=/mnt/opea-models export MODELNAME="Intel/neural-chat-7b-v3-3" export HFTOKEN="insert-your-huggingface-token-here" helm dependency update -helm install tgi . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +helm install tgi . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} # To deploy on Gaudi enabled kubernetes cluster -# helm install tgi . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +# helm install tgi . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} --values gaudi-values.yaml ``` By default, the tgi service will downloading the "Intel/neural-chat-7b-v3-3" which is about 54GB. @@ -42,10 +42,10 @@ curl http://localhost:2080/generate \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `""` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ----------------------- | ------ | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, tgi will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml index c5a5132c9..828201ed3 100644 --- a/helm-charts/common/tgi/templates/configmap.yaml +++ b/helm-charts/common/tgi/templates/configmap.yaml @@ -10,7 +10,7 @@ metadata: data: MODEL_ID: {{ .Values.LLM_MODEL_ID | quote }} PORT: {{ .Values.port | quote }} - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index c50a3cc7a..891cdb9d9 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -130,7 +130,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/vllm/README.md b/helm-charts/common/vllm/README.md index de4e41acc..7be1ea533 100644 --- a/helm-charts/common/vllm/README.md +++ b/helm-charts/common/vllm/README.md @@ -15,11 +15,11 @@ cd GenAIInfra/helm-charts/common/vllm export MODELDIR=/mnt/opea-models export MODELNAME="meta-llama/Meta-Llama-3-8B-Instruct" export HFTOKEN="insert-your-huggingface-token-here" -helm install myvllm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +helm install myvllm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} # To deploy on Gaudi enabled kubernetes cluster -# helm install myvllm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values gaudi-values.yaml +# helm install myvllm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} --values gaudi-values.yaml # To deploy on AMD ROCm GPU kubernetes cluster -# helm install vllm-rocm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values rocm-values.yaml +# helm install vllm-rocm . --set global.modelUseHostPath=${MODELDIR} --set LLM_MODEL_ID=${MODELNAME} --set global.HF_TOKEN=${HFTOKEN} --values rocm-values.yaml ``` By default, the vllm service will downloading the "meta-llama/Meta-Llama-3-8B-Instruct". @@ -46,12 +46,12 @@ curl http://localhost:2080/v1/completions \ ## Values -| Key | Type | Default | Description | -| ------------------------------- | ------ | --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| LLM_MODEL_ID | string | `"meta-llama/Meta-Llama-3-8B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | -| global.modelUseHostPath | string | `""` | Cached models directory, vllm will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | -| image.repository | string | `"opea/vllm"` | | -| image.tag | string | `"latest"` | | -| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | -| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | +| Key | Type | Default | Description | +| ----------------------- | ------ | --------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | string | `"meta-llama/Meta-Llama-3-8B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.HF_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, vllm will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| image.repository | string | `"opea/vllm"` | | +| image.tag | string | `"latest"` | | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/vllm/templates/configmap.yaml b/helm-charts/common/vllm/templates/configmap.yaml index 1c09a3197..9cfdb4f89 100644 --- a/helm-charts/common/vllm/templates/configmap.yaml +++ b/helm-charts/common/vllm/templates/configmap.yaml @@ -8,7 +8,7 @@ metadata: labels: {{- include "vllm.labels" . | nindent 4 }} data: - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote}} {{- if .Values.global.HF_ENDPOINT }} HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} diff --git a/helm-charts/common/vllm/values.yaml b/helm-charts/common/vllm/values.yaml index bec916ac6..5dc0d12a5 100644 --- a/helm-charts/common/vllm/values.yaml +++ b/helm-charts/common/vllm/values.yaml @@ -112,7 +112,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/common/whisper/templates/configmap.yaml b/helm-charts/common/whisper/templates/configmap.yaml index 39ab3db4e..7d87d80cb 100644 --- a/helm-charts/common/whisper/templates/configmap.yaml +++ b/helm-charts/common/whisper/templates/configmap.yaml @@ -18,4 +18,4 @@ data: HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} {{- end }} HUGGINGFACE_HUB_CACHE: "/data" - HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | default .Values.global.HF_TOKEN | quote }} diff --git a/helm-charts/common/whisper/values.yaml b/helm-charts/common/whisper/values.yaml index b1f3e247b..5ead6ebf8 100644 --- a/helm-charts/common/whisper/values.yaml +++ b/helm-charts/common/whisper/values.yaml @@ -87,7 +87,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/docsum/README.md b/helm-charts/docsum/README.md index 76f50fae2..e2ce2e5c7 100644 --- a/helm-charts/docsum/README.md +++ b/helm-charts/docsum/README.md @@ -17,15 +17,15 @@ helm dependency update docsum export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="Intel/neural-chat-7b-v3-3" -helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} +helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} # To use Gaudi device with vLLM -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml +# helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml # To use Gaudi device with TGI -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml +# helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml # To use AMD ROCm device with vLLM -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/rocm-values.yaml +# helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --values docsum/rocm-values.yaml # To use AMD ROCm device with TGI -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/rocm-tgi-values.yaml +# helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --values docsum/rocm-tgi-values.yaml ``` diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 23fcbfcd9..52d9d2c7d 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -107,7 +107,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/financeagent/README.md b/helm-charts/financeagent/README.md index 2f1d36222..0607cc205 100644 --- a/helm-charts/financeagent/README.md +++ b/helm-charts/financeagent/README.md @@ -59,12 +59,12 @@ Deploy everything on Gaudi enabled Kubernetes cluster: If you want to try with latest version, use `helm pull oci://ghcr.io/opea-project/charts/financeagent --version 0-latest --untar` ``` -export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" +export HF_TOKEN="YourOwnToken" export FINNHUB_API_KEY="YourOwnToken" export FINANCIAL_DATASETS_API_KEY="YourOwnToken" helm pull oci://ghcr.io/opea-project/charts/financeagent --untar helm install financeagent financeagent -f financeagent/gaudi-values.yaml \ ---set global.HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} \ +--set global.HF_TOKEN=${HF_TOKEN} \ --set research-agent.FINNHUB_API_KEY=${FINNHUB_API_KEY} \ --set research-agent.FINANCIAL_DATASETS_API_KEY=${FINANCIAL_DATASETS_API_KEY} ``` diff --git a/helm-charts/financeagent/values.yaml b/helm-charts/financeagent/values.yaml index b5bdf6251..8f37abe86 100644 --- a/helm-charts/financeagent/values.yaml +++ b/helm-charts/financeagent/values.yaml @@ -106,7 +106,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/searchqna/README.md b/helm-charts/searchqna/README.md index 523890418..5d2bbe9c5 100644 --- a/helm-charts/searchqna/README.md +++ b/helm-charts/searchqna/README.md @@ -29,10 +29,10 @@ export GOOGLE_API_KEY="insert-your-google-api-key-here" export GOOGLE_CSE_ID="insert-your-google-search-engine-id-here" # To run on Xeon -helm install searchqna searchqna --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set web-retriever.GOOGLE_API_KEY=${GOOGLE_API_KEY} --web-retriever.GOOGLE_CSE_ID=${GOOGLE_CSE_ID} --set tgi.LLM_MODEL_ID=${MODEL} --set llm-uservice.LLM_MODEL_ID=${MODEL} +helm install searchqna searchqna --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --set web-retriever.GOOGLE_API_KEY=${GOOGLE_API_KEY} --web-retriever.GOOGLE_CSE_ID=${GOOGLE_CSE_ID} --set tgi.LLM_MODEL_ID=${MODEL} --set llm-uservice.LLM_MODEL_ID=${MODEL} # To run on Gaudi -# helm install searchqna searchqna --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set web-retriever.GOOGLE_API_KEY=${GOOGLE_API_KEY} --web-retriever.GOOGLE_CSE_ID=${GOOGLE_CSE_ID} --set tgi.LLM_MODEL_ID=${MODEL} --set llm-uservice.LLM_MODEL_ID=${MODEL} -f gaudi-values.yaml +# helm install searchqna searchqna --set global.modelUseHostPath=${MODELDIR} --set global.HF_TOKEN=${HFTOKEN} --set web-retriever.GOOGLE_API_KEY=${GOOGLE_API_KEY} --web-retriever.GOOGLE_CSE_ID=${GOOGLE_CSE_ID} --set tgi.LLM_MODEL_ID=${MODEL} --set llm-uservice.LLM_MODEL_ID=${MODEL} -f gaudi-values.yaml ``` ### IMPORTANT NOTE diff --git a/helm-charts/searchqna/values.yaml b/helm-charts/searchqna/values.yaml index b995631c9..48c41bc94 100644 --- a/helm-charts/searchqna/values.yaml +++ b/helm-charts/searchqna/values.yaml @@ -82,7 +82,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/txt2img/README.md b/helm-charts/txt2img/README.md index 9f178d370..eece4ffcd 100644 --- a/helm-charts/txt2img/README.md +++ b/helm-charts/txt2img/README.md @@ -13,9 +13,9 @@ helm dependency update txt2img export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" # To run on Xeon -helm install txt2img txt2img --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} +helm install txt2img txt2img --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} # To run on Gaudi -#helm install txt2img txt2img --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f txt2img/gaudi-values.yaml +#helm install txt2img txt2img --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f txt2img/gaudi-values.yaml ``` ### IMPORTANT NOTE diff --git a/helm-charts/txt2img/values.yaml b/helm-charts/txt2img/values.yaml index 59b1d7732..76dc54301 100644 --- a/helm-charts/txt2img/values.yaml +++ b/helm-charts/txt2img/values.yaml @@ -31,7 +31,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others. diff --git a/helm-charts/visualqna/README.md b/helm-charts/visualqna/README.md index c0f44b305..f9733959a 100644 --- a/helm-charts/visualqna/README.md +++ b/helm-charts/visualqna/README.md @@ -16,9 +16,9 @@ helm dependency update visualqna export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" # To use CPU with vLLM -helm install visualqna visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} +helm install visualqna visualqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} # To use Gaudi with vLLM -# helm install visualqna visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f visualqna/gaudi-values.yaml +# helm install visualqna visualqna --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} -f visualqna/gaudi-values.yaml ``` diff --git a/helm-charts/visualqna/values.yaml b/helm-charts/visualqna/values.yaml index 6a5e75a32..66a529f2c 100644 --- a/helm-charts/visualqna/values.yaml +++ b/helm-charts/visualqna/values.yaml @@ -90,7 +90,7 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + HF_TOKEN: "insert-your-huggingface-token-here" # service account name to be shared with all parent/child charts. # If set, it will overwrite serviceAccount.name. # If set, and serviceAccount.create is false, it will assume this service account is already created by others.