diff --git a/helm-charts/chatqna/faqgen-rocm-tgi-values.yaml b/helm-charts/chatqna/faqgen-rocm-tgi-values.yaml index 3951386cb..19ca79f10 100644 --- a/helm-charts/chatqna/faqgen-rocm-tgi-values.yaml +++ b/helm-charts/chatqna/faqgen-rocm-tgi-values.yaml @@ -50,17 +50,3 @@ tgi: failureThreshold: 120 vllm: enabled: false - -# Reranking: second largest bottleneck when reranking is in use -# (i.e. query context docs have been uploaded with data-prep) -# -# TODO: could vLLM be used also for reranking / embedding? -teirerank: - accelDevice: "cpu" - image: - repository: ghcr.io/huggingface/text-embeddings-inference - tag: cpu-1.5 - # securityContext: - # readOnlyRootFilesystem: false - readinessProbe: - timeoutSeconds: 1 diff --git a/helm-charts/chatqna/faqgen-rocm-values.yaml b/helm-charts/chatqna/faqgen-rocm-values.yaml index 279c59721..e8941d815 100644 --- a/helm-charts/chatqna/faqgen-rocm-values.yaml +++ b/helm-charts/chatqna/faqgen-rocm-values.yaml @@ -43,17 +43,3 @@ vllm: readOnlyRootFilesystem: false runAsNonRoot: false runAsUser: 0 - -# Reranking: second largest bottleneck when reranking is in use -# (i.e. query context docs have been uploaded with data-prep) -# -# TODO: could vLLM be used also for reranking / embedding? -teirerank: - accelDevice: "cpu" - image: - repository: ghcr.io/huggingface/text-embeddings-inference - tag: cpu-1.5 - # securityContext: - # readOnlyRootFilesystem: false - readinessProbe: - timeoutSeconds: 1 diff --git a/helm-charts/chatqna/rocm-tgi-values.yaml b/helm-charts/chatqna/rocm-tgi-values.yaml index 35690fbf0..1a76b460d 100644 --- a/helm-charts/chatqna/rocm-tgi-values.yaml +++ b/helm-charts/chatqna/rocm-tgi-values.yaml @@ -45,17 +45,3 @@ tgi: vllm: enabled: false - -# Reranking: second largest bottleneck when reranking is in use -# (i.e. query context docs have been uploaded with data-prep) -# -# TODO: could vLLM be used also for reranking / embedding? -teirerank: - accelDevice: "cpu" - image: - repository: ghcr.io/huggingface/text-embeddings-inference - tag: cpu-1.5 - securityContext: - readOnlyRootFilesystem: false - readinessProbe: - timeoutSeconds: 1 diff --git a/helm-charts/chatqna/rocm-values.yaml b/helm-charts/chatqna/rocm-values.yaml index 085b04408..4d637bd8c 100644 --- a/helm-charts/chatqna/rocm-values.yaml +++ b/helm-charts/chatqna/rocm-values.yaml @@ -37,17 +37,3 @@ vllm: readOnlyRootFilesystem: false runAsNonRoot: false runAsUser: 0 - -# Reranking: second largest bottleneck when reranking is in use -# (i.e. query context docs have been uploaded with data-prep) -# -# TODO: could vLLM be used also for reranking / embedding? -teirerank: - accelDevice: "cpu" - image: - repository: ghcr.io/huggingface/text-embeddings-inference - tag: cpu-1.5 - securityContext: - readOnlyRootFilesystem: false - readinessProbe: - timeoutSeconds: 1 diff --git a/helm-charts/common/tei/README.md b/helm-charts/common/tei/README.md index b21c2eb25..1bafe41c3 100644 --- a/helm-charts/common/tei/README.md +++ b/helm-charts/common/tei/README.md @@ -40,6 +40,6 @@ curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"} | EMBEDDING_MODEL_ID | string | `"BAAI/bge-base-en-v1.5"` | Models id from https://huggingface.co/, or predownloaded model directory | | global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | | image.repository | string | `"ghcr.io/huggingface/text-embeddings-inference"` | | -| image.tag | string | `"cpu-1.5"` | | +| image.tag | string | `"cpu-1.7"` | | | autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | | global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 351f4fc58..7f9b6c870 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -30,7 +30,7 @@ image: # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. # pullPolicy: "" # Overrides the image tag whose default is the chart appVersion. - tag: "cpu-1.5" + tag: "cpu-1.7" # empty for CPU accelDevice: "" diff --git a/helm-charts/common/teirerank/README.md b/helm-charts/common/teirerank/README.md index 79202497d..71d94cd56 100644 --- a/helm-charts/common/teirerank/README.md +++ b/helm-charts/common/teirerank/README.md @@ -43,6 +43,6 @@ curl http://localhost:2082/rerank \ | RERANK_MODEL_ID | string | `"BAAI/bge-reranker-base"` | Models id from https://huggingface.co/, or predownloaded model directory | | global.modelUseHostPath | string | `"/mnt/opea-models"` | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | | image.repository | string | `"ghcr.io/huggingface/text-embeddings-inference"` | | -| image.tag | string | `"cpu-1.5"` | | +| image.tag | string | `"cpu-1.7"` | | | autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | | global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index b40116ede..f86e7a619 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -30,7 +30,7 @@ image: # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. # pullPolicy: "" # Overrides the image tag whose default is the chart appVersion. - tag: "cpu-1.5" + tag: "cpu-1.7" # empty for CPU accelDevice: ""