opea-project · lianhao · Jul 4, 2025 · Jun 27, 2025
@@ -50,17 +50,3 @@ tgi:
     failureThreshold: 120
 vllm:
   enabled: false
-
-# Reranking: second largest bottleneck when reranking is in use
-# (i.e. query context docs have been uploaded with data-prep)
-#
-# TODO: could vLLM be used also for reranking / embedding?
-teirerank:
-  accelDevice: "cpu"
-  image:
-    repository: ghcr.io/huggingface/text-embeddings-inference
-    tag: cpu-1.5
-  # securityContext:
-  #   readOnlyRootFilesystem: false
-  readinessProbe:
-    timeoutSeconds: 1
@@ -43,17 +43,3 @@ vllm:
     readOnlyRootFilesystem: false
     runAsNonRoot: false
     runAsUser: 0
-
-# Reranking: second largest bottleneck when reranking is in use
-# (i.e. query context docs have been uploaded with data-prep)
-#
-# TODO: could vLLM be used also for reranking / embedding?
-teirerank:
-  accelDevice: "cpu"
-  image:
-    repository: ghcr.io/huggingface/text-embeddings-inference
-    tag: cpu-1.5
-  # securityContext:
-  #   readOnlyRootFilesystem: false
-  readinessProbe:
-    timeoutSeconds: 1
@@ -45,17 +45,3 @@ tgi:
 
 vllm:
   enabled: false
-
-# Reranking: second largest bottleneck when reranking is in use
-# (i.e. query context docs have been uploaded with data-prep)
-#
-# TODO: could vLLM be used also for reranking / embedding?
-teirerank:
-  accelDevice: "cpu"
-  image:
-    repository: ghcr.io/huggingface/text-embeddings-inference
-    tag: cpu-1.5
-  securityContext:
-    readOnlyRootFilesystem: false
-  readinessProbe:
-    timeoutSeconds: 1
@@ -37,17 +37,3 @@ vllm:
     readOnlyRootFilesystem: false
     runAsNonRoot: false
     runAsUser: 0
-
-# Reranking: second largest bottleneck when reranking is in use
-# (i.e. query context docs have been uploaded with data-prep)
-#
-# TODO: could vLLM be used also for reranking / embedding?
-teirerank:
-  accelDevice: "cpu"
-  image:
-    repository: ghcr.io/huggingface/text-embeddings-inference
-    tag: cpu-1.5
-  securityContext:
-    readOnlyRootFilesystem: false
-  readinessProbe:
-    timeoutSeconds: 1
@@ -40,6 +40,6 @@ curl http://localhost:2081/embed -X POST -d '{"inputs":"What is Deep Learning?"}
 | EMBEDDING_MODEL_ID      | string | `"BAAI/bge-base-en-v1.5"`                         | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                              |
 | global.modelUseHostPath | string | `"/mnt/opea-models"`                              | Cached models directory, tei will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository        | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                       |
-| image.tag               | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                       |
+| image.tag               | string | `"cpu-1.7"`                                       |                                                                                                                                                                                                                       |
 | autoscaling.enabled     | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                 |
 | global.monitoring       | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                           |
@@ -30,7 +30,7 @@ image:
   # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
   # pullPolicy: ""
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "cpu-1.5"
+  tag: "cpu-1.7"
 
 # empty for CPU
 accelDevice: ""

@@ -43,6 +43,6 @@ curl http://localhost:2082/rerank \
 | RERANK_MODEL_ID         | string | `"BAAI/bge-reranker-base"`                        | Models id from https://huggingface.co/, or predownloaded model directory                                                                                                                                                    |
 | global.modelUseHostPath | string | `"/mnt/opea-models"`                              | Cached models directory, teirerank will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
 | image.repository        | string | `"ghcr.io/huggingface/text-embeddings-inference"` |                                                                                                                                                                                                                             |
-| image.tag               | string | `"cpu-1.5"`                                       |                                                                                                                                                                                                                             |
+| image.tag               | string | `"cpu-1.7"`                                       |                                                                                                                                                                                                                             |
 | autoscaling.enabled     | bool   | `false`                                           | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                       |
 | global.monitoring       | bool   | `false`                                           | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                                 |
@@ -30,7 +30,7 @@ image:
   # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never.
   # pullPolicy: ""
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "cpu-1.5"
+  tag: "cpu-1.7"
 
 # empty for CPU
 accelDevice: ""