From 5b4c7e218fcf2e7a1054fa2591c090028f485b7d Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 17 Oct 2024 03:42:28 +0000
Subject: [PATCH 01/18] added helmcharts folder

---
 .../performance/helm_charts/.helmignore       |  23 ++
 .../performance/helm_charts/Chart.yaml        |  27 +++
 .../performance/helm_charts/README.md         |  36 ++++
 .../performance/helm_charts/customize.yaml    |  71 ++++++
 .../helm_charts/templates/configmap.yaml      |  25 +++
 .../helm_charts/templates/deployment.yaml     | 113 ++++++++++
 .../helm_charts/templates/service.yaml        |  24 +++
 .../performance/helm_charts/values.yaml       | 203 ++++++++++++++++++
 8 files changed, 522 insertions(+)
 create mode 100644 FaqGen/benchmark/performance/helm_charts/.helmignore
 create mode 100644 FaqGen/benchmark/performance/helm_charts/Chart.yaml
 create mode 100644 FaqGen/benchmark/performance/helm_charts/README.md
 create mode 100644 FaqGen/benchmark/performance/helm_charts/customize.yaml
 create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
 create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
 create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/service.yaml
 create mode 100644 FaqGen/benchmark/performance/helm_charts/values.yaml

diff --git a/FaqGen/benchmark/performance/helm_charts/.helmignore b/FaqGen/benchmark/performance/helm_charts/.helmignore
new file mode 100644
index 0000000000..0e8a0eb36f
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/FaqGen/benchmark/performance/helm_charts/Chart.yaml b/FaqGen/benchmark/performance/helm_charts/Chart.yaml
new file mode 100644
index 0000000000..51f94d0879
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/FaqGen/benchmark/performance/helm_charts/README.md b/FaqGen/benchmark/performance/helm_charts/README.md
new file mode 100644
index 0000000000..f6df9ce4fe
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml
new file mode 100644
index 0000000000..9e7edbeb85
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -0,0 +1,71 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: chatqna-backend-server-deploy
+    spec:
+      image_name: opea/chatqna
+      image_tag: latest
+      replicas: 2
+      resources:
+        limits:
+          cpu: "8"
+          memory: "8000Mi"
+        requests:
+          cpu: "8"
+          memory: "8000Mi"
+
+  - name: embedding-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/text-embeddings-inference
+      image_tag: cpu-1.5
+      replicas: 1
+      resources:
+        limits:
+          cpu: "80"
+          memory: "20000Mi"
+        requests:
+          cpu: "80"
+          memory: "20000Mi"
+
+  - name: reranking-dependency-deploy
+    spec:
+      image_name: opea/tei-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: llm-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.4
+      replicas: 7
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: dataprep-deploy
+    spec:
+      image_name: opea/dataprep-redis
+      image_tag: latest
+      replicas: 1
+
+  - name: vector-db
+    spec:
+      image_name: redis/redis-stack
+      image_tag: 7.2.0-v9
+      replicas: 1
+
+  - name: retriever-deploy
+    spec:
+      image_name: opea/retriever-redis
+      image_tag: latest
+      replicas: 2
+      resources:
+        requests:
+          cpu: "4"
+          memory: "4000Mi"
diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
new file mode 100644
index 0000000000..2ce795a1ef
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
+  EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  INDEX_NAME: rag-redis
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  LLM_SERVER_HOST_IP: llm-dependency-svc
+  NODE_SELECTOR: chatqna-opea
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
+  RERANK_SERVER_HOST_IP: reranking-dependency-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+---
diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
new file mode 100644
index 0000000000..d751d7dfe5
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $deployment := .Values.deployments }}
+{{- range $podSpec := $global.podSpecs }}
+{{- if eq $podSpec.name $deployment.name }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $deployment.name }}
+  namespace: default
+spec:
+  replicas: {{ $podSpec.spec.replicas }}
+  selector:
+    matchLabels:
+      app: {{ $deployment.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $deployment.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        {{- if $deployment.spec.args }}
+        args:
+        {{- range $arg := $deployment.spec.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.env }}
+        env:
+        {{- range $env := $deployment.spec.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        imagePullPolicy: IfNotPresent
+        name: {{ $podSpec.name }}
+
+        {{- if $deployment.spec.ports }}
+        ports:
+        {{- range $port := $deployment.spec.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+
+        {{- if $podSpec.spec.resources }}
+        resources:
+        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: chatqna-opea
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $deployment.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $deployment.spec.volumes }}
+      volumes:
+      {{- range $index, $volume := $deployment.spec.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
new file mode 100644
index 0000000000..5a5896921d
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}
diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
new file mode 100644
index 0000000000..7041e0e8f2
--- /dev/null
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -0,0 +1,203 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+namespace: default
+
+config:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+
+deployments:
+  - name: chatqna-backend-server-deploy
+    spec:
+      ports:
+        - containerPort: 8888
+
+  - name: dataprep-deploy
+    spec:
+      ports:
+        - containerPort: 6007
+
+  - name: vector-db
+    spec:
+      ports:
+        - containerPort: 6379
+        - containerPort: 8001
+
+  - name: retriever-deploy
+    spec:
+      ports:
+        - containerPort: 7000
+
+  - name: embedding-dependency-deploy
+    spec:
+      ports:
+        - containerPort: 80
+      args:
+        - name: "--model-id"
+          value: $(EMBEDDING_MODEL_ID)
+        - name: "--auto-truncate"
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+  - name: reranking-dependency-deploy
+    spec:
+      args:
+        - name: "--model-id"
+        - value: $(RERANK_MODEL_ID)
+        - name: "--auto-truncate"
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: "true"
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: "512"
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+  - name: llm-dependency-deploy
+    spec:
+      ports:
+        - containerPort: 80
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+      args:
+        - name: "--model-id"
+          value: $(LLM_MODEL_ID)
+        - name: "--max-input-length"
+          value: "2048"
+        - name: "--max-total-tokens"
+          value: "4096"
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: "true"
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: ${HF_TOKEN}
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+services:
+  - name: chatqna-backend-server-svc
+    spec:
+      ports:
+        - name: service
+          nodePort: 30888
+          port: 8888
+          targetPort: 8888
+      selector:
+        app: chatqna-backend-server-deploy
+      type: NodePort
+
+  - name: dataprep-svc
+    spec:
+      ports:
+        - name: port1
+          port: 6007
+          targetPort: 6007
+      selector:
+        app: dataprep-deploy
+      type: ClusterIP
+
+  - name: embedding-dependency-svc
+    spec:
+      ports:
+        - name: service
+          port: 6006
+          targetPort: 80
+      selector:
+        app: embedding-dependency-deploy
+      type: ClusterIP
+
+  - name: llm-dependency-svc
+    spec:
+      ports:
+        - name: service
+          port: 9009
+          targetPort: 80
+      selector:
+        app: llm-dependency-deploy
+      type: ClusterIP
+
+  - name: reranking-dependency-svc
+    spec:
+      ports:
+        - name: service
+          port: 8808
+          targetPort: 80
+      selector:
+        app: reranking-dependency-deploy
+      type: ClusterIP
+
+  - name: retriever-svc
+    spec:
+      ports:
+        - name: service
+          port: 7000
+          targetPort: 7000
+      selector:
+        app: retriever-deploy
+      type: ClusterIP
+
+  - name: vector-db
+    spec:
+      ports:
+        - name: vector-db-service
+          port: 6379
+          targetPort: 6379
+        - name: vector-db-insight
+          port: 8001
+          targetPort: 8001
+      selector:
+        app: vector-db
+      type: ClusterIP

From e33cfb238f9197fb0197afc78fa0bba1b041102e Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:06:12 +0800
Subject: [PATCH 02/18] initialize values.yaml

---
 .../performance/helm_charts/values.yaml       | 158 ++++--------------
 1 file changed, 28 insertions(+), 130 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index 7041e0e8f2..216575aa97 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -4,90 +4,23 @@
 namespace: default
 
 config:
-  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
-  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
-  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
 
 deployments:
-  - name: chatqna-backend-server-deploy
+  - name: faq-mega-server-deploy
     spec:
       ports:
         - containerPort: 8888
 
-  - name: dataprep-deploy
+  - name: faq-micro-deploy
     spec:
       ports:
-        - containerPort: 6007
-
-  - name: vector-db
-    spec:
-      ports:
-        - containerPort: 6379
-        - containerPort: 8001
-
-  - name: retriever-deploy
-    spec:
-      ports:
-        - containerPort: 7000
-
-  - name: embedding-dependency-deploy
-    spec:
-      ports:
-        - containerPort: 80
-      args:
-        - name: "--model-id"
-          value: $(EMBEDDING_MODEL_ID)
-        - name: "--auto-truncate"
-      volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-      volumes:
-        - hostPath:
-            path: /mnt/models
-            type: Directory
-          name: model-volume
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-          name: shm
-
-  - name: reranking-dependency-deploy
-    spec:
-      args:
-        - name: "--model-id"
-        - value: $(RERANK_MODEL_ID)
-        - name: "--auto-truncate"
+        - containerPort: 8888
       env:
-        - name: OMPI_MCA_btl_vader_single_copy_mechanism
-          value: none
-        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
-          value: "true"
-        - name: runtime
-          value: habana
-        - name: HABANA_VISIBLE_DEVICES
-          value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
-        - name: MAX_WARMUP_SEQUENCE_LENGTH
-          value: "512"
-      volumeMounts:
-        - mountPath: /data
-          name: model-volume
-        - mountPath: /dev/shm
-          name: shm
-      volumes:
-        - hostPath:
-            path: /mnt/models
-            type: Directory
-          name: model-volume
-        - emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-          name: shm
+      - name: TGI_LLM_ENDPOINT
+        value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
 
-  - name: llm-dependency-deploy
+  - name: faq-tgi-deploy
     spec:
       ports:
         - containerPort: 80
@@ -112,6 +45,14 @@ deployments:
           value: all
         - name: HF_TOKEN
           value: ${HF_TOKEN}
+        - name: ENABLE_HPU_GRAPH
+          value: 'true'
+        - name: LIMIT_HPU_GRAPH
+          value: 'true'
+        - name: USE_FLASH_ATTENTION
+          value: 'true'
+        - name: FLASH_ATTENTION_RECOMPUTE
+          value: 'true'
       volumeMounts:
         - mountPath: /data
           name: model-volume
@@ -128,76 +69,33 @@ deployments:
           name: shm
 
 services:
-  - name: chatqna-backend-server-svc
-    spec:
-      ports:
-        - name: service
-          nodePort: 30888
-          port: 8888
-          targetPort: 8888
-      selector:
-        app: chatqna-backend-server-deploy
-      type: NodePort
-
-  - name: dataprep-svc
-    spec:
-      ports:
-        - name: port1
-          port: 6007
-          targetPort: 6007
-      selector:
-        app: dataprep-deploy
-      type: ClusterIP
-
-  - name: embedding-dependency-svc
-    spec:
-      ports:
-        - name: service
-          port: 6006
-          targetPort: 80
-      selector:
-        app: embedding-dependency-deploy
-      type: ClusterIP
-
-  - name: llm-dependency-svc
+  - name: faq-micro-svc
     spec:
       ports:
         - name: service
-          port: 9009
-          targetPort: 80
+          port: 9003
+          targetPort: 9000
       selector:
-        app: llm-dependency-deploy
+        app: faq-micro-deploy
       type: ClusterIP
 
-  - name: reranking-dependency-svc
+  - name: faq-tgi-svc
     spec:
       ports:
         - name: service
-          port: 8808
+          port: 8010
           targetPort: 80
       selector:
-        app: reranking-dependency-deploy
+        app: faq-tgi-deploy
       type: ClusterIP
 
-  - name: retriever-svc
+  - name: faq-mega-server-svc
     spec:
       ports:
         - name: service
-          port: 7000
-          targetPort: 7000
-      selector:
-        app: retriever-deploy
-      type: ClusterIP
-
-  - name: vector-db
-    spec:
-      ports:
-        - name: vector-db-service
-          port: 6379
-          targetPort: 6379
-        - name: vector-db-insight
-          port: 8001
-          targetPort: 8001
+          port: 7779
+          targetPort: 7777
+          nodePort: 30779
       selector:
-        app: vector-db
-      type: ClusterIP
+        app: faq-mega-server-deploy
+      type: NodePort

From 113dedc7e00de2d358a02a72c5ee24d709976efa Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:10:31 +0800
Subject: [PATCH 03/18] Update configmap.yaml

---
 .../helm_charts/templates/configmap.yaml        | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
index 2ce795a1ef..ff261aa394 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -4,22 +4,11 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: qna-config
+  name: faq-config
   namespace: default
 data:
-  EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }}
-  EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc
   HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
-  INDEX_NAME: rag-redis
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  LLM_SERVER_HOST_IP: llm-dependency-svc
-  NODE_SELECTOR: chatqna-opea
-  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
-  RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }}
-  RERANK_SERVER_HOST_IP: reranking-dependency-svc
-  RETRIEVER_SERVICE_HOST_IP: retriever-svc
-  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
-  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
-  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  NODE_SELECTOR: faq-opea
+  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
 ---

From 90400532548ba8d222afd69edf540c620da83821 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:17:13 +0800
Subject: [PATCH 04/18] Update customize.yaml

---
 .../performance/helm_charts/customize.yaml    | 47 ++-----------------
 1 file changed, 5 insertions(+), 42 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml
index 9e7edbeb85..5e156db459 100644
--- a/FaqGen/benchmark/performance/helm_charts/customize.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml
@@ -4,7 +4,7 @@
 HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 
 podSpecs:
-  - name: chatqna-backend-server-deploy
+  - name: faq-mega-server-deploy
     spec:
       image_name: opea/chatqna
       image_tag: latest
@@ -17,55 +17,18 @@ podSpecs:
           cpu: "8"
           memory: "8000Mi"
 
-  - name: embedding-dependency-deploy
-    spec:
-      image_name: ghcr.io/huggingface/text-embeddings-inference
-      image_tag: cpu-1.5
-      replicas: 1
-      resources:
-        limits:
-          cpu: "80"
-          memory: "20000Mi"
-        requests:
-          cpu: "80"
-          memory: "20000Mi"
 
-  - name: reranking-dependency-deploy
-    spec:
-      image_name: opea/tei-gaudi
-      image_tag: latest
-      replicas: 1
-      resources:
-        limits:
-          habana.ai/gaudi: 1
-
-  - name: llm-dependency-deploy
+  - name: faq-tgi-deploy
     spec:
       image_name: ghcr.io/huggingface/tgi-gaudi
-      image_tag: 2.0.4
+      image_tag: 2.0.5
       replicas: 7
       resources:
         limits:
           habana.ai/gaudi: 1
 
-  - name: dataprep-deploy
+  - name: faq-micro-deploy
     spec:
-      image_name: opea/dataprep-redis
+      image_name: opea/llm-faqgen-tgi
       image_tag: latest
       replicas: 1
-
-  - name: vector-db
-    spec:
-      image_name: redis/redis-stack
-      image_tag: 7.2.0-v9
-      replicas: 1
-
-  - name: retriever-deploy
-    spec:
-      image_name: opea/retriever-redis
-      image_tag: latest
-      replicas: 2
-      resources:
-        requests:
-          cpu: "4"
-          memory: "4000Mi"

From 9383c226c7052a9ad59863e18d5083a5ae26918e Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:24:07 +0800
Subject: [PATCH 05/18] Update values.yaml ports

---
 FaqGen/benchmark/performance/helm_charts/values.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index 216575aa97..bd52ef9855 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -10,12 +10,12 @@ deployments:
   - name: faq-mega-server-deploy
     spec:
       ports:
-        - containerPort: 8888
+        - containerPort: 7777
 
   - name: faq-micro-deploy
     spec:
       ports:
-        - containerPort: 8888
+        - containerPort: 9000
       env:
       - name: TGI_LLM_ENDPOINT
         value: "http://faq-tgi-svc.default.svc.cluster.local:8010"

From 6a9d64bd1c1b86bc755c5ce87d1dee3507a68c8e Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:40:03 +0800
Subject: [PATCH 06/18] Update configmap.yaml

---
 .../benchmark/performance/helm_charts/templates/configmap.yaml  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
index ff261aa394..4bc78fe313 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -11,4 +11,6 @@ data:
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
   NODE_SELECTOR: faq-opea
   TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
+  LLM_SERVICE_HOST_IP: faq-micro-svc
+  MEGA_SERVICE_HOST_IP: faq-mega-server-svc  
 ---

From 3a80915269e101ff2bb350620dd9ac29a6b46aaf Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 17 Oct 2024 06:07:45 +0000
Subject: [PATCH 07/18] fixed deployment.yaml

---
 .../performance/helm_charts/templates/deployment.yaml         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
index d751d7dfe5..495c459eb9 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -25,7 +25,7 @@ spec:
       containers:
       - envFrom:
         - configMapRef:
-            name: qna-config
+            name: faq-config
         {{- if $deployment.spec.args }}
         args:
         {{- range $arg := $deployment.spec.args }}
@@ -80,7 +80,7 @@ spec:
 
       hostIPC: true
       nodeSelector:
-        node-type: chatqna-opea
+        node-type: faq-opea
       serviceAccountName: default
       topologySpreadConstraints:
       - labelSelector:

From 43d79d499225c5a57366ca714e655fb0c9ff308e Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 17 Oct 2024 06:23:30 +0000
Subject: [PATCH 08/18] removed HF_TOKEN in values.yaml

---
 FaqGen/benchmark/performance/helm_charts/values.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index bd52ef9855..39f2a4d8e8 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -43,8 +43,6 @@ deployments:
           value: habana
         - name: HABANA_VISIBLE_DEVICES
           value: all
-        - name: HF_TOKEN
-          value: ${HF_TOKEN}
         - name: ENABLE_HPU_GRAPH
           value: 'true'
         - name: LIMIT_HPU_GRAPH

From 084a31ffa55dcf1f57ed683f5d6ba6c26e896694 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:29:05 +0800
Subject: [PATCH 09/18] Update values.yaml

---
 FaqGen/benchmark/performance/helm_charts/values.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index 39f2a4d8e8..b583a3fbcc 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -5,6 +5,8 @@ namespace: default
 
 config:
   LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
+  CONFIG_MAP_NAME: faq-config
+  NODE_SELECTOR: faq-opea
 
 deployments:
   - name: faq-mega-server-deploy

From 8d83a453da09397ead92fb8f5110b663b1badc15 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:29:59 +0800
Subject: [PATCH 10/18] Update configmap.yaml

---
 .../performance/helm_charts/templates/configmap.yaml          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
index 4bc78fe313..113e96501a 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -4,12 +4,12 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
-  name: faq-config
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
   namespace: default
 data:
   HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
-  NODE_SELECTOR: faq-opea
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
   TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
   LLM_SERVICE_HOST_IP: faq-micro-svc
   MEGA_SERVICE_HOST_IP: faq-mega-server-svc  

From 7ca85e1218d5847559d7c40898b29f605633b941 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:31:58 +0800
Subject: [PATCH 11/18] Update deployment.yaml

---
 .../performance/helm_charts/templates/deployment.yaml         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
index 495c459eb9..7a9fe0c548 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml
@@ -25,7 +25,7 @@ spec:
       containers:
       - envFrom:
         - configMapRef:
-            name: faq-config
+            name: {{ $global.config.CONFIG_MAP_NAME }}
         {{- if $deployment.spec.args }}
         args:
         {{- range $arg := $deployment.spec.args }}
@@ -80,7 +80,7 @@ spec:
 
       hostIPC: true
       nodeSelector:
-        node-type: faq-opea
+        node-type: {{ $global.config.NODE_SELECTOR }}
       serviceAccountName: default
       topologySpreadConstraints:
       - labelSelector:

From 1f88b5021efdd5fd60d81d42d46ee2e22b71f56b Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:38:25 +0800
Subject: [PATCH 12/18] removed TEI_ENDPOINT in values.yaml

---
 FaqGen/benchmark/performance/helm_charts/values.yaml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index b583a3fbcc..f26fbace6e 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -18,10 +18,7 @@ deployments:
     spec:
       ports:
         - containerPort: 9000
-      env:
-      - name: TGI_LLM_ENDPOINT
-        value: "http://faq-tgi-svc.default.svc.cluster.local:8010"
-
+  
   - name: faq-tgi-deploy
     spec:
       ports:

From 0c863663657852efb156c5591550245531d1c816 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Thu, 17 Oct 2024 07:38:26 +0000
Subject: [PATCH 13/18] initlize the AudioQnA helm charts

---
 AudioQnA/benchmark/helm_charts/.helmignore    |  23 ++++
 AudioQnA/benchmark/helm_charts/Chart.yaml     |  27 +++++
 AudioQnA/benchmark/helm_charts/README.md      |  36 ++++++
 AudioQnA/benchmark/helm_charts/customize.yaml |  34 ++++++
 .../helm_charts/templates/configmap.yaml      |  16 +++
 .../helm_charts/templates/deployment.yaml     | 113 ++++++++++++++++++
 .../helm_charts/templates/service.yaml        |  24 ++++
 AudioQnA/benchmark/helm_charts/values.yaml    |  98 +++++++++++++++
 8 files changed, 371 insertions(+)
 create mode 100644 AudioQnA/benchmark/helm_charts/.helmignore
 create mode 100644 AudioQnA/benchmark/helm_charts/Chart.yaml
 create mode 100644 AudioQnA/benchmark/helm_charts/README.md
 create mode 100644 AudioQnA/benchmark/helm_charts/customize.yaml
 create mode 100644 AudioQnA/benchmark/helm_charts/templates/configmap.yaml
 create mode 100644 AudioQnA/benchmark/helm_charts/templates/deployment.yaml
 create mode 100644 AudioQnA/benchmark/helm_charts/templates/service.yaml
 create mode 100644 AudioQnA/benchmark/helm_charts/values.yaml

diff --git a/AudioQnA/benchmark/helm_charts/.helmignore b/AudioQnA/benchmark/helm_charts/.helmignore
new file mode 100644
index 0000000000..0e8a0eb36f
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/AudioQnA/benchmark/helm_charts/Chart.yaml b/AudioQnA/benchmark/helm_charts/Chart.yaml
new file mode 100644
index 0000000000..51f94d0879
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/AudioQnA/benchmark/helm_charts/README.md b/AudioQnA/benchmark/helm_charts/README.md
new file mode 100644
index 0000000000..f6df9ce4fe
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml
new file mode 100644
index 0000000000..5e156db459
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -0,0 +1,34 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: faq-mega-server-deploy
+    spec:
+      image_name: opea/chatqna
+      image_tag: latest
+      replicas: 2
+      resources:
+        limits:
+          cpu: "8"
+          memory: "8000Mi"
+        requests:
+          cpu: "8"
+          memory: "8000Mi"
+
+
+  - name: faq-tgi-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.5
+      replicas: 7
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: faq-micro-deploy
+    spec:
+      image_name: opea/llm-faqgen-tgi
+      image_tag: latest
+      replicas: 1
diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
new file mode 100644
index 0000000000..113e96501a
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -0,0 +1,16 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
+  namespace: default
+data:
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
+  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
+  LLM_SERVICE_HOST_IP: faq-micro-svc
+  MEGA_SERVICE_HOST_IP: faq-mega-server-svc  
+---
diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
new file mode 100644
index 0000000000..7a9fe0c548
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $deployment := .Values.deployments }}
+{{- range $podSpec := $global.podSpecs }}
+{{- if eq $podSpec.name $deployment.name }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $deployment.name }}
+  namespace: default
+spec:
+  replicas: {{ $podSpec.spec.replicas }}
+  selector:
+    matchLabels:
+      app: {{ $deployment.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $deployment.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: {{ $global.config.CONFIG_MAP_NAME }}
+        {{- if $deployment.spec.args }}
+        args:
+        {{- range $arg := $deployment.spec.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.env }}
+        env:
+        {{- range $env := $deployment.spec.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        imagePullPolicy: IfNotPresent
+        name: {{ $podSpec.name }}
+
+        {{- if $deployment.spec.ports }}
+        ports:
+        {{- range $port := $deployment.spec.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+
+        {{- if $podSpec.spec.resources }}
+        resources:
+        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: {{ $global.config.NODE_SELECTOR }}
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $deployment.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $deployment.spec.volumes }}
+      volumes:
+      {{- range $index, $volume := $deployment.spec.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/AudioQnA/benchmark/helm_charts/templates/service.yaml b/AudioQnA/benchmark/helm_charts/templates/service.yaml
new file mode 100644
index 0000000000..5a5896921d
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}
diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml
new file mode 100644
index 0000000000..f26fbace6e
--- /dev/null
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -0,0 +1,98 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+namespace: default
+
+config:
+  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
+  CONFIG_MAP_NAME: faq-config
+  NODE_SELECTOR: faq-opea
+
+deployments:
+  - name: faq-mega-server-deploy
+    spec:
+      ports:
+        - containerPort: 7777
+
+  - name: faq-micro-deploy
+    spec:
+      ports:
+        - containerPort: 9000
+  
+  - name: faq-tgi-deploy
+    spec:
+      ports:
+        - containerPort: 80
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+      args:
+        - name: "--model-id"
+          value: $(LLM_MODEL_ID)
+        - name: "--max-input-length"
+          value: "2048"
+        - name: "--max-total-tokens"
+          value: "4096"
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: "true"
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: ENABLE_HPU_GRAPH
+          value: 'true'
+        - name: LIMIT_HPU_GRAPH
+          value: 'true'
+        - name: USE_FLASH_ATTENTION
+          value: 'true'
+        - name: FLASH_ATTENTION_RECOMPUTE
+          value: 'true'
+      volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+      volumes:
+        - hostPath:
+            path: /mnt/models
+            type: Directory
+          name: model-volume
+        - emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+          name: shm
+
+services:
+  - name: faq-micro-svc
+    spec:
+      ports:
+        - name: service
+          port: 9003
+          targetPort: 9000
+      selector:
+        app: faq-micro-deploy
+      type: ClusterIP
+
+  - name: faq-tgi-svc
+    spec:
+      ports:
+        - name: service
+          port: 8010
+          targetPort: 80
+      selector:
+        app: faq-tgi-deploy
+      type: ClusterIP
+
+  - name: faq-mega-server-svc
+    spec:
+      ports:
+        - name: service
+          port: 7779
+          targetPort: 7777
+          nodePort: 30779
+      selector:
+        app: faq-mega-server-deploy
+      type: NodePort

From c154ada9c2cf9c97c384fc346ee70f697f935efb Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:51:17 +0800
Subject: [PATCH 14/18] Update values.yaml audioqna

---
 AudioQnA/benchmark/helm_charts/values.yaml | 128 +++++++++++++++++----
 1 file changed, 108 insertions(+), 20 deletions(-)

diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml
index f26fbace6e..d469003649 100644
--- a/AudioQnA/benchmark/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -4,22 +4,70 @@
 namespace: default
 
 config:
-  LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
-  CONFIG_MAP_NAME: faq-config
-  NODE_SELECTOR: faq-opea
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  CONFIG_MAP_NAME: audio-qna-config
+  NODE_SELECTOR: audioqna-opea
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
 
 deployments:
-  - name: faq-mega-server-deploy
+  - name: audioqna-backend-server-deploy
     spec:
       ports:
-        - containerPort: 7777
+        - containerPort: 8888
+        
+  - name: asr-deploy
+    spec:
+      ports:
+        - containerPort: 9099
+
+  - name: whisper-deploy
+    spec:
+      ports:
+        - containerPort: 7066
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
 
-  - name: faq-micro-deploy
+  - name: tts-deploy
+    spec:
+      ports:
+        - containerPort: 9088
+
+  - name: llm-deploy
     spec:
       ports:
         - containerPort: 9000
-  
-  - name: faq-tgi-deploy
+
+  - name: speecht5-deploy
+    spec:
+      ports:
+        - containerPort: 7055
+      env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+
+  - name: llm-dependency-deploy
     spec:
       ports:
         - containerPort: 80
@@ -66,33 +114,73 @@ deployments:
           name: shm
 
 services:
-  - name: faq-micro-svc
+  - name: asr-svc
     spec:
       ports:
         - name: service
-          port: 9003
-          targetPort: 9000
+          port: 3001
+          targetPort: 9099
+      selector:
+        app: asr-deploy
+      type: ClusterIP
+
+  - name: whisper-svc
+    spec:
+      ports:
+        - name: service
+          port: 7066
+          targetPort: 7066
       selector:
-        app: faq-micro-deploy
+        app: whisper-deploy
       type: ClusterIP
 
-  - name: faq-tgi-svc
+  - name: tts-svc
     spec:
       ports:
         - name: service
-          port: 8010
+          port: 3002
+          targetPort: 9088
+      selector:
+        app: tts-deploy
+      type: ClusterIP
+
+  - name: speecht5-svc
+    spec:
+      ports:
+        - name: service
+          port: 7055
+          targetPort: 7055
+      selector:
+        app: speecht5-deploy
+      type: ClusterIP
+
+  - name: llm-dependency-svc
+    spec:
+      ports:
+        - name: service
+          port: 3006
           targetPort: 80
       selector:
-        app: faq-tgi-deploy
+        app: llm-dependency-deploy
       type: ClusterIP
 
-  - name: faq-mega-server-svc
+    - name: llm-svc
+    spec:
+      ports:
+        - name: service
+          port: 3007
+          targetPort: 9000
+      selector:
+        app: llm-deploy
+      type: ClusterIP
+      
+  - name: audioqna-backend-server-svc
     spec:
       ports:
         - name: service
-          port: 7779
-          targetPort: 7777
-          nodePort: 30779
+          port: 3088
+          targetPort: 8888
+          nodePort: 30666
       selector:
-        app: faq-mega-server-deploy
+        app: audioqna-backend-server-deploy
       type: NodePort

From 2af3066ec0bd698be2568ba4c348b1a7602bff6d Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:52:12 +0800
Subject: [PATCH 15/18] Update configmap.yaml audioqna

---
 .../benchmark/helm_charts/templates/configmap.yaml  | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
index 113e96501a..79246763fa 100644
--- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
+++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -11,6 +11,15 @@ data:
   LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
   NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
   TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
-  LLM_SERVICE_HOST_IP: faq-micro-svc
-  MEGA_SERVICE_HOST_IP: faq-mega-server-svc  
+
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
 ---

From 4ba047cf0eb3c8149317ff66cacb99c2132d6138 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:58:04 +0800
Subject: [PATCH 16/18] Update customize.yaml audioqna

---
 AudioQnA/benchmark/helm_charts/customize.yaml | 50 ++++++++++++-------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml
index 5e156db459..2c788c5725 100644
--- a/AudioQnA/benchmark/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -4,31 +4,47 @@
 HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
 
 podSpecs:
-  - name: faq-mega-server-deploy
+  - name: audioqna-backend-server-deploy
     spec:
-      image_name: opea/chatqna
+      image_name: opea/audioqna
       image_tag: latest
-      replicas: 2
+      replicas: 1
+      
+  - name: asr-deploy
+    spec:
+      image_name: opea/asr
+      image_tag: latest
+      replicas: 1
+      
+  - name: whisper-deploy
+    spec:
+      image_name: opea/whisper-gaudi
+      image_tag: latest
+      replicas: 1
       resources:
         limits:
-          cpu: "8"
-          memory: "8000Mi"
-        requests:
-          cpu: "8"
-          memory: "8000Mi"
-
-
-  - name: faq-tgi-deploy
+          habana.ai/gaudi: 1
+      
+  - name: tts-deploy
     spec:
-      image_name: ghcr.io/huggingface/tgi-gaudi
-      image_tag: 2.0.5
-      replicas: 7
+      image_name: opea/tts
+      image_tag: latest
+      replicas: 1
+      
+  - name: speecht5-deploy
+    spec:
+      image_name: opea/speecht5-gaudi
+      image_tag: latest
+      replicas: 1
       resources:
         limits:
           habana.ai/gaudi: 1
 
-  - name: faq-micro-deploy
+  - name: llm-dependency-deploy
     spec:
-      image_name: opea/llm-faqgen-tgi
-      image_tag: latest
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.5
       replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1

From c0f2b47aef6fc288f826c725f4aa74d51f2fccb6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Oct 2024 02:58:48 +0000
Subject: [PATCH 17/18] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 AudioQnA/benchmark/helm_charts/customize.yaml             | 8 ++++----
 AudioQnA/benchmark/helm_charts/values.yaml                | 4 ++--
 .../performance/helm_charts/templates/configmap.yaml      | 2 +-
 FaqGen/benchmark/performance/helm_charts/values.yaml      | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml
index 2c788c5725..31e1b6ca65 100644
--- a/AudioQnA/benchmark/helm_charts/customize.yaml
+++ b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -9,13 +9,13 @@ podSpecs:
       image_name: opea/audioqna
       image_tag: latest
       replicas: 1
-      
+
   - name: asr-deploy
     spec:
       image_name: opea/asr
       image_tag: latest
       replicas: 1
-      
+
   - name: whisper-deploy
     spec:
       image_name: opea/whisper-gaudi
@@ -24,13 +24,13 @@ podSpecs:
       resources:
         limits:
           habana.ai/gaudi: 1
-      
+
   - name: tts-deploy
     spec:
       image_name: opea/tts
       image_tag: latest
       replicas: 1
-      
+
   - name: speecht5-deploy
     spec:
       image_name: opea/speecht5-gaudi
diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml
index d469003649..e2f03da958 100644
--- a/AudioQnA/benchmark/helm_charts/values.yaml
+++ b/AudioQnA/benchmark/helm_charts/values.yaml
@@ -23,7 +23,7 @@ deployments:
     spec:
       ports:
         - containerPort: 8888
-        
+
   - name: asr-deploy
     spec:
       ports:
@@ -173,7 +173,7 @@ services:
       selector:
         app: llm-deploy
       type: ClusterIP
-      
+
   - name: audioqna-backend-server-svc
     spec:
       ports:
diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
index 113e96501a..df3e61d20a 100644
--- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml
@@ -12,5 +12,5 @@ data:
   NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
   TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
   LLM_SERVICE_HOST_IP: faq-micro-svc
-  MEGA_SERVICE_HOST_IP: faq-mega-server-svc  
+  MEGA_SERVICE_HOST_IP: faq-mega-server-svc
 ---
diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml
index f26fbace6e..eeb206761a 100644
--- a/FaqGen/benchmark/performance/helm_charts/values.yaml
+++ b/FaqGen/benchmark/performance/helm_charts/values.yaml
@@ -18,7 +18,7 @@ deployments:
     spec:
       ports:
         - containerPort: 9000
-  
+
   - name: faq-tgi-deploy
     spec:
       ports:

From 399768e98b60ece41e12405f372758f1ee985d71 Mon Sep 17 00:00:00 2001
From: Zhenzhong1 <zhenzhong.xu@intel.com>
Date: Mon, 21 Oct 2024 03:12:02 +0000
Subject: [PATCH 18/18] modifed the README.md

---
 ChatQnA/benchmark/performance/helm_charts/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChatQnA/benchmark/performance/helm_charts/README.md b/ChatQnA/benchmark/performance/helm_charts/README.md
index f6df9ce4fe..8da6d836e2 100644
--- a/ChatQnA/benchmark/performance/helm_charts/README.md
+++ b/ChatQnA/benchmark/performance/helm_charts/README.md
@@ -20,7 +20,7 @@ HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
 ```bash
 # Deploy a ChatQnA pipeline using the specified YAML configuration.
 # To deploy with different configurations, simply provide a different YAML file.
-helm install chatqna helm_charts/ -f customize.yaml
+helm install chatqna ../helm_charts/ -f customize.yaml
 ```
 
 Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.