Zhenzhong1 · Zhenzhong1 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/AudioQnA/benchmark/helm_charts/.helmignore b/AudioQnA/benchmark/helm_charts/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/AudioQnA/benchmark/helm_charts/Chart.yaml b/AudioQnA/benchmark/helm_charts/Chart.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: chatqna-charts
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/AudioQnA/benchmark/helm_charts/README.md b/AudioQnA/benchmark/helm_charts/README.md
@@ -0,0 +1,36 @@
+# ChatQnA Deployment
+
+This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.
+
+## Getting Started
+
+### Preparation
+
+```bash
+# on k8s-master node
+cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts
+
+# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
+# vim customize.yaml
+HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
+```
+
+### Deploy your ChatQnA
+
+```bash
+# Deploy a ChatQnA pipeline using the specified YAML configuration.
+# To deploy with different configurations, simply provide a different YAML file.
+helm install chatqna helm_charts/ -f customize.yaml
+```
+
+Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.
+
+## Customize your own ChatQnA pipelines. (Optional)
+
+There are two yaml configs you can specify.
+
+- customize.yaml
+  This file can specify image names, the number of replicas and CPU cores to manage your pods.
+
+- values.yaml
+  This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml
@@ -0,0 +1,50 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
+
+podSpecs:
+  - name: audioqna-backend-server-deploy
+    spec:
+      image_name: opea/audioqna
+      image_tag: latest
+      replicas: 1
+
+  - name: asr-deploy
+    spec:
+      image_name: opea/asr
+      image_tag: latest
+      replicas: 1
+
+  - name: whisper-deploy
+    spec:
+      image_name: opea/whisper-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: tts-deploy
+    spec:
+      image_name: opea/tts
+      image_tag: latest
+      replicas: 1
+
+  - name: speecht5-deploy
+    spec:
+      image_name: opea/speecht5-gaudi
+      image_tag: latest
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
+
+  - name: llm-dependency-deploy
+    spec:
+      image_name: ghcr.io/huggingface/tgi-gaudi
+      image_tag: 2.0.5
+      replicas: 1
+      resources:
+        limits:
+          habana.ai/gaudi: 1
diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Values.config.CONFIG_MAP_NAME }}
+  namespace: default
+data:
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
+  LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
+  NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
+  TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010
+
+  ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
+  TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
+  MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
+  ASR_SERVICE_HOST_IP: asr-svc
+  ASR_SERVICE_PORT: "3001"
+  LLM_SERVICE_HOST_IP: llm-svc
+  LLM_SERVICE_PORT: "3007"
+  TTS_SERVICE_HOST_IP: tts-svc
+  TTS_SERVICE_PORT: "3002"
+---
diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml
@@ -0,0 +1,113 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- $global := .Values }}
+{{- range $deployment := .Values.deployments }}
+{{- range $podSpec := $global.podSpecs }}
+{{- if eq $podSpec.name $deployment.name }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $deployment.name }}
+  namespace: default
+spec:
+  replicas: {{ $podSpec.spec.replicas }}
+  selector:
+    matchLabels:
+      app: {{ $deployment.name }}
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: {{ $deployment.name }}
+    spec:
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: {{ $global.config.CONFIG_MAP_NAME }}
+        {{- if $deployment.spec.args }}
+        args:
+        {{- range $arg := $deployment.spec.args }}
+          {{- if $arg.name }}
+          - {{ $arg.name }}
+          {{- end }}
+          {{- if $arg.value }}
+          - "{{ $arg.value }}"
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.env }}
+        env:
+        {{- range $env := $deployment.spec.env }}
+          - name: {{ $env.name }}
+            value: "{{ $env.value }}"
+        {{- end }}
+        {{- end }}
+
+        image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
+        imagePullPolicy: IfNotPresent
+        name: {{ $podSpec.name }}
+
+        {{- if $deployment.spec.ports }}
+        ports:
+        {{- range $port := $deployment.spec.ports }}
+          {{- range $port_name, $port_id := $port }}
+          - {{ $port_name }}: {{ $port_id }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+
+        {{- if $podSpec.spec.resources }}
+        resources:
+        {{- range $resourceType, $resource := $podSpec.spec.resources }}
+          {{ $resourceType }}:
+          {{- range $limitType, $limit := $resource }}
+            {{ $limitType }}: {{ $limit }}
+          {{- end }}
+        {{- end }}
+        {{- end }}
+
+        {{- if $deployment.spec.volumeMounts }}
+        volumeMounts:
+        {{- range $volumeMount := $deployment.spec.volumeMounts }}
+          - mountPath: {{ $volumeMount.mountPath }}
+            name: {{ $volumeMount.name }}
+        {{- end }}
+        {{- end }}
+
+      hostIPC: true
+      nodeSelector:
+        node-type: {{ $global.config.NODE_SELECTOR }}
+      serviceAccountName: default
+      topologySpreadConstraints:
+      - labelSelector:
+          matchLabels:
+            app: {{ $deployment.name }}
+        maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+
+
+      {{- if $deployment.spec.volumes }}
+      volumes:
+      {{- range $index, $volume := $deployment.spec.volumes }}
+        - name: {{ $volume.name }}
+          {{- if $volume.hostPath }}
+          hostPath:
+            path: {{ $volume.hostPath.path }}
+            type: {{ $volume.hostPath.type }}
+          {{- else if $volume.emptyDir }}
+          emptyDir:
+            medium: {{ $volume.emptyDir.medium }}
+            sizeLimit: {{ $volume.emptyDir.sizeLimit }}
+          {{- end }}
+      {{- end }}
+      {{- end }}
+
+---
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/AudioQnA/benchmark/helm_charts/templates/service.yaml b/AudioQnA/benchmark/helm_charts/templates/service.yaml
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+{{- range $service := .Values.services }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $service.name }}
+  namespace: default
+spec:
+  ports:
+  {{- range $port := $service.spec.ports }}
+    - name: {{ $port.name }}
+    {{- range $port_name, $port_id := $port }}
+      {{- if ne $port_name "name"}}
+      {{ $port_name }}: {{ $port_id }}
+      {{- end }}
+    {{- end }}
+  {{- end }}
+  selector:
+    app: {{ $service.spec.selector.app }}
+  type: {{ $service.spec.type }}
+---
+{{- end }}