Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions AudioQnA/benchmark/helm_charts/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
27 changes: 27 additions & 0 deletions AudioQnA/benchmark/helm_charts/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v2
name: chatqna-charts
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
36 changes: 36 additions & 0 deletions AudioQnA/benchmark/helm_charts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# ChatQnA Deployment

This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources.

## Getting Started

### Preparation

```bash
# on k8s-master node
cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts

# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token:
# vim customize.yaml
HUGGINGFACEHUB_API_TOKEN: hf_xxxxx
```

### Deploy your ChatQnA

```bash
# Deploy a ChatQnA pipeline using the specified YAML configuration.
# To deploy with different configurations, simply provide a different YAML file.
helm install chatqna helm_charts/ -f customize.yaml
```

Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.

## Customize your own ChatQnA pipelines. (Optional)

There are two yaml configs you can specify.

- customize.yaml
This file can specify image names, the number of replicas and CPU cores to manage your pods.

- values.yaml
This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes.
50 changes: 50 additions & 0 deletions AudioQnA/benchmark/helm_charts/customize.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}

podSpecs:
- name: audioqna-backend-server-deploy
spec:
image_name: opea/audioqna
image_tag: latest
replicas: 1

- name: asr-deploy
spec:
image_name: opea/asr
image_tag: latest
replicas: 1

- name: whisper-deploy
spec:
image_name: opea/whisper-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1

- name: tts-deploy
spec:
image_name: opea/tts
image_tag: latest
replicas: 1

- name: speecht5-deploy
spec:
image_name: opea/speecht5-gaudi
image_tag: latest
replicas: 1
resources:
limits:
habana.ai/gaudi: 1

- name: llm-dependency-deploy
spec:
image_name: ghcr.io/huggingface/tgi-gaudi
image_tag: 2.0.5
replicas: 1
resources:
limits:
habana.ai/gaudi: 1
25 changes: 25 additions & 0 deletions AudioQnA/benchmark/helm_charts/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.CONFIG_MAP_NAME }}
namespace: default
data:
HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }}
LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }}
NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }}
TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010

ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066
TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006
MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc
ASR_SERVICE_HOST_IP: asr-svc
ASR_SERVICE_PORT: "3001"
LLM_SERVICE_HOST_IP: llm-svc
LLM_SERVICE_PORT: "3007"
TTS_SERVICE_HOST_IP: tts-svc
TTS_SERVICE_PORT: "3002"
---
113 changes: 113 additions & 0 deletions AudioQnA/benchmark/helm_charts/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

{{- $global := .Values }}
{{- range $deployment := .Values.deployments }}
{{- range $podSpec := $global.podSpecs }}
{{- if eq $podSpec.name $deployment.name }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $deployment.name }}
namespace: default
spec:
replicas: {{ $podSpec.spec.replicas }}
selector:
matchLabels:
app: {{ $deployment.name }}
template:
metadata:
annotations:
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
labels:
app: {{ $deployment.name }}
spec:
containers:
- envFrom:
- configMapRef:
name: {{ $global.config.CONFIG_MAP_NAME }}
{{- if $deployment.spec.args }}
args:
{{- range $arg := $deployment.spec.args }}
{{- if $arg.name }}
- {{ $arg.name }}
{{- end }}
{{- if $arg.value }}
- "{{ $arg.value }}"
{{- end }}
{{- end }}
{{- end }}

{{- if $deployment.spec.env }}
env:
{{- range $env := $deployment.spec.env }}
- name: {{ $env.name }}
value: "{{ $env.value }}"
{{- end }}
{{- end }}

image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }}
imagePullPolicy: IfNotPresent
name: {{ $podSpec.name }}

{{- if $deployment.spec.ports }}
ports:
{{- range $port := $deployment.spec.ports }}
{{- range $port_name, $port_id := $port }}
- {{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}


{{- if $podSpec.spec.resources }}
resources:
{{- range $resourceType, $resource := $podSpec.spec.resources }}
{{ $resourceType }}:
{{- range $limitType, $limit := $resource }}
{{ $limitType }}: {{ $limit }}
{{- end }}
{{- end }}
{{- end }}

{{- if $deployment.spec.volumeMounts }}
volumeMounts:
{{- range $volumeMount := $deployment.spec.volumeMounts }}
- mountPath: {{ $volumeMount.mountPath }}
name: {{ $volumeMount.name }}
{{- end }}
{{- end }}

hostIPC: true
nodeSelector:
node-type: {{ $global.config.NODE_SELECTOR }}
serviceAccountName: default
topologySpreadConstraints:
- labelSelector:
matchLabels:
app: {{ $deployment.name }}
maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway


{{- if $deployment.spec.volumes }}
volumes:
{{- range $index, $volume := $deployment.spec.volumes }}
- name: {{ $volume.name }}
{{- if $volume.hostPath }}
hostPath:
path: {{ $volume.hostPath.path }}
type: {{ $volume.hostPath.type }}
{{- else if $volume.emptyDir }}
emptyDir:
medium: {{ $volume.emptyDir.medium }}
sizeLimit: {{ $volume.emptyDir.sizeLimit }}
{{- end }}
{{- end }}
{{- end }}

---
{{- end }}
{{- end }}
{{- end }}
24 changes: 24 additions & 0 deletions AudioQnA/benchmark/helm_charts/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

{{- range $service := .Values.services }}
apiVersion: v1
kind: Service
metadata:
name: {{ $service.name }}
namespace: default
spec:
ports:
{{- range $port := $service.spec.ports }}
- name: {{ $port.name }}
{{- range $port_name, $port_id := $port }}
{{- if ne $port_name "name"}}
{{ $port_name }}: {{ $port_id }}
{{- end }}
{{- end }}
{{- end }}
selector:
app: {{ $service.spec.selector.app }}
type: {{ $service.spec.type }}
---
{{- end }}
Loading