From 5b4c7e218fcf2e7a1054fa2591c090028f485b7d Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 17 Oct 2024 03:42:28 +0000 Subject: [PATCH 01/18] added helmcharts folder --- .../performance/helm_charts/.helmignore | 23 ++ .../performance/helm_charts/Chart.yaml | 27 +++ .../performance/helm_charts/README.md | 36 ++++ .../performance/helm_charts/customize.yaml | 71 ++++++ .../helm_charts/templates/configmap.yaml | 25 +++ .../helm_charts/templates/deployment.yaml | 113 ++++++++++ .../helm_charts/templates/service.yaml | 24 +++ .../performance/helm_charts/values.yaml | 203 ++++++++++++++++++ 8 files changed, 522 insertions(+) create mode 100644 FaqGen/benchmark/performance/helm_charts/.helmignore create mode 100644 FaqGen/benchmark/performance/helm_charts/Chart.yaml create mode 100644 FaqGen/benchmark/performance/helm_charts/README.md create mode 100644 FaqGen/benchmark/performance/helm_charts/customize.yaml create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml create mode 100644 FaqGen/benchmark/performance/helm_charts/templates/service.yaml create mode 100644 FaqGen/benchmark/performance/helm_charts/values.yaml diff --git a/FaqGen/benchmark/performance/helm_charts/.helmignore b/FaqGen/benchmark/performance/helm_charts/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/FaqGen/benchmark/performance/helm_charts/Chart.yaml b/FaqGen/benchmark/performance/helm_charts/Chart.yaml new file mode 100644 index 0000000000..51f94d0879 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chatqna-charts +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/FaqGen/benchmark/performance/helm_charts/README.md b/FaqGen/benchmark/performance/helm_charts/README.md new file mode 100644 index 0000000000..f6df9ce4fe --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/README.md @@ -0,0 +1,36 @@ +# ChatQnA Deployment + +This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. + +## Getting Started + +### Preparation + +```bash +# on k8s-master node +cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts + +# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: +# vim customize.yaml +HUGGINGFACEHUB_API_TOKEN: hf_xxxxx +``` + +### Deploy your ChatQnA + +```bash +# Deploy a ChatQnA pipeline using the specified YAML configuration. +# To deploy with different configurations, simply provide a different YAML file. +helm install chatqna helm_charts/ -f customize.yaml +``` + +Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. + +## Customize your own ChatQnA pipelines. (Optional) + +There are two yaml configs you can specify. + +- customize.yaml + This file can specify image names, the number of replicas and CPU cores to manage your pods. + +- values.yaml + This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes. diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml new file mode 100644 index 0000000000..9e7edbeb85 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml @@ -0,0 +1,71 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + +podSpecs: + - name: chatqna-backend-server-deploy + spec: + image_name: opea/chatqna + image_tag: latest + replicas: 2 + resources: + limits: + cpu: "8" + memory: "8000Mi" + requests: + cpu: "8" + memory: "8000Mi" + + - name: embedding-dependency-deploy + spec: + image_name: ghcr.io/huggingface/text-embeddings-inference + image_tag: cpu-1.5 + replicas: 1 + resources: + limits: + cpu: "80" + memory: "20000Mi" + requests: + cpu: "80" + memory: "20000Mi" + + - name: reranking-dependency-deploy + spec: + image_name: opea/tei-gaudi + image_tag: latest + replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 + + - name: llm-dependency-deploy + spec: + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.4 + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 + + - name: dataprep-deploy + spec: + image_name: opea/dataprep-redis + image_tag: latest + replicas: 1 + + - name: vector-db + spec: + image_name: redis/redis-stack + image_tag: 7.2.0-v9 + replicas: 1 + + - name: retriever-deploy + spec: + image_name: opea/retriever-redis + image_tag: latest + replicas: 2 + resources: + requests: + cpu: "4" + memory: "4000Mi" diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml new file mode 100644 index 0000000000..2ce795a1ef --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -0,0 +1,25 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }} + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + INDEX_NAME: rag-redis + LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }} + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +--- diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml new file mode 100644 index 0000000000..d751d7dfe5 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- $global := .Values }} +{{- range $deployment := .Values.deployments }} +{{- range $podSpec := $global.podSpecs }} +{{- if eq $podSpec.name $deployment.name }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $deployment.name }} + namespace: default +spec: + replicas: {{ $podSpec.spec.replicas }} + selector: + matchLabels: + app: {{ $deployment.name }} + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: {{ $deployment.name }} + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + {{- if $deployment.spec.args }} + args: + {{- range $arg := $deployment.spec.args }} + {{- if $arg.name }} + - {{ $arg.name }} + {{- end }} + {{- if $arg.value }} + - "{{ $arg.value }}" + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.env }} + env: + {{- range $env := $deployment.spec.env }} + - name: {{ $env.name }} + value: "{{ $env.value }}" + {{- end }} + {{- end }} + + image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} + imagePullPolicy: IfNotPresent + name: {{ $podSpec.name }} + + {{- if $deployment.spec.ports }} + ports: + {{- range $port := $deployment.spec.ports }} + {{- range $port_name, $port_id := $port }} + - {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + + + {{- if $podSpec.spec.resources }} + resources: + {{- range $resourceType, $resource := $podSpec.spec.resources }} + {{ $resourceType }}: + {{- range $limitType, $limit := $resource }} + {{ $limitType }}: {{ $limit }} + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.volumeMounts }} + volumeMounts: + {{- range $volumeMount := $deployment.spec.volumeMounts }} + - mountPath: {{ $volumeMount.mountPath }} + name: {{ $volumeMount.name }} + {{- end }} + {{- end }} + + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: {{ $deployment.name }} + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + + + {{- if $deployment.spec.volumes }} + volumes: + {{- range $index, $volume := $deployment.spec.volumes }} + - name: {{ $volume.name }} + {{- if $volume.hostPath }} + hostPath: + path: {{ $volume.hostPath.path }} + type: {{ $volume.hostPath.type }} + {{- else if $volume.emptyDir }} + emptyDir: + medium: {{ $volume.emptyDir.medium }} + sizeLimit: {{ $volume.emptyDir.sizeLimit }} + {{- end }} + {{- end }} + {{- end }} + +--- +{{- end }} +{{- end }} +{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/templates/service.yaml b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml new file mode 100644 index 0000000000..5a5896921d --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/templates/service.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- range $service := .Values.services }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $service.name }} + namespace: default +spec: + ports: + {{- range $port := $service.spec.ports }} + - name: {{ $port.name }} + {{- range $port_name, $port_id := $port }} + {{- if ne $port_name "name"}} + {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + selector: + app: {{ $service.spec.selector.app }} + type: {{ $service.spec.type }} +--- +{{- end }} diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml new file mode 100644 index 0000000000..7041e0e8f2 --- /dev/null +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -0,0 +1,203 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +namespace: default + +config: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + RERANK_MODEL_ID: BAAI/bge-reranker-base + +deployments: + - name: chatqna-backend-server-deploy + spec: + ports: + - containerPort: 8888 + + - name: dataprep-deploy + spec: + ports: + - containerPort: 6007 + + - name: vector-db + spec: + ports: + - containerPort: 6379 + - containerPort: 8001 + + - name: retriever-deploy + spec: + ports: + - containerPort: 7000 + + - name: embedding-dependency-deploy + spec: + ports: + - containerPort: 80 + args: + - name: "--model-id" + value: $(EMBEDDING_MODEL_ID) + - name: "--auto-truncate" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + + - name: reranking-dependency-deploy + spec: + args: + - name: "--model-id" + - value: $(RERANK_MODEL_ID) + - name: "--auto-truncate" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: "512" + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + + - name: llm-dependency-deploy + spec: + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + +services: + - name: chatqna-backend-server-svc + spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort + + - name: dataprep-svc + spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP + + - name: embedding-dependency-svc + spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP + + - name: llm-dependency-svc + spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP + + - name: reranking-dependency-svc + spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP + + - name: retriever-svc + spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP + + - name: vector-db + spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP From e33cfb238f9197fb0197afc78fa0bba1b041102e Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:06:12 +0800 Subject: [PATCH 02/18] initialize values.yaml --- .../performance/helm_charts/values.yaml | 158 ++++-------------- 1 file changed, 28 insertions(+), 130 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index 7041e0e8f2..216575aa97 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -4,90 +4,23 @@ namespace: default config: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct deployments: - - name: chatqna-backend-server-deploy + - name: faq-mega-server-deploy spec: ports: - containerPort: 8888 - - name: dataprep-deploy + - name: faq-micro-deploy spec: ports: - - containerPort: 6007 - - - name: vector-db - spec: - ports: - - containerPort: 6379 - - containerPort: 8001 - - - name: retriever-deploy - spec: - ports: - - containerPort: 7000 - - - name: embedding-dependency-deploy - spec: - ports: - - containerPort: 80 - args: - - name: "--model-id" - value: $(EMBEDDING_MODEL_ID) - - name: "--auto-truncate" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm - - - name: reranking-dependency-deploy - spec: - args: - - name: "--model-id" - - value: $(RERANK_MODEL_ID) - - name: "--auto-truncate" + - containerPort: 8888 env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: "true" - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: "512" - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm + - name: TGI_LLM_ENDPOINT + value: "http://faq-tgi-svc.default.svc.cluster.local:8010" - - name: llm-dependency-deploy + - name: faq-tgi-deploy spec: ports: - containerPort: 80 @@ -112,6 +45,14 @@ deployments: value: all - name: HF_TOKEN value: ${HF_TOKEN} + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' volumeMounts: - mountPath: /data name: model-volume @@ -128,76 +69,33 @@ deployments: name: shm services: - - name: chatqna-backend-server-svc - spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort - - - name: dataprep-svc - spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP - - - name: embedding-dependency-svc - spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP - - - name: llm-dependency-svc + - name: faq-micro-svc spec: ports: - name: service - port: 9009 - targetPort: 80 + port: 9003 + targetPort: 9000 selector: - app: llm-dependency-deploy + app: faq-micro-deploy type: ClusterIP - - name: reranking-dependency-svc + - name: faq-tgi-svc spec: ports: - name: service - port: 8808 + port: 8010 targetPort: 80 selector: - app: reranking-dependency-deploy + app: faq-tgi-deploy type: ClusterIP - - name: retriever-svc + - name: faq-mega-server-svc spec: ports: - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP - - - name: vector-db - spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 + port: 7779 + targetPort: 7777 + nodePort: 30779 selector: - app: vector-db - type: ClusterIP + app: faq-mega-server-deploy + type: NodePort From 113dedc7e00de2d358a02a72c5ee24d709976efa Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:10:31 +0800 Subject: [PATCH 03/18] Update configmap.yaml --- .../helm_charts/templates/configmap.yaml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml index 2ce795a1ef..ff261aa394 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -4,22 +4,11 @@ apiVersion: v1 kind: ConfigMap metadata: - name: qna-config + name: faq-config namespace: default data: - EMBEDDING_MODEL_ID: {{ .Values.config.EMBEDDING_MODEL_ID }} - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} - INDEX_NAME: rag-redis LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - LLM_SERVER_HOST_IP: llm-dependency-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: {{ .Values.config.RERANK_MODEL_ID }} - RERANK_SERVER_HOST_IP: reranking-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + NODE_SELECTOR: faq-opea + TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 --- From 90400532548ba8d222afd69edf540c620da83821 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:17:13 +0800 Subject: [PATCH 04/18] Update customize.yaml --- .../performance/helm_charts/customize.yaml | 47 ++----------------- 1 file changed, 5 insertions(+), 42 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/customize.yaml b/FaqGen/benchmark/performance/helm_charts/customize.yaml index 9e7edbeb85..5e156db459 100644 --- a/FaqGen/benchmark/performance/helm_charts/customize.yaml +++ b/FaqGen/benchmark/performance/helm_charts/customize.yaml @@ -4,7 +4,7 @@ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} podSpecs: - - name: chatqna-backend-server-deploy + - name: faq-mega-server-deploy spec: image_name: opea/chatqna image_tag: latest @@ -17,55 +17,18 @@ podSpecs: cpu: "8" memory: "8000Mi" - - name: embedding-dependency-deploy - spec: - image_name: ghcr.io/huggingface/text-embeddings-inference - image_tag: cpu-1.5 - replicas: 1 - resources: - limits: - cpu: "80" - memory: "20000Mi" - requests: - cpu: "80" - memory: "20000Mi" - - name: reranking-dependency-deploy - spec: - image_name: opea/tei-gaudi - image_tag: latest - replicas: 1 - resources: - limits: - habana.ai/gaudi: 1 - - - name: llm-dependency-deploy + - name: faq-tgi-deploy spec: image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.4 + image_tag: 2.0.5 replicas: 7 resources: limits: habana.ai/gaudi: 1 - - name: dataprep-deploy + - name: faq-micro-deploy spec: - image_name: opea/dataprep-redis + image_name: opea/llm-faqgen-tgi image_tag: latest replicas: 1 - - - name: vector-db - spec: - image_name: redis/redis-stack - image_tag: 7.2.0-v9 - replicas: 1 - - - name: retriever-deploy - spec: - image_name: opea/retriever-redis - image_tag: latest - replicas: 2 - resources: - requests: - cpu: "4" - memory: "4000Mi" From 9383c226c7052a9ad59863e18d5083a5ae26918e Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:24:07 +0800 Subject: [PATCH 05/18] Update values.yaml ports --- FaqGen/benchmark/performance/helm_charts/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index 216575aa97..bd52ef9855 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -10,12 +10,12 @@ deployments: - name: faq-mega-server-deploy spec: ports: - - containerPort: 8888 + - containerPort: 7777 - name: faq-micro-deploy spec: ports: - - containerPort: 8888 + - containerPort: 9000 env: - name: TGI_LLM_ENDPOINT value: "http://faq-tgi-svc.default.svc.cluster.local:8010" From 6a9d64bd1c1b86bc755c5ce87d1dee3507a68c8e Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:40:03 +0800 Subject: [PATCH 06/18] Update configmap.yaml --- .../benchmark/performance/helm_charts/templates/configmap.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml index ff261aa394..4bc78fe313 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -11,4 +11,6 @@ data: LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} NODE_SELECTOR: faq-opea TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 + LLM_SERVICE_HOST_IP: faq-micro-svc + MEGA_SERVICE_HOST_IP: faq-mega-server-svc --- From 3a80915269e101ff2bb350620dd9ac29a6b46aaf Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 17 Oct 2024 06:07:45 +0000 Subject: [PATCH 07/18] fixed deployment.yaml --- .../performance/helm_charts/templates/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml index d751d7dfe5..495c459eb9 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml @@ -25,7 +25,7 @@ spec: containers: - envFrom: - configMapRef: - name: qna-config + name: faq-config {{- if $deployment.spec.args }} args: {{- range $arg := $deployment.spec.args }} @@ -80,7 +80,7 @@ spec: hostIPC: true nodeSelector: - node-type: chatqna-opea + node-type: faq-opea serviceAccountName: default topologySpreadConstraints: - labelSelector: From 43d79d499225c5a57366ca714e655fb0c9ff308e Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 17 Oct 2024 06:23:30 +0000 Subject: [PATCH 08/18] removed HF_TOKEN in values.yaml --- FaqGen/benchmark/performance/helm_charts/values.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index bd52ef9855..39f2a4d8e8 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -43,8 +43,6 @@ deployments: value: habana - name: HABANA_VISIBLE_DEVICES value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - name: ENABLE_HPU_GRAPH value: 'true' - name: LIMIT_HPU_GRAPH From 084a31ffa55dcf1f57ed683f5d6ba6c26e896694 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:29:05 +0800 Subject: [PATCH 09/18] Update values.yaml --- FaqGen/benchmark/performance/helm_charts/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index 39f2a4d8e8..b583a3fbcc 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -5,6 +5,8 @@ namespace: default config: LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + CONFIG_MAP_NAME: faq-config + NODE_SELECTOR: faq-opea deployments: - name: faq-mega-server-deploy From 8d83a453da09397ead92fb8f5110b663b1badc15 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:29:59 +0800 Subject: [PATCH 10/18] Update configmap.yaml --- .../performance/helm_charts/templates/configmap.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml index 4bc78fe313..113e96501a 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -4,12 +4,12 @@ apiVersion: v1 kind: ConfigMap metadata: - name: faq-config + name: {{ .Values.config.CONFIG_MAP_NAME }} namespace: default data: HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} - NODE_SELECTOR: faq-opea + NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 LLM_SERVICE_HOST_IP: faq-micro-svc MEGA_SERVICE_HOST_IP: faq-mega-server-svc From 7ca85e1218d5847559d7c40898b29f605633b941 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:31:58 +0800 Subject: [PATCH 11/18] Update deployment.yaml --- .../performance/helm_charts/templates/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml index 495c459eb9..7a9fe0c548 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/deployment.yaml @@ -25,7 +25,7 @@ spec: containers: - envFrom: - configMapRef: - name: faq-config + name: {{ $global.config.CONFIG_MAP_NAME }} {{- if $deployment.spec.args }} args: {{- range $arg := $deployment.spec.args }} @@ -80,7 +80,7 @@ spec: hostIPC: true nodeSelector: - node-type: faq-opea + node-type: {{ $global.config.NODE_SELECTOR }} serviceAccountName: default topologySpreadConstraints: - labelSelector: From 1f88b5021efdd5fd60d81d42d46ee2e22b71f56b Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:38:25 +0800 Subject: [PATCH 12/18] removed TEI_ENDPOINT in values.yaml --- FaqGen/benchmark/performance/helm_charts/values.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index b583a3fbcc..f26fbace6e 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -18,10 +18,7 @@ deployments: spec: ports: - containerPort: 9000 - env: - - name: TGI_LLM_ENDPOINT - value: "http://faq-tgi-svc.default.svc.cluster.local:8010" - + - name: faq-tgi-deploy spec: ports: From 0c863663657852efb156c5591550245531d1c816 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 17 Oct 2024 07:38:26 +0000 Subject: [PATCH 13/18] initlize the AudioQnA helm charts --- AudioQnA/benchmark/helm_charts/.helmignore | 23 ++++ AudioQnA/benchmark/helm_charts/Chart.yaml | 27 +++++ AudioQnA/benchmark/helm_charts/README.md | 36 ++++++ AudioQnA/benchmark/helm_charts/customize.yaml | 34 ++++++ .../helm_charts/templates/configmap.yaml | 16 +++ .../helm_charts/templates/deployment.yaml | 113 ++++++++++++++++++ .../helm_charts/templates/service.yaml | 24 ++++ AudioQnA/benchmark/helm_charts/values.yaml | 98 +++++++++++++++ 8 files changed, 371 insertions(+) create mode 100644 AudioQnA/benchmark/helm_charts/.helmignore create mode 100644 AudioQnA/benchmark/helm_charts/Chart.yaml create mode 100644 AudioQnA/benchmark/helm_charts/README.md create mode 100644 AudioQnA/benchmark/helm_charts/customize.yaml create mode 100644 AudioQnA/benchmark/helm_charts/templates/configmap.yaml create mode 100644 AudioQnA/benchmark/helm_charts/templates/deployment.yaml create mode 100644 AudioQnA/benchmark/helm_charts/templates/service.yaml create mode 100644 AudioQnA/benchmark/helm_charts/values.yaml diff --git a/AudioQnA/benchmark/helm_charts/.helmignore b/AudioQnA/benchmark/helm_charts/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/AudioQnA/benchmark/helm_charts/Chart.yaml b/AudioQnA/benchmark/helm_charts/Chart.yaml new file mode 100644 index 0000000000..51f94d0879 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: chatqna-charts +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/AudioQnA/benchmark/helm_charts/README.md b/AudioQnA/benchmark/helm_charts/README.md new file mode 100644 index 0000000000..f6df9ce4fe --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/README.md @@ -0,0 +1,36 @@ +# ChatQnA Deployment + +This document guides you through deploying ChatQnA pipelines using Helm charts. Helm charts simplify managing Kubernetes applications by packaging configuration and resources. + +## Getting Started + +### Preparation + +```bash +# on k8s-master node +cd GenAIExamples/ChatQnA/benchmark/performance/helm_charts + +# Replace the key of HUGGINGFACEHUB_API_TOKEN with your actual Hugging Face token: +# vim customize.yaml +HUGGINGFACEHUB_API_TOKEN: hf_xxxxx +``` + +### Deploy your ChatQnA + +```bash +# Deploy a ChatQnA pipeline using the specified YAML configuration. +# To deploy with different configurations, simply provide a different YAML file. +helm install chatqna helm_charts/ -f customize.yaml +``` + +Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool. + +## Customize your own ChatQnA pipelines. (Optional) + +There are two yaml configs you can specify. + +- customize.yaml + This file can specify image names, the number of replicas and CPU cores to manage your pods. + +- values.yaml + This file contains the default microservice configurations for ChatQnA. Please review and understand each parameter before making any changes. diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml new file mode 100644 index 0000000000..5e156db459 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/customize.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + +podSpecs: + - name: faq-mega-server-deploy + spec: + image_name: opea/chatqna + image_tag: latest + replicas: 2 + resources: + limits: + cpu: "8" + memory: "8000Mi" + requests: + cpu: "8" + memory: "8000Mi" + + + - name: faq-tgi-deploy + spec: + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.5 + replicas: 7 + resources: + limits: + habana.ai/gaudi: 1 + + - name: faq-micro-deploy + spec: + image_name: opea/llm-faqgen-tgi + image_tag: latest + replicas: 1 diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml new file mode 100644 index 0000000000..113e96501a --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.config.CONFIG_MAP_NAME }} + namespace: default +data: + HUGGINGFACEHUB_API_TOKEN: {{ .Values.HUGGINGFACEHUB_API_TOKEN }} + LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} + NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} + TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 + LLM_SERVICE_HOST_IP: faq-micro-svc + MEGA_SERVICE_HOST_IP: faq-mega-server-svc +--- diff --git a/AudioQnA/benchmark/helm_charts/templates/deployment.yaml b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml new file mode 100644 index 0000000000..7a9fe0c548 --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/deployment.yaml @@ -0,0 +1,113 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- $global := .Values }} +{{- range $deployment := .Values.deployments }} +{{- range $podSpec := $global.podSpecs }} +{{- if eq $podSpec.name $deployment.name }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $deployment.name }} + namespace: default +spec: + replicas: {{ $podSpec.spec.replicas }} + selector: + matchLabels: + app: {{ $deployment.name }} + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: {{ $deployment.name }} + spec: + containers: + - envFrom: + - configMapRef: + name: {{ $global.config.CONFIG_MAP_NAME }} + {{- if $deployment.spec.args }} + args: + {{- range $arg := $deployment.spec.args }} + {{- if $arg.name }} + - {{ $arg.name }} + {{- end }} + {{- if $arg.value }} + - "{{ $arg.value }}" + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.env }} + env: + {{- range $env := $deployment.spec.env }} + - name: {{ $env.name }} + value: "{{ $env.value }}" + {{- end }} + {{- end }} + + image: {{ $podSpec.spec.image_name }}:{{ $podSpec.spec.image_tag }} + imagePullPolicy: IfNotPresent + name: {{ $podSpec.name }} + + {{- if $deployment.spec.ports }} + ports: + {{- range $port := $deployment.spec.ports }} + {{- range $port_name, $port_id := $port }} + - {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + + + {{- if $podSpec.spec.resources }} + resources: + {{- range $resourceType, $resource := $podSpec.spec.resources }} + {{ $resourceType }}: + {{- range $limitType, $limit := $resource }} + {{ $limitType }}: {{ $limit }} + {{- end }} + {{- end }} + {{- end }} + + {{- if $deployment.spec.volumeMounts }} + volumeMounts: + {{- range $volumeMount := $deployment.spec.volumeMounts }} + - mountPath: {{ $volumeMount.mountPath }} + name: {{ $volumeMount.name }} + {{- end }} + {{- end }} + + hostIPC: true + nodeSelector: + node-type: {{ $global.config.NODE_SELECTOR }} + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: {{ $deployment.name }} + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + + + {{- if $deployment.spec.volumes }} + volumes: + {{- range $index, $volume := $deployment.spec.volumes }} + - name: {{ $volume.name }} + {{- if $volume.hostPath }} + hostPath: + path: {{ $volume.hostPath.path }} + type: {{ $volume.hostPath.type }} + {{- else if $volume.emptyDir }} + emptyDir: + medium: {{ $volume.emptyDir.medium }} + sizeLimit: {{ $volume.emptyDir.sizeLimit }} + {{- end }} + {{- end }} + {{- end }} + +--- +{{- end }} +{{- end }} +{{- end }} diff --git a/AudioQnA/benchmark/helm_charts/templates/service.yaml b/AudioQnA/benchmark/helm_charts/templates/service.yaml new file mode 100644 index 0000000000..5a5896921d --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/templates/service.yaml @@ -0,0 +1,24 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- range $service := .Values.services }} +apiVersion: v1 +kind: Service +metadata: + name: {{ $service.name }} + namespace: default +spec: + ports: + {{- range $port := $service.spec.ports }} + - name: {{ $port.name }} + {{- range $port_name, $port_id := $port }} + {{- if ne $port_name "name"}} + {{ $port_name }}: {{ $port_id }} + {{- end }} + {{- end }} + {{- end }} + selector: + app: {{ $service.spec.selector.app }} + type: {{ $service.spec.type }} +--- +{{- end }} diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml new file mode 100644 index 0000000000..f26fbace6e --- /dev/null +++ b/AudioQnA/benchmark/helm_charts/values.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +namespace: default + +config: + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + CONFIG_MAP_NAME: faq-config + NODE_SELECTOR: faq-opea + +deployments: + - name: faq-mega-server-deploy + spec: + ports: + - containerPort: 7777 + + - name: faq-micro-deploy + spec: + ports: + - containerPort: 9000 + + - name: faq-tgi-deploy + spec: + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + args: + - name: "--model-id" + value: $(LLM_MODEL_ID) + - name: "--max-input-length" + value: "2048" + - name: "--max-total-tokens" + value: "4096" + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: "true" + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: ENABLE_HPU_GRAPH + value: 'true' + - name: LIMIT_HPU_GRAPH + value: 'true' + - name: USE_FLASH_ATTENTION + value: 'true' + - name: FLASH_ATTENTION_RECOMPUTE + value: 'true' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm + +services: + - name: faq-micro-svc + spec: + ports: + - name: service + port: 9003 + targetPort: 9000 + selector: + app: faq-micro-deploy + type: ClusterIP + + - name: faq-tgi-svc + spec: + ports: + - name: service + port: 8010 + targetPort: 80 + selector: + app: faq-tgi-deploy + type: ClusterIP + + - name: faq-mega-server-svc + spec: + ports: + - name: service + port: 7779 + targetPort: 7777 + nodePort: 30779 + selector: + app: faq-mega-server-deploy + type: NodePort From c154ada9c2cf9c97c384fc346ee70f697f935efb Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:51:17 +0800 Subject: [PATCH 14/18] Update values.yaml audioqna --- AudioQnA/benchmark/helm_charts/values.yaml | 128 +++++++++++++++++---- 1 file changed, 108 insertions(+), 20 deletions(-) diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml index f26fbace6e..d469003649 100644 --- a/AudioQnA/benchmark/helm_charts/values.yaml +++ b/AudioQnA/benchmark/helm_charts/values.yaml @@ -4,22 +4,70 @@ namespace: default config: - LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct - CONFIG_MAP_NAME: faq-config - NODE_SELECTOR: faq-opea + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + CONFIG_MAP_NAME: audio-qna-config + NODE_SELECTOR: audioqna-opea + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" deployments: - - name: faq-mega-server-deploy + - name: audioqna-backend-server-deploy spec: ports: - - containerPort: 7777 + - containerPort: 8888 + + - name: asr-deploy + spec: + ports: + - containerPort: 9099 + + - name: whisper-deploy + spec: + ports: + - containerPort: 7066 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all - - name: faq-micro-deploy + - name: tts-deploy + spec: + ports: + - containerPort: 9088 + + - name: llm-deploy spec: ports: - containerPort: 9000 - - - name: faq-tgi-deploy + + - name: speecht5-deploy + spec: + ports: + - containerPort: 7055 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + + - name: llm-dependency-deploy spec: ports: - containerPort: 80 @@ -66,33 +114,73 @@ deployments: name: shm services: - - name: faq-micro-svc + - name: asr-svc spec: ports: - name: service - port: 9003 - targetPort: 9000 + port: 3001 + targetPort: 9099 + selector: + app: asr-deploy + type: ClusterIP + + - name: whisper-svc + spec: + ports: + - name: service + port: 7066 + targetPort: 7066 selector: - app: faq-micro-deploy + app: whisper-deploy type: ClusterIP - - name: faq-tgi-svc + - name: tts-svc spec: ports: - name: service - port: 8010 + port: 3002 + targetPort: 9088 + selector: + app: tts-deploy + type: ClusterIP + + - name: speecht5-svc + spec: + ports: + - name: service + port: 7055 + targetPort: 7055 + selector: + app: speecht5-deploy + type: ClusterIP + + - name: llm-dependency-svc + spec: + ports: + - name: service + port: 3006 targetPort: 80 selector: - app: faq-tgi-deploy + app: llm-dependency-deploy type: ClusterIP - - name: faq-mega-server-svc + - name: llm-svc + spec: + ports: + - name: service + port: 3007 + targetPort: 9000 + selector: + app: llm-deploy + type: ClusterIP + + - name: audioqna-backend-server-svc spec: ports: - name: service - port: 7779 - targetPort: 7777 - nodePort: 30779 + port: 3088 + targetPort: 8888 + nodePort: 30666 selector: - app: faq-mega-server-deploy + app: audioqna-backend-server-deploy type: NodePort From 2af3066ec0bd698be2568ba4c348b1a7602bff6d Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:52:12 +0800 Subject: [PATCH 15/18] Update configmap.yaml audioqna --- .../benchmark/helm_charts/templates/configmap.yaml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml index 113e96501a..79246763fa 100644 --- a/AudioQnA/benchmark/helm_charts/templates/configmap.yaml +++ b/AudioQnA/benchmark/helm_charts/templates/configmap.yaml @@ -11,6 +11,15 @@ data: LLM_MODEL_ID: {{ .Values.config.LLM_MODEL_ID }} NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 - LLM_SERVICE_HOST_IP: faq-micro-svc - MEGA_SERVICE_HOST_IP: faq-mega-server-svc + + ASR_ENDPOINT: http://whisper-svc.default.svc.cluster.local:7066 + TTS_ENDPOINT: http://speecht5-svc.default.svc.cluster.local:7055 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:3006 + MEGA_SERVICE_HOST_IP: audioqna-backend-server-svc + ASR_SERVICE_HOST_IP: asr-svc + ASR_SERVICE_PORT: "3001" + LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVICE_PORT: "3007" + TTS_SERVICE_HOST_IP: tts-svc + TTS_SERVICE_PORT: "3002" --- From 4ba047cf0eb3c8149317ff66cacb99c2132d6138 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 <109137058+Zhenzhong1@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:58:04 +0800 Subject: [PATCH 16/18] Update customize.yaml audioqna --- AudioQnA/benchmark/helm_charts/customize.yaml | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml index 5e156db459..2c788c5725 100644 --- a/AudioQnA/benchmark/helm_charts/customize.yaml +++ b/AudioQnA/benchmark/helm_charts/customize.yaml @@ -4,31 +4,47 @@ HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} podSpecs: - - name: faq-mega-server-deploy + - name: audioqna-backend-server-deploy spec: - image_name: opea/chatqna + image_name: opea/audioqna image_tag: latest - replicas: 2 + replicas: 1 + + - name: asr-deploy + spec: + image_name: opea/asr + image_tag: latest + replicas: 1 + + - name: whisper-deploy + spec: + image_name: opea/whisper-gaudi + image_tag: latest + replicas: 1 resources: limits: - cpu: "8" - memory: "8000Mi" - requests: - cpu: "8" - memory: "8000Mi" - - - - name: faq-tgi-deploy + habana.ai/gaudi: 1 + + - name: tts-deploy spec: - image_name: ghcr.io/huggingface/tgi-gaudi - image_tag: 2.0.5 - replicas: 7 + image_name: opea/tts + image_tag: latest + replicas: 1 + + - name: speecht5-deploy + spec: + image_name: opea/speecht5-gaudi + image_tag: latest + replicas: 1 resources: limits: habana.ai/gaudi: 1 - - name: faq-micro-deploy + - name: llm-dependency-deploy spec: - image_name: opea/llm-faqgen-tgi - image_tag: latest + image_name: ghcr.io/huggingface/tgi-gaudi + image_tag: 2.0.5 replicas: 1 + resources: + limits: + habana.ai/gaudi: 1 From c0f2b47aef6fc288f826c725f4aa74d51f2fccb6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 02:58:48 +0000 Subject: [PATCH 17/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- AudioQnA/benchmark/helm_charts/customize.yaml | 8 ++++---- AudioQnA/benchmark/helm_charts/values.yaml | 4 ++-- .../performance/helm_charts/templates/configmap.yaml | 2 +- FaqGen/benchmark/performance/helm_charts/values.yaml | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/AudioQnA/benchmark/helm_charts/customize.yaml b/AudioQnA/benchmark/helm_charts/customize.yaml index 2c788c5725..31e1b6ca65 100644 --- a/AudioQnA/benchmark/helm_charts/customize.yaml +++ b/AudioQnA/benchmark/helm_charts/customize.yaml @@ -9,13 +9,13 @@ podSpecs: image_name: opea/audioqna image_tag: latest replicas: 1 - + - name: asr-deploy spec: image_name: opea/asr image_tag: latest replicas: 1 - + - name: whisper-deploy spec: image_name: opea/whisper-gaudi @@ -24,13 +24,13 @@ podSpecs: resources: limits: habana.ai/gaudi: 1 - + - name: tts-deploy spec: image_name: opea/tts image_tag: latest replicas: 1 - + - name: speecht5-deploy spec: image_name: opea/speecht5-gaudi diff --git a/AudioQnA/benchmark/helm_charts/values.yaml b/AudioQnA/benchmark/helm_charts/values.yaml index d469003649..e2f03da958 100644 --- a/AudioQnA/benchmark/helm_charts/values.yaml +++ b/AudioQnA/benchmark/helm_charts/values.yaml @@ -23,7 +23,7 @@ deployments: spec: ports: - containerPort: 8888 - + - name: asr-deploy spec: ports: @@ -173,7 +173,7 @@ services: selector: app: llm-deploy type: ClusterIP - + - name: audioqna-backend-server-svc spec: ports: diff --git a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml index 113e96501a..df3e61d20a 100644 --- a/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml +++ b/FaqGen/benchmark/performance/helm_charts/templates/configmap.yaml @@ -12,5 +12,5 @@ data: NODE_SELECTOR: {{ .Values.config.NODE_SELECTOR }} TGI_LLM_ENDPOINT: http://faq-tgi-svc.default.svc.cluster.local:8010 LLM_SERVICE_HOST_IP: faq-micro-svc - MEGA_SERVICE_HOST_IP: faq-mega-server-svc + MEGA_SERVICE_HOST_IP: faq-mega-server-svc --- diff --git a/FaqGen/benchmark/performance/helm_charts/values.yaml b/FaqGen/benchmark/performance/helm_charts/values.yaml index f26fbace6e..eeb206761a 100644 --- a/FaqGen/benchmark/performance/helm_charts/values.yaml +++ b/FaqGen/benchmark/performance/helm_charts/values.yaml @@ -18,7 +18,7 @@ deployments: spec: ports: - containerPort: 9000 - + - name: faq-tgi-deploy spec: ports: From 399768e98b60ece41e12405f372758f1ee985d71 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Mon, 21 Oct 2024 03:12:02 +0000 Subject: [PATCH 18/18] modifed the README.md --- ChatQnA/benchmark/performance/helm_charts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/benchmark/performance/helm_charts/README.md b/ChatQnA/benchmark/performance/helm_charts/README.md index f6df9ce4fe..8da6d836e2 100644 --- a/ChatQnA/benchmark/performance/helm_charts/README.md +++ b/ChatQnA/benchmark/performance/helm_charts/README.md @@ -20,7 +20,7 @@ HUGGINGFACEHUB_API_TOKEN: hf_xxxxx ```bash # Deploy a ChatQnA pipeline using the specified YAML configuration. # To deploy with different configurations, simply provide a different YAML file. -helm install chatqna helm_charts/ -f customize.yaml +helm install chatqna ../helm_charts/ -f customize.yaml ``` Notes: The provided [BKC manifests](https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/benchmark) for single, two, and four node Kubernetes clusters are generated using this tool.