diff --git a/helm-charts/docsum/Chart.yaml b/helm-charts/docsum/Chart.yaml index d7cac44c6..8b963c502 100644 --- a/helm-charts/docsum/Chart.yaml +++ b/helm-charts/docsum/Chart.yaml @@ -9,6 +9,11 @@ dependencies: - name: tgi version: 0-latest repository: "file://../common/tgi" + condition: tgi.enabled + - name: vllm + version: 0-latest + repository: "file://../common/vllm" + condition: vllm.enabled - name: llm-uservice version: 0-latest repository: "file://../common/llm-uservice" diff --git a/helm-charts/docsum/README.md b/helm-charts/docsum/README.md index 957c55c25..2864714ce 100644 --- a/helm-charts/docsum/README.md +++ b/helm-charts/docsum/README.md @@ -16,8 +16,10 @@ export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" export MODELNAME="Intel/neural-chat-7b-v3-3" helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set tgi.LLM_MODEL_ID=${MODELNAME} -# To use Gaudi device -# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml +# To use Gaudi device with TGI +# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-tgi-values.yaml ... +# To use Gaudi device with vLLM +# helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values docsum/gaudi-vllm-values.yaml .. ``` ## Verify diff --git a/helm-charts/docsum/ci-gaudi-tgi-values.yaml b/helm-charts/docsum/ci-gaudi-tgi-values.yaml new file mode 120000 index 000000000..8702c8f68 --- /dev/null +++ b/helm-charts/docsum/ci-gaudi-tgi-values.yaml @@ -0,0 +1 @@ +gaudi-tgi-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/ci-gaudi-values.yaml b/helm-charts/docsum/ci-gaudi-values.yaml deleted file mode 120000 index 7243d31b2..000000000 --- a/helm-charts/docsum/ci-gaudi-values.yaml +++ /dev/null @@ -1 +0,0 @@ -gaudi-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/ci-gaudi-vllm-values.yaml b/helm-charts/docsum/ci-gaudi-vllm-values.yaml new file mode 120000 index 000000000..d9ab8c698 --- /dev/null +++ b/helm-charts/docsum/ci-gaudi-vllm-values.yaml @@ -0,0 +1 @@ +gaudi-vllm-values.yaml \ No newline at end of file diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-tgi-values.yaml similarity index 89% rename from helm-charts/docsum/gaudi-values.yaml rename to helm-charts/docsum/gaudi-tgi-values.yaml index bbd8f36ad..7a09c1f20 100644 --- a/helm-charts/docsum/gaudi-values.yaml +++ b/helm-charts/docsum/gaudi-tgi-values.yaml @@ -1,7 +1,14 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +vllm: + enabled: false + +llm-uservice: + DOCSUM_BACKEND: "TGI" + tgi: + enabled: true accelDevice: "gaudi" image: repository: ghcr.io/huggingface/tgi-gaudi diff --git a/helm-charts/docsum/gaudi-vllm-values.yaml b/helm-charts/docsum/gaudi-vllm-values.yaml new file mode 100644 index 000000000..4ab2c7457 --- /dev/null +++ b/helm-charts/docsum/gaudi-vllm-values.yaml @@ -0,0 +1,43 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: false + +llm-uservice: + DOCSUM_BACKEND: "vLLM" + +vllm: + enabled: true + image: + repository: opea/vllm-gaudi + tag: "latest" + resources: + limits: + habana.ai/gaudi: 1 + startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + livenessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + + PT_HPU_ENABLE_LAZY_COLLECTIVES: "true" + OMPI_MCA_btl_vader_single_copy_mechanism: "none" + + extraCmdArgs: [ + "--tensor-parallel-size", "1", + "--block-size", "128", + "--max-num-seqs", "256", + "--max-seq_len-to-capture", "2048" + ] diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 7cb5330cf..56509b309 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -59,16 +59,21 @@ affinity: {} # To override values in subchart llm-uservice llm-uservice: image: - repository: opea/llm-docsum-tgi + repository: opea/llm-docsum + DOCSUM_BACKEND: "TGI" MAX_INPUT_TOKENS: "1024" MAX_TOTAL_TOKENS: "2048" LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 -# To override values in subchart tgi +# To override values in TGI/vLLM subcharts tgi: + enabled: true LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" +vllm: + enabled: false + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # Use docsum gradio UI nginx: