diff --git a/helm-charts/docsum/README.md b/helm-charts/docsum/README.md index e2ce2e5c7..8e05c6300 100644 --- a/helm-charts/docsum/README.md +++ b/helm-charts/docsum/README.md @@ -16,8 +16,8 @@ scripts/update_dependency.sh helm dependency update docsum export HFTOKEN="insert-your-huggingface-token-here" export MODELDIR="/mnt/opea-models" -export MODELNAME="Intel/neural-chat-7b-v3-3" -helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} +export MODELNAME="meta-llama/Meta-Llama-3-8B-Instruct" +helm install docsum docsum --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set global.modelUseHostPath=${MODELDIR} --set llm-uservice.LLM_MODEL_ID=${MODELNAME} --set vllm.LLM_MODEL_ID=${MODELNAME} # To use Gaudi device with vLLM # helm install docsum docsum --set global.HF_TOKEN=${HFTOKEN} --values docsum/gaudi-values.yaml # To use Gaudi device with TGI @@ -65,10 +65,10 @@ Open a browser to access `http://:${port}` to play with the ## Values -| Key | Type | Default | Description | -| ------------------------- | ------ | ----------------------------- | ------------------------------------------------------------------------------------------------------------------- | -| image.repository | string | `"opea/docsum"` | | -| service.port | string | `"8888"` | | -| llm-uservice.LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory, must be consistent with vllm.LLM_MODEL_ID | -| vllm.LLM_MODEL_ID | string | `"Intel/neural-chat-7b-v3-3"` | Models id from https://huggingface.co/, or predownloaded model directory | -| global.monitoring | bool | `false` | Enable usage metrics for the service components. See ../monitoring.md before enabling! | +| Key | Type | Default | Description | +| ------------------------- | ------ | --------------------------------------- | ------------------------------------------------------------------------------------------------------------------- | +| image.repository | string | `"opea/docsum"` | | +| service.port | string | `"8888"` | | +| llm-uservice.LLM_MODEL_ID | string | `"meta-llama/Meta-Llama-3-8B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory, must be consistent with vllm.LLM_MODEL_ID | +| vllm.LLM_MODEL_ID | string | `"meta-llama/Meta-Llama-3-8B-Instruct"` | Models id from https://huggingface.co/, or predownloaded model directory | +| global.monitoring | bool | `false` | Enable usage metrics for the service components. See ../monitoring.md before enabling! | diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 52d9d2c7d..9cca28e4c 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -63,17 +63,17 @@ llm-uservice: DOCSUM_BACKEND: "vLLM" MAX_INPUT_TOKENS: "1024" MAX_TOTAL_TOKENS: "2048" - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # To override values in TGI/vLLM subcharts tgi: enabled: false - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct MAX_INPUT_LENGTH: "1024" MAX_TOTAL_TOKENS: "2048" vllm: enabled: true - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # Use docsum gradio UI nginx: