diff --git a/comps/llms/text-generation/README.md b/comps/llms/text-generation/README.md index 824d8a2277..5a8f247adc 100644 --- a/comps/llms/text-generation/README.md +++ b/comps/llms/text-generation/README.md @@ -235,7 +235,7 @@ docker run \ -e no_proxy=${no_proxy} \ -e vLLM_LLM_ENDPOINT=$vLLM_LLM_ENDPOINT \ -e HF_TOKEN=$HF_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ + -e LLM_MODEL_ID=$LLM_MODEL \ opea/llm-vllm:latest ``` diff --git a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml b/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml index 077ceee8b1..df911a6f7d 100644 --- a/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm/langchain/docker_compose_llm.yaml @@ -38,7 +38,7 @@ services: https_proxy: ${https_proxy} vLLM_ENDPOINT: ${vLLM_ENDPOINT} HF_TOKEN: ${HF_TOKEN} - LLM_MODEL: ${LLM_MODEL} + LLM_MODEL_ID: ${LLM_MODEL} restart: unless-stopped networks: diff --git a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh b/comps/llms/text-generation/vllm/langchain/launch_microservice.sh index 01bd0f6f58..c364554362 100644 --- a/comps/llms/text-generation/vllm/langchain/launch_microservice.sh +++ b/comps/llms/text-generation/vllm/langchain/launch_microservice.sh @@ -9,6 +9,6 @@ docker run -d --rm \ -e https_proxy=$https_proxy \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ + -e LLM_MODEL_ID=$LLM_MODEL \ -e LOGFLAG=$LOGFLAG \ opea/llm-vllm:latest diff --git a/comps/llms/text-generation/vllm/langchain/llm.py b/comps/llms/text-generation/vllm/langchain/llm.py index 143c9b9d0a..e03328d2e6 100644 --- a/comps/llms/text-generation/vllm/langchain/llm.py +++ b/comps/llms/text-generation/vllm/langchain/llm.py @@ -83,7 +83,7 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, Searche headers = {} if access_token: headers = {"Authorization": f"Bearer {access_token}"} - model_name = input.model if input.model else os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") + model_name = input.model if input.model else os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") llm_endpoint = get_llm_endpoint(model_name) llm = VLLMOpenAI( openai_api_key="EMPTY", openai_api_base=llm_endpoint + "/v1", model_name=model_name, default_headers=headers diff --git a/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml b/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml index 6bfc0d500f..6a26d5074f 100644 --- a/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml +++ b/comps/llms/text-generation/vllm/llama_index/docker_compose_llm.yaml @@ -38,7 +38,7 @@ services: https_proxy: ${https_proxy} vLLM_ENDPOINT: ${vLLM_ENDPOINT} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - LLM_MODEL: ${LLM_MODEL} + LLM_MODEL_ID: ${LLM_MODEL} restart: unless-stopped networks: diff --git a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh b/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh index ef8084f61d..6a6d642f86 100644 --- a/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh +++ b/comps/llms/text-generation/vllm/llama_index/launch_microservice.sh @@ -9,6 +9,6 @@ docker run -d --rm \ -e https_proxy=$https_proxy \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ + -e LLM_MODEL_ID=$LLM_MODEL \ -e LOGFLAG=$LOGFLAG \ opea/llm-vllm-llamaindex:latest diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/text-generation/vllm/llama_index/llm.py index 335f406295..800cff5d8d 100644 --- a/comps/llms/text-generation/vllm/llama_index/llm.py +++ b/comps/llms/text-generation/vllm/llama_index/llm.py @@ -43,7 +43,7 @@ async def llm_generate(input: LLMParamsDoc): if logflag: logger.info(input) llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") - model_name = os.getenv("LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct") + model_name = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3-8B-Instruct") llm = OpenAILike( api_key="fake", api_base=llm_endpoint + "/v1", diff --git a/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh index c83799128c..693be8e191 100644 --- a/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh +++ b/tests/llms/test_llms_text-generation_vllm_langchain_on_intel_hpu.sh @@ -58,7 +58,7 @@ function start_service() { --ipc=host \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ + -e LLM_MODEL_ID=$LLM_MODEL \ opea/llm-vllm:comps # check whether vllm ray is fully ready diff --git a/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh b/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh index 91a30ed857..12cdd98676 100644 --- a/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh +++ b/tests/llms/test_llms_text-generation_vllm_llamaindex_on_intel_hpu.sh @@ -57,7 +57,7 @@ function start_service() { --ipc=host \ -e vLLM_ENDPOINT=$vLLM_ENDPOINT \ -e HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN \ - -e LLM_MODEL=$LLM_MODEL \ + -e LLM_MODEL_ID=$LLM_MODEL \ opea/llm-vllm-llamaindex:comps # check whether vllm ray is fully ready