diff --git a/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml b/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml index 8eb0044247..05ec82d393 100644 --- a/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml +++ b/comps/llms/deployment/docker_compose/compose_doc-summarization.yaml @@ -18,7 +18,6 @@ services: https_proxy: ${https_proxy} LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} - HF_TOKEN: ${HF_TOKEN} LOGFLAG: ${LOGFLAG:-False} MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} @@ -29,15 +28,32 @@ services: container_name: docsum-tgi environment: DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi} + HF_TOKEN: ${HF_TOKEN} depends_on: tgi-server: condition: service_healthy + docsum-tgi-offline: + extends: docsum + container_name: docsum-tgi + depends_on: + tgi-server: + condition: service_healthy + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi} + # Use non-existing proxy to mimic air gapped environment + http_proxy: http://localhost:7777 + https_proxy: http://localhost:7777 + no_proxy: localhost,127.0.0.1,${offline_no_proxy} + volumes: + - "${DATA_PATH:-./data}:/data" + docsum-tgi-gaudi: extends: docsum container_name: docsum-tgi-gaudi environment: DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi} + HF_TOKEN: ${HF_TOKEN} depends_on: tgi-gaudi-server: condition: service_healthy @@ -47,15 +63,32 @@ services: container_name: docsum-vllm environment: DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + HF_TOKEN: ${HF_TOKEN} + depends_on: + vllm-server: + condition: service_healthy + + docsum-vllm-offline: + extends: docsum + container_name: docsum-vllm depends_on: vllm-server: condition: service_healthy + environment: + DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + # Use non-existing proxy to mimic air gapped environment + http_proxy: http://localhost:7777 + https_proxy: http://localhost:7777 + no_proxy: localhost,127.0.0.1,${offline_no_proxy} + volumes: + - "${DATA_PATH:-./data}:/data" docsum-vllm-gaudi: extends: docsum container_name: docsum-vllm-gaudi environment: DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM} + HF_TOKEN: ${HF_TOKEN} depends_on: vllm-gaudi-server: condition: service_healthy diff --git a/comps/llms/src/doc-summarization/Dockerfile b/comps/llms/src/doc-summarization/Dockerfile index c1e0686b30..645056e0dd 100644 --- a/comps/llms/src/doc-summarization/Dockerfile +++ b/comps/llms/src/doc-summarization/Dockerfile @@ -21,8 +21,17 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \ ENV PYTHONPATH=$PYTHONPATH:/home/user +# air gapped support: set model cache dir +RUN mkdir -p /data && chown -R user /data +ENV HF_HUB_CACHE=/data +# air gapped support: pre-download tiktoken bpe files +RUN mkdir -p /opt/tiktoken_cache +ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache +RUN python -c 'import tiktoken; tiktoken.get_encoding("cl100k_base");tiktoken.get_encoding("o200k_base");tiktoken.get_encoding("gpt2");tiktoken.get_encoding("r50k_base");tiktoken.get_encoding("p50k_base");tiktoken.get_encoding("p50k_edit");' +RUN chown -R user /opt/tiktoken_cache + USER user WORKDIR /home/user/comps/llms/src/doc-summarization -ENTRYPOINT ["bash", "entrypoint.sh"] +ENTRYPOINT ["python", "opea_docsum_microservice.py"] diff --git a/comps/llms/src/doc-summarization/README.md b/comps/llms/src/doc-summarization/README.md index 6c0e36040b..7c8f30f539 100644 --- a/comps/llms/src/doc-summarization/README.md +++ b/comps/llms/src/doc-summarization/README.md @@ -199,3 +199,19 @@ curl http://${your_ip}:9000/v1/docsum \ -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}' \ -H 'Content-Type: application/json' ``` + +## Running in the air gapped environment + +The following steps are needed for running the `opea/llm-docsum` microservice in an air gapped environment (a.k.a. environment with no internet access). + +1. Pre download the following models, i.e. `huggingface-cli download --cache-dir ` + +- gpt2 +- the same model as the LLM inference backend + +2. Launch the `opea/llm-docsum` microservice with the following settings: + +- mount the host `` as the `/data` directory within the microservice container +- leave environment as unset `HF_TOKEN` as unset + +e.g. `unset HF_TOKEN; docker run -v :/data ... ...` diff --git a/comps/llms/src/doc-summarization/entrypoint.sh b/comps/llms/src/doc-summarization/entrypoint.sh deleted file mode 100644 index 64c8df3b4d..0000000000 --- a/comps/llms/src/doc-summarization/entrypoint.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -pip --no-cache-dir install -r requirements-runtime.txt - -python opea_docsum_microservice.py diff --git a/comps/llms/src/doc-summarization/integrations/common.py b/comps/llms/src/doc-summarization/integrations/common.py index e84cd55acb..654fca37cb 100644 --- a/comps/llms/src/doc-summarization/integrations/common.py +++ b/comps/llms/src/doc-summarization/integrations/common.py @@ -8,6 +8,7 @@ from langchain.chains.summarize import load_summarize_chain from langchain.docstore.document import Document from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter +from langchain_core.load import dumps as langchain_dumps from langchain_core.prompts import PromptTemplate from transformers import AutoTokenizer @@ -201,11 +202,8 @@ async def generate(self, input: DocSumChatCompletionRequest, client): if input.stream: async def stream_generator(): - from langserve.serialization import WellKnownLCSerializer - - _serializer = WellKnownLCSerializer() async for chunk in llm_chain.astream_log(docs): - data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8") + data = langchain_dumps({"ops": chunk.ops}) if logflag: logger.info(data) yield f"data: {data}\n\n" diff --git a/comps/llms/src/doc-summarization/requirements-runtime.txt b/comps/llms/src/doc-summarization/requirements-runtime.txt deleted file mode 100644 index 225adde271..0000000000 --- a/comps/llms/src/doc-summarization/requirements-runtime.txt +++ /dev/null @@ -1 +0,0 @@ -langserve diff --git a/tests/llms/llm_utils.sh b/tests/llms/llm_utils.sh new file mode 100644 index 0000000000..b86f8283aa --- /dev/null +++ b/tests/llms/llm_utils.sh @@ -0,0 +1,25 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +function prepare_models() { + + if [ $# -lt 2 ]; then + echo "Usage: prepare_models [model2] ..." + return + fi + + local model_path=$1 + shift + mkdir -p ${model_path} + python3 -m pip install huggingface_hub[cli] --user + # Workaround for huggingface-cli reporting error when set --cache-dir to same as default + local extra_args="" + local default_model_dir=$(readlink -m ~/.cache/huggingface/hub) + local real_model_dir=$(echo ${model_path/#\~/$HOME} | xargs readlink -m ) + if [[ "${default_model_dir}" != "${real_model_dir}" ]]; then + extra_args="--cache-dir ${model_path}" + fi + for m in "$@"; do + PATH=~/.local/bin:$PATH huggingface-cli download ${extra_args} $m + done +} diff --git a/tests/llms/test_llms_doc-summarization_tgi.sh b/tests/llms/test_llms_doc-summarization_tgi.sh index 289259978e..8df6a8a137 100644 --- a/tests/llms/test_llms_doc-summarization_tgi.sh +++ b/tests/llms/test_llms_doc-summarization_tgi.sh @@ -4,6 +4,9 @@ set -xe +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +source ${SCRIPT_DIR}/llm_utils.sh + IMAGE_REPO=${IMAGE_REPO:-"opea"} export REGISTRY=${IMAGE_REPO} export TAG="comps" @@ -28,6 +31,7 @@ function build_docker_images() { } function start_service() { + local offline=${1:-false} export host_ip=${host_ip} export LLM_ENDPOINT_PORT=12105 # 12100-12199 export DOCSUM_PORT=10505 #10500-10599 @@ -38,6 +42,12 @@ function start_service() { export MAX_TOTAL_TOKENS=4096 export LOGFLAG=True + service_name="docsum-tgi" + if [[ "$offline" == "true" ]]; then + service_name="docsum-tgi-offline" + export offline_no_proxy="${host_ip}" + prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2 + fi cd $WORKPATH/comps/llms/deployment/docker_compose docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log @@ -72,6 +82,7 @@ function validate_services() { else echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + docker exec ${DOCKER_NAME} env exit 1 fi sleep 1s @@ -147,11 +158,21 @@ function main() { stop_docker build_docker_images - start_service - validate_microservices + trap stop_docker EXIT + echo "Test normal env ..." + start_service + validate_microservices stop_docker + + if [[ -n "${DATA_PATH}" ]]; then + echo "Test air gapped env ..." + start_service true + validate_microservices + stop_docker + fi + echo y | docker system prune } diff --git a/tests/llms/test_llms_doc-summarization_vllm.sh b/tests/llms/test_llms_doc-summarization_vllm.sh index 148b69b3e0..b4ccde85ec 100644 --- a/tests/llms/test_llms_doc-summarization_vllm.sh +++ b/tests/llms/test_llms_doc-summarization_vllm.sh @@ -4,6 +4,9 @@ set -xe +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +source ${SCRIPT_DIR}/llm_utils.sh + IMAGE_REPO=${IMAGE_REPO:-"opea"} export REGISTRY=${IMAGE_REPO} export TAG="comps" @@ -42,6 +45,7 @@ function build_docker_images() { } function start_service() { + local offline=${1:-false} export host_ip=${host_ip} export LLM_ENDPOINT_PORT=12107 # 12100-12199 export DOCSUM_PORT=10507 #10500-10599 @@ -53,6 +57,12 @@ function start_service() { export VLLM_SKIP_WARMUP=true export LOGFLAG=True + service_name="docsum-vllm" + if [[ "$offline" == "true" ]]; then + service_name="docsum-vllm-offline" + export offline_no_proxy="${host_ip}" + prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2 + fi cd $WORKPATH/comps/llms/deployment/docker_compose docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log @@ -162,11 +172,20 @@ function main() { stop_docker build_docker_images - start_service - validate_microservices + trap stop_docker EXIT + echo "Test normal env ..." + start_service + validate_microservices stop_docker + + if [[ -n "${DATA_PATH}" ]]; then + echo "Test air gapped env ..." + start_service true + validate_microservices + stop_docker + fi echo y | docker system prune }