opea-project · yinghu5 · Jun 6, 2025 · May 30, 2025 · Jun 6, 2025 · Jun 6, 2025
@@ -18,7 +18,6 @@ services:
       https_proxy: ${https_proxy}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
-      HF_TOKEN: ${HF_TOKEN}
       LOGFLAG: ${LOGFLAG:-False}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
@@ -29,15 +28,32 @@ services:
     container_name: docsum-tgi
     environment:
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
+      HF_TOKEN: ${HF_TOKEN}
     depends_on:
       tgi-server:
         condition: service_healthy
 
+  docsum-tgi-offline:
+    extends: docsum
+    container_name: docsum-tgi
+    depends_on:
+      tgi-server:
+        condition: service_healthy
+    environment:
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
+      # Use non-existing proxy to mimic air gapped environment
+      http_proxy: http://localhost:7777
+      https_proxy: http://localhost:7777
+      no_proxy: localhost,127.0.0.1,${offline_no_proxy}
+    volumes:
+      - "${DATA_PATH:-./data}:/data"
+
   docsum-tgi-gaudi:
     extends: docsum
     container_name: docsum-tgi-gaudi
     environment:
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
+      HF_TOKEN: ${HF_TOKEN}
     depends_on:
       tgi-gaudi-server:
         condition: service_healthy
@@ -47,15 +63,32 @@ services:
     container_name: docsum-vllm
     environment:
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
+      HF_TOKEN: ${HF_TOKEN}
+    depends_on:
+      vllm-server:
+        condition: service_healthy
+
+  docsum-vllm-offline:
+    extends: docsum
+    container_name: docsum-vllm
     depends_on:
       vllm-server:
         condition: service_healthy
+    environment:
+      DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
+      # Use non-existing proxy to mimic air gapped environment
+      http_proxy: http://localhost:7777
+      https_proxy: http://localhost:7777
+      no_proxy: localhost,127.0.0.1,${offline_no_proxy}
+    volumes:
+      - "${DATA_PATH:-./data}:/data"
 
   docsum-vllm-gaudi:
     extends: docsum
     container_name: docsum-vllm-gaudi
     environment:
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
+      HF_TOKEN: ${HF_TOKEN}
     depends_on:
       vllm-gaudi-server:
         condition: service_healthy

@@ -21,8 +21,17 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
+# air gapped support: set model cache dir
+RUN mkdir -p /data && chown -R user /data
+ENV HF_HUB_CACHE=/data
+# air gapped support: pre-download tiktoken bpe files
+RUN mkdir -p /opt/tiktoken_cache
+ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache
+RUN python -c 'import tiktoken; tiktoken.get_encoding("cl100k_base");tiktoken.get_encoding("o200k_base");tiktoken.get_encoding("gpt2");tiktoken.get_encoding("r50k_base");tiktoken.get_encoding("p50k_base");tiktoken.get_encoding("p50k_edit");'
+RUN chown -R user /opt/tiktoken_cache
+
 USER user
 
 WORKDIR /home/user/comps/llms/src/doc-summarization
 
-ENTRYPOINT ["bash", "entrypoint.sh"]
+ENTRYPOINT ["python", "opea_docsum_microservice.py"]
@@ -199,3 +199,19 @@ curl http://${your_ip}:9000/v1/docsum \
   -d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}' \
   -H 'Content-Type: application/json'
 ```
+
+## Running in the air gapped environment
+
+The following steps are needed for running the `opea/llm-docsum` microservice in an air gapped environment (a.k.a. environment with no internet access).
+
+1. Pre download the following models, i.e. `huggingface-cli download --cache-dir <model data directory> <model>`
+
+- gpt2
+- the same model as the LLM inference backend
+
+2. Launch the `opea/llm-docsum` microservice with the following settings:
+
+- mount the host `<model data directory>` as the `/data` directory within the microservice container
+- leave environment as unset `HF_TOKEN` as unset
+
+e.g. `unset HF_TOKEN; docker run -v <model data directory>:/data ... ...`
@@ -8,6 +8,7 @@
 from langchain.chains.summarize import load_summarize_chain
 from langchain.docstore.document import Document
 from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+from langchain_core.load import dumps as langchain_dumps
 from langchain_core.prompts import PromptTemplate
 from transformers import AutoTokenizer
 
@@ -201,11 +202,8 @@ async def generate(self, input: DocSumChatCompletionRequest, client):
         if input.stream:
 
             async def stream_generator():
-                from langserve.serialization import WellKnownLCSerializer
-
-                _serializer = WellKnownLCSerializer()
                 async for chunk in llm_chain.astream_log(docs):
-                    data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8")
+                    data = langchain_dumps({"ops": chunk.ops})
                     if logflag:
                         logger.info(data)
                     yield f"data: {data}\n\n"

@@ -0,0 +1,25 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+function prepare_models() {
+
+    if [ $# -lt 2 ]; then
+        echo "Usage: prepare_models <directory> <model1> [model2] ..."
+        return
+    fi
+
+    local model_path=$1
+    shift
+    mkdir -p ${model_path}
+    python3 -m pip install huggingface_hub[cli] --user
+    # Workaround for huggingface-cli reporting error when set --cache-dir to same as default
+    local extra_args=""
+    local default_model_dir=$(readlink -m ~/.cache/huggingface/hub)
+    local real_model_dir=$(echo ${model_path/#\~/$HOME} | xargs readlink -m )
+    if [[ "${default_model_dir}" != "${real_model_dir}" ]]; then
+        extra_args="--cache-dir ${model_path}"
+    fi
+    for m in "$@"; do
+      PATH=~/.local/bin:$PATH huggingface-cli download ${extra_args} $m
+    done
+}
@@ -4,6 +4,9 @@
 
 set -xe
 
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+source ${SCRIPT_DIR}/llm_utils.sh
+
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 export REGISTRY=${IMAGE_REPO}
 export TAG="comps"
@@ -28,6 +31,7 @@ function build_docker_images() {
 }
 
 function start_service() {
+    local offline=${1:-false}
     export host_ip=${host_ip}
     export LLM_ENDPOINT_PORT=12105  # 12100-12199
     export DOCSUM_PORT=10505 #10500-10599
@@ -38,6 +42,12 @@ function start_service() {
     export MAX_TOTAL_TOKENS=4096
     export LOGFLAG=True
 
+    service_name="docsum-tgi"
+    if [[ "$offline" == "true" ]]; then
+        service_name="docsum-tgi-offline"
+        export offline_no_proxy="${host_ip}"
+        prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2
+    fi
     cd $WORKPATH/comps/llms/deployment/docker_compose
     docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log
 
@@ -72,6 +82,7 @@ function validate_services() {
     else
         echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
         docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        docker exec ${DOCKER_NAME} env
         exit 1
     fi
     sleep 1s
@@ -147,11 +158,21 @@ function main() {
     stop_docker
 
     build_docker_images
-    start_service
 
-    validate_microservices
+    trap stop_docker EXIT
 
+    echo "Test normal env ..."
+    start_service
+    validate_microservices
     stop_docker
+
+    if [[ -n "${DATA_PATH}" ]]; then
+        echo "Test air gapped env ..."
+        start_service true
+        validate_microservices
+        stop_docker
+    fi
+
     echo y | docker system prune
 
 }

@@ -4,6 +4,9 @@
 
 set -xe
 
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+source ${SCRIPT_DIR}/llm_utils.sh
+
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 export REGISTRY=${IMAGE_REPO}
 export TAG="comps"
@@ -42,6 +45,7 @@ function build_docker_images() {
 }
 
 function start_service() {
+    local offline=${1:-false}
     export host_ip=${host_ip}
     export LLM_ENDPOINT_PORT=12107  # 12100-12199
     export DOCSUM_PORT=10507 #10500-10599
@@ -53,6 +57,12 @@ function start_service() {
     export VLLM_SKIP_WARMUP=true
     export LOGFLAG=True
 
+    service_name="docsum-vllm"
+    if [[ "$offline" == "true" ]]; then
+        service_name="docsum-vllm-offline"
+        export offline_no_proxy="${host_ip}"
+        prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2
+    fi
     cd $WORKPATH/comps/llms/deployment/docker_compose
     docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log
 
@@ -162,11 +172,20 @@ function main() {
     stop_docker
 
     build_docker_images
-    start_service
 
-    validate_microservices
+    trap stop_docker EXIT
 
+    echo "Test normal env ..."
+    start_service
+    validate_microservices
     stop_docker
+
+    if [[ -n "${DATA_PATH}" ]]; then
+        echo "Test air gapped env ..."
+        start_service true
+        validate_microservices
+        stop_docker
+    fi
     echo y | docker system prune
 
 }