Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ services:
https_proxy: ${https_proxy}
LLM_ENDPOINT: ${LLM_ENDPOINT}
LLM_MODEL_ID: ${LLM_MODEL_ID}
HF_TOKEN: ${HF_TOKEN}
LOGFLAG: ${LOGFLAG:-False}
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
Expand All @@ -29,15 +28,32 @@ services:
container_name: docsum-tgi
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
HF_TOKEN: ${HF_TOKEN}
depends_on:
tgi-server:
condition: service_healthy

docsum-tgi-offline:
extends: docsum
container_name: docsum-tgi
depends_on:
tgi-server:
condition: service_healthy
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
# Use non-existing proxy to mimic air gapped environment
http_proxy: http://localhost:7777
https_proxy: http://localhost:7777
no_proxy: localhost,127.0.0.1,${offline_no_proxy}
volumes:
- "${DATA_PATH:-./data}:/data"

docsum-tgi-gaudi:
extends: docsum
container_name: docsum-tgi-gaudi
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumTgi}
HF_TOKEN: ${HF_TOKEN}
depends_on:
tgi-gaudi-server:
condition: service_healthy
Expand All @@ -47,15 +63,32 @@ services:
container_name: docsum-vllm
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
HF_TOKEN: ${HF_TOKEN}
depends_on:
vllm-server:
condition: service_healthy

docsum-vllm-offline:
extends: docsum
container_name: docsum-vllm
depends_on:
vllm-server:
condition: service_healthy
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
# Use non-existing proxy to mimic air gapped environment
http_proxy: http://localhost:7777
https_proxy: http://localhost:7777
no_proxy: localhost,127.0.0.1,${offline_no_proxy}
volumes:
- "${DATA_PATH:-./data}:/data"

docsum-vllm-gaudi:
extends: docsum
container_name: docsum-vllm-gaudi
environment:
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME:-OpeaDocSumvLLM}
HF_TOKEN: ${HF_TOKEN}
depends_on:
vllm-gaudi-server:
condition: service_healthy
Expand Down
11 changes: 10 additions & 1 deletion comps/llms/src/doc-summarization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,17 @@ RUN pip install --no-cache-dir --upgrade pip setuptools && \

ENV PYTHONPATH=$PYTHONPATH:/home/user

# air gapped support: set model cache dir
RUN mkdir -p /data && chown -R user /data
ENV HF_HUB_CACHE=/data
# air gapped support: pre-download tiktoken bpe files
RUN mkdir -p /opt/tiktoken_cache
ENV TIKTOKEN_CACHE_DIR=/opt/tiktoken_cache
RUN python -c 'import tiktoken; tiktoken.get_encoding("cl100k_base");tiktoken.get_encoding("o200k_base");tiktoken.get_encoding("gpt2");tiktoken.get_encoding("r50k_base");tiktoken.get_encoding("p50k_base");tiktoken.get_encoding("p50k_edit");'
RUN chown -R user /opt/tiktoken_cache

USER user

WORKDIR /home/user/comps/llms/src/doc-summarization

ENTRYPOINT ["bash", "entrypoint.sh"]
ENTRYPOINT ["python", "opea_docsum_microservice.py"]
16 changes: 16 additions & 0 deletions comps/llms/src/doc-summarization/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,19 @@ curl http://${your_ip}:9000/v1/docsum \
-d '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5.", "max_tokens":32, "language":"en", "summary_type": "refine", "chunk_size": 2000, "timeout":200}' \
-H 'Content-Type: application/json'
```

## Running in the air gapped environment

The following steps are needed for running the `opea/llm-docsum` microservice in an air gapped environment (a.k.a. environment with no internet access).

1. Pre download the following models, i.e. `huggingface-cli download --cache-dir <model data directory> <model>`

- gpt2
- the same model as the LLM inference backend

2. Launch the `opea/llm-docsum` microservice with the following settings:

- mount the host `<model data directory>` as the `/data` directory within the microservice container
- leave environment as unset `HF_TOKEN` as unset

e.g. `unset HF_TOKEN; docker run -v <model data directory>:/data ... ...`
8 changes: 0 additions & 8 deletions comps/llms/src/doc-summarization/entrypoint.sh

This file was deleted.

6 changes: 2 additions & 4 deletions comps/llms/src/doc-summarization/integrations/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.load import dumps as langchain_dumps
from langchain_core.prompts import PromptTemplate
from transformers import AutoTokenizer

Expand Down Expand Up @@ -201,11 +202,8 @@ async def generate(self, input: DocSumChatCompletionRequest, client):
if input.stream:

async def stream_generator():
from langserve.serialization import WellKnownLCSerializer

_serializer = WellKnownLCSerializer()
async for chunk in llm_chain.astream_log(docs):
data = _serializer.dumps({"ops": chunk.ops}).decode("utf-8")
data = langchain_dumps({"ops": chunk.ops})
if logflag:
logger.info(data)
yield f"data: {data}\n\n"
Expand Down

This file was deleted.

25 changes: 25 additions & 0 deletions tests/llms/llm_utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

function prepare_models() {

if [ $# -lt 2 ]; then
echo "Usage: prepare_models <directory> <model1> [model2] ..."
return
fi

local model_path=$1
shift
mkdir -p ${model_path}
python3 -m pip install huggingface_hub[cli] --user
# Workaround for huggingface-cli reporting error when set --cache-dir to same as default
local extra_args=""
local default_model_dir=$(readlink -m ~/.cache/huggingface/hub)
local real_model_dir=$(echo ${model_path/#\~/$HOME} | xargs readlink -m )
if [[ "${default_model_dir}" != "${real_model_dir}" ]]; then
extra_args="--cache-dir ${model_path}"
fi
for m in "$@"; do
PATH=~/.local/bin:$PATH huggingface-cli download ${extra_args} $m
done
}
25 changes: 23 additions & 2 deletions tests/llms/test_llms_doc-summarization_tgi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

set -xe

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/llm_utils.sh

IMAGE_REPO=${IMAGE_REPO:-"opea"}
export REGISTRY=${IMAGE_REPO}
export TAG="comps"
Expand All @@ -28,6 +31,7 @@ function build_docker_images() {
}

function start_service() {
local offline=${1:-false}
export host_ip=${host_ip}
export LLM_ENDPOINT_PORT=12105 # 12100-12199
export DOCSUM_PORT=10505 #10500-10599
Expand All @@ -38,6 +42,12 @@ function start_service() {
export MAX_TOTAL_TOKENS=4096
export LOGFLAG=True

service_name="docsum-tgi"
if [[ "$offline" == "true" ]]; then
service_name="docsum-tgi-offline"
export offline_no_proxy="${host_ip}"
prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2
fi
cd $WORKPATH/comps/llms/deployment/docker_compose
docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log

Expand Down Expand Up @@ -72,6 +82,7 @@ function validate_services() {
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
docker exec ${DOCKER_NAME} env
exit 1
fi
sleep 1s
Expand Down Expand Up @@ -147,11 +158,21 @@ function main() {
stop_docker

build_docker_images
start_service

validate_microservices
trap stop_docker EXIT

echo "Test normal env ..."
start_service
validate_microservices
stop_docker

if [[ -n "${DATA_PATH}" ]]; then
echo "Test air gapped env ..."
start_service true
validate_microservices
stop_docker
fi

echo y | docker system prune

}
Expand Down
23 changes: 21 additions & 2 deletions tests/llms/test_llms_doc-summarization_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

set -xe

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/llm_utils.sh

IMAGE_REPO=${IMAGE_REPO:-"opea"}
export REGISTRY=${IMAGE_REPO}
export TAG="comps"
Expand Down Expand Up @@ -42,6 +45,7 @@ function build_docker_images() {
}

function start_service() {
local offline=${1:-false}
export host_ip=${host_ip}
export LLM_ENDPOINT_PORT=12107 # 12100-12199
export DOCSUM_PORT=10507 #10500-10599
Expand All @@ -53,6 +57,12 @@ function start_service() {
export VLLM_SKIP_WARMUP=true
export LOGFLAG=True

service_name="docsum-vllm"
if [[ "$offline" == "true" ]]; then
service_name="docsum-vllm-offline"
export offline_no_proxy="${host_ip}"
prepare_models ${DATA_PATH} ${LLM_MODEL_ID} gpt2
fi
cd $WORKPATH/comps/llms/deployment/docker_compose
docker compose -f compose_doc-summarization.yaml up ${service_name} -d > ${LOG_PATH}/start_services_with_compose.log

Expand Down Expand Up @@ -162,11 +172,20 @@ function main() {
stop_docker

build_docker_images
start_service

validate_microservices
trap stop_docker EXIT

echo "Test normal env ..."
start_service
validate_microservices
stop_docker

if [[ -n "${DATA_PATH}" ]]; then
echo "Test air gapped env ..."
start_service true
validate_microservices
stop_docker
fi
echo y | docker system prune

}
Expand Down