Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
ports:
- "8007:80"
Expand All @@ -104,7 +104,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
Expand Down
12 changes: 6 additions & 6 deletions ChatQnA/docker_image_build/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,6 @@ services:
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
llm-vllm-hpu:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
llm-vllm-ray-hpu:
build:
context: GenAIComps
Expand Down Expand Up @@ -113,6 +107,12 @@ services:
dockerfile: Dockerfile.cpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
vllm-hpu:
build:
context: vllm-fork
dockerfile: Dockerfile.hpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
nginx:
build:
context: GenAIComps
Expand Down
3 changes: 2 additions & 1 deletion ChatQnA/tests/test_compose_vllm_on_gaudi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
git clone https://github.com/HabanaAI/vllm-fork.git

echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log

docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
Expand Down
Loading