22# SPDX-License-Identifier: Apache-2.0
33
44services :
5+
56 tgi-service :
67 image : ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
7- container_name : tgi-service
8+ container_name : tgi-server
9+ # profiles:
10+ # - codegen-xeon-tgi
811 ports :
912 - " 8028:80"
1013 volumes :
11- - " ./data:/data"
14+ - " ${MODEL_CACHE:- ./data} :/data"
1215 shm_size : 1g
1316 environment :
1417 no_proxy : ${no_proxy}
@@ -22,46 +25,82 @@ services:
2225 timeout : 10s
2326 retries : 100
2427 command : --model-id ${LLM_MODEL_ID} --cuda-graphs 0
28+
29+ vllm-service :
30+ image : ${REGISTRY:-opea}/vllm:${TAG:-latest}
31+ container_name : vllm-server
32+ profiles :
33+ - codegen-xeon-vllm
34+ ports :
35+ - " 8028:80"
36+ volumes :
37+ - " ${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
38+ shm_size : 1g
39+ environment :
40+ no_proxy : ${no_proxy}
41+ http_proxy : ${http_proxy}
42+ https_proxy : ${https_proxy}
43+ HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
44+ host_ip : ${host_ip}
45+ healthcheck :
46+ test : ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
47+ interval : 10s
48+ timeout : 10s
49+ retries : 100
50+ command : --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
2551
26- llm :
52+ llm-base :
2753 image : ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
2854 container_name : llm-textgen-server
29- depends_on :
30- tgi-service :
31- condition : service_healthy
32- ports :
33- - " 9000:9000"
34- ipc : host
3555 environment :
3656 no_proxy : ${no_proxy}
3757 http_proxy : ${http_proxy}
3858 https_proxy : ${https_proxy}
39- LLM_ENDPOINT : ${TGI_LLM_ENDPOINT }
59+ LLM_ENDPOINT : ${LLM_ENDPOINT }
4060 LLM_MODEL_ID : ${LLM_MODEL_ID}
4161 HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
4262 restart : unless-stopped
43-
63+
64+ llm-tgi-service :
65+ extends : llm-base
66+ container_name : llm-codegen-tgi-server
67+ # profiles:
68+ # - codegen-xeon-tgi
69+ ports :
70+ - " 9000:9000"
71+ ipc : host
72+ depends_on :
73+ tgi-service :
74+ condition : service_healthy
75+ llm-vllm-service :
76+ extends : llm-base
77+ container_name : llm-codegen-vllm-server
78+ profiles :
79+ - codegen-xeon-vllm
80+ ports :
81+ - " 9000:9000"
82+ ipc : host
83+ depends_on :
84+ vllm-service :
85+ condition : service_healthy
4486
4587 codegen-xeon-backend-server :
4688 image : ${REGISTRY:-opea}/codegen:${TAG:-latest}
4789 container_name : codegen-xeon-backend-server
4890 depends_on :
49- - llm
91+ - llm-base
5092 ports :
5193 - " 7778:7778"
5294 environment :
5395 - no_proxy=${no_proxy}
5496 - https_proxy=${https_proxy}
5597 - http_proxy=${http_proxy}
56- - MEGA_SERVICE_HOST_IP=${host_ip} # ${MEGA_SERVICE_HOST_IP}
57- - LLM_SERVICE_HOST_IP=${host_ip} # ${LLM_SERVICE_HOST_IP}
58- # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
59- - RETRIEVAL_SERVICE_HOST_IP=${host_ip} # ${RETRIEVAL_SERVICE_HOST_IP}
98+ - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
99+ - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
100+ - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
60101 - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
61- # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
62- - TEI_EMBEDDING_HOST_IP=${host_ip} # ${TEI_EMBEDDING_HOST_IP}
102+ - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
63103 - EMBEDDER_PORT=${EMBEDDER_PORT}
64-
65104 ipc : host
66105 restart : always
67106 codegen-xeon-ui-server :
@@ -85,7 +124,7 @@ services:
85124 ports :
86125 - " ${REDIS_DB_PORT}:${REDIS_DB_PORT}"
87126 - " ${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
88-
127+
89128 dataprep-redis-server :
90129 image : ${REGISTRY:-opea}/dataprep:${TAG:-latest}
91130 container_name : dataprep-redis-server
@@ -165,6 +204,8 @@ services:
165204 RETRIEVER_COMPONENT_NAME : ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
166205 restart : unless-stopped
167206
207+
208+
168209networks :
169210 default :
170- driver : bridge
211+ driver : bridge
0 commit comments