44services :
55 tgi-service :
66 image : ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
7- container_name : tgi-server
8- profiles :
9- - codegen-xeon-tgi
7+ container_name : tgi-service
108 ports :
119 - " 8028:80"
1210 volumes :
13- - " ${MODEL_CACHE:- ./data} :/data"
11+ - " ./data:/data"
1412 shm_size : 1g
1513 environment :
1614 no_proxy : ${no_proxy}
@@ -24,78 +22,41 @@ services:
2422 timeout : 10s
2523 retries : 100
2624 command : --model-id ${LLM_MODEL_ID} --cuda-graphs 0
27- vllm-service :
28- image : ${REGISTRY:-opea}/vllm:${TAG:-latest}
29- container_name : vllm-server
30- profiles :
31- - codegen-xeon-vllm
32- ports :
33- - " 8028:80"
34- volumes :
35- - " ${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
36- shm_size : 1g
37- environment :
38- no_proxy : ${no_proxy}
39- http_proxy : ${http_proxy}
40- https_proxy : ${https_proxy}
41- HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
42- host_ip : ${host_ip}
43- healthcheck :
44- test : ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
45- interval : 10s
46- timeout : 10s
47- retries : 100
48- command : --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
49- llm-base :
25+ llm :
5026 image : ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
5127 container_name : llm-textgen-server
28+ depends_on :
29+ tgi-service :
30+ condition : service_healthy
31+ ports :
32+ - " 9000:9000"
33+ ipc : host
5234 environment :
5335 no_proxy : ${no_proxy}
5436 http_proxy : ${http_proxy}
5537 https_proxy : ${https_proxy}
56- LLM_ENDPOINT : ${LLM_ENDPOINT }
38+ LLM_ENDPOINT : ${TGI_LLM_ENDPOINT }
5739 LLM_MODEL_ID : ${LLM_MODEL_ID}
5840 HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
59- HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
6041 restart : unless-stopped
61- llm-tgi-service :
62- extends : llm-base
63- container_name : llm-codegen-tgi-server
64- profiles :
65- - codegen-xeon-tgi
66- ports :
67- - " 9000:9000"
68- ipc : host
69- depends_on :
70- tgi-service :
71- condition : service_healthy
72- llm-vllm-service :
73- extends : llm-base
74- container_name : llm-codegen-vllm-server
75- profiles :
76- - codegen-xeon-vllm
77- ports :
78- - " 9000:9000"
79- ipc : host
80- depends_on :
81- vllm-service :
82- condition : service_healthy
8342 codegen-xeon-backend-server :
8443 image : ${REGISTRY:-opea}/codegen:${TAG:-latest}
8544 container_name : codegen-xeon-backend-server
8645 depends_on :
87- - llm-base
46+ - llm
8847 ports :
8948 - " 7778:7778"
9049 environment :
9150 - no_proxy=${no_proxy}
9251 - https_proxy=${https_proxy}
9352 - http_proxy=${http_proxy}
94- - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
95- - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
96- - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
53+ - MEGA_SERVICE_HOST_IP=${host_ip} # ${MEGA_SERVICE_HOST_IP}
54+ - LLM_SERVICE_HOST_IP=${host_ip} # ${LLM_SERVICE_HOST_IP}
55+ # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
56+ - RETRIEVAL_SERVICE_HOST_IP=${host_ip} # ${RETRIEVAL_SERVICE_HOST_IP}
9757 - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
98- - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
58+ # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
59+ - TEI_EMBEDDING_HOST_IP=${host_ip} # ${TEI_EMBEDDING_HOST_IP}
9960 - EMBEDDER_PORT=${EMBEDDER_PORT}
10061
10162 ipc : host
@@ -139,8 +100,7 @@ services:
139100 HUGGINGFACEHUB_API_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
140101 LOGFLAG : true
141102 restart : unless-stopped
142-
143-
103+
144104 tei-embedding-serving :
145105 image : ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
146106 container_name : tei-embedding-serving
@@ -204,7 +164,4 @@ services:
204164
205165networks :
206166 default :
207- driver : bridge
208-
209-
210-
167+ driver : bridge
0 commit comments