Skip to content

Commit 3efb2f9

Browse files
committed
compose.yaml updates
Signed-off-by: Mustafa <[email protected]>
1 parent 2a52094 commit 3efb2f9

File tree

2 files changed

+22
-26
lines changed

2 files changed

+22
-26
lines changed

CodeGen/codegen.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import os
66
import ast
77

8-
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
8+
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger
99
from comps.cores.mega.utils import handle_message
1010
from comps.cores.proto.api_protocol import (
1111
ChatCompletionRequest,
@@ -19,6 +19,9 @@
1919
from fastapi.responses import StreamingResponse
2020
from langchain.prompts import PromptTemplate
2121

22+
logger = CustomLogger("opea_dataprep_microservice")
23+
logflag = os.getenv("LOGFLAG", False)
24+
2225
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
2326

2427
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
8083
embedding = inputs['data'][0]['embedding']
8184
# Align the inputs for the retriever service
8285
inputs = {
83-
"index_name": llm_parameters_dict["key_index_name"],
86+
"index_name": llm_parameters_dict["index_name"],
8487
"text": self.input_query,
8588
"embedding": embedding
8689
}
@@ -198,14 +201,14 @@ async def handle_request(self, request: Request):
198201
presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
199202
repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
200203
stream=stream_opt,
201-
key_index_name=chat_request.key_index_name
204+
index_name=chat_request.index_name
202205
)
203206

204207
# Initialize the initial inputs with the generated prompt
205208
initial_inputs = {"query": prompt}
206209

207210
# Check if the key index name is provided in the parameters
208-
if parameters.key_index_name:
211+
if parameters.index_name:
209212
if agents_flag:
210213
# Schedule the retriever microservice
211214
result_ret, runtime_graph = await self.megaservice_retriever.schedule(
@@ -248,11 +251,16 @@ async def handle_request(self, request: Request):
248251
relevant_docs.append(doc)
249252

250253
# Update the initial inputs with the relevant documents
251-
query = initial_inputs["query"]
252-
initial_inputs = {}
253-
initial_inputs["retrieved_docs"] = relevant_docs
254-
initial_inputs["initial_query"] = query
255-
megaservice = self.megaservice_llm
254+
if len(relevant_docs)>0:
255+
logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
256+
query = initial_inputs["query"]
257+
initial_inputs = {}
258+
initial_inputs["retrieved_docs"] = relevant_docs
259+
initial_inputs["initial_query"] = query
260+
261+
else:
262+
logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
263+
256264
else:
257265
# Use the combined retriever and LLM microservice
258266
megaservice = self.megaservice_retriever_llm

CodeGen/docker_compose/intel/cpu/xeon/compose.yaml

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ services:
66
tgi-service:
77
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
88
container_name: tgi-server
9-
# profiles:
10-
# - codegen-xeon-tgi
9+
profiles:
10+
- codegen-xeon-tgi
1111
ports:
1212
- "8028:80"
1313
volumes:
@@ -25,7 +25,6 @@ services:
2525
timeout: 10s
2626
retries: 100
2727
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
28-
2928
vllm-service:
3029
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
3130
container_name: vllm-server
@@ -48,7 +47,6 @@ services:
4847
timeout: 10s
4948
retries: 100
5049
command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
51-
5250
llm-base:
5351
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
5452
container_name: llm-textgen-server
@@ -60,12 +58,11 @@ services:
6058
LLM_MODEL_ID: ${LLM_MODEL_ID}
6159
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
6260
restart: unless-stopped
63-
6461
llm-tgi-service:
6562
extends: llm-base
6663
container_name: llm-codegen-tgi-server
67-
# profiles:
68-
# - codegen-xeon-tgi
64+
profiles:
65+
- codegen-xeon-tgi
6966
ports:
7067
- "9000:9000"
7168
ipc: host
@@ -83,7 +80,6 @@ services:
8380
depends_on:
8481
vllm-service:
8582
condition: service_healthy
86-
8783
codegen-xeon-backend-server:
8884
image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
8985
container_name: codegen-xeon-backend-server
@@ -117,14 +113,12 @@ services:
117113
- BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
118114
ipc: host
119115
restart: always
120-
121116
redis-vector-db:
122117
image: redis/redis-stack:7.2.0-v9
123118
container_name: redis-vector-db
124119
ports:
125120
- "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
126121
- "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
127-
128122
dataprep-redis-server:
129123
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
130124
container_name: dataprep-redis-server
@@ -142,7 +136,6 @@ services:
142136
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
143137
LOGFLAG: true
144138
restart: unless-stopped
145-
146139
tei-embedding-serving:
147140
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
148141
container_name: tei-embedding-serving
@@ -163,7 +156,6 @@ services:
163156
interval: 10s
164157
timeout: 6s
165158
retries: 48
166-
167159
tei-embedding-server:
168160
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
169161
container_name: tei-embedding-server
@@ -180,7 +172,6 @@ services:
180172
tei-embedding-serving:
181173
condition: service_healthy
182174
restart: unless-stopped
183-
184175
retriever-redis:
185176
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
186177
container_name: retriever-redis
@@ -203,9 +194,6 @@ services:
203194
LOGFLAG: ${LOGFLAG}
204195
RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
205196
restart: unless-stopped
206-
207-
208-
209197
networks:
210198
default:
211-
driver: bridge
199+
driver: bridge

0 commit comments

Comments
 (0)