compose.yaml updates

MSCetin37 · MSCetin37 · commit 3efb2f9adf04 · 2025-03-26T11:22:48.000-07:00
Signed-off-by: Mustafa &lt;mustafa.cetin@intel.com&gt;
diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
@@ -5,7 +5,7 @@
 import os
 import ast
 
-from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
+from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger
 from comps.cores.mega.utils import handle_message
 from comps.cores.proto.api_protocol import (
     ChatCompletionRequest,
@@ -19,6 +19,9 @@
 from fastapi.responses import StreamingResponse
 from langchain.prompts import PromptTemplate
 
+logger = CustomLogger("opea_dataprep_microservice")
+logflag = os.getenv("LOGFLAG", False)
+
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
 
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         embedding = inputs['data'][0]['embedding']
         # Align the inputs for the retriever service
         inputs = {
-            "index_name": llm_parameters_dict["key_index_name"],  
+            "index_name": llm_parameters_dict["index_name"],  
             "text": self.input_query,
             "embedding": embedding
         }
@@ -198,14 +201,14 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
-            key_index_name=chat_request.key_index_name
+            index_name=chat_request.index_name
         )
 
         # Initialize the initial inputs with the generated prompt
         initial_inputs = {"query": prompt}
 
         # Check if the key index name is provided in the parameters
-        if parameters.key_index_name:
+        if parameters.index_name:
             if agents_flag:
                 # Schedule the retriever microservice
                 result_ret, runtime_graph = await self.megaservice_retriever.schedule(
@@ -248,11 +251,16 @@ async def handle_request(self, request: Request):
                                         relevant_docs.append(doc)
                                     
                 # Update the initial inputs with the relevant documents
-                query = initial_inputs["query"]
-                initial_inputs = {}
-                initial_inputs["retrieved_docs"] = relevant_docs
-                initial_inputs["initial_query"] = query
-                megaservice = self.megaservice_llm
+                if len(relevant_docs)>0:
+                    logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
+                    query = initial_inputs["query"]
+                    initial_inputs = {}
+                    initial_inputs["retrieved_docs"] = relevant_docs
+                    initial_inputs["initial_query"] = query
+                    
+                else:
+                    logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
+                    
             else:
                 # Use the combined retriever and LLM microservice
                 megaservice = self.megaservice_retriever_llm
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -6,8 +6,8 @@ services:
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-server
-    # profiles:
-    #   - codegen-xeon-tgi
+    profiles:
+      - codegen-xeon-tgi
     ports:
       - "8028:80"
     volumes:
@@ -25,7 +25,6 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-
   vllm-service:
     image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
     container_name: vllm-server
@@ -48,7 +47,6 @@ services:
       timeout: 10s
       retries: 100
     command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
-  
   llm-base:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
@@ -60,12 +58,11 @@ services:
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-
   llm-tgi-service:
     extends: llm-base
     container_name: llm-codegen-tgi-server
-    # profiles:
-    #   - codegen-xeon-tgi
+    profiles:
+      - codegen-xeon-tgi
     ports:
       - "9000:9000"
     ipc: host
@@ -83,7 +80,6 @@ services:
     depends_on:
       vllm-service:
         condition: service_healthy
-  
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
@@ -117,14 +113,12 @@ services:
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
-
   redis-vector-db:
     image: redis/redis-stack:7.2.0-v9
     container_name: redis-vector-db
     ports:
       - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
       - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
-
   dataprep-redis-server:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-redis-server
@@ -142,7 +136,6 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LOGFLAG: true
     restart: unless-stopped
-    
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-serving
@@ -163,7 +156,6 @@ services:
       interval: 10s
       timeout: 6s
       retries: 48
-
   tei-embedding-server:
     image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
     container_name: tei-embedding-server
@@ -180,7 +172,6 @@ services:
       tei-embedding-serving:
         condition: service_healthy
     restart: unless-stopped
-
   retriever-redis:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-redis
@@ -203,9 +194,6 @@ services:
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
     restart: unless-stopped
-
-
-
 networks:
   default:
-    driver: bridge
+    driver: bridge