From cd35fff309619ba910323e25988e0fc264e1069d Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Thu, 13 Feb 2025 22:11:53 -0800
Subject: [PATCH 01/22] update the compose file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

initial

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update for codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

add agents

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

env updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

merged to main

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/codegen.py                            | 225 +++++++++++++++++-
 .../intel/cpu/xeon/compose.yaml               | 161 ++++++++-----
 CodeGen/docker_compose/set_env.sh             |  66 ++++-
 3 files changed, 382 insertions(+), 70 deletions(-)

diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index 16db9aa262..b5cf45bfd9 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -3,6 +3,7 @@
 
 import asyncio
 import os
+import ast
 
 from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
 from comps.cores.mega.utils import handle_message
@@ -16,20 +17,113 @@
 from comps.cores.proto.docarray import LLMParams
 from fastapi import Request
 from fastapi.responses import StreamingResponse
+from langchain.prompts import PromptTemplate
 
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
+
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
 LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
 
+RETRIEVAL_SERVICE_HOST_IP = os.getenv("RETRIEVAL_SERVICE_HOST_IP", "0.0.0.0")
+REDIS_RETRIEVER_PORT = int(os.getenv("REDIS_RETRIEVER_PORT", 7000))
+
+TEI_EMBEDDING_HOST_IP = os.getenv("TEI_EMBEDDING_HOST_IP", "0.0.0.0")
+EMBEDDER_PORT = int(os.getenv("EMBEDDER_PORT", 6000))
+
+print(">>>>>> LLM_SERVICE_HOST_IP:", LLM_SERVICE_HOST_IP)
+print(">>>>>> LLM_SERVICE_PORT:", LLM_SERVICE_PORT)
+
+print(">>>>>> RETRIEVAL_SERVICE_HOST_IP:", RETRIEVAL_SERVICE_HOST_IP)
+print(">>>>>> REDIS_RETRIEVER_PORT:", REDIS_RETRIEVER_PORT)
+
+print(">>>>>> TEI_EMBEDDING_HOST_IP:", TEI_EMBEDDING_HOST_IP)
+print(">>>>>> EMBEDDER_PORT:", EMBEDDER_PORT)
+
+grader_prompt = """You are a grader assessing relevance of a retrieved document to a user question. \n                     
+Here is the user question: {question} \n
+Here is the retrieved document: \n\n {document} \n\n
+
+If the document contains keywords related to the user question, grade it as relevant. 
+It does not need to be a stringent test. The goal is to filter out erroneous retrievals. 
+Rules:
+- Do not return the question, the provided document or explanation. 
+- if this document is relevant to the question, return 'yes' otherwise return 'no'. 
+- Do not include any other details in your response.
+"""
+
+def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
+    """
+    Aligns the inputs based on the service type of the current node.
+
+    Parameters:
+    - self: Reference to the current instance of the class.
+    - inputs: Dictionary containing the inputs for the current node.
+    - cur_node: The current node in the service orchestrator.
+    - runtime_graph: The runtime graph of the service orchestrator.
+    - llm_parameters_dict: Dictionary containing the LLM parameters.
+    - kwargs: Additional keyword arguments.
+
+    Returns:
+    - inputs: The aligned inputs for the current node.
+    """
+    
+    # Check if the current service type is EMBEDDING
+    if self.services[cur_node].service_type == ServiceType.EMBEDDING:
+        # Store the input query for later use
+        self.input_query = inputs["query"]
+        # Set the input for the embedding service
+        inputs["input"] = inputs["query"]
+                
+    # Check if the current service type is RETRIEVER
+    if self.services[cur_node].service_type == ServiceType.RETRIEVER:
+        # Extract the embedding from the inputs
+        embedding = inputs['data'][0]['embedding']
+        # Align the inputs for the retriever service
+        inputs = {
+            "index_name": llm_parameters_dict["key_index_name"],  
+            "text": self.input_query,
+            "embedding": embedding
+        }
+                
+    return inputs
+
 
 class CodeGenService:
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
-        self.megaservice = ServiceOrchestrator()
+        ServiceOrchestrator.align_inputs = align_inputs
+        self.megaservice_llm = ServiceOrchestrator()
+        self.megaservice_retriever = ServiceOrchestrator()
+        self.megaservice_retriever_llm = ServiceOrchestrator()
         self.endpoint = str(MegaServiceEndpoint.CODE_GEN)
 
     def add_remote_service(self):
+        """
+        Adds remote microservices to the service orchestrators and defines the flow between them.
+        """
+
+        # Define the embedding microservice
+        embedding = MicroService(
+            name="embedding",
+            host=TEI_EMBEDDING_HOST_IP,
+            port=EMBEDDER_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+
+        # Define the retriever microservice
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVAL_SERVICE_HOST_IP,
+            port=REDIS_RETRIEVER_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+
+        # Define the LLM microservice
         llm = MicroService(
             name="llm",
             host=LLM_SERVICE_HOST_IP,
@@ -38,13 +132,63 @@ def add_remote_service(self):
             use_remote_service=True,
             service_type=ServiceType.LLM,
         )
-        self.megaservice.add(llm)
+
+        # Add the microservices to the megaservice_retriever_llm orchestrator and define the flow
+        self.megaservice_retriever_llm.add(embedding).add(retriever).add(llm)
+        self.megaservice_retriever_llm.flow_to(embedding, retriever)
+        self.megaservice_retriever_llm.flow_to(retriever, llm)
+
+        # Add the microservices to the megaservice_retriever orchestrator and define the flow
+        self.megaservice_retriever.add(embedding).add(retriever)
+        self.megaservice_retriever.flow_to(embedding, retriever)
+
+        # Add the LLM microservice to the megaservice_llm orchestrator
+        self.megaservice_llm.add(llm)
+
+    async def read_streaming_response(self, response: StreamingResponse):
+        """
+        Reads the streaming response from a StreamingResponse object.
+
+        Parameters:
+        - self: Reference to the current instance of the class.
+        - response: The StreamingResponse object to read from.
+
+        Returns:
+        - str: The complete response body as a decoded string.
+        """
+        body = b""  # Initialize an empty byte string to accumulate the response chunks
+        async for chunk in response.body_iterator:
+            body += chunk  # Append each chunk to the body
+        return body.decode("utf-8")  # Decode the accumulated byte string to a regular string
 
     async def handle_request(self, request: Request):
+        """
+        Handles the incoming request, processes it through the appropriate microservices,
+        and returns the response.
+
+        Parameters:
+        - self: Reference to the current instance of the class.
+        - request: The incoming request object.
+
+        Returns:
+        - ChatCompletionResponse: The response from the LLM microservice.
+        """
+        # Parse the incoming request data
         data = await request.json()
+
+        # Get the stream option from the request data, default to True if not provided
         stream_opt = data.get("stream", True)
-        chat_request = ChatCompletionRequest.parse_obj(data)
+
+        # Validate and parse the chat request data
+        chat_request = ChatCompletionRequest.model_validate(data)
+
+        # Handle the chat messages to generate the prompt
         prompt = handle_message(chat_request.messages)
+
+        # Get the agents flag from the request data, default to False if not provided
+        agents_flag = data.get("agents_flag", False)
+
+        # Define the LLM parameters
         parameters = LLMParams(
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,
@@ -54,18 +198,83 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
+            key_index_name=chat_request.key_index_name
         )
-        result_dict, runtime_graph = await self.megaservice.schedule(
-            initial_inputs={"query": prompt}, llm_parameters=parameters
+
+        # Initialize the initial inputs with the generated prompt
+        initial_inputs = {"query": prompt}
+
+        # Check if the key index name is provided in the parameters
+        if parameters.key_index_name:
+            if agents_flag:
+                # Schedule the retriever microservice
+                result_ret, runtime_graph = await self.megaservice_retriever.schedule(
+                    initial_inputs=initial_inputs, llm_parameters=parameters
+                )
+
+                # Switch to the LLM microservice
+                megaservice = self.megaservice_llm
+
+                relevant_docs = []
+                for doc in result_ret["retriever/MicroService"]["retrieved_docs"]:
+                    # Create the PromptTemplate
+                    prompt_agent = PromptTemplate(template=grader_prompt, input_variables=["question", "document"])
+
+                    # Format the template with the input variables
+                    formatted_prompt = prompt_agent.format(question=prompt, document=doc["text"])
+                    initial_inputs_grader = {"query": formatted_prompt}
+
+                    # Schedule the LLM microservice for grading
+                    grade, runtime_graph = await self.megaservice_llm.schedule(
+                        initial_inputs=initial_inputs_grader, llm_parameters=parameters
+                    )
+
+                    for node, response in grade.items():
+                        if isinstance(response, StreamingResponse):
+                            # Read the streaming response
+                            grader_response = await self.read_streaming_response(response)
+
+                            # Replace null with None
+                            grader_response = grader_response.replace("null", "None")
+
+                            # Split the response by "data:" and process each part
+                            for i in grader_response.split("data:"):
+                                if '"text":' in i:
+                                    # Convert the string to a dictionary
+                                    r = ast.literal_eval(i)
+                                    # Check if the response text is "yes"
+                                    if r["choices"][0]["text"] == "yes":
+                                        # Append the document to the relevant_docs list
+                                        relevant_docs.append(doc)
+                                    
+                # Update the initial inputs with the relevant documents
+                query = initial_inputs["query"]
+                initial_inputs = {}
+                initial_inputs["retrieved_docs"] = relevant_docs
+                initial_inputs["initial_query"] = query
+                megaservice = self.megaservice_llm
+            else:
+                # Use the combined retriever and LLM microservice
+                megaservice = self.megaservice_retriever_llm
+        else:
+            # Use the LLM microservice only
+            megaservice = self.megaservice_llm
+
+        # Schedule the final megaservice
+        result_dict, runtime_graph = await megaservice.schedule(
+            initial_inputs=initial_inputs, llm_parameters=parameters
         )
+
         for node, response in result_dict.items():
-            # Here it suppose the last microservice in the megaservice is LLM.
+            # Check if the last microservice in the megaservice is LLM
             if (
                 isinstance(response, StreamingResponse)
-                and node == list(self.megaservice.services.keys())[-1]
-                and self.megaservice.services[node].service_type == ServiceType.LLM
+                and node == list(megaservice.services.keys())[-1]
+                and megaservice.services[node].service_type == ServiceType.LLM
             ):
                 return response
+
+        # Get the response from the last node in the runtime graph
         last_node = runtime_graph.all_leaves()[-1]
         response = result_dict[last_node]["text"]
         choices = []
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 5567d9e368..be288975c9 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -4,13 +4,11 @@
 services:
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-server
-    profiles:
-      - codegen-xeon-tgi
+    container_name: tgi-service
     ports:
       - "8028:80"
     volumes:
-      - "${MODEL_CACHE:-./data}:/data"
+      - "./data:/data"
     shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
@@ -24,74 +22,43 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  vllm-service:
-    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
-    container_name: vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "8028:80"
-    volumes:
-      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      host_ip: ${host_ip}
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
-      interval: 10s
-      timeout: 10s
-      retries: 100
-    command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
-  llm-base:
+  llm:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+    ports:
+      - "9000:9000"
+    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-  llm-tgi-service:
-    extends: llm-base
-    container_name: llm-codegen-tgi-server
-    profiles:
-      - codegen-xeon-tgi
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      tgi-service:
-        condition: service_healthy
-  llm-vllm-service:
-    extends: llm-base
-    container_name: llm-codegen-vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      vllm-service:
-        condition: service_healthy
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
     depends_on:
-      - llm-base
+      - llm
     ports:
       - "7778:7778"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP}
+      # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
+      - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP}
+      - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
+      # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
+      - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP}
+      - EMBEDDER_PORT=${EMBEDDER_PORT}
+
     ipc: host
     restart: always
   codegen-xeon-ui-server:
@@ -109,6 +76,92 @@ services:
     ipc: host
     restart: always
 
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
+      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
+  
+  dataprep-redis-server:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${DATAPREP_REDIS_PORT}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: ${host_ip} 
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: true
+    restart: unless-stopped
+    
+  tei-embedding-serving:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-serving
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    ports:
+      - "${TEI_EMBEDDER_PORT:-12000}:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 48
+
+  tei-embedding-server:
+    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
+    container_name: tei-embedding-server
+    ports:
+      - "${EMBEDDER_PORT:-10201}:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING"
+    depends_on:
+      tei-embedding-serving:
+        condition: service_healthy
+    restart: unless-stopped
+
+  retriever-redis:
+    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+    container_name: retriever-redis
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_DB_PORT: ${REDIS_DB_PORT}
+      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
+      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: ${LOGFLAG}
+      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
+    restart: unless-stopped
+
 networks:
   default:
-    driver: bridge
+    driver: bridge
\ No newline at end of file
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index cb9e742847..e27df5e91a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -2,12 +2,13 @@
 
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-pushd "../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
+# pushd "../../" > /dev/null
+# source .set_env.sh
+# popd > /dev/null
 
-export host_ip=$(hostname -I | awk '{print $1}')
+export your_ip=$(hostname -I | awk '{print $1}')
 
+export host_ip=$(hostname -I | awk '{print $1}')
 if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
     echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
 fi
@@ -18,9 +19,58 @@ fi
 
 export no_proxy=${no_proxy},${host_ip}
 
-export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
-export LLM_ENDPOINT="http://${host_ip}:8028"
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+
+# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
+export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
+
+export MEGA_SERVICE_PORT=7778
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export LLM_SERVICE_HOST_IP=${host_ip}
+export MEGA_SERVICE_PORT=7778
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
-export MODEL_CACHE="./data"
+
+export REDIS_DB_PORT=6379
+export REDIS_INSIGHTS_PORT=8001
+export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
+export REDIS_HOST=${host_ip}
+export INDEX_NAME="test_codeGen_v1"
+
+
+export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+
+export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
+export MM_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
+
+export MM_EMBEDDING_PORT_MICROSERVICE=6000
+export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
+
+# export BRIDGE_TOWER_EMBEDDING=true
+
+# export MULTIMODAL_DATAPREP=true
+export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS"
+
+export REDIS_RETRIEVER_PORT=7000
+export DATAPREP_REDIS_PORT=6007
+export LOGFLAG=false
+
+# Text Retriever 
+export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
+export INDEX_NAME="CodeGen"
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export EMBEDDER_PORT=6000
+export TEI_EMBEDDER_PORT=8090
+export TEI_EMBEDDING_HOST_IP=${host_ip}
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
+
+export DATAPREP_REDIS_PORT=6007
+export LOGFLAG=false
+
+
+# export LLM_SERVICE_HOST_IP=${host_ip}
+# export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
+# export TEI_EMBEDDING_HOST_IP=${host_ip}
\ No newline at end of file

From 205b38039c7c64144a6c0bd7d34d9f1a464ddc9b Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Tue, 25 Mar 2025 10:43:25 -0700
Subject: [PATCH 02/22] update the env variables

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               |  3 ++
 CodeGen/docker_compose/set_env.sh             | 41 ++++---------------
 2 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index be288975c9..763f93081b 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -22,6 +22,7 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+  
   llm:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
@@ -39,6 +40,8 @@ services:
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
+  
+  
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index e27df5e91a..dd0b97a551 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -2,9 +2,9 @@
 
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-# pushd "../../" > /dev/null
-# source .set_env.sh
-# popd > /dev/null
+pushd "../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
 
 export your_ip=$(hostname -I | awk '{print $1}')
 
@@ -22,42 +22,21 @@ export no_proxy=${no_proxy},${host_ip}
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 
-# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
+export LLM_SERVICE_PORT=9000
+export LLM_ENDPOINT="http://${host_ip}:8028"
+export LLM_SERVICE_HOST_IP=${host_ip}
 export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 
 export MEGA_SERVICE_PORT=7778
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export MEGA_SERVICE_PORT=7778
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
 
 export REDIS_DB_PORT=6379
 export REDIS_INSIGHTS_PORT=8001
 export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
-export REDIS_HOST=${host_ip}
-export INDEX_NAME="test_codeGen_v1"
-
-
-export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-
-export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
-
-export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
-export MM_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT"
-
-export MM_EMBEDDING_PORT_MICROSERVICE=6000
-export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip}
-
-# export BRIDGE_TOWER_EMBEDDING=true
-
-# export MULTIMODAL_DATAPREP=true
-export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS"
-
 export REDIS_RETRIEVER_PORT=7000
-export DATAPREP_REDIS_PORT=6007
-export LOGFLAG=false
-
-# Text Retriever 
+export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
 export INDEX_NAME="CodeGen"
 
@@ -69,8 +48,4 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
 
 export DATAPREP_REDIS_PORT=6007
 export LOGFLAG=false
-
-
-# export LLM_SERVICE_HOST_IP=${host_ip}
-# export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-# export TEI_EMBEDDING_HOST_IP=${host_ip}
\ No newline at end of file
+export MODEL_CACHE="./data"

From 2a520948d0151d6856cc9ada6d91b7b346e45410 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Tue, 25 Mar 2025 12:13:29 -0700
Subject: [PATCH 03/22] update compose.yaml

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               | 83 ++++++++++++++-----
 1 file changed, 62 insertions(+), 21 deletions(-)

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 763f93081b..0987b6fa8e 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -2,13 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
+
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-service
+    container_name: tgi-server
+    # profiles:
+    #   - codegen-xeon-tgi
     ports:
       - "8028:80"
     volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
     shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
@@ -22,46 +25,82 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
+
+  vllm-service:
+    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
+    container_name: vllm-server
+    profiles:
+      - codegen-xeon-vllm
+    ports:
+      - "8028:80"
+    volumes:
+      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
   
-  llm:
+  llm-base:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
-    depends_on:
-      tgi-service:
-        condition: service_healthy
-    ports:
-      - "9000:9000"
-    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-  
+
+  llm-tgi-service:
+    extends: llm-base
+    container_name: llm-codegen-tgi-server
+    # profiles:
+    #   - codegen-xeon-tgi
+    ports:
+      - "9000:9000"
+    ipc: host
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+  llm-vllm-service:
+    extends: llm-base
+    container_name: llm-codegen-vllm-server
+    profiles:
+      - codegen-xeon-vllm
+    ports:
+      - "9000:9000"
+    ipc: host
+    depends_on:
+      vllm-service:
+        condition: service_healthy
   
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
     depends_on:
-      - llm
+      - llm-base
     ports:
       - "7778:7778"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP}
-      # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
-      - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
       - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
-      # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
-      - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP}
+      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
       - EMBEDDER_PORT=${EMBEDDER_PORT}
-
     ipc: host
     restart: always
   codegen-xeon-ui-server:
@@ -85,7 +124,7 @@ services:
     ports:
       - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
       - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
-  
+
   dataprep-redis-server:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-redis-server
@@ -165,6 +204,8 @@ services:
       RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
     restart: unless-stopped
 
+
+
 networks:
   default:
-    driver: bridge
\ No newline at end of file
+    driver: bridge

From 3efb2f9adf04c812e1c3b571035008732859f1a2 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Wed, 26 Mar 2025 11:22:48 -0700
Subject: [PATCH 04/22] compose.yaml updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/codegen.py                            | 26 ++++++++++++-------
 .../intel/cpu/xeon/compose.yaml               | 22 ++++------------
 2 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index b5cf45bfd9..6384efaa47 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -5,7 +5,7 @@
 import os
 import ast
 
-from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
+from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger
 from comps.cores.mega.utils import handle_message
 from comps.cores.proto.api_protocol import (
     ChatCompletionRequest,
@@ -19,6 +19,9 @@
 from fastapi.responses import StreamingResponse
 from langchain.prompts import PromptTemplate
 
+logger = CustomLogger("opea_dataprep_microservice")
+logflag = os.getenv("LOGFLAG", False)
+
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
 
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         embedding = inputs['data'][0]['embedding']
         # Align the inputs for the retriever service
         inputs = {
-            "index_name": llm_parameters_dict["key_index_name"],  
+            "index_name": llm_parameters_dict["index_name"],  
             "text": self.input_query,
             "embedding": embedding
         }
@@ -198,14 +201,14 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
-            key_index_name=chat_request.key_index_name
+            index_name=chat_request.index_name
         )
 
         # Initialize the initial inputs with the generated prompt
         initial_inputs = {"query": prompt}
 
         # Check if the key index name is provided in the parameters
-        if parameters.key_index_name:
+        if parameters.index_name:
             if agents_flag:
                 # Schedule the retriever microservice
                 result_ret, runtime_graph = await self.megaservice_retriever.schedule(
@@ -248,11 +251,16 @@ async def handle_request(self, request: Request):
                                         relevant_docs.append(doc)
                                     
                 # Update the initial inputs with the relevant documents
-                query = initial_inputs["query"]
-                initial_inputs = {}
-                initial_inputs["retrieved_docs"] = relevant_docs
-                initial_inputs["initial_query"] = query
-                megaservice = self.megaservice_llm
+                if len(relevant_docs)>0:
+                    logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
+                    query = initial_inputs["query"]
+                    initial_inputs = {}
+                    initial_inputs["retrieved_docs"] = relevant_docs
+                    initial_inputs["initial_query"] = query
+                    
+                else:
+                    logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
+                    
             else:
                 # Use the combined retriever and LLM microservice
                 megaservice = self.megaservice_retriever_llm
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 0987b6fa8e..3d132d29f9 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -6,8 +6,8 @@ services:
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-server
-    # profiles:
-    #   - codegen-xeon-tgi
+    profiles:
+      - codegen-xeon-tgi
     ports:
       - "8028:80"
     volumes:
@@ -25,7 +25,6 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-
   vllm-service:
     image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
     container_name: vllm-server
@@ -48,7 +47,6 @@ services:
       timeout: 10s
       retries: 100
     command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
-  
   llm-base:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
@@ -60,12 +58,11 @@ services:
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-
   llm-tgi-service:
     extends: llm-base
     container_name: llm-codegen-tgi-server
-    # profiles:
-    #   - codegen-xeon-tgi
+    profiles:
+      - codegen-xeon-tgi
     ports:
       - "9000:9000"
     ipc: host
@@ -83,7 +80,6 @@ services:
     depends_on:
       vllm-service:
         condition: service_healthy
-  
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
@@ -117,14 +113,12 @@ services:
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
     ipc: host
     restart: always
-
   redis-vector-db:
     image: redis/redis-stack:7.2.0-v9
     container_name: redis-vector-db
     ports:
       - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
       - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
-
   dataprep-redis-server:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-redis-server
@@ -142,7 +136,6 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LOGFLAG: true
     restart: unless-stopped
-    
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-serving
@@ -163,7 +156,6 @@ services:
       interval: 10s
       timeout: 6s
       retries: 48
-
   tei-embedding-server:
     image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
     container_name: tei-embedding-server
@@ -180,7 +172,6 @@ services:
       tei-embedding-serving:
         condition: service_healthy
     restart: unless-stopped
-
   retriever-redis:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-redis
@@ -203,9 +194,6 @@ services:
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
     restart: unless-stopped
-
-
-
 networks:
   default:
-    driver: bridge
+    driver: bridge
\ No newline at end of file

From 8875802c6ffb9369deae44be25f933def29d47c5 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Thu, 27 Mar 2025 13:07:14 -0700
Subject: [PATCH 05/22] update readme file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/README.md | 54 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/CodeGen/README.md b/CodeGen/README.md
index 00d54adbc2..647cad3b65 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -1,6 +1,6 @@
 # Code Generation Application
 
-Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions.
+Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. Additionally Retrieval-Augmented Generation (RAG) and Agents are parts of the CodeGen example which provide an additional layer of intelligence and adaptability, ensuring that the generated code is not only relevant but also accurate, efficient, and tailored to the specific needs of the developers and programmers.
 
 The capabilities of CodeGen LLMs include:
 
@@ -20,6 +20,7 @@ The workflow falls into the following architecture:
 
 The CodeGen example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
 
+
 ```mermaid
 ---
 config:
@@ -28,7 +29,7 @@ config:
     rankSpacing: 100
     curve: linear
   themeVariables:
-    fontSize: 50px
+    fontSize: 25px
 ---
 flowchart LR
     %% Colors %%
@@ -37,34 +38,56 @@ flowchart LR
     classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
     classDef invisible fill:transparent,stroke:transparent;
     style CodeGen-MegaService stroke:#000000
-
     %% Subgraphs %%
-    subgraph CodeGen-MegaService["CodeGen MegaService "]
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        LLM([LLM MicroService]):::blue
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-    subgraph UserInterface[" User Interface "]
+    subgraph User Interface
         direction LR
-        a([User Input Query]):::orchid
-        UI([UI server<br>]):::orchid
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
 
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{LLM Service}}
+    GW([CodeGen GateWay]):::orange
 
-    LLM_gen{{LLM Service <br>}}
-    GW([CodeGen GateWay<br>]):::orange
-
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
 
     %% Questions interaction
     direction LR
     a[User Input Query] --> UI
     UI --> GW
     GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
 
 
     %% Embedding service flow
     direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
     LLM <-.-> LLM_gen
 
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ## Deploy CodeGen Service
@@ -161,6 +184,15 @@ Two ways of consuming CodeGen Service:
        -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
    ```
 
+   If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+
+   ```bash
+   curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+
+   ```
+
 2. Access via frontend
 
    To access the frontend, open the following URL in your browser: http://{host_ip}:5173.

From f33ba5ae77d1cdc43f454ebc31b4a8d693f77fb3 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Thu, 27 Mar 2025 13:35:19 -0700
Subject: [PATCH 06/22] update readme

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/README.md                             | 20 ++++-
 .../docker_compose/intel/cpu/xeon/README.md   | 84 ++++++++++++++++---
 .../docker_compose/intel/hpu/gaudi/README.md  | 84 ++++++++++++++++---
 3 files changed, 159 insertions(+), 29 deletions(-)

diff --git a/CodeGen/README.md b/CodeGen/README.md
index 647cad3b65..692e01848b 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -58,7 +58,7 @@ flowchart LR
     V_RET{{Retriever<br>service}}
     Ingest{{Ingest data}}
     DP([Data Preparation]):::blue
-    LLM_gen{{LLM Service}}
+    LLM_gen{{TGI Service}}
     GW([CodeGen GateWay]):::orange
 
     %% Data Preparation flow
@@ -161,11 +161,25 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build
 
 Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml).
 
+Start CodeGen based on TGI service:
+
 ```bash
-cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon
-docker compose up -d
+cd GenAIExamples/CodeGen/docker_compose
+source set_env.sh
+cd intel/cpu/xeon
+docker compose --profile codegen-xeon-tgi up -d
 ```
 
+Start CodeGen based on vLLM service:
+
+```bash
+cd GenAIExamples/CodeGen/docker_compose
+source set_env.sh
+cd intel/cpu/xeon
+docker compose --profile codegen-xeon-vllm up -d
+```
+
+
 Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
 
 ### Deploy CodeGen on Kubernetes using Helm Chart
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 3cc7a19b3c..75c7b1851b 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -13,28 +13,77 @@ After launching your instance, you can connect to it using SSH (for Linux instan
 
 ## 🚀 Start Microservices and MegaService
 
-The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
+The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
 
 The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
 
 ```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 25px
+---
 flowchart LR
-    subgraph CodeGen
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style CodeGen-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        A[User] --> |Input query| B[CodeGen Gateway]
-        B --> |Invoke| Megaservice
-        subgraph Megaservice["Megaservice"]
-            direction TB
-            C((LLM<br>9000)) -. Post .-> D{{TGI Service<br>8028}}
-        end
-        Megaservice --> |Output| E[Response]
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-
-    subgraph Legend
+    subgraph User Interface
         direction LR
-        G([Microservice]) ==> H([Microservice])
-        I([Microservice]) -.-> J{{Server API}}
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
+
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
+
+
+    %% Embedding service flow
+    direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
+    LLM <-.-> LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ### Setup Environment Variables
@@ -111,6 +160,15 @@ docker compose --profile codegen-xeon-vllm up -d
         }'
    ```
 
+    If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+   
+    ```bash
+    curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+    ```
+   
+
 ## 🚀 Launch the UI
 
 To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
index 133b32f09f..5408e33654 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -6,28 +6,77 @@ The default pipeline deploys with vLLM as the LLM serving component. It also pro
 
 ## 🚀 Start MicroServices and MegaService
 
-The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
+The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
 
 The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
 
 ```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 25px
+---
 flowchart LR
-    subgraph CodeGen
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style CodeGen-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        A[User] --> |Input query| B[CodeGen Gateway]
-        B --> |Invoke| Megaservice
-        subgraph Megaservice["Megaservice"]
-            direction TB
-            C((LLM<br>9000)) -. Post .-> D{{TGI Service<br>8028}}
-        end
-        Megaservice --> |Output| E[Response]
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-
-    subgraph Legend
+    subgraph User Interface
         direction LR
-        G([Microservice]) ==> H([Microservice])
-        I([Microservice]) -.-> J{{Server API}}
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
+
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
+
+
+    %% Embedding service flow
+    direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
+    LLM <-.-> LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ### Setup Environment Variables
@@ -104,6 +153,15 @@ docker compose --profile codegen-gaudi-vllm up -d
         }'
    ```
 
+    If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+   
+    ```bash
+    curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+    ```
+   
+
 ## 🚀 Launch the Svelte Based UI
 
 To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:

From 301b75da9c36c2637cfa2567be7aa09f3a6b4802 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Thu, 13 Feb 2025 22:11:53 -0800
Subject: [PATCH 07/22] update the compose file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

initial

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

added microservice ports

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update for codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

Initial commit for Gradio UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

New UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

prepare for merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

add agents

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

env updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

merged to main

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

UI Updates

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added dockerfile

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

removed files dataframe

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

updated file upload

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added checkbox for agent

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

key_index_name --> index_name

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added / removed print statements

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

Support for data streaming (from Melanie)

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

fixed file not supported bug

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added refresh button to index

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

simplified README

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/Dockerfile                            |  47 +-
 CodeGen/codegen.py                            |  24 +-
 .../docker_compose/intel/cpu/xeon/README.md   |  98 +----
 .../intel/cpu/xeon/compose.yaml               |  83 ++--
 CodeGen/docker_compose/set_env.sh             |  27 +-
 CodeGen/docker_image_build/build.yaml         |   6 +
 CodeGen/ui/docker/Dockerfile.gradio           |  33 ++
 CodeGen/ui/gradio/README.md                   |  76 ++++
 CodeGen/ui/gradio/codegen_ui_gradio.py        | 402 ++++++++++++++++++
 CodeGen/ui/gradio/requirements.txt            |   6 +
 10 files changed, 620 insertions(+), 182 deletions(-)
 create mode 100644 CodeGen/ui/docker/Dockerfile.gradio
 create mode 100644 CodeGen/ui/gradio/README.md
 create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py
 create mode 100644 CodeGen/ui/gradio/requirements.txt

diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile
index 5305a9d89f..b2b4155fd7 100644
--- a/CodeGen/Dockerfile
+++ b/CodeGen/Dockerfile
@@ -1,8 +1,51 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-ARG BASE_TAG=latest
-FROM opea/comps-base:$BASE_TAG
+# Stage 1: base setup used by other stages
+FROM python:3.11-slim AS base
+
+# get security updates
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ENV HOME=/home/user
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p $HOME && \
+    chown -R user $HOME
+
+WORKDIR $HOME
+
+
+# Stage 2: latest GenAIComps sources
+FROM base AS git
+
+RUN apt-get update && apt-get install -y --no-install-recommends git
+# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
+COPY GenAIComps GenAIComps
+
+
+# Stage 3: common layer shared by services using GenAIComps
+FROM base AS comps-base
+
+# copy just relevant parts
+COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
+COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
+
+WORKDIR $HOME/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
+WORKDIR $HOME
+
+ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
+
+USER user
+
+
+# Stage 4: unique part
+FROM comps-base
+
+ENV LANG=C.UTF-8
 
 COPY ./codegen.py $HOME/codegen.py
 
diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index 6384efaa47..a5d79f8dd2 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -19,9 +19,6 @@
 from fastapi.responses import StreamingResponse
 from langchain.prompts import PromptTemplate
 
-logger = CustomLogger("opea_dataprep_microservice")
-logflag = os.getenv("LOGFLAG", False)
-
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
 
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -83,7 +80,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         embedding = inputs['data'][0]['embedding']
         # Align the inputs for the retriever service
         inputs = {
-            "index_name": llm_parameters_dict["index_name"],  
+            "index_name": llm_parameters_dict["key_index_name"],  
             "text": self.input_query,
             "embedding": embedding
         }
@@ -201,14 +198,14 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
-            index_name=chat_request.index_name
+            key_index_name=chat_request.key_index_name
         )
 
         # Initialize the initial inputs with the generated prompt
         initial_inputs = {"query": prompt}
 
         # Check if the key index name is provided in the parameters
-        if parameters.index_name:
+        if parameters.key_index_name:
             if agents_flag:
                 # Schedule the retriever microservice
                 result_ret, runtime_graph = await self.megaservice_retriever.schedule(
@@ -251,16 +248,11 @@ async def handle_request(self, request: Request):
                                         relevant_docs.append(doc)
                                     
                 # Update the initial inputs with the relevant documents
-                if len(relevant_docs)>0:
-                    logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
-                    query = initial_inputs["query"]
-                    initial_inputs = {}
-                    initial_inputs["retrieved_docs"] = relevant_docs
-                    initial_inputs["initial_query"] = query
-                    
-                else:
-                    logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
-                    
+                query = initial_inputs["query"]
+                initial_inputs = {}
+                initial_inputs["retrieved_docs"] = relevant_docs
+                initial_inputs["initial_query"] = query
+                megaservice = self.megaservice_llm
             else:
                 # Use the combined retriever and LLM microservice
                 megaservice = self.megaservice_retriever_llm
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 75c7b1851b..e6ed4334a8 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -3,89 +3,6 @@
 This document outlines the deployment process for a CodeGen application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker images creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, further simplifying the deployment process for this service.
 The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice.
 
-## 🚀 Create an AWS Xeon Instance
-
-To run the example on an AWS Xeon instance, start by creating an AWS account if you don't have one already. Then, get started with the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home). AWS EC2 M7i, C7i, C7i-flex and M7i-flex instances are 4th Generation Intel Xeon Scalable processors suitable for the task.
-
-For detailed information about these instance types, you can refer to [m7i](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options.
-
-After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
-
-## 🚀 Start Microservices and MegaService
-
-The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
-
-The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
-
-```mermaid
----
-config:
-  flowchart:
-    nodeSpacing: 400
-    rankSpacing: 100
-    curve: linear
-  themeVariables:
-    fontSize: 25px
----
-flowchart LR
-    %% Colors %%
-    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
-    classDef invisible fill:transparent,stroke:transparent;
-    style CodeGen-MegaService stroke:#000000
-    %% Subgraphs %%
-    subgraph CodeGen-MegaService["CodeGen-MegaService"]
-        direction LR
-        EM([Embedding<br>MicroService]):::blue
-        RET([Retrieval<br>MicroService]):::blue
-        RER([Agents]):::blue
-        LLM([LLM<br>MicroService]):::blue
-    end
-    subgraph User Interface
-        direction LR
-        a([Submit Query Tab]):::orchid
-        UI([UI server]):::orchid
-        Ingest([Manage Resources]):::orchid
-    end
-
-    CLIP_EM{{Embedding<br>service}}
-    VDB{{Vector DB}}
-    V_RET{{Retriever<br>service}}
-    Ingest{{Ingest data}}
-    DP([Data Preparation]):::blue
-    LLM_gen{{TGI Service}}
-    GW([CodeGen GateWay]):::orange
-
-    %% Data Preparation flow
-    %% Ingest data flow
-    direction LR
-    Ingest[Ingest data] --> UI
-    UI --> DP
-    DP <-.-> CLIP_EM
-
-    %% Questions interaction
-    direction LR
-    a[User Input Query] --> UI
-    UI --> GW
-    GW <==> CodeGen-MegaService
-    EM ==> RET
-    RET ==> RER
-    RER ==> LLM
-
-
-    %% Embedding service flow
-    direction LR
-    EM <-.-> CLIP_EM
-    RET <-.-> V_RET
-    LLM <-.-> LLM_gen
-
-    direction TB
-    %% Vector DB interaction
-    V_RET <-.->VDB
-    DP <-.->VDB
-```
-
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
@@ -175,17 +92,12 @@ To access the frontend, open the following URL in your browser: `http://{host_ip
 
 ```yaml
   codegen-xeon-ui-server:
-    image: opea/codegen-ui:latest
+    image: opea/codegen-gradio-ui:latest
     ...
     ports:
       - "80:5173"
 ```
 
-![project-screenshot](../../../../assets/img/codeGen_ui_init.jpg)
-
-Here is an example of running CodeGen in the UI:
-
-![project-screenshot](../../../../assets/img/codeGen_ui_response.png)
 
 ## 🚀 Launch the React Based UI (Optional)
 
@@ -314,15 +226,15 @@ cd GenAIExamples/CodeGen/ui
 docker build -t opea/codegen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
-### 4. Build CodeGen React UI Docker Image (Optional)
+### 4. Build CodeGen Gradio UI Docker Image
 
-Build react frontend Docker image via below command:
+Build gradio frontend Docker image via below command:
 
 **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
 
 ```bash
 cd GenAIExamples/CodeGen/ui
-docker build --no-cache -t opea/codegen-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
+docker build --no-cache -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.gradio .
 ```
 
 Then run the command `docker images`, you will have the following Docker Images:
@@ -330,4 +242,4 @@ Then run the command `docker images`, you will have the following Docker Images:
 - `opea/llm-textgen:latest`
 - `opea/codegen:latest`
 - `opea/codegen-ui:latest`
-- `opea/codegen-react-ui:latest` (optional)
+- `opea/codegen-gradio-ui:latest`
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 3d132d29f9..3f6573f01e 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -5,13 +5,11 @@ services:
 
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-server
-    profiles:
-      - codegen-xeon-tgi
+    container_name: tgi-service
     ports:
       - "8028:80"
     volumes:
-      - "${MODEL_CACHE:-./data}:/data"
+      - "./data:/data"
     shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
@@ -25,82 +23,47 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  vllm-service:
-    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
-    container_name: vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "8028:80"
-    volumes:
-      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
-    shm_size: 1g
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      host_ip: ${host_ip}
-    healthcheck:
-      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
-      interval: 10s
-      timeout: 10s
-      retries: 100
-    command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
-  llm-base:
+  llm:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+    ports:
+      - "9000:9000"
+    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${LLM_ENDPOINT}
+      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
-  llm-tgi-service:
-    extends: llm-base
-    container_name: llm-codegen-tgi-server
-    profiles:
-      - codegen-xeon-tgi
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      tgi-service:
-        condition: service_healthy
-  llm-vllm-service:
-    extends: llm-base
-    container_name: llm-codegen-vllm-server
-    profiles:
-      - codegen-xeon-vllm
-    ports:
-      - "9000:9000"
-    ipc: host
-    depends_on:
-      vllm-service:
-        condition: service_healthy
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
     depends_on:
-      - llm-base
+      - llm
     ports:
       - "7778:7778"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
-      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP}
+      # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
+      - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP}
       - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
-      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
+      # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
+      - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP}
       - EMBEDDER_PORT=${EMBEDDER_PORT}
+
     ipc: host
     restart: always
   codegen-xeon-ui-server:
-    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
     container_name: codegen-xeon-ui-server
     depends_on:
       - codegen-xeon-backend-server
@@ -111,6 +74,9 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - host_ip=${host_ip}
+      - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
     ipc: host
     restart: always
   redis-vector-db:
@@ -119,6 +85,7 @@ services:
     ports:
       - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
       - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
+  
   dataprep-redis-server:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-redis-server
@@ -136,6 +103,7 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LOGFLAG: true
     restart: unless-stopped
+    
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-serving
@@ -156,6 +124,7 @@ services:
       interval: 10s
       timeout: 6s
       retries: 48
+
   tei-embedding-server:
     image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
     container_name: tei-embedding-server
@@ -172,6 +141,7 @@ services:
       tei-embedding-serving:
         condition: service_healthy
     restart: unless-stopped
+
   retriever-redis:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-redis
@@ -194,6 +164,7 @@ services:
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
     restart: unless-stopped
+
 networks:
   default:
     driver: bridge
\ No newline at end of file
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index dd0b97a551..d4cff7af39 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -2,26 +2,18 @@
 
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-pushd "../../" > /dev/null
-source .set_env.sh
-popd > /dev/null
+# pushd "../../" > /dev/null
+# source .set_env.sh
+# popd > /dev/null
 
 export your_ip=$(hostname -I | awk '{print $1}')
 
 export host_ip=$(hostname -I | awk '{print $1}')
-if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
-    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
-fi
-
-if [ -z "${host_ip}" ]; then
-    echo "Error: host_ip is not set. Please set host_ip first."
-fi
-
-export no_proxy=${no_proxy},${host_ip}
-
+export no_proxy="${no_proxy},${host_ip}"
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 
+# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
 export LLM_SERVICE_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:8028"
@@ -30,7 +22,7 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 
 export MEGA_SERVICE_PORT=7778
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/codegen"
 
 export REDIS_DB_PORT=6379
 export REDIS_INSIGHTS_PORT=8001
@@ -47,5 +39,10 @@ export TEI_EMBEDDING_HOST_IP=${host_ip}
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
 
 export DATAPREP_REDIS_PORT=6007
+export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
 export LOGFLAG=false
-export MODEL_CACHE="./data"
+
+
+# export LLM_SERVICE_HOST_IP=${host_ip}
+# export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
+# export TEI_EMBEDDING_HOST_IP=${host_ip}
\ No newline at end of file
diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml
index 3275aa71bf..52ca23b109 100644
--- a/CodeGen/docker_image_build/build.yaml
+++ b/CodeGen/docker_image_build/build.yaml
@@ -23,6 +23,12 @@ services:
       dockerfile: ./docker/Dockerfile.react
     extends: codegen
     image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest}
+  codegen-gradio-ui:
+    build:
+      context: ../ui
+      dockerfile: ./docker/Dockerfile.gradio
+    extends: codegen
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
   llm-textgen:
     build:
       context: GenAIComps
diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio
new file mode 100644
index 0000000000..11a4f4f581
--- /dev/null
+++ b/CodeGen/ui/docker/Dockerfile.gradio
@@ -0,0 +1,33 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+
+ARG ARCH="cpu"
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    default-jre \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    wget
+
+# Install ffmpeg static build
+WORKDIR /root
+RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \
+    mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \
+    export PATH=/root/ffmpeg-git-amd64-static:$PATH && \
+    cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \
+    cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/
+
+RUN mkdir -p /home/user
+
+COPY gradio /home/user/gradio
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+pip install --no-cache-dir -r /home/user/gradio/requirements.txt
+
+WORKDIR /home/user/gradio
+ENTRYPOINT ["python", "codegen_ui_gradio.py"]
diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md
new file mode 100644
index 0000000000..38b2e964a3
--- /dev/null
+++ b/CodeGen/ui/gradio/README.md
@@ -0,0 +1,76 @@
+# Document Summary
+
+This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries.
+
+## Docker
+
+### Build UI Docker Image
+
+To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui
+docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio .
+```
+
+This command builds the Docker image with the tag `opea/docsum-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall.
+
+### Run UI Docker Image
+
+To run the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui/docker` directory and execute the following commands:
+
+```bash
+cd GenAIExamples/CodeGen/ui/docker
+
+ip_address=$(hostname -I | awk '{print $1}')
+docker run -d -p 5173:5173 --ipc=host \
+   -e http_proxy=$http_proxy \
+   -e https_proxy=$https_proxy \
+   -e no_proxy=$no_proxy \
+   -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:8888/v1/docsum \
+   opea/codegen-gradio-ui:latest
+```
+
+This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service.
+
+### Python
+
+To run the frontend application directly using Python, navigate to the `GenAIExamples/DocSum/ui/gradio` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui/gradio
+python codegen_ui_gradio.py
+```
+
+This command starts the frontend application using Python.
+
+## 📸 Project Screenshots
+
+![project-screenshot](../../assets/img/docSum_ui_gradio_text.png)
+
+### 🧐 Features
+
+Here are some of the project's features:
+
+- Summarizing Uploaded Files: Users can upload files from their local device. Once a file is uploaded, the summarization of the document will start automatically. The summary will be displayed in the 'Summary' box.
+- Summarizing Text via Pasting: Users can paste the text to be summarized into the text box. By clicking the 'Generate Summary' button, a condensed summary of the content will be produced and displayed in the 'Summary' box on the right.
+
+## Additional Information
+
+### Prerequisites
+
+Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall.
+
+### Environment Variables
+
+- `http_proxy`: Proxy setting for HTTP connections.
+- `https_proxy`: Proxy setting for HTTPS connections.
+- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying.
+- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with.
+
+### Troubleshooting
+
+- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access.
+- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible.
+
+This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment.
diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
new file mode 100644
index 0000000000..873d0c42b4
--- /dev/null
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -0,0 +1,402 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# This is a Gradio app that includes two tabs: one for code generation and another for resource management.
+# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files.
+# Additionally, three small text boxes have been added for managing file dataframe parameters.
+
+import argparse
+import os
+from pathlib import Path
+import gradio as gr
+from gradio_pdf import PDF
+import requests
+import pandas as pd
+import os
+import uvicorn
+import json
+import argparse
+# from utils import build_logger, make_temp_image, server_error_msg, split_video
+from urllib.parse import urlparse
+from pathlib import Path
+from fastapi import FastAPI
+# from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+
+# logger = build_logger("gradio_web_server", "gradio_web_server.log")
+logflag = os.getenv("LOGFLAG", False)
+
+# create a FastAPI app
+app = FastAPI()
+cur_dir = os.getcwd()
+static_dir = Path(os.path.join(cur_dir, "static/"))
+tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/"))
+
+Path(static_dir).mkdir(parents=True, exist_ok=True)
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
+
+tmp_upload_folder = "/tmp/gradio/"
+
+
+
+host_ip = os.getenv("host_ip")
+DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+
+# Define the functions that will be used in the app
+def conversation_history(prompt, index, use_agent, history):
+    # Print the language and prompt, and return a placeholder code
+    print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}")
+    history.append([prompt, ""])
+    response_generator = generate_code(prompt, index, use_agent)
+    for token in response_generator:
+        history[-1][-1] += token
+        yield history
+
+
+def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
+    media = media.strip().split("\n")
+    print("Files passed is ", media, flush=True)
+    if not chunk_size:
+        chunk_size = 1500
+    if not chunk_overlap:
+        chunk_overlap = 100
+
+    requests = []
+    if type(media) is list:
+        for file in media:
+            file_ext = os.path.splitext(file)[-1]
+            if is_valid_url(file):
+                print(file, " is valid URL")
+                print("Ingesting URL...")
+                value = ingest_url(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            elif file_ext in ['.pdf', '.txt']:
+                print("Ingesting File...")
+                value = ingest_file(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            else:
+                print(file, "File type not supported")
+                yield (
+                    gr.Textbox(
+                        visible=True,
+                        value="Your file extension type is not supported.",
+                    )
+                )
+                return
+        yield requests
+
+    else:
+        file_ext = os.path.splitext(media)[-1]
+        if is_valid_url(media):
+            value = ingest_url(media, index, chunk_size, chunk_overlap)
+            yield value
+        elif file_ext in ['.pdf', '.txt']:
+            print("Ingesting File...")
+            value = ingest_file(media, index, chunk_size, chunk_overlap)
+            # print("Return value is: ", value, flush=True)
+            yield value
+        else:
+            print(media, "File type not supported")
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Your file extension type is not supported.",
+                )
+            )
+            return
+
+def generate_code(query, index=None, use_agent=False):
+    if index is None or index == "None":
+        input_dict = {"messages": query, "agents_flag": use_agent}
+    else:
+        input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent}
+
+    print("Query is ", input_dict)
+    headers = {"Content-Type": "application/json"}
+    
+    response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True)
+
+    for line in response.iter_lines():
+        if line:
+            line = line.decode('utf-8')
+            if line.startswith("data: "):  # Only process lines starting with "data: "
+                json_part = line[len("data: "):]  # Remove the "data: " prefix
+                if json_part.strip() == "[DONE]":  # Ignore the DONE marker
+                    continue
+                try:
+                    json_obj = json.loads(json_part)  # Convert to dictionary
+                    if "choices" in json_obj:
+                        for choice in json_obj["choices"]:
+                            if "text" in choice:
+                                # Yield each token individually
+                                yield choice["text"]
+                except json.JSONDecodeError:
+                    print("Error parsing JSON:", json_part)
+
+
+def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150):
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+    file_input = {"files": open(file, "rb")}
+
+    if index:
+        print("Index is", index)
+        data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+
+    print("Calling Request Now!")
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data)
+    # print("Ingest Files", response)
+    print(response.text)
+        
+    # table = update_table()
+    return response.text
+
+def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150):
+    print("URL is ", url)
+    url = str(url)
+    if not is_valid_url(url):
+        print("Invalid URL")
+        # yield (
+        #     gr.Textbox(
+        #         visible=True,
+        #         value="Invalid URL entered. Please enter a valid URL",
+        #     )
+        # )
+        return
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+
+    if index:
+        url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input)
+    # print("Ingest URL", response)
+    # table = update_table()
+    return response.text
+
+
+def is_valid_url(url):
+    url = str(url)
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
+
+
+
+# Initialize the file list
+file_list = []
+
+# def update_files(file):
+#     # Add the uploaded file to the file list
+#     file_list.append(file.name)
+#     file_df["Files"] = file_list
+#     return file_df
+
+
+def get_files(index=None):
+    headers = {
+        # "Content-Type: multipart/form-data"
+    }
+    if index == "All Files":
+        index = None
+
+    if index:
+        index = {"index_name": index}
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index)
+        print("Get files with ", index, response)
+        table = response.json()
+        return table
+    else:
+        # print("URL IS ", dataprep_get_files_endpoint)
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers)
+        print("Get files ", response)
+        table = response.json()
+        return table
+
+def update_table(index=None):
+    if index == "All Files":
+        index = None
+    files = get_files(index)
+    print("Files is ", files)
+    if len(files) == 0:
+        df = pd.DataFrame(files, columns=["Files"])
+        return df
+    else:
+        df = pd.DataFrame(files)
+        return df
+    
+def update_indices():
+    indices = get_indices()
+    df = pd.DataFrame(indices, columns=["File Databases"])
+    return df
+
+def delete_file(file, index=None):
+    # Remove the selected file from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    print("URL IS ", dataprep_delete_files_endpoint)
+    if index:
+        file_input = {"files": open(file, "rb"), "index_name": index}
+    else:
+        file_input = {"files": open(file, "rb")}
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input)
+    print("Delete file ", response)
+    table = update_table()
+    return response.text
+
+def delete_all_files(index=None):
+    # Remove all files from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}')
+    print("Delete all files ", response)
+    table = update_table()
+    
+    return response.text
+
+def get_indices():
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_get_indices_endpoint, headers=headers)
+    print("Get Indices", response)
+    indices = response.json()
+    return indices
+
+def update_indices_dropdown():
+    indices = ["None"] + get_indices()
+    new_dd = gr.update(choices=indices, value="None")
+    return new_dd
+    
+
+def get_file_names(files):
+    file_str = ""
+    if not files:
+        return file_str
+    
+    for file in files:
+      file_str += file + '\n'
+    file_str.strip()
+    return file_str
+
+
+# Define UI components
+with gr.Blocks() as ui:
+    with gr.Tab("Code Generation"):
+        gr.Markdown("### Generate Code from Natural Language")
+        chatbot = gr.Chatbot(label="Chat History")
+        prompt_input = gr.Textbox(label="Enter your query")
+        with gr.Column():
+            with gr.Row(scale=8):
+                # indices = ["None"] + get_indices()
+                database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None")
+            with gr.Row(scale=1):
+                db_refresh_button = gr.Button("Refresh", variant="primary")
+                db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
+                use_agent = gr.Checkbox(label="Use Agent", container=False)
+        
+        generate_button = gr.Button("Generate Code")
+
+        # Connect the generate button to the conversation_history function
+        generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot)
+
+    with gr.Tab("Resource Management"):
+        # File management components
+        # url_button = gr.Button("Process")
+        with gr.Row():
+            with gr.Column(scale=1):
+                index_name_input = gr.Textbox(label="Index Name")
+                chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)")
+                chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)")
+            with gr.Column(scale=3):
+                file_upload = gr.File(label="Upload Files", file_count="multiple")
+                url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)")
+                upload_button = gr.Button("Upload", variant="primary")
+                upload_status = gr.Textbox(label="Upload Status")
+                file_upload.change(get_file_names, inputs=file_upload, outputs=url_input)
+            with gr.Column(scale=1):
+                # table_dropdown = gr.Dropdown(indices)
+                # file_table = gr.Dataframe(interactive=False, value=update_table())
+                file_table = gr.Dataframe(interactive=False, value=update_indices())
+                refresh_button = gr.Button("Refresh", variant="primary", size="sm")
+                refresh_button.click(update_indices, outputs=file_table)
+                # refresh_button.click(update_indices, outputs=database_dropdown)
+                # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table)
+                # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table)
+                upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status)
+                
+                delete_all_button = gr.Button("Delete All", variant="primary", size="sm")
+                delete_all_button.click(delete_all_files, outputs=upload_status)
+        
+        
+        
+                # delete_button = gr.Button("Delete Index")
+
+                # selected_file_output = gr.Textbox(label="Selected File")
+                # delete_button.click(delete_file, inputs=indices, outputs=upload_status)
+
+      
+
+ui.queue()
+app = gr.mount_gradio_app(app, ui, path="/")
+share = False
+enable_queue = True
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173))
+    parser.add_argument("--concurrency-count", type=int, default=20)
+    parser.add_argument("--share", action="store_true")
+
+    host_ip = os.getenv("host_ip")
+    DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+    DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+    MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+
+    backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+    # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+    # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+    # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+    # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+    args = parser.parse_args()
+    # logger.info(f"args: {args}")
+    global gateway_addr
+    gateway_addr = backend_service_endpoint
+    global dataprep_ingest_addr
+    dataprep_ingest_addr = dataprep_ingest_endpoint
+    global dataprep_get_files_addr
+    dataprep_get_files_addr = dataprep_get_files_endpoint
+
+
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt
new file mode 100644
index 0000000000..41e95a141b
--- /dev/null
+++ b/CodeGen/ui/gradio/requirements.txt
@@ -0,0 +1,6 @@
+gradio==5.22.0
+gradio_pdf==0.0.19
+moviepy==1.0.3
+numpy==1.26.4
+opencv-python==4.10.0.82
+Pillow==10.3.0

From 4205703783d189c2bd126a58a53c246e459f9fc3 Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Thu, 27 Mar 2025 14:12:08 -0700
Subject: [PATCH 08/22] updated readme and fixed merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               | 81 +++++++++++++------
 CodeGen/docker_compose/set_env.sh             | 26 +++---
 CodeGen/ui/gradio/README.md                   | 21 ++---
 3 files changed, 75 insertions(+), 53 deletions(-)

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 3f6573f01e..c932ece069 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -1,15 +1,14 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
 services:
 
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
-    container_name: tgi-service
+    container_name: tgi-server
+    profiles:
+      - codegen-xeon-tgi
     ports:
       - "8028:80"
     volumes:
-      - "./data:/data"
+      - "${MODEL_CACHE:-./data}:/data"
     shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
@@ -23,43 +22,78 @@ services:
       timeout: 10s
       retries: 100
     command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
-  llm:
+  vllm-service:
+    image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
+    container_name: vllm-server
+    profiles:
+      - codegen-xeon-vllm
+    ports:
+      - "8028:80"
+    volumes:
+      - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      host_ip: ${host_ip}
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"]
+      interval: 10s
+      timeout: 10s
+      retries: 100
+    command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80
+  llm-base:
     image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
     container_name: llm-textgen-server
-    depends_on:
-      tgi-service:
-        condition: service_healthy
-    ports:
-      - "9000:9000"
-    ipc: host
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
+      LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
+  llm-tgi-service:
+    extends: llm-base
+    container_name: llm-codegen-tgi-server
+    profiles:
+      - codegen-xeon-tgi
+    ports:
+      - "9000:9000"
+    ipc: host
+    depends_on:
+      tgi-service:
+        condition: service_healthy
+  llm-vllm-service:
+    extends: llm-base
+    container_name: llm-codegen-vllm-server
+    profiles:
+      - codegen-xeon-vllm
+    ports:
+      - "9000:9000"
+    ipc: host
+    depends_on:
+      vllm-service:
+        condition: service_healthy
   codegen-xeon-backend-server:
     image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
     container_name: codegen-xeon-backend-server
     depends_on:
-      - llm
+      - llm-base
     ports:
       - "7778:7778"
     environment:
       - no_proxy=${no_proxy}
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
-      - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP}
-      - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP}
-      # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT}
-      - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP}
+      - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
+      - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
       - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
-      # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE}
-      - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP}
+      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
       - EMBEDDER_PORT=${EMBEDDER_PORT}
-
     ipc: host
     restart: always
   codegen-xeon-ui-server:
@@ -85,7 +119,6 @@ services:
     ports:
       - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
       - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
-  
   dataprep-redis-server:
     image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
     container_name: dataprep-redis-server
@@ -103,7 +136,6 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       LOGFLAG: true
     restart: unless-stopped
-    
   tei-embedding-serving:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-serving
@@ -124,7 +156,6 @@ services:
       interval: 10s
       timeout: 6s
       retries: 48
-
   tei-embedding-server:
     image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
     container_name: tei-embedding-server
@@ -141,7 +172,6 @@ services:
       tei-embedding-serving:
         condition: service_healthy
     restart: unless-stopped
-
   retriever-redis:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-redis
@@ -164,7 +194,6 @@ services:
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
     restart: unless-stopped
-
 networks:
   default:
     driver: bridge
\ No newline at end of file
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index d4cff7af39..559f00cf2a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -2,18 +2,26 @@
 
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-# pushd "../../" > /dev/null
-# source .set_env.sh
-# popd > /dev/null
+pushd "../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
 
 export your_ip=$(hostname -I | awk '{print $1}')
 
 export host_ip=$(hostname -I | awk '{print $1}')
-export no_proxy="${no_proxy},${host_ip}"
+if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
+    echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
+fi
+
+if [ -z "${host_ip}" ]; then
+    echo "Error: host_ip is not set. Please set host_ip first."
+fi
+
+export no_proxy=${no_proxy},${host_ip}
+
 export http_proxy=${http_proxy}
 export https_proxy=${https_proxy}
 
-# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
 export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
 export LLM_SERVICE_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:8028"
@@ -22,7 +30,7 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
 
 export MEGA_SERVICE_PORT=7778
 export MEGA_SERVICE_HOST_IP=${host_ip}
-export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
 
 export REDIS_DB_PORT=6379
 export REDIS_INSIGHTS_PORT=8001
@@ -41,8 +49,4 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
 export DATAPREP_REDIS_PORT=6007
 export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
 export LOGFLAG=false
-
-
-# export LLM_SERVICE_HOST_IP=${host_ip}
-# export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
-# export TEI_EMBEDDING_HOST_IP=${host_ip}
\ No newline at end of file
+export MODEL_CACHE="./data"
\ No newline at end of file
diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md
index 38b2e964a3..9769efb317 100644
--- a/CodeGen/ui/gradio/README.md
+++ b/CodeGen/ui/gradio/README.md
@@ -13,21 +13,21 @@ cd GenAIExamples/CodeGen/ui
 docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio .
 ```
 
-This command builds the Docker image with the tag `opea/docsum-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall.
+This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall.
 
 ### Run UI Docker Image
 
-To run the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui/docker` directory and execute the following commands:
+To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands:
 
 ```bash
-cd GenAIExamples/CodeGen/ui/docker
+cd GenAIExamples/CodeGen/ui/gradio
 
 ip_address=$(hostname -I | awk '{print $1}')
 docker run -d -p 5173:5173 --ipc=host \
    -e http_proxy=$http_proxy \
    -e https_proxy=$https_proxy \
    -e no_proxy=$no_proxy \
-   -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:8888/v1/docsum \
+   -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \
    opea/codegen-gradio-ui:latest
 ```
 
@@ -35,7 +35,7 @@ This command runs the Docker container in interactive mode, mapping port 5173 of
 
 ### Python
 
-To run the frontend application directly using Python, navigate to the `GenAIExamples/DocSum/ui/gradio` directory and run the following command:
+To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command:
 
 ```bash
 cd GenAIExamples/CodeGen/ui/gradio
@@ -44,17 +44,6 @@ python codegen_ui_gradio.py
 
 This command starts the frontend application using Python.
 
-## 📸 Project Screenshots
-
-![project-screenshot](../../assets/img/docSum_ui_gradio_text.png)
-
-### 🧐 Features
-
-Here are some of the project's features:
-
-- Summarizing Uploaded Files: Users can upload files from their local device. Once a file is uploaded, the summarization of the document will start automatically. The summary will be displayed in the 'Summary' box.
-- Summarizing Text via Pasting: Users can paste the text to be summarized into the text box. By clicking the 'Generate Summary' button, a condensed summary of the content will be produced and displayed in the 'Summary' box on the right.
-
 ## Additional Information
 
 ### Prerequisites

From 6dc0e89a9d0f2ee6e2b0511fda32e12008590f99 Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Thu, 27 Mar 2025 14:19:34 -0700
Subject: [PATCH 09/22] reverted changes

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/codegen.py                            | 26 +++--
 .../docker_compose/intel/cpu/xeon/README.md   | 98 ++++++++++++++++++-
 CodeGen/ui/gradio/requirements.txt            |  2 -
 3 files changed, 110 insertions(+), 16 deletions(-)

diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index a5d79f8dd2..00521175f0 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -19,6 +19,9 @@
 from fastapi.responses import StreamingResponse
 from langchain.prompts import PromptTemplate
 
+logger = CustomLogger("opea_dataprep_microservice")
+logflag = os.getenv("LOGFLAG", False)
+
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
 
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
@@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         embedding = inputs['data'][0]['embedding']
         # Align the inputs for the retriever service
         inputs = {
-            "index_name": llm_parameters_dict["key_index_name"],  
+            "index_name": llm_parameters_dict["index_name"],  
             "text": self.input_query,
             "embedding": embedding
         }
@@ -198,14 +201,14 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
-            key_index_name=chat_request.key_index_name
+            index_name=chat_request.index_name
         )
 
         # Initialize the initial inputs with the generated prompt
         initial_inputs = {"query": prompt}
 
         # Check if the key index name is provided in the parameters
-        if parameters.key_index_name:
+        if parameters.index_name:
             if agents_flag:
                 # Schedule the retriever microservice
                 result_ret, runtime_graph = await self.megaservice_retriever.schedule(
@@ -248,11 +251,16 @@ async def handle_request(self, request: Request):
                                         relevant_docs.append(doc)
                                     
                 # Update the initial inputs with the relevant documents
-                query = initial_inputs["query"]
-                initial_inputs = {}
-                initial_inputs["retrieved_docs"] = relevant_docs
-                initial_inputs["initial_query"] = query
-                megaservice = self.megaservice_llm
+                if len(relevant_docs)>0:
+                    logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
+                    query = initial_inputs["query"]
+                    initial_inputs = {}
+                    initial_inputs["retrieved_docs"] = relevant_docs
+                    initial_inputs["initial_query"] = query
+                    
+                else:
+                    logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
+                    
             else:
                 # Use the combined retriever and LLM microservice
                 megaservice = self.megaservice_retriever_llm
@@ -305,4 +313,4 @@ def start(self):
 if __name__ == "__main__":
     chatqna = CodeGenService(port=MEGA_SERVICE_PORT)
     chatqna.add_remote_service()
-    chatqna.start()
+    chatqna.start()
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index e6ed4334a8..fc8b81b45f 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -3,6 +3,89 @@
 This document outlines the deployment process for a CodeGen application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker images creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, further simplifying the deployment process for this service.
 The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice.
 
+## 🚀 Create an AWS Xeon Instance
+
+To run the example on an AWS Xeon instance, start by creating an AWS account if you don't have one already. Then, get started with the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home). AWS EC2 M7i, C7i, C7i-flex and M7i-flex instances are 4th Generation Intel Xeon Scalable processors suitable for the task.
+
+For detailed information about these instance types, you can refer to [m7i](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options.
+
+After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed.
+
+## 🚀 Start Microservices and MegaService
+
+The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
+
+The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
+
+```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 25px
+---
+flowchart LR
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style CodeGen-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
+        direction LR
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
+    end
+    subgraph User Interface
+        direction LR
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
+    end
+
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
+
+
+    %% Embedding service flow
+    direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
+    LLM <-.-> LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
+```
+
 ### Setup Environment Variables
 
 Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
@@ -92,12 +175,17 @@ To access the frontend, open the following URL in your browser: `http://{host_ip
 
 ```yaml
   codegen-xeon-ui-server:
-    image: opea/codegen-gradio-ui:latest
+    image: opea/codegen-ui:latest
     ...
     ports:
       - "80:5173"
 ```
 
+![project-screenshot](../../../../assets/img/codeGen_ui_init.jpg)
+
+Here is an example of running CodeGen in the UI:
+
+![project-screenshot](../../../../assets/img/codeGen_ui_response.png)
 
 ## 🚀 Launch the React Based UI (Optional)
 
@@ -226,15 +314,15 @@ cd GenAIExamples/CodeGen/ui
 docker build -t opea/codegen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
 ```
 
-### 4. Build CodeGen Gradio UI Docker Image
+### 4. Build CodeGen React UI Docker Image (Optional)
 
-Build gradio frontend Docker image via below command:
+Build react frontend Docker image via below command:
 
 **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
 
 ```bash
 cd GenAIExamples/CodeGen/ui
-docker build --no-cache -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.gradio .
+docker build --no-cache -t opea/codegen-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
 ```
 
 Then run the command `docker images`, you will have the following Docker Images:
@@ -242,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images:
 - `opea/llm-textgen:latest`
 - `opea/codegen:latest`
 - `opea/codegen-ui:latest`
-- `opea/codegen-gradio-ui:latest`
+- `opea/codegen-react-ui:latest` (optional)
\ No newline at end of file
diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt
index 41e95a141b..2a4c8e1a30 100644
--- a/CodeGen/ui/gradio/requirements.txt
+++ b/CodeGen/ui/gradio/requirements.txt
@@ -1,6 +1,4 @@
 gradio==5.22.0
-gradio_pdf==0.0.19
-moviepy==1.0.3
 numpy==1.26.4
 opencv-python==4.10.0.82
 Pillow==10.3.0

From 251991a607759748388b984e547d683f8e0eb6c3 Mon Sep 17 00:00:00 2001
From: Omar Khleif <omar.khleif@intel.com>
Date: Thu, 27 Mar 2025 14:23:03 -0700
Subject: [PATCH 10/22] Gradio UI for CodeGen (#4)

* update the compose file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

initial

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

added microservice ports

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update for codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

Initial commit for Gradio UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

New UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

prepare for merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

add agents

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

env updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

merged to main

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

UI Updates

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added dockerfile

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

removed files dataframe

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

updated file upload

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added checkbox for agent

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

key_index_name --> index_name

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added / removed print statements

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

Support for data streaming (from Melanie)

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

fixed file not supported bug

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added refresh button to index

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

simplified README

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

* updated readme and fixed merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

* reverted changes

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

---------

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
Co-authored-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/Dockerfile                            |  47 +-
 CodeGen/codegen.py                            |   2 +-
 .../docker_compose/intel/cpu/xeon/README.md   |   2 +-
 .../intel/cpu/xeon/compose.yaml               |   8 +-
 CodeGen/docker_compose/set_env.sh             |   3 +-
 CodeGen/docker_image_build/build.yaml         |   6 +
 CodeGen/ui/docker/Dockerfile.gradio           |  33 ++
 CodeGen/ui/gradio/README.md                   |  65 +++
 CodeGen/ui/gradio/codegen_ui_gradio.py        | 402 ++++++++++++++++++
 CodeGen/ui/gradio/requirements.txt            |   4 +
 10 files changed, 563 insertions(+), 9 deletions(-)
 create mode 100644 CodeGen/ui/docker/Dockerfile.gradio
 create mode 100644 CodeGen/ui/gradio/README.md
 create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py
 create mode 100644 CodeGen/ui/gradio/requirements.txt

diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile
index 5305a9d89f..b2b4155fd7 100644
--- a/CodeGen/Dockerfile
+++ b/CodeGen/Dockerfile
@@ -1,8 +1,51 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-ARG BASE_TAG=latest
-FROM opea/comps-base:$BASE_TAG
+# Stage 1: base setup used by other stages
+FROM python:3.11-slim AS base
+
+# get security updates
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ENV HOME=/home/user
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p $HOME && \
+    chown -R user $HOME
+
+WORKDIR $HOME
+
+
+# Stage 2: latest GenAIComps sources
+FROM base AS git
+
+RUN apt-get update && apt-get install -y --no-install-recommends git
+# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
+COPY GenAIComps GenAIComps
+
+
+# Stage 3: common layer shared by services using GenAIComps
+FROM base AS comps-base
+
+# copy just relevant parts
+COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
+COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
+
+WORKDIR $HOME/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
+WORKDIR $HOME
+
+ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
+
+USER user
+
+
+# Stage 4: unique part
+FROM comps-base
+
+ENV LANG=C.UTF-8
 
 COPY ./codegen.py $HOME/codegen.py
 
diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index 6384efaa47..00521175f0 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -313,4 +313,4 @@ def start(self):
 if __name__ == "__main__":
     chatqna = CodeGenService(port=MEGA_SERVICE_PORT)
     chatqna.add_remote_service()
-    chatqna.start()
+    chatqna.start()
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 75c7b1851b..fc8b81b45f 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -330,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images:
 - `opea/llm-textgen:latest`
 - `opea/codegen:latest`
 - `opea/codegen-ui:latest`
-- `opea/codegen-react-ui:latest` (optional)
+- `opea/codegen-react-ui:latest` (optional)
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 3d132d29f9..c932ece069 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -1,6 +1,3 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
 services:
 
   tgi-service:
@@ -100,7 +97,7 @@ services:
     ipc: host
     restart: always
   codegen-xeon-ui-server:
-    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
     container_name: codegen-xeon-ui-server
     depends_on:
       - codegen-xeon-backend-server
@@ -111,6 +108,9 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - host_ip=${host_ip}
+      - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
     ipc: host
     restart: always
   redis-vector-db:
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index dd0b97a551..559f00cf2a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -47,5 +47,6 @@ export TEI_EMBEDDING_HOST_IP=${host_ip}
 export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
 
 export DATAPREP_REDIS_PORT=6007
+export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
 export LOGFLAG=false
-export MODEL_CACHE="./data"
+export MODEL_CACHE="./data"
\ No newline at end of file
diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml
index 3275aa71bf..52ca23b109 100644
--- a/CodeGen/docker_image_build/build.yaml
+++ b/CodeGen/docker_image_build/build.yaml
@@ -23,6 +23,12 @@ services:
       dockerfile: ./docker/Dockerfile.react
     extends: codegen
     image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest}
+  codegen-gradio-ui:
+    build:
+      context: ../ui
+      dockerfile: ./docker/Dockerfile.gradio
+    extends: codegen
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
   llm-textgen:
     build:
       context: GenAIComps
diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio
new file mode 100644
index 0000000000..11a4f4f581
--- /dev/null
+++ b/CodeGen/ui/docker/Dockerfile.gradio
@@ -0,0 +1,33 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+
+ARG ARCH="cpu"
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    default-jre \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    wget
+
+# Install ffmpeg static build
+WORKDIR /root
+RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \
+    mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \
+    export PATH=/root/ffmpeg-git-amd64-static:$PATH && \
+    cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \
+    cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/
+
+RUN mkdir -p /home/user
+
+COPY gradio /home/user/gradio
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+pip install --no-cache-dir -r /home/user/gradio/requirements.txt
+
+WORKDIR /home/user/gradio
+ENTRYPOINT ["python", "codegen_ui_gradio.py"]
diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md
new file mode 100644
index 0000000000..9769efb317
--- /dev/null
+++ b/CodeGen/ui/gradio/README.md
@@ -0,0 +1,65 @@
+# Document Summary
+
+This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries.
+
+## Docker
+
+### Build UI Docker Image
+
+To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui
+docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio .
+```
+
+This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall.
+
+### Run UI Docker Image
+
+To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands:
+
+```bash
+cd GenAIExamples/CodeGen/ui/gradio
+
+ip_address=$(hostname -I | awk '{print $1}')
+docker run -d -p 5173:5173 --ipc=host \
+   -e http_proxy=$http_proxy \
+   -e https_proxy=$https_proxy \
+   -e no_proxy=$no_proxy \
+   -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \
+   opea/codegen-gradio-ui:latest
+```
+
+This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service.
+
+### Python
+
+To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui/gradio
+python codegen_ui_gradio.py
+```
+
+This command starts the frontend application using Python.
+
+## Additional Information
+
+### Prerequisites
+
+Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall.
+
+### Environment Variables
+
+- `http_proxy`: Proxy setting for HTTP connections.
+- `https_proxy`: Proxy setting for HTTPS connections.
+- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying.
+- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with.
+
+### Troubleshooting
+
+- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access.
+- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible.
+
+This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment.
diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
new file mode 100644
index 0000000000..873d0c42b4
--- /dev/null
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -0,0 +1,402 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# This is a Gradio app that includes two tabs: one for code generation and another for resource management.
+# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files.
+# Additionally, three small text boxes have been added for managing file dataframe parameters.
+
+import argparse
+import os
+from pathlib import Path
+import gradio as gr
+from gradio_pdf import PDF
+import requests
+import pandas as pd
+import os
+import uvicorn
+import json
+import argparse
+# from utils import build_logger, make_temp_image, server_error_msg, split_video
+from urllib.parse import urlparse
+from pathlib import Path
+from fastapi import FastAPI
+# from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+
+# logger = build_logger("gradio_web_server", "gradio_web_server.log")
+logflag = os.getenv("LOGFLAG", False)
+
+# create a FastAPI app
+app = FastAPI()
+cur_dir = os.getcwd()
+static_dir = Path(os.path.join(cur_dir, "static/"))
+tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/"))
+
+Path(static_dir).mkdir(parents=True, exist_ok=True)
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
+
+tmp_upload_folder = "/tmp/gradio/"
+
+
+
+host_ip = os.getenv("host_ip")
+DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+
+# Define the functions that will be used in the app
+def conversation_history(prompt, index, use_agent, history):
+    # Print the language and prompt, and return a placeholder code
+    print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}")
+    history.append([prompt, ""])
+    response_generator = generate_code(prompt, index, use_agent)
+    for token in response_generator:
+        history[-1][-1] += token
+        yield history
+
+
+def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
+    media = media.strip().split("\n")
+    print("Files passed is ", media, flush=True)
+    if not chunk_size:
+        chunk_size = 1500
+    if not chunk_overlap:
+        chunk_overlap = 100
+
+    requests = []
+    if type(media) is list:
+        for file in media:
+            file_ext = os.path.splitext(file)[-1]
+            if is_valid_url(file):
+                print(file, " is valid URL")
+                print("Ingesting URL...")
+                value = ingest_url(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            elif file_ext in ['.pdf', '.txt']:
+                print("Ingesting File...")
+                value = ingest_file(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            else:
+                print(file, "File type not supported")
+                yield (
+                    gr.Textbox(
+                        visible=True,
+                        value="Your file extension type is not supported.",
+                    )
+                )
+                return
+        yield requests
+
+    else:
+        file_ext = os.path.splitext(media)[-1]
+        if is_valid_url(media):
+            value = ingest_url(media, index, chunk_size, chunk_overlap)
+            yield value
+        elif file_ext in ['.pdf', '.txt']:
+            print("Ingesting File...")
+            value = ingest_file(media, index, chunk_size, chunk_overlap)
+            # print("Return value is: ", value, flush=True)
+            yield value
+        else:
+            print(media, "File type not supported")
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Your file extension type is not supported.",
+                )
+            )
+            return
+
+def generate_code(query, index=None, use_agent=False):
+    if index is None or index == "None":
+        input_dict = {"messages": query, "agents_flag": use_agent}
+    else:
+        input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent}
+
+    print("Query is ", input_dict)
+    headers = {"Content-Type": "application/json"}
+    
+    response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True)
+
+    for line in response.iter_lines():
+        if line:
+            line = line.decode('utf-8')
+            if line.startswith("data: "):  # Only process lines starting with "data: "
+                json_part = line[len("data: "):]  # Remove the "data: " prefix
+                if json_part.strip() == "[DONE]":  # Ignore the DONE marker
+                    continue
+                try:
+                    json_obj = json.loads(json_part)  # Convert to dictionary
+                    if "choices" in json_obj:
+                        for choice in json_obj["choices"]:
+                            if "text" in choice:
+                                # Yield each token individually
+                                yield choice["text"]
+                except json.JSONDecodeError:
+                    print("Error parsing JSON:", json_part)
+
+
+def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150):
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+    file_input = {"files": open(file, "rb")}
+
+    if index:
+        print("Index is", index)
+        data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+
+    print("Calling Request Now!")
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data)
+    # print("Ingest Files", response)
+    print(response.text)
+        
+    # table = update_table()
+    return response.text
+
+def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150):
+    print("URL is ", url)
+    url = str(url)
+    if not is_valid_url(url):
+        print("Invalid URL")
+        # yield (
+        #     gr.Textbox(
+        #         visible=True,
+        #         value="Invalid URL entered. Please enter a valid URL",
+        #     )
+        # )
+        return
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+
+    if index:
+        url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input)
+    # print("Ingest URL", response)
+    # table = update_table()
+    return response.text
+
+
+def is_valid_url(url):
+    url = str(url)
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
+
+
+
+# Initialize the file list
+file_list = []
+
+# def update_files(file):
+#     # Add the uploaded file to the file list
+#     file_list.append(file.name)
+#     file_df["Files"] = file_list
+#     return file_df
+
+
+def get_files(index=None):
+    headers = {
+        # "Content-Type: multipart/form-data"
+    }
+    if index == "All Files":
+        index = None
+
+    if index:
+        index = {"index_name": index}
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index)
+        print("Get files with ", index, response)
+        table = response.json()
+        return table
+    else:
+        # print("URL IS ", dataprep_get_files_endpoint)
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers)
+        print("Get files ", response)
+        table = response.json()
+        return table
+
+def update_table(index=None):
+    if index == "All Files":
+        index = None
+    files = get_files(index)
+    print("Files is ", files)
+    if len(files) == 0:
+        df = pd.DataFrame(files, columns=["Files"])
+        return df
+    else:
+        df = pd.DataFrame(files)
+        return df
+    
+def update_indices():
+    indices = get_indices()
+    df = pd.DataFrame(indices, columns=["File Databases"])
+    return df
+
+def delete_file(file, index=None):
+    # Remove the selected file from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    print("URL IS ", dataprep_delete_files_endpoint)
+    if index:
+        file_input = {"files": open(file, "rb"), "index_name": index}
+    else:
+        file_input = {"files": open(file, "rb")}
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input)
+    print("Delete file ", response)
+    table = update_table()
+    return response.text
+
+def delete_all_files(index=None):
+    # Remove all files from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}')
+    print("Delete all files ", response)
+    table = update_table()
+    
+    return response.text
+
+def get_indices():
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_get_indices_endpoint, headers=headers)
+    print("Get Indices", response)
+    indices = response.json()
+    return indices
+
+def update_indices_dropdown():
+    indices = ["None"] + get_indices()
+    new_dd = gr.update(choices=indices, value="None")
+    return new_dd
+    
+
+def get_file_names(files):
+    file_str = ""
+    if not files:
+        return file_str
+    
+    for file in files:
+      file_str += file + '\n'
+    file_str.strip()
+    return file_str
+
+
+# Define UI components
+with gr.Blocks() as ui:
+    with gr.Tab("Code Generation"):
+        gr.Markdown("### Generate Code from Natural Language")
+        chatbot = gr.Chatbot(label="Chat History")
+        prompt_input = gr.Textbox(label="Enter your query")
+        with gr.Column():
+            with gr.Row(scale=8):
+                # indices = ["None"] + get_indices()
+                database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None")
+            with gr.Row(scale=1):
+                db_refresh_button = gr.Button("Refresh", variant="primary")
+                db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
+                use_agent = gr.Checkbox(label="Use Agent", container=False)
+        
+        generate_button = gr.Button("Generate Code")
+
+        # Connect the generate button to the conversation_history function
+        generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot)
+
+    with gr.Tab("Resource Management"):
+        # File management components
+        # url_button = gr.Button("Process")
+        with gr.Row():
+            with gr.Column(scale=1):
+                index_name_input = gr.Textbox(label="Index Name")
+                chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)")
+                chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)")
+            with gr.Column(scale=3):
+                file_upload = gr.File(label="Upload Files", file_count="multiple")
+                url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)")
+                upload_button = gr.Button("Upload", variant="primary")
+                upload_status = gr.Textbox(label="Upload Status")
+                file_upload.change(get_file_names, inputs=file_upload, outputs=url_input)
+            with gr.Column(scale=1):
+                # table_dropdown = gr.Dropdown(indices)
+                # file_table = gr.Dataframe(interactive=False, value=update_table())
+                file_table = gr.Dataframe(interactive=False, value=update_indices())
+                refresh_button = gr.Button("Refresh", variant="primary", size="sm")
+                refresh_button.click(update_indices, outputs=file_table)
+                # refresh_button.click(update_indices, outputs=database_dropdown)
+                # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table)
+                # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table)
+                upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status)
+                
+                delete_all_button = gr.Button("Delete All", variant="primary", size="sm")
+                delete_all_button.click(delete_all_files, outputs=upload_status)
+        
+        
+        
+                # delete_button = gr.Button("Delete Index")
+
+                # selected_file_output = gr.Textbox(label="Selected File")
+                # delete_button.click(delete_file, inputs=indices, outputs=upload_status)
+
+      
+
+ui.queue()
+app = gr.mount_gradio_app(app, ui, path="/")
+share = False
+enable_queue = True
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173))
+    parser.add_argument("--concurrency-count", type=int, default=20)
+    parser.add_argument("--share", action="store_true")
+
+    host_ip = os.getenv("host_ip")
+    DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+    DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+    MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+
+    backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+    # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+    # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+    # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+    # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+    args = parser.parse_args()
+    # logger.info(f"args: {args}")
+    global gateway_addr
+    gateway_addr = backend_service_endpoint
+    global dataprep_ingest_addr
+    dataprep_ingest_addr = dataprep_ingest_endpoint
+    global dataprep_get_files_addr
+    dataprep_get_files_addr = dataprep_get_files_endpoint
+
+
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt
new file mode 100644
index 0000000000..2a4c8e1a30
--- /dev/null
+++ b/CodeGen/ui/gradio/requirements.txt
@@ -0,0 +1,4 @@
+gradio==5.22.0
+numpy==1.26.4
+opencv-python==4.10.0.82
+Pillow==10.3.0

From bc60f95c4a3314ac30b3ff6d060bcd482799b329 Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Fri, 28 Mar 2025 13:40:17 -0700
Subject: [PATCH 11/22] bug fixes and ui updates

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               |  1 +
 CodeGen/ui/gradio/README.md                   |  2 +-
 CodeGen/ui/gradio/codegen_ui_gradio.py        | 45 +++++++++++--------
 3 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index c932ece069..e88b3e4847 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -111,6 +111,7 @@ services:
       - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
       - host_ip=${host_ip}
       - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
+      - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT}
     ipc: host
     restart: always
   redis-vector-db:
diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md
index 9769efb317..d994729c82 100644
--- a/CodeGen/ui/gradio/README.md
+++ b/CodeGen/ui/gradio/README.md
@@ -6,7 +6,7 @@ This project provides a user interface for summarizing documents and text using
 
 ### Build UI Docker Image
 
-To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command:
+To build the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui` directory and run the following command:
 
 ```bash
 cd GenAIExamples/CodeGen/ui
diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
index 873d0c42b4..4f7e46d3a6 100644
--- a/CodeGen/ui/gradio/codegen_ui_gradio.py
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -9,7 +9,6 @@
 import os
 from pathlib import Path
 import gradio as gr
-from gradio_pdf import PDF
 import requests
 import pandas as pd
 import os
@@ -81,11 +80,23 @@ def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
             if is_valid_url(file):
                 print(file, " is valid URL")
                 print("Ingesting URL...")
+                yield (
+                    gr.Textbox(
+                        visible=True,
+                        value="Ingesting URL...",
+                    )
+                )
                 value = ingest_url(file, index, chunk_size, chunk_overlap)
                 requests.append(value)
                 yield value
             elif file_ext in ['.pdf', '.txt']:
                 print("Ingesting File...")
+                yield (
+                    gr.Textbox(
+                        visible=True,
+                        value="Ingesting file...",
+                    )
+                )
                 value = ingest_file(file, index, chunk_size, chunk_overlap)
                 requests.append(value)
                 yield value
@@ -94,7 +105,7 @@ def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
                 yield (
                     gr.Textbox(
                         visible=True,
-                        value="Your file extension type is not supported.",
+                        value="Your media is either an invalid URL or the file extension type is not supported. (Supports .pdf, .txt, url)",
                     )
                 )
                 return
@@ -173,14 +184,8 @@ def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150):
     print("URL is ", url)
     url = str(url)
     if not is_valid_url(url):
-        print("Invalid URL")
-        # yield (
-        #     gr.Textbox(
-        #         visible=True,
-        #         value="Invalid URL entered. Please enter a valid URL",
-        #     )
-        # )
-        return
+        return "Invalid URL entered. Please enter a valid URL"
+    
     headers = {
          # "Content-Type: multipart/form-data"
         }
@@ -249,7 +254,7 @@ def update_table(index=None):
     
 def update_indices():
     indices = get_indices()
-    df = pd.DataFrame(indices, columns=["File Databases"])
+    df = pd.DataFrame(indices, columns=["File Indices"])
     return df
 
 def delete_file(file, index=None):
@@ -276,20 +281,21 @@ def delete_all_files(index=None):
     print("Delete all files ", response)
     table = update_table()
     
-    return response.text
+    return "Delete All status: " + response.text
 
 def get_indices():
     headers = {
         # "Content-Type: application/json"
     }
+    print("URL IS ", dataprep_get_indices_endpoint)
     response = requests.post(url=dataprep_get_indices_endpoint, headers=headers)
+    indices = ["None"]
     print("Get Indices", response)
-    indices = response.json()
+    indices += response.json()
     return indices
 
 def update_indices_dropdown():
-    indices = ["None"] + get_indices()
-    new_dd = gr.update(choices=indices, value="None")
+    new_dd = gr.update(choices=get_indices(), value="None")
     return new_dd
     
 
@@ -311,13 +317,14 @@ def get_file_names(files):
         chatbot = gr.Chatbot(label="Chat History")
         prompt_input = gr.Textbox(label="Enter your query")
         with gr.Column():
-            with gr.Row(scale=8):
+            with gr.Row(equal_height=True):
                 # indices = ["None"] + get_indices()
-                database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None")
-            with gr.Row(scale=1):
-                db_refresh_button = gr.Button("Refresh", variant="primary")
+                database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None", scale=10)
+                db_refresh_button = gr.Button("Refresh Dropdown", scale=0.1)
                 db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
                 use_agent = gr.Checkbox(label="Use Agent", container=False)
+            # with gr.Row(scale=1):
+            
         
         generate_button = gr.Button("Generate Code")
 

From 56ad272b088917b0f55875ffc03ce718846b5eef Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Fri, 28 Mar 2025 14:42:45 -0700
Subject: [PATCH 12/22] add cpu xeon test

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 .../intel/cpu/xeon/compose.yaml               |  1 +
 CodeGen/tests/test_compose_on_xeon.sh         | 57 ++++++++++++++-----
 CodeGen/ui/gradio/codegen_ui_gradio.py        |  1 -
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index c932ece069..e88b3e4847 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -111,6 +111,7 @@ services:
       - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
       - host_ip=${host_ip}
       - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
+      - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT}
     ipc: host
     restart: always
   redis-vector-db:
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 6fc25963ac..14e01a0899 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     fi
 
     cd $WORKPATH/docker_image_build
-    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
 
     git clone https://github.com/vllm-project/vllm.git && cd vllm
     VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
@@ -82,23 +82,35 @@ function validate_services() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    if [[ "$SERVICE_NAME" == "ingest" ]]; then
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL")
 
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..."
+        else
+            echo "[ $SERVICE_NAME ] Data preparation failed..."
+        fi
+
+    else
+
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
 
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
+            local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+            if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+                echo "[ $SERVICE_NAME ] Content is as expected."
+            else
+                echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+                docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+                exit 1
+            fi
         else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
             docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
             exit 1
         fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
     fi
     sleep 5s
 }
@@ -122,6 +134,14 @@ function validate_microservices() {
         "llm-textgen-server" \
         '{"query":"def print_hello_world():", "max_tokens": 256}'
 
+    # Data ingest microservice
+    validate_services \
+        "${ip_address}:6007/v1/dataprep/ingest" \
+        "Data preparation succeeded" \
+        "ingest" \
+        "dataprep-redis-server" \
+        'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]'
+
 }
 
 function validate_megaservice() {
@@ -133,6 +153,14 @@ function validate_megaservice() {
         "codegen-xeon-backend-server" \
         '{"messages": "def print_hello_world():", "max_tokens": 256}'
 
+    # Curl the Mega Service with index_name and agents_flag
+    validate_services \
+        "${ip_address}:7778/v1/codegen" \
+        "print" \
+        "mega-codegen" \
+        "codegen-xeon-backend-server" \
+        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
+
 }
 
 function validate_frontend() {
@@ -191,7 +219,7 @@ function main() {
         stop_docker "${docker_compose_profiles[${i}]}"
     done
 
-    # build docker images
+    # # build docker images
     if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
 
     # loop all profiles
@@ -202,7 +230,7 @@ function main() {
 
         validate_microservices "${docker_llm_container_names[${i}]}"
         validate_megaservice
-        validate_frontend
+        # validate_frontend
 
         stop_docker "${docker_compose_profiles[${i}]}"
         sleep 5s
@@ -212,3 +240,4 @@ function main() {
 }
 
 main
+
diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
index 873d0c42b4..cb90288cc5 100644
--- a/CodeGen/ui/gradio/codegen_ui_gradio.py
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -9,7 +9,6 @@
 import os
 from pathlib import Path
 import gradio as gr
-from gradio_pdf import PDF
 import requests
 import pandas as pd
 import os

From d7275c2b7c87c1d2ee989c1f4e9e42c273195575 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Fri, 28 Mar 2025 15:25:50 -0700
Subject: [PATCH 13/22] add cpu xeon test

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/tests/test_compose_on_gaudi.sh | 3 ++-
 CodeGen/tests/test_compose_on_xeon.sh  | 2 +-
 CodeGen/ui/svelte/.env                 | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index c7b6b83f7e..60bd1b6782 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -29,7 +29,8 @@ function build_docker_images() {
     fi
 
     cd $WORKPATH/docker_image_build
-    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+
     # Download Gaudi vllm of latest tag
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
     VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 14e01a0899..aa517f7c98 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -219,7 +219,7 @@ function main() {
         stop_docker "${docker_compose_profiles[${i}]}"
     done
 
-    # # build docker images
+    # build docker images
     if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
 
     # loop all profiles
diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env
index 0bf85fa876..2efb56c8f5 100644
--- a/CodeGen/ui/svelte/.env
+++ b/CodeGen/ui/svelte/.env
@@ -1 +1 @@
-BASIC_URL = 'http://backend_address:7778/v1/codegen'
+BASIC_URL = 'http://10.98.56.44:7778/v1/codegen'

From 815083eacf81e28ea33fcf5415b6c5857ac635f4 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Fri, 28 Mar 2025 18:36:27 -0700
Subject: [PATCH 14/22] update gaudi test and compose

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 .../intel/hpu/gaudi/compose.yaml              | 96 ++++++++++++++++++-
 CodeGen/docker_compose/set_env.sh             |  3 +-
 CodeGen/tests/test_compose_on_gaudi.sh        | 52 +++++++---
 CodeGen/tests/test_compose_on_xeon.sh         |  2 +-
 4 files changed, 136 insertions(+), 17 deletions(-)

diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 2f669e9465..dab03fe080 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -108,11 +108,15 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
+      - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
+      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
+      - EMBEDDER_PORT=${EMBEDDER_PORT}
     ipc: host
     restart: always
-  codegen-gaudi-ui-server:
-    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
-    container_name: codegen-gaudi-ui-server
+  codegen-xeon-ui-server:
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
+    container_name: codegen-xeon-ui-server
     depends_on:
       - codegen-gaudi-backend-server
     ports:
@@ -122,9 +126,95 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - host_ip=${host_ip}
+      - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
+      - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT}
     ipc: host
     restart: always
 
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
+      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
+  dataprep-redis-server:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${DATAPREP_REDIS_PORT}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: ${host_ip} 
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: true
+    restart: unless-stopped
+  tei-embedding-serving:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-serving
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    ports:
+      - "${TEI_EMBEDDER_PORT:-12000}:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 48
+  tei-embedding-server:
+    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
+    container_name: tei-embedding-server
+    ports:
+      - "${EMBEDDER_PORT:-10201}:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING"
+    depends_on:
+      tei-embedding-serving:
+        condition: service_healthy
+    restart: unless-stopped
+  retriever-redis:
+    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+    container_name: retriever-redis
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_DB_PORT: ${REDIS_DB_PORT}
+      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
+      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: ${LOGFLAG}
+      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
+    restart: unless-stopped
+
 networks:
   default:
     driver: bridge
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index 559f00cf2a..73805e6095 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -49,4 +49,5 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
 export DATAPREP_REDIS_PORT=6007
 export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
 export LOGFLAG=false
-export MODEL_CACHE="./data"
\ No newline at end of file
+export MODEL_CACHE="./data"
+export NUM_CARDS=1
\ No newline at end of file
diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index 60bd1b6782..1944b78e16 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -30,6 +30,7 @@ function build_docker_images() {
 
     cd $WORKPATH/docker_image_build
     # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    git clone --depth 1 --branch codegen_rag_agent_v2 https://github.com/MSCetin37/GenAIComps.git
 
     # Download Gaudi vllm of latest tag
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
@@ -83,23 +84,34 @@ function validate_services() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    if [[ "$SERVICE_NAME" == "ingest" ]]; then
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL")
 
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..."
+        else
+            echo "[ $SERVICE_NAME ] Data preparation failed..."
+        fi
 
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
+    else
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+
+            local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+            if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+                echo "[ $SERVICE_NAME ] Content is as expected."
+            else
+                echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+                docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+                exit 1
+            fi
         else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
             docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
             exit 1
         fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
     fi
     sleep 5s
 }
@@ -123,6 +135,14 @@ function validate_microservices() {
         "llm-textgen-gaudi-server" \
         '{"query":"def print_hello_world():"}'
 
+    # Data ingest microservice
+    validate_services \
+        "${ip_address}:6007/v1/dataprep/ingest" \
+        "Data preparation succeeded" \
+        "ingest" \
+        "dataprep-redis-server" \
+        'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]'
+
 }
 
 function validate_megaservice() {
@@ -134,6 +154,14 @@ function validate_megaservice() {
         "codegen-gaudi-backend-server" \
         '{"messages": "def print_hello_world():"}'
 
+    # Curl the Mega Service with index_name and agents_flag
+    validate_services \
+        "${ip_address}:7778/v1/codegen" \
+        "print" \
+        "mega-codegen" \
+        "codegen-xeon-backend-server" \
+        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
+
 }
 
 function validate_frontend() {
@@ -202,7 +230,7 @@ function main() {
 
         validate_microservices "${docker_llm_container_names[${i}]}"
         validate_megaservice
-        validate_frontend
+        # validate_frontend
 
         stop_docker "${docker_compose_profiles[${i}]}"
         sleep 5s
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index aa517f7c98..4e210bf540 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -30,6 +30,7 @@ function build_docker_images() {
 
     cd $WORKPATH/docker_image_build
     # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    git clone --depth 1 --branch codegen_rag_agent_v2 https://github.com/MSCetin37/GenAIComps.git
 
     git clone https://github.com/vllm-project/vllm.git && cd vllm
     VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
@@ -92,7 +93,6 @@ function validate_services() {
         fi
 
     else
-
         local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
         if [ "$HTTP_STATUS" -eq 200 ]; then
             echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."

From dc88a1f24956784b9f52db07036cc80636326aa7 Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Fri, 28 Mar 2025 18:40:36 -0700
Subject: [PATCH 15/22] update gaudi test and compose

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/ui/svelte/.env | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env
index 2efb56c8f5..0bf85fa876 100644
--- a/CodeGen/ui/svelte/.env
+++ b/CodeGen/ui/svelte/.env
@@ -1 +1 @@
-BASIC_URL = 'http://10.98.56.44:7778/v1/codegen'
+BASIC_URL = 'http://backend_address:7778/v1/codegen'

From e11d51668c5f2d1c30af2aa72871f72a0df70d4f Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Mon, 31 Mar 2025 12:23:10 -0700
Subject: [PATCH 16/22] fixed output for index uses

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/ui/gradio/codegen_ui_gradio.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
index 4f7e46d3a6..770608a0c9 100644
--- a/CodeGen/ui/gradio/codegen_ui_gradio.py
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -147,17 +147,19 @@ def generate_code(query, index=None, use_agent=False):
             line = line.decode('utf-8')
             if line.startswith("data: "):  # Only process lines starting with "data: "
                 json_part = line[len("data: "):]  # Remove the "data: " prefix
-                if json_part.strip() == "[DONE]":  # Ignore the DONE marker
-                    continue
-                try:
-                    json_obj = json.loads(json_part)  # Convert to dictionary
-                    if "choices" in json_obj:
-                        for choice in json_obj["choices"]:
-                            if "text" in choice:
-                                # Yield each token individually
-                                yield choice["text"]
-                except json.JSONDecodeError:
-                    print("Error parsing JSON:", json_part)
+            else:
+                json_part = line
+            if json_part.strip() == "[DONE]":  # Ignore the DONE marker
+                continue
+            try:
+                json_obj = json.loads(json_part)  # Convert to dictionary
+                if "choices" in json_obj:
+                    for choice in json_obj["choices"]:
+                        if "text" in choice:
+                            # Yield each token individually
+                            yield choice["text"]
+            except json.JSONDecodeError:
+                print("Error parsing JSON:", json_part)
 
 
 def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150):

From 7bf2eb5895e933843a01b10a5d10efb46c43ba9a Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Mon, 31 Mar 2025 15:34:09 -0700
Subject: [PATCH 17/22] added services for gaudi

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 .../intel/hpu/gaudi/compose.yaml              | 59 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
index 2f669e9465..dd0e5a2b29 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -108,10 +108,15 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
+      - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
+      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
+      - EMBEDDER_PORT=${EMBEDDER_PORT}
+      - host_ip=${host_ip}
     ipc: host
     restart: always
   codegen-gaudi-ui-server:
-    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
     container_name: codegen-gaudi-ui-server
     depends_on:
       - codegen-gaudi-backend-server
@@ -122,9 +127,57 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - host_ip=${host_ip}
+      - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
+      - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT}
     ipc: host
     restart: always
-
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
+      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
+  dataprep-redis-server:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${DATAPREP_REDIS_PORT}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: ${host_ip} 
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: true
+    restart: unless-stopped
+  retriever-redis:
+    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+    container_name: retriever-redis
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_DB_PORT: ${REDIS_DB_PORT}
+      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
+      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: ${LOGFLAG}
+      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
+    restart: unless-stopped
 networks:
   default:
-    driver: bridge
+    driver: bridge
\ No newline at end of file

From 9e84e73b72a98051e316a7eb19fda95e3044f30a Mon Sep 17 00:00:00 2001
From: Mustafa <mustafa.cetin@intel.com>
Date: Thu, 13 Feb 2025 22:11:53 -0800
Subject: [PATCH 18/22] update the compose file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

initial

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update for codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

add agents

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

env updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

merged to main

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update the env variables

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update compose.yaml

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

compose.yaml updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update readme file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update readme

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

Gradio UI for CodeGen (#4)

* update the compose file

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

initial

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

added microservice ports

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update for codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

Initial commit for Gradio UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

New UI

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

prepare for merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

add agents

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

env updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

update codegen

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

merged to main

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

updates

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

UI Updates

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added dockerfile

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

removed files dataframe

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

updated file upload

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added checkbox for agent

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

key_index_name --> index_name

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added / removed print statements

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

Support for data streaming (from Melanie)

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

fixed file not supported bug

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

added refresh button to index

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

simplified README

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

* updated readme and fixed merge

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

* reverted changes

Signed-off-by: okhleif-IL <omar.khleif@intel.com>

---------

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
Co-authored-by: Mustafa <mustafa.cetin@intel.com>

add cpu xeon test

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

add cpu xeon test

Signed-off-by: Mustafa <mustafa.cetin@intel.com>

codegen code cleaning

Signed-off-by: Mustafa <mustafa.cetin@intel.com>
---
 CodeGen/Dockerfile                            |  47 +-
 CodeGen/README.md                             |  72 +++-
 CodeGen/codegen.py                            | 228 +++++++++-
 .../docker_compose/intel/cpu/xeon/README.md   |  86 +++-
 .../intel/cpu/xeon/compose.yaml               |  98 ++++-
 .../docker_compose/intel/hpu/gaudi/README.md  |  84 +++-
 CodeGen/docker_compose/set_env.sh             |  34 +-
 CodeGen/docker_image_build/build.yaml         |   6 +
 CodeGen/tests/test_compose_on_gaudi.sh        |   3 +-
 CodeGen/tests/test_compose_on_xeon.sh         |  55 ++-
 CodeGen/ui/docker/Dockerfile.gradio           |  33 ++
 CodeGen/ui/gradio/README.md                   |  65 +++
 CodeGen/ui/gradio/codegen_ui_gradio.py        | 401 ++++++++++++++++++
 CodeGen/ui/gradio/requirements.txt            |   4 +
 CodeGen/ui/svelte/.env                        |   2 +-
 15 files changed, 1141 insertions(+), 77 deletions(-)
 create mode 100644 CodeGen/ui/docker/Dockerfile.gradio
 create mode 100644 CodeGen/ui/gradio/README.md
 create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py
 create mode 100644 CodeGen/ui/gradio/requirements.txt

diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile
index 5305a9d89f..b2b4155fd7 100644
--- a/CodeGen/Dockerfile
+++ b/CodeGen/Dockerfile
@@ -1,8 +1,51 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-ARG BASE_TAG=latest
-FROM opea/comps-base:$BASE_TAG
+# Stage 1: base setup used by other stages
+FROM python:3.11-slim AS base
+
+# get security updates
+RUN apt-get update && apt-get upgrade -y && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+ENV HOME=/home/user
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p $HOME && \
+    chown -R user $HOME
+
+WORKDIR $HOME
+
+
+# Stage 2: latest GenAIComps sources
+FROM base AS git
+
+RUN apt-get update && apt-get install -y --no-install-recommends git
+# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git
+COPY GenAIComps GenAIComps
+
+
+# Stage 3: common layer shared by services using GenAIComps
+FROM base AS comps-base
+
+# copy just relevant parts
+COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps
+COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/
+
+WORKDIR $HOME/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt
+WORKDIR $HOME
+
+ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps
+
+USER user
+
+
+# Stage 4: unique part
+FROM comps-base
+
+ENV LANG=C.UTF-8
 
 COPY ./codegen.py $HOME/codegen.py
 
diff --git a/CodeGen/README.md b/CodeGen/README.md
index 00d54adbc2..692e01848b 100644
--- a/CodeGen/README.md
+++ b/CodeGen/README.md
@@ -1,6 +1,6 @@
 # Code Generation Application
 
-Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions.
+Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. Additionally Retrieval-Augmented Generation (RAG) and Agents are parts of the CodeGen example which provide an additional layer of intelligence and adaptability, ensuring that the generated code is not only relevant but also accurate, efficient, and tailored to the specific needs of the developers and programmers.
 
 The capabilities of CodeGen LLMs include:
 
@@ -20,6 +20,7 @@ The workflow falls into the following architecture:
 
 The CodeGen example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
 
+
 ```mermaid
 ---
 config:
@@ -28,7 +29,7 @@ config:
     rankSpacing: 100
     curve: linear
   themeVariables:
-    fontSize: 50px
+    fontSize: 25px
 ---
 flowchart LR
     %% Colors %%
@@ -37,34 +38,56 @@ flowchart LR
     classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
     classDef invisible fill:transparent,stroke:transparent;
     style CodeGen-MegaService stroke:#000000
-
     %% Subgraphs %%
-    subgraph CodeGen-MegaService["CodeGen MegaService "]
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        LLM([LLM MicroService]):::blue
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-    subgraph UserInterface[" User Interface "]
+    subgraph User Interface
         direction LR
-        a([User Input Query]):::orchid
-        UI([UI server<br>]):::orchid
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
 
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
 
-    LLM_gen{{LLM Service <br>}}
-    GW([CodeGen GateWay<br>]):::orange
-
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
 
     %% Questions interaction
     direction LR
     a[User Input Query] --> UI
     UI --> GW
     GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
 
 
     %% Embedding service flow
     direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
     LLM <-.-> LLM_gen
 
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ## Deploy CodeGen Service
@@ -138,11 +161,25 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build
 
 Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml).
 
+Start CodeGen based on TGI service:
+
 ```bash
-cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon
-docker compose up -d
+cd GenAIExamples/CodeGen/docker_compose
+source set_env.sh
+cd intel/cpu/xeon
+docker compose --profile codegen-xeon-tgi up -d
+```
+
+Start CodeGen based on vLLM service:
+
+```bash
+cd GenAIExamples/CodeGen/docker_compose
+source set_env.sh
+cd intel/cpu/xeon
+docker compose --profile codegen-xeon-vllm up -d
 ```
 
+
 Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
 
 ### Deploy CodeGen on Kubernetes using Helm Chart
@@ -161,6 +198,15 @@ Two ways of consuming CodeGen Service:
        -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
    ```
 
+   If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+
+   ```bash
+   curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+
+   ```
+
 2. Access via frontend
 
    To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py
index 16db9aa262..af520211a6 100644
--- a/CodeGen/codegen.py
+++ b/CodeGen/codegen.py
@@ -3,8 +3,9 @@
 
 import asyncio
 import os
+import ast
 
-from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
+from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger
 from comps.cores.mega.utils import handle_message
 from comps.cores.proto.api_protocol import (
     ChatCompletionRequest,
@@ -16,20 +17,107 @@
 from comps.cores.proto.docarray import LLMParams
 from fastapi import Request
 from fastapi.responses import StreamingResponse
+from langchain.prompts import PromptTemplate
+
+logger = CustomLogger("opea_dataprep_microservice")
+logflag = os.getenv("LOGFLAG", False)
 
 MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778))
+
 LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
 LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
 
+RETRIEVAL_SERVICE_HOST_IP = os.getenv("RETRIEVAL_SERVICE_HOST_IP", "0.0.0.0")
+REDIS_RETRIEVER_PORT = int(os.getenv("REDIS_RETRIEVER_PORT", 7000))
+
+TEI_EMBEDDING_HOST_IP = os.getenv("TEI_EMBEDDING_HOST_IP", "0.0.0.0")
+EMBEDDER_PORT = int(os.getenv("EMBEDDER_PORT", 6000))
+
+grader_prompt = """You are a grader assessing relevance of a retrieved document to a user question. \n                     
+Here is the user question: {question} \n
+Here is the retrieved document: \n\n {document} \n\n
+
+If the document contains keywords related to the user question, grade it as relevant. 
+It does not need to be a stringent test. The goal is to filter out erroneous retrievals. 
+Rules:
+- Do not return the question, the provided document or explanation. 
+- if this document is relevant to the question, return 'yes' otherwise return 'no'. 
+- Do not include any other details in your response.
+"""
+
+def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
+    """
+    Aligns the inputs based on the service type of the current node.
+
+    Parameters:
+    - self: Reference to the current instance of the class.
+    - inputs: Dictionary containing the inputs for the current node.
+    - cur_node: The current node in the service orchestrator.
+    - runtime_graph: The runtime graph of the service orchestrator.
+    - llm_parameters_dict: Dictionary containing the LLM parameters.
+    - kwargs: Additional keyword arguments.
+
+    Returns:
+    - inputs: The aligned inputs for the current node.
+    """
+    
+    # Check if the current service type is EMBEDDING
+    if self.services[cur_node].service_type == ServiceType.EMBEDDING:
+        # Store the input query for later use
+        self.input_query = inputs["query"]
+        # Set the input for the embedding service
+        inputs["input"] = inputs["query"]
+                
+    # Check if the current service type is RETRIEVER
+    if self.services[cur_node].service_type == ServiceType.RETRIEVER:
+        # Extract the embedding from the inputs
+        embedding = inputs['data'][0]['embedding']
+        # Align the inputs for the retriever service
+        inputs = {
+            "index_name": llm_parameters_dict["index_name"],  
+            "text": self.input_query,
+            "embedding": embedding
+        }
+                
+    return inputs
+
 
 class CodeGenService:
     def __init__(self, host="0.0.0.0", port=8000):
         self.host = host
         self.port = port
-        self.megaservice = ServiceOrchestrator()
+        ServiceOrchestrator.align_inputs = align_inputs
+        self.megaservice_llm = ServiceOrchestrator()
+        self.megaservice_retriever = ServiceOrchestrator()
+        self.megaservice_retriever_llm = ServiceOrchestrator()
         self.endpoint = str(MegaServiceEndpoint.CODE_GEN)
 
     def add_remote_service(self):
+        """
+        Adds remote microservices to the service orchestrators and defines the flow between them.
+        """
+
+        # Define the embedding microservice
+        embedding = MicroService(
+            name="embedding",
+            host=TEI_EMBEDDING_HOST_IP,
+            port=EMBEDDER_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+
+        # Define the retriever microservice
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVAL_SERVICE_HOST_IP,
+            port=REDIS_RETRIEVER_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+
+        # Define the LLM microservice
         llm = MicroService(
             name="llm",
             host=LLM_SERVICE_HOST_IP,
@@ -38,13 +126,63 @@ def add_remote_service(self):
             use_remote_service=True,
             service_type=ServiceType.LLM,
         )
-        self.megaservice.add(llm)
+
+        # Add the microservices to the megaservice_retriever_llm orchestrator and define the flow
+        self.megaservice_retriever_llm.add(embedding).add(retriever).add(llm)
+        self.megaservice_retriever_llm.flow_to(embedding, retriever)
+        self.megaservice_retriever_llm.flow_to(retriever, llm)
+
+        # Add the microservices to the megaservice_retriever orchestrator and define the flow
+        self.megaservice_retriever.add(embedding).add(retriever)
+        self.megaservice_retriever.flow_to(embedding, retriever)
+
+        # Add the LLM microservice to the megaservice_llm orchestrator
+        self.megaservice_llm.add(llm)
+
+    async def read_streaming_response(self, response: StreamingResponse):
+        """
+        Reads the streaming response from a StreamingResponse object.
+
+        Parameters:
+        - self: Reference to the current instance of the class.
+        - response: The StreamingResponse object to read from.
+
+        Returns:
+        - str: The complete response body as a decoded string.
+        """
+        body = b""  # Initialize an empty byte string to accumulate the response chunks
+        async for chunk in response.body_iterator:
+            body += chunk  # Append each chunk to the body
+        return body.decode("utf-8")  # Decode the accumulated byte string to a regular string
 
     async def handle_request(self, request: Request):
+        """
+        Handles the incoming request, processes it through the appropriate microservices,
+        and returns the response.
+
+        Parameters:
+        - self: Reference to the current instance of the class.
+        - request: The incoming request object.
+
+        Returns:
+        - ChatCompletionResponse: The response from the LLM microservice.
+        """
+        # Parse the incoming request data
         data = await request.json()
+
+        # Get the stream option from the request data, default to True if not provided
         stream_opt = data.get("stream", True)
-        chat_request = ChatCompletionRequest.parse_obj(data)
+
+        # Validate and parse the chat request data
+        chat_request = ChatCompletionRequest.model_validate(data)
+
+        # Handle the chat messages to generate the prompt
         prompt = handle_message(chat_request.messages)
+
+        # Get the agents flag from the request data, default to False if not provided
+        agents_flag = data.get("agents_flag", False)
+
+        # Define the LLM parameters
         parameters = LLMParams(
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,
@@ -54,18 +192,88 @@ async def handle_request(self, request: Request):
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
+            index_name=chat_request.index_name
         )
-        result_dict, runtime_graph = await self.megaservice.schedule(
-            initial_inputs={"query": prompt}, llm_parameters=parameters
+
+        # Initialize the initial inputs with the generated prompt
+        initial_inputs = {"query": prompt}
+
+        # Check if the key index name is provided in the parameters
+        if parameters.index_name:
+            if agents_flag:
+                # Schedule the retriever microservice
+                result_ret, runtime_graph = await self.megaservice_retriever.schedule(
+                    initial_inputs=initial_inputs, llm_parameters=parameters
+                )
+
+                # Switch to the LLM microservice
+                megaservice = self.megaservice_llm
+
+                relevant_docs = []
+                for doc in result_ret["retriever/MicroService"]["retrieved_docs"]:
+                    # Create the PromptTemplate
+                    prompt_agent = PromptTemplate(template=grader_prompt, input_variables=["question", "document"])
+
+                    # Format the template with the input variables
+                    formatted_prompt = prompt_agent.format(question=prompt, document=doc["text"])
+                    initial_inputs_grader = {"query": formatted_prompt}
+
+                    # Schedule the LLM microservice for grading
+                    grade, runtime_graph = await self.megaservice_llm.schedule(
+                        initial_inputs=initial_inputs_grader, llm_parameters=parameters
+                    )
+
+                    for node, response in grade.items():
+                        if isinstance(response, StreamingResponse):
+                            # Read the streaming response
+                            grader_response = await self.read_streaming_response(response)
+
+                            # Replace null with None
+                            grader_response = grader_response.replace("null", "None")
+
+                            # Split the response by "data:" and process each part
+                            for i in grader_response.split("data:"):
+                                if '"text":' in i:
+                                    # Convert the string to a dictionary
+                                    r = ast.literal_eval(i)
+                                    # Check if the response text is "yes"
+                                    if r["choices"][0]["text"] == "yes":
+                                        # Append the document to the relevant_docs list
+                                        relevant_docs.append(doc)
+                                    
+                # Update the initial inputs with the relevant documents
+                if len(relevant_docs)>0:
+                    logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.")
+                    query = initial_inputs["query"]
+                    initial_inputs = {}
+                    initial_inputs["retrieved_docs"] = relevant_docs
+                    initial_inputs["initial_query"] = query
+                    
+                else:
+                    logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.")
+                    
+            else:
+                # Use the combined retriever and LLM microservice
+                megaservice = self.megaservice_retriever_llm
+        else:
+            # Use the LLM microservice only
+            megaservice = self.megaservice_llm
+
+        # Schedule the final megaservice
+        result_dict, runtime_graph = await megaservice.schedule(
+            initial_inputs=initial_inputs, llm_parameters=parameters
         )
+
         for node, response in result_dict.items():
-            # Here it suppose the last microservice in the megaservice is LLM.
+            # Check if the last microservice in the megaservice is LLM
             if (
                 isinstance(response, StreamingResponse)
-                and node == list(self.megaservice.services.keys())[-1]
-                and self.megaservice.services[node].service_type == ServiceType.LLM
+                and node == list(megaservice.services.keys())[-1]
+                and megaservice.services[node].service_type == ServiceType.LLM
             ):
                 return response
+
+        # Get the response from the last node in the runtime graph
         last_node = runtime_graph.all_leaves()[-1]
         response = result_dict[last_node]["text"]
         choices = []
@@ -96,4 +304,4 @@ def start(self):
 if __name__ == "__main__":
     chatqna = CodeGenService(port=MEGA_SERVICE_PORT)
     chatqna.add_remote_service()
-    chatqna.start()
+    chatqna.start()
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md
index 3cc7a19b3c..fc8b81b45f 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/README.md
+++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md
@@ -13,28 +13,77 @@ After launching your instance, you can connect to it using SSH (for Linux instan
 
 ## 🚀 Start Microservices and MegaService
 
-The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
+The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
 
 The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
 
 ```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 25px
+---
 flowchart LR
-    subgraph CodeGen
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style CodeGen-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        A[User] --> |Input query| B[CodeGen Gateway]
-        B --> |Invoke| Megaservice
-        subgraph Megaservice["Megaservice"]
-            direction TB
-            C((LLM<br>9000)) -. Post .-> D{{TGI Service<br>8028}}
-        end
-        Megaservice --> |Output| E[Response]
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-
-    subgraph Legend
+    subgraph User Interface
         direction LR
-        G([Microservice]) ==> H([Microservice])
-        I([Microservice]) -.-> J{{Server API}}
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
+
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
+
+
+    %% Embedding service flow
+    direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
+    LLM <-.-> LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ### Setup Environment Variables
@@ -111,6 +160,15 @@ docker compose --profile codegen-xeon-vllm up -d
         }'
    ```
 
+    If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+   
+    ```bash
+    curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+    ```
+   
+
 ## 🚀 Launch the UI
 
 To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
@@ -272,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images:
 - `opea/llm-textgen:latest`
 - `opea/codegen:latest`
 - `opea/codegen-ui:latest`
-- `opea/codegen-react-ui:latest` (optional)
+- `opea/codegen-react-ui:latest` (optional)
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
index 5567d9e368..e88b3e4847 100644
--- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml
@@ -1,7 +1,5 @@
-# Copyright (C) 2024 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
 services:
+
   tgi-service:
     image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
     container_name: tgi-server
@@ -92,10 +90,14 @@ services:
       - http_proxy=${http_proxy}
       - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
       - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
+      - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP}
+      - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT}
+      - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP}
+      - EMBEDDER_PORT=${EMBEDDER_PORT}
     ipc: host
     restart: always
   codegen-xeon-ui-server:
-    image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
     container_name: codegen-xeon-ui-server
     depends_on:
       - codegen-xeon-backend-server
@@ -106,9 +108,93 @@ services:
       - https_proxy=${https_proxy}
       - http_proxy=${http_proxy}
       - BASIC_URL=${BACKEND_SERVICE_ENDPOINT}
+      - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
+      - host_ip=${host_ip}
+      - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT}
+      - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT}
     ipc: host
     restart: always
-
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    ports:
+      - "${REDIS_DB_PORT}:${REDIS_DB_PORT}"
+      - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}"
+  dataprep-redis-server:
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${DATAPREP_REDIS_PORT}:5000"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_HOST: ${host_ip} 
+      INDEX_NAME: ${INDEX_NAME}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: true
+    restart: unless-stopped
+  tei-embedding-serving:
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+    container_name: tei-embedding-serving
+    entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate"
+    ports:
+      - "${TEI_EMBEDDER_PORT:-12000}:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      host_ip: ${host_ip}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 48
+  tei-embedding-server:
+    image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
+    container_name: tei-embedding-server
+    ports:
+      - "${EMBEDDER_PORT:-10201}:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING"
+    depends_on:
+      tei-embedding-serving:
+        condition: service_healthy
+    restart: unless-stopped
+  retriever-redis:
+    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+    container_name: retriever-redis
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      REDIS_DB_PORT: ${REDIS_DB_PORT}
+      REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT}
+      REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LOGFLAG: ${LOGFLAG}
+      RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS}
+    restart: unless-stopped
 networks:
   default:
-    driver: bridge
+    driver: bridge
\ No newline at end of file
diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
index 133b32f09f..5408e33654 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md
@@ -6,28 +6,77 @@ The default pipeline deploys with vLLM as the LLM serving component. It also pro
 
 ## 🚀 Start MicroServices and MegaService
 
-The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
+The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query.
 
 The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows:
 
 ```mermaid
+---
+config:
+  flowchart:
+    nodeSpacing: 400
+    rankSpacing: 100
+    curve: linear
+  themeVariables:
+    fontSize: 25px
+---
 flowchart LR
-    subgraph CodeGen
+    %% Colors %%
+    classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+    classDef invisible fill:transparent,stroke:transparent;
+    style CodeGen-MegaService stroke:#000000
+    %% Subgraphs %%
+    subgraph CodeGen-MegaService["CodeGen-MegaService"]
         direction LR
-        A[User] --> |Input query| B[CodeGen Gateway]
-        B --> |Invoke| Megaservice
-        subgraph Megaservice["Megaservice"]
-            direction TB
-            C((LLM<br>9000)) -. Post .-> D{{TGI Service<br>8028}}
-        end
-        Megaservice --> |Output| E[Response]
+        EM([Embedding<br>MicroService]):::blue
+        RET([Retrieval<br>MicroService]):::blue
+        RER([Agents]):::blue
+        LLM([LLM<br>MicroService]):::blue
     end
-
-    subgraph Legend
+    subgraph User Interface
         direction LR
-        G([Microservice]) ==> H([Microservice])
-        I([Microservice]) -.-> J{{Server API}}
+        a([Submit Query Tab]):::orchid
+        UI([UI server]):::orchid
+        Ingest([Manage Resources]):::orchid
     end
+
+    CLIP_EM{{Embedding<br>service}}
+    VDB{{Vector DB}}
+    V_RET{{Retriever<br>service}}
+    Ingest{{Ingest data}}
+    DP([Data Preparation]):::blue
+    LLM_gen{{TGI Service}}
+    GW([CodeGen GateWay]):::orange
+
+    %% Data Preparation flow
+    %% Ingest data flow
+    direction LR
+    Ingest[Ingest data] --> UI
+    UI --> DP
+    DP <-.-> CLIP_EM
+
+    %% Questions interaction
+    direction LR
+    a[User Input Query] --> UI
+    UI --> GW
+    GW <==> CodeGen-MegaService
+    EM ==> RET
+    RET ==> RER
+    RER ==> LLM
+
+
+    %% Embedding service flow
+    direction LR
+    EM <-.-> CLIP_EM
+    RET <-.-> V_RET
+    LLM <-.-> LLM_gen
+
+    direction TB
+    %% Vector DB interaction
+    V_RET <-.->VDB
+    DP <-.->VDB
 ```
 
 ### Setup Environment Variables
@@ -104,6 +153,15 @@ docker compose --profile codegen-gaudi-vllm up -d
         }'
    ```
 
+    If the user wants a CodeGen service with RAG and Agents based on dedicated documentation.
+   
+    ```bash
+    curl http://localhost:7778/v1/codegen \
+      -H "Content-Type: application/json" \
+      -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}'
+    ```
+   
+
 ## 🚀 Launch the Svelte Based UI
 
 To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh
index cb9e742847..559f00cf2a 100644
--- a/CodeGen/docker_compose/set_env.sh
+++ b/CodeGen/docker_compose/set_env.sh
@@ -6,8 +6,9 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
-export host_ip=$(hostname -I | awk '{print $1}')
+export your_ip=$(hostname -I | awk '{print $1}')
 
+export host_ip=$(hostname -I | awk '{print $1}')
 if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then
     echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN"
 fi
@@ -18,9 +19,34 @@ fi
 
 export no_proxy=${no_proxy},${host_ip}
 
-export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct"
+export http_proxy=${http_proxy}
+export https_proxy=${https_proxy}
+
+export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct"
+export LLM_SERVICE_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:8028"
-export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
+export TGI_LLM_ENDPOINT="http://${host_ip}:8028"
+
+export MEGA_SERVICE_PORT=7778
+export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen"
-export MODEL_CACHE="./data"
+
+export REDIS_DB_PORT=6379
+export REDIS_INSIGHTS_PORT=8001
+export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}"
+export REDIS_RETRIEVER_PORT=7000
+export RETRIEVAL_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS"
+export INDEX_NAME="CodeGen"
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export EMBEDDER_PORT=6000
+export TEI_EMBEDDER_PORT=8090
+export TEI_EMBEDDING_HOST_IP=${host_ip}
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
+
+export DATAPREP_REDIS_PORT=6007
+export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep"
+export LOGFLAG=false
+export MODEL_CACHE="./data"
\ No newline at end of file
diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml
index 3275aa71bf..52ca23b109 100644
--- a/CodeGen/docker_image_build/build.yaml
+++ b/CodeGen/docker_image_build/build.yaml
@@ -23,6 +23,12 @@ services:
       dockerfile: ./docker/Dockerfile.react
     extends: codegen
     image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest}
+  codegen-gradio-ui:
+    build:
+      context: ../ui
+      dockerfile: ./docker/Dockerfile.gradio
+    extends: codegen
+    image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
   llm-textgen:
     build:
       context: GenAIComps
diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index c7b6b83f7e..60bd1b6782 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -29,7 +29,8 @@ function build_docker_images() {
     fi
 
     cd $WORKPATH/docker_image_build
-    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+
     # Download Gaudi vllm of latest tag
     git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork
     VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)")
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 6fc25963ac..aa517f7c98 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -29,7 +29,7 @@ function build_docker_images() {
     fi
 
     cd $WORKPATH/docker_image_build
-    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
+    # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
 
     git clone https://github.com/vllm-project/vllm.git && cd vllm
     VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )"
@@ -82,23 +82,35 @@ function validate_services() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-    if [ "$HTTP_STATUS" -eq 200 ]; then
-        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
+    if [[ "$SERVICE_NAME" == "ingest" ]]; then
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL")
 
-        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..."
+        else
+            echo "[ $SERVICE_NAME ] Data preparation failed..."
+        fi
+
+    else
+
+        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+        if [ "$HTTP_STATUS" -eq 200 ]; then
+            echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
 
-        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-            echo "[ $SERVICE_NAME ] Content is as expected."
+            local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
+
+            if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+                echo "[ $SERVICE_NAME ] Content is as expected."
+            else
+                echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+                docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+                exit 1
+            fi
         else
-            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
+            echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
             docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
             exit 1
         fi
-    else
-        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
-        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-        exit 1
     fi
     sleep 5s
 }
@@ -122,6 +134,14 @@ function validate_microservices() {
         "llm-textgen-server" \
         '{"query":"def print_hello_world():", "max_tokens": 256}'
 
+    # Data ingest microservice
+    validate_services \
+        "${ip_address}:6007/v1/dataprep/ingest" \
+        "Data preparation succeeded" \
+        "ingest" \
+        "dataprep-redis-server" \
+        'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]'
+
 }
 
 function validate_megaservice() {
@@ -133,6 +153,14 @@ function validate_megaservice() {
         "codegen-xeon-backend-server" \
         '{"messages": "def print_hello_world():", "max_tokens": 256}'
 
+    # Curl the Mega Service with index_name and agents_flag
+    validate_services \
+        "${ip_address}:7778/v1/codegen" \
+        "print" \
+        "mega-codegen" \
+        "codegen-xeon-backend-server" \
+        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
+
 }
 
 function validate_frontend() {
@@ -202,7 +230,7 @@ function main() {
 
         validate_microservices "${docker_llm_container_names[${i}]}"
         validate_megaservice
-        validate_frontend
+        # validate_frontend
 
         stop_docker "${docker_compose_profiles[${i}]}"
         sleep 5s
@@ -212,3 +240,4 @@ function main() {
 }
 
 main
+
diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio
new file mode 100644
index 0000000000..11a4f4f581
--- /dev/null
+++ b/CodeGen/ui/docker/Dockerfile.gradio
@@ -0,0 +1,33 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+ENV LANG=C.UTF-8
+
+ARG ARCH="cpu"
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    build-essential \
+    default-jre \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    wget
+
+# Install ffmpeg static build
+WORKDIR /root
+RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \
+    mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \
+    export PATH=/root/ffmpeg-git-amd64-static:$PATH && \
+    cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \
+    cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/
+
+RUN mkdir -p /home/user
+
+COPY gradio /home/user/gradio
+
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+pip install --no-cache-dir -r /home/user/gradio/requirements.txt
+
+WORKDIR /home/user/gradio
+ENTRYPOINT ["python", "codegen_ui_gradio.py"]
diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md
new file mode 100644
index 0000000000..9769efb317
--- /dev/null
+++ b/CodeGen/ui/gradio/README.md
@@ -0,0 +1,65 @@
+# Document Summary
+
+This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries.
+
+## Docker
+
+### Build UI Docker Image
+
+To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui
+docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio .
+```
+
+This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall.
+
+### Run UI Docker Image
+
+To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands:
+
+```bash
+cd GenAIExamples/CodeGen/ui/gradio
+
+ip_address=$(hostname -I | awk '{print $1}')
+docker run -d -p 5173:5173 --ipc=host \
+   -e http_proxy=$http_proxy \
+   -e https_proxy=$https_proxy \
+   -e no_proxy=$no_proxy \
+   -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \
+   opea/codegen-gradio-ui:latest
+```
+
+This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service.
+
+### Python
+
+To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command:
+
+```bash
+cd GenAIExamples/CodeGen/ui/gradio
+python codegen_ui_gradio.py
+```
+
+This command starts the frontend application using Python.
+
+## Additional Information
+
+### Prerequisites
+
+Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall.
+
+### Environment Variables
+
+- `http_proxy`: Proxy setting for HTTP connections.
+- `https_proxy`: Proxy setting for HTTPS connections.
+- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying.
+- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with.
+
+### Troubleshooting
+
+- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access.
+- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible.
+
+This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment.
diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
new file mode 100644
index 0000000000..cb90288cc5
--- /dev/null
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -0,0 +1,401 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# This is a Gradio app that includes two tabs: one for code generation and another for resource management.
+# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files.
+# Additionally, three small text boxes have been added for managing file dataframe parameters.
+
+import argparse
+import os
+from pathlib import Path
+import gradio as gr
+import requests
+import pandas as pd
+import os
+import uvicorn
+import json
+import argparse
+# from utils import build_logger, make_temp_image, server_error_msg, split_video
+from urllib.parse import urlparse
+from pathlib import Path
+from fastapi import FastAPI
+# from fastapi.responses import JSONResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+
+# logger = build_logger("gradio_web_server", "gradio_web_server.log")
+logflag = os.getenv("LOGFLAG", False)
+
+# create a FastAPI app
+app = FastAPI()
+cur_dir = os.getcwd()
+static_dir = Path(os.path.join(cur_dir, "static/"))
+tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/"))
+
+Path(static_dir).mkdir(parents=True, exist_ok=True)
+app.mount("/static", StaticFiles(directory=static_dir), name="static")
+
+tmp_upload_folder = "/tmp/gradio/"
+
+
+
+host_ip = os.getenv("host_ip")
+DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+
+# Define the functions that will be used in the app
+def conversation_history(prompt, index, use_agent, history):
+    # Print the language and prompt, and return a placeholder code
+    print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}")
+    history.append([prompt, ""])
+    response_generator = generate_code(prompt, index, use_agent)
+    for token in response_generator:
+        history[-1][-1] += token
+        yield history
+
+
+def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100):
+    media = media.strip().split("\n")
+    print("Files passed is ", media, flush=True)
+    if not chunk_size:
+        chunk_size = 1500
+    if not chunk_overlap:
+        chunk_overlap = 100
+
+    requests = []
+    if type(media) is list:
+        for file in media:
+            file_ext = os.path.splitext(file)[-1]
+            if is_valid_url(file):
+                print(file, " is valid URL")
+                print("Ingesting URL...")
+                value = ingest_url(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            elif file_ext in ['.pdf', '.txt']:
+                print("Ingesting File...")
+                value = ingest_file(file, index, chunk_size, chunk_overlap)
+                requests.append(value)
+                yield value
+            else:
+                print(file, "File type not supported")
+                yield (
+                    gr.Textbox(
+                        visible=True,
+                        value="Your file extension type is not supported.",
+                    )
+                )
+                return
+        yield requests
+
+    else:
+        file_ext = os.path.splitext(media)[-1]
+        if is_valid_url(media):
+            value = ingest_url(media, index, chunk_size, chunk_overlap)
+            yield value
+        elif file_ext in ['.pdf', '.txt']:
+            print("Ingesting File...")
+            value = ingest_file(media, index, chunk_size, chunk_overlap)
+            # print("Return value is: ", value, flush=True)
+            yield value
+        else:
+            print(media, "File type not supported")
+            yield (
+                gr.Textbox(
+                    visible=True,
+                    value="Your file extension type is not supported.",
+                )
+            )
+            return
+
+def generate_code(query, index=None, use_agent=False):
+    if index is None or index == "None":
+        input_dict = {"messages": query, "agents_flag": use_agent}
+    else:
+        input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent}
+
+    print("Query is ", input_dict)
+    headers = {"Content-Type": "application/json"}
+    
+    response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True)
+
+    for line in response.iter_lines():
+        if line:
+            line = line.decode('utf-8')
+            if line.startswith("data: "):  # Only process lines starting with "data: "
+                json_part = line[len("data: "):]  # Remove the "data: " prefix
+                if json_part.strip() == "[DONE]":  # Ignore the DONE marker
+                    continue
+                try:
+                    json_obj = json.loads(json_part)  # Convert to dictionary
+                    if "choices" in json_obj:
+                        for choice in json_obj["choices"]:
+                            if "text" in choice:
+                                # Yield each token individually
+                                yield choice["text"]
+                except json.JSONDecodeError:
+                    print("Error parsing JSON:", json_part)
+
+
+def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150):
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+    file_input = {"files": open(file, "rb")}
+
+    if index:
+        print("Index is", index)
+        data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+
+    print("Calling Request Now!")
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data)
+    # print("Ingest Files", response)
+    print(response.text)
+        
+    # table = update_table()
+    return response.text
+
+def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150):
+    print("URL is ", url)
+    url = str(url)
+    if not is_valid_url(url):
+        print("Invalid URL")
+        # yield (
+        #     gr.Textbox(
+        #         visible=True,
+        #         value="Invalid URL entered. Please enter a valid URL",
+        #     )
+        # )
+        return
+    headers = {
+         # "Content-Type: multipart/form-data"
+        }
+
+    if index:
+        url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    else:
+        url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
+    response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input)
+    # print("Ingest URL", response)
+    # table = update_table()
+    return response.text
+
+
+def is_valid_url(url):
+    url = str(url)
+    try:
+        result = urlparse(url)
+        return all([result.scheme, result.netloc])
+    except ValueError:
+        return False
+
+
+
+# Initialize the file list
+file_list = []
+
+# def update_files(file):
+#     # Add the uploaded file to the file list
+#     file_list.append(file.name)
+#     file_df["Files"] = file_list
+#     return file_df
+
+
+def get_files(index=None):
+    headers = {
+        # "Content-Type: multipart/form-data"
+    }
+    if index == "All Files":
+        index = None
+
+    if index:
+        index = {"index_name": index}
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index)
+        print("Get files with ", index, response)
+        table = response.json()
+        return table
+    else:
+        # print("URL IS ", dataprep_get_files_endpoint)
+        response = requests.post(url=dataprep_get_files_endpoint, headers=headers)
+        print("Get files ", response)
+        table = response.json()
+        return table
+
+def update_table(index=None):
+    if index == "All Files":
+        index = None
+    files = get_files(index)
+    print("Files is ", files)
+    if len(files) == 0:
+        df = pd.DataFrame(files, columns=["Files"])
+        return df
+    else:
+        df = pd.DataFrame(files)
+        return df
+    
+def update_indices():
+    indices = get_indices()
+    df = pd.DataFrame(indices, columns=["File Databases"])
+    return df
+
+def delete_file(file, index=None):
+    # Remove the selected file from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    print("URL IS ", dataprep_delete_files_endpoint)
+    if index:
+        file_input = {"files": open(file, "rb"), "index_name": index}
+    else:
+        file_input = {"files": open(file, "rb")}
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input)
+    print("Delete file ", response)
+    table = update_table()
+    return response.text
+
+def delete_all_files(index=None):
+    # Remove all files from the file list
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}')
+    print("Delete all files ", response)
+    table = update_table()
+    
+    return response.text
+
+def get_indices():
+    headers = {
+        # "Content-Type: application/json"
+    }
+    response = requests.post(url=dataprep_get_indices_endpoint, headers=headers)
+    print("Get Indices", response)
+    indices = response.json()
+    return indices
+
+def update_indices_dropdown():
+    indices = ["None"] + get_indices()
+    new_dd = gr.update(choices=indices, value="None")
+    return new_dd
+    
+
+def get_file_names(files):
+    file_str = ""
+    if not files:
+        return file_str
+    
+    for file in files:
+      file_str += file + '\n'
+    file_str.strip()
+    return file_str
+
+
+# Define UI components
+with gr.Blocks() as ui:
+    with gr.Tab("Code Generation"):
+        gr.Markdown("### Generate Code from Natural Language")
+        chatbot = gr.Chatbot(label="Chat History")
+        prompt_input = gr.Textbox(label="Enter your query")
+        with gr.Column():
+            with gr.Row(scale=8):
+                # indices = ["None"] + get_indices()
+                database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None")
+            with gr.Row(scale=1):
+                db_refresh_button = gr.Button("Refresh", variant="primary")
+                db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown)
+                use_agent = gr.Checkbox(label="Use Agent", container=False)
+        
+        generate_button = gr.Button("Generate Code")
+
+        # Connect the generate button to the conversation_history function
+        generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot)
+
+    with gr.Tab("Resource Management"):
+        # File management components
+        # url_button = gr.Button("Process")
+        with gr.Row():
+            with gr.Column(scale=1):
+                index_name_input = gr.Textbox(label="Index Name")
+                chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)")
+                chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)")
+            with gr.Column(scale=3):
+                file_upload = gr.File(label="Upload Files", file_count="multiple")
+                url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)")
+                upload_button = gr.Button("Upload", variant="primary")
+                upload_status = gr.Textbox(label="Upload Status")
+                file_upload.change(get_file_names, inputs=file_upload, outputs=url_input)
+            with gr.Column(scale=1):
+                # table_dropdown = gr.Dropdown(indices)
+                # file_table = gr.Dataframe(interactive=False, value=update_table())
+                file_table = gr.Dataframe(interactive=False, value=update_indices())
+                refresh_button = gr.Button("Refresh", variant="primary", size="sm")
+                refresh_button.click(update_indices, outputs=file_table)
+                # refresh_button.click(update_indices, outputs=database_dropdown)
+                # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table)
+                # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table)
+                upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status)
+                
+                delete_all_button = gr.Button("Delete All", variant="primary", size="sm")
+                delete_all_button.click(delete_all_files, outputs=upload_status)
+        
+        
+        
+                # delete_button = gr.Button("Delete Index")
+
+                # selected_file_output = gr.Textbox(label="Selected File")
+                # delete_button.click(delete_file, inputs=indices, outputs=upload_status)
+
+      
+
+ui.queue()
+app = gr.mount_gradio_app(app, ui, path="/")
+share = False
+enable_queue = True
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="0.0.0.0")
+    parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173))
+    parser.add_argument("--concurrency-count", type=int, default=20)
+    parser.add_argument("--share", action="store_true")
+
+    host_ip = os.getenv("host_ip")
+    DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007)
+    DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep")
+    MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778)
+
+
+    backend_service_endpoint = os.getenv(
+        "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen"
+    )
+
+    # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest"
+    # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get"
+    # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete"
+    # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices"
+
+
+    args = parser.parse_args()
+    # logger.info(f"args: {args}")
+    global gateway_addr
+    gateway_addr = backend_service_endpoint
+    global dataprep_ingest_addr
+    dataprep_ingest_addr = dataprep_ingest_endpoint
+    global dataprep_get_files_addr
+    dataprep_get_files_addr = dataprep_get_files_endpoint
+
+
+    uvicorn.run(app, host=args.host, port=args.port)
diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt
new file mode 100644
index 0000000000..2a4c8e1a30
--- /dev/null
+++ b/CodeGen/ui/gradio/requirements.txt
@@ -0,0 +1,4 @@
+gradio==5.22.0
+numpy==1.26.4
+opencv-python==4.10.0.82
+Pillow==10.3.0
diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env
index 0bf85fa876..2efb56c8f5 100644
--- a/CodeGen/ui/svelte/.env
+++ b/CodeGen/ui/svelte/.env
@@ -1 +1 @@
-BASIC_URL = 'http://backend_address:7778/v1/codegen'
+BASIC_URL = 'http://10.98.56.44:7778/v1/codegen'

From b8ec015ed47fdb29661dd00c617c4ae3bead9c23 Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Wed, 2 Apr 2025 23:14:39 +0000
Subject: [PATCH 19/22] added error handling for exceeded token size

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/ui/gradio/codegen_ui_gradio.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py
index 770608a0c9..d0e351960a 100644
--- a/CodeGen/ui/gradio/codegen_ui_gradio.py
+++ b/CodeGen/ui/gradio/codegen_ui_gradio.py
@@ -140,9 +140,11 @@ def generate_code(query, index=None, use_agent=False):
     print("Query is ", input_dict)
     headers = {"Content-Type": "application/json"}
     
-    response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True)
+    response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True)        
 
+    line_count = 0
     for line in response.iter_lines():
+        line_count += 1
         if line:
             line = line.decode('utf-8')
             if line.startswith("data: "):  # Only process lines starting with "data: "
@@ -160,6 +162,10 @@ def generate_code(query, index=None, use_agent=False):
                             yield choice["text"]
             except json.JSONDecodeError:
                 print("Error parsing JSON:", json_part)
+    
+    if line_count == 0:
+        yield f"Something went wrong, No Response Generated! \nIf you are using an Index, try uploading your media again with a smaller chunk size to avoid exceeding the token max. \
+        \nOr, check the Use Agent box and try again."
 
 
 def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150):
@@ -289,7 +295,6 @@ def get_indices():
     headers = {
         # "Content-Type: application/json"
     }
-    print("URL IS ", dataprep_get_indices_endpoint)
     response = requests.post(url=dataprep_get_indices_endpoint, headers=headers)
     indices = ["None"]
     print("Get Indices", response)

From 18445c5625feef1cbb1ce7ae9119c9aad365167f Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Thu, 3 Apr 2025 17:47:35 +0000
Subject: [PATCH 20/22] xeon --> gaudi

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
index c2732fcc96..afe6d7ab35 100644
--- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -114,7 +114,7 @@ services:
       - EMBEDDER_PORT=${EMBEDDER_PORT}
     ipc: host
     restart: always
-  codegen-xeon-ui-server:
+  codegen-gaudi-ui-server:
     image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}
     container_name: codegen-xeon-ui-server
     depends_on:

From f8178a59fdce16c7d3e478699086751b8ee3d9cd Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Thu, 3 Apr 2025 17:53:05 +0000
Subject: [PATCH 21/22] made tests like codegen ragagents branch

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/tests/test_compose_on_gaudi.sh | 53 +++++++-------------------
 CodeGen/tests/test_compose_on_xeon.sh  |  4 +-
 2 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index 2ce5b0ec87..a64f6431a9 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -83,34 +83,23 @@ function validate_services() {
     local DOCKER_NAME="$4"
     local INPUT_DATA="$5"
 
-    if [[ "$SERVICE_NAME" == "ingest" ]]; then
-        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL")
+    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
+    if [ "$HTTP_STATUS" -eq 200 ]; then
+        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
 
-        if [ "$HTTP_STATUS" -eq 200 ]; then
-            echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..."
-        else
-            echo "[ $SERVICE_NAME ] Data preparation failed..."
-        fi
+        local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
 
-    else
-        local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
-        if [ "$HTTP_STATUS" -eq 200 ]; then
-            echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
-
-            local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
-
-            if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
-                echo "[ $SERVICE_NAME ] Content is as expected."
-            else
-                echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
-                docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
-                exit 1
-            fi
+        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
+            echo "[ $SERVICE_NAME ] Content is as expected."
         else
-            echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
             docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
             exit 1
         fi
+    else
+        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
+        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
+        exit 1
     fi
     sleep 5s
 }
@@ -134,14 +123,6 @@ function validate_microservices() {
         "llm-textgen-gaudi-server" \
         '{"query":"def print_hello_world():"}'
 
-    # Data ingest microservice
-    validate_services \
-        "${ip_address}:6007/v1/dataprep/ingest" \
-        "Data preparation succeeded" \
-        "ingest" \
-        "dataprep-redis-server" \
-        'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]'
-
 }
 
 function validate_megaservice() {
@@ -153,14 +134,6 @@ function validate_megaservice() {
         "codegen-gaudi-backend-server" \
         '{"messages": "def print_hello_world():"}'
 
-    # Curl the Mega Service with index_name and agents_flag
-    validate_services \
-        "${ip_address}:7778/v1/codegen" \
-        "print" \
-        "mega-codegen" \
-        "codegen-xeon-backend-server" \
-        '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}'
-
 }
 
 function validate_frontend() {
@@ -229,7 +202,7 @@ function main() {
 
         validate_microservices "${docker_llm_container_names[${i}]}"
         validate_megaservice
-        # validate_frontend
+        validate_frontend
 
         stop_docker "${docker_compose_profiles[${i}]}"
         sleep 5s
@@ -238,4 +211,4 @@ function main() {
     echo y | docker system prune
 }
 
-main
+main
\ No newline at end of file
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 670d071600..1049308724 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -92,6 +92,7 @@ function validate_services() {
         fi
 
     else
+
         local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
         if [ "$HTTP_STATUS" -eq 200 ]; then
             echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
@@ -238,5 +239,4 @@ function main() {
     echo y | docker system prune
 }
 
-main
-
+main
\ No newline at end of file

From 21cf358ec1d0b47926420f11c5c92f70aaffbbe7 Mon Sep 17 00:00:00 2001
From: okhleif-IL <omar.khleif@intel.com>
Date: Thu, 3 Apr 2025 17:55:08 +0000
Subject: [PATCH 22/22] added back \n

Signed-off-by: okhleif-IL <omar.khleif@intel.com>
---
 CodeGen/tests/test_compose_on_gaudi.sh | 2 +-
 CodeGen/tests/test_compose_on_xeon.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh
index a64f6431a9..60bd1b6782 100644
--- a/CodeGen/tests/test_compose_on_gaudi.sh
+++ b/CodeGen/tests/test_compose_on_gaudi.sh
@@ -211,4 +211,4 @@ function main() {
     echo y | docker system prune
 }
 
-main
\ No newline at end of file
+main
diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh
index 1049308724..a74b78f6cd 100644
--- a/CodeGen/tests/test_compose_on_xeon.sh
+++ b/CodeGen/tests/test_compose_on_xeon.sh
@@ -239,4 +239,4 @@ function main() {
     echo y | docker system prune
 }
 
-main
\ No newline at end of file
+main