From cd35fff309619ba910323e25988e0fc264e1069d Mon Sep 17 00:00:00 2001 From: Mustafa Date: Thu, 13 Feb 2025 22:11:53 -0800 Subject: [PATCH 01/22] update the compose file Signed-off-by: Mustafa initial Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa update for codegen Signed-off-by: Mustafa add agents Signed-off-by: Mustafa env updates Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa merged to main Signed-off-by: Mustafa updates Signed-off-by: Mustafa --- CodeGen/codegen.py | 225 +++++++++++++++++- .../intel/cpu/xeon/compose.yaml | 161 ++++++++----- CodeGen/docker_compose/set_env.sh | 66 ++++- 3 files changed, 382 insertions(+), 70 deletions(-) diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index 16db9aa262..b5cf45bfd9 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -3,6 +3,7 @@ import asyncio import os +import ast from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType from comps.cores.mega.utils import handle_message @@ -16,20 +17,113 @@ from comps.cores.proto.docarray import LLMParams from fastapi import Request from fastapi.responses import StreamingResponse +from langchain.prompts import PromptTemplate MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778)) + LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) +RETRIEVAL_SERVICE_HOST_IP = os.getenv("RETRIEVAL_SERVICE_HOST_IP", "0.0.0.0") +REDIS_RETRIEVER_PORT = int(os.getenv("REDIS_RETRIEVER_PORT", 7000)) + +TEI_EMBEDDING_HOST_IP = os.getenv("TEI_EMBEDDING_HOST_IP", "0.0.0.0") +EMBEDDER_PORT = int(os.getenv("EMBEDDER_PORT", 6000)) + +print(">>>>>> LLM_SERVICE_HOST_IP:", LLM_SERVICE_HOST_IP) +print(">>>>>> LLM_SERVICE_PORT:", LLM_SERVICE_PORT) + +print(">>>>>> RETRIEVAL_SERVICE_HOST_IP:", RETRIEVAL_SERVICE_HOST_IP) +print(">>>>>> REDIS_RETRIEVER_PORT:", REDIS_RETRIEVER_PORT) + +print(">>>>>> TEI_EMBEDDING_HOST_IP:", TEI_EMBEDDING_HOST_IP) +print(">>>>>> EMBEDDER_PORT:", EMBEDDER_PORT) + +grader_prompt = """You are a grader assessing relevance of a retrieved document to a user question. \n +Here is the user question: {question} \n +Here is the retrieved document: \n\n {document} \n\n + +If the document contains keywords related to the user question, grade it as relevant. +It does not need to be a stringent test. The goal is to filter out erroneous retrievals. +Rules: +- Do not return the question, the provided document or explanation. +- if this document is relevant to the question, return 'yes' otherwise return 'no'. +- Do not include any other details in your response. +""" + +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + """ + Aligns the inputs based on the service type of the current node. + + Parameters: + - self: Reference to the current instance of the class. + - inputs: Dictionary containing the inputs for the current node. + - cur_node: The current node in the service orchestrator. + - runtime_graph: The runtime graph of the service orchestrator. + - llm_parameters_dict: Dictionary containing the LLM parameters. + - kwargs: Additional keyword arguments. + + Returns: + - inputs: The aligned inputs for the current node. + """ + + # Check if the current service type is EMBEDDING + if self.services[cur_node].service_type == ServiceType.EMBEDDING: + # Store the input query for later use + self.input_query = inputs["query"] + # Set the input for the embedding service + inputs["input"] = inputs["query"] + + # Check if the current service type is RETRIEVER + if self.services[cur_node].service_type == ServiceType.RETRIEVER: + # Extract the embedding from the inputs + embedding = inputs['data'][0]['embedding'] + # Align the inputs for the retriever service + inputs = { + "index_name": llm_parameters_dict["key_index_name"], + "text": self.input_query, + "embedding": embedding + } + + return inputs + class CodeGenService: def __init__(self, host="0.0.0.0", port=8000): self.host = host self.port = port - self.megaservice = ServiceOrchestrator() + ServiceOrchestrator.align_inputs = align_inputs + self.megaservice_llm = ServiceOrchestrator() + self.megaservice_retriever = ServiceOrchestrator() + self.megaservice_retriever_llm = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.CODE_GEN) def add_remote_service(self): + """ + Adds remote microservices to the service orchestrators and defines the flow between them. + """ + + # Define the embedding microservice + embedding = MicroService( + name="embedding", + host=TEI_EMBEDDING_HOST_IP, + port=EMBEDDER_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + + # Define the retriever microservice + retriever = MicroService( + name="retriever", + host=RETRIEVAL_SERVICE_HOST_IP, + port=REDIS_RETRIEVER_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + + # Define the LLM microservice llm = MicroService( name="llm", host=LLM_SERVICE_HOST_IP, @@ -38,13 +132,63 @@ def add_remote_service(self): use_remote_service=True, service_type=ServiceType.LLM, ) - self.megaservice.add(llm) + + # Add the microservices to the megaservice_retriever_llm orchestrator and define the flow + self.megaservice_retriever_llm.add(embedding).add(retriever).add(llm) + self.megaservice_retriever_llm.flow_to(embedding, retriever) + self.megaservice_retriever_llm.flow_to(retriever, llm) + + # Add the microservices to the megaservice_retriever orchestrator and define the flow + self.megaservice_retriever.add(embedding).add(retriever) + self.megaservice_retriever.flow_to(embedding, retriever) + + # Add the LLM microservice to the megaservice_llm orchestrator + self.megaservice_llm.add(llm) + + async def read_streaming_response(self, response: StreamingResponse): + """ + Reads the streaming response from a StreamingResponse object. + + Parameters: + - self: Reference to the current instance of the class. + - response: The StreamingResponse object to read from. + + Returns: + - str: The complete response body as a decoded string. + """ + body = b"" # Initialize an empty byte string to accumulate the response chunks + async for chunk in response.body_iterator: + body += chunk # Append each chunk to the body + return body.decode("utf-8") # Decode the accumulated byte string to a regular string async def handle_request(self, request: Request): + """ + Handles the incoming request, processes it through the appropriate microservices, + and returns the response. + + Parameters: + - self: Reference to the current instance of the class. + - request: The incoming request object. + + Returns: + - ChatCompletionResponse: The response from the LLM microservice. + """ + # Parse the incoming request data data = await request.json() + + # Get the stream option from the request data, default to True if not provided stream_opt = data.get("stream", True) - chat_request = ChatCompletionRequest.parse_obj(data) + + # Validate and parse the chat request data + chat_request = ChatCompletionRequest.model_validate(data) + + # Handle the chat messages to generate the prompt prompt = handle_message(chat_request.messages) + + # Get the agents flag from the request data, default to False if not provided + agents_flag = data.get("agents_flag", False) + + # Define the LLM parameters parameters = LLMParams( max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, @@ -54,18 +198,83 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, + key_index_name=chat_request.key_index_name ) - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"query": prompt}, llm_parameters=parameters + + # Initialize the initial inputs with the generated prompt + initial_inputs = {"query": prompt} + + # Check if the key index name is provided in the parameters + if parameters.key_index_name: + if agents_flag: + # Schedule the retriever microservice + result_ret, runtime_graph = await self.megaservice_retriever.schedule( + initial_inputs=initial_inputs, llm_parameters=parameters + ) + + # Switch to the LLM microservice + megaservice = self.megaservice_llm + + relevant_docs = [] + for doc in result_ret["retriever/MicroService"]["retrieved_docs"]: + # Create the PromptTemplate + prompt_agent = PromptTemplate(template=grader_prompt, input_variables=["question", "document"]) + + # Format the template with the input variables + formatted_prompt = prompt_agent.format(question=prompt, document=doc["text"]) + initial_inputs_grader = {"query": formatted_prompt} + + # Schedule the LLM microservice for grading + grade, runtime_graph = await self.megaservice_llm.schedule( + initial_inputs=initial_inputs_grader, llm_parameters=parameters + ) + + for node, response in grade.items(): + if isinstance(response, StreamingResponse): + # Read the streaming response + grader_response = await self.read_streaming_response(response) + + # Replace null with None + grader_response = grader_response.replace("null", "None") + + # Split the response by "data:" and process each part + for i in grader_response.split("data:"): + if '"text":' in i: + # Convert the string to a dictionary + r = ast.literal_eval(i) + # Check if the response text is "yes" + if r["choices"][0]["text"] == "yes": + # Append the document to the relevant_docs list + relevant_docs.append(doc) + + # Update the initial inputs with the relevant documents + query = initial_inputs["query"] + initial_inputs = {} + initial_inputs["retrieved_docs"] = relevant_docs + initial_inputs["initial_query"] = query + megaservice = self.megaservice_llm + else: + # Use the combined retriever and LLM microservice + megaservice = self.megaservice_retriever_llm + else: + # Use the LLM microservice only + megaservice = self.megaservice_llm + + # Schedule the final megaservice + result_dict, runtime_graph = await megaservice.schedule( + initial_inputs=initial_inputs, llm_parameters=parameters ) + for node, response in result_dict.items(): - # Here it suppose the last microservice in the megaservice is LLM. + # Check if the last microservice in the megaservice is LLM if ( isinstance(response, StreamingResponse) - and node == list(self.megaservice.services.keys())[-1] - and self.megaservice.services[node].service_type == ServiceType.LLM + and node == list(megaservice.services.keys())[-1] + and megaservice.services[node].service_type == ServiceType.LLM ): return response + + # Get the response from the last node in the runtime graph last_node = runtime_graph.all_leaves()[-1] response = result_dict[last_node]["text"] choices = [] diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 5567d9e368..be288975c9 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -4,13 +4,11 @@ services: tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-server - profiles: - - codegen-xeon-tgi + container_name: tgi-service ports: - "8028:80" volumes: - - "${MODEL_CACHE:-./data}:/data" + - "./data:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -24,74 +22,43 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - container_name: vllm-server - profiles: - - codegen-xeon-vllm - ports: - - "8028:80" - volumes: - - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - host_ip: ${host_ip} - healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 - llm-base: + llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server + depends_on: + tgi-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped - llm-tgi-service: - extends: llm-base - container_name: llm-codegen-tgi-server - profiles: - - codegen-xeon-tgi - ports: - - "9000:9000" - ipc: host - depends_on: - tgi-service: - condition: service_healthy - llm-vllm-service: - extends: llm-base - container_name: llm-codegen-vllm-server - profiles: - - codegen-xeon-vllm - ports: - - "9000:9000" - ipc: host - depends_on: - vllm-service: - condition: service_healthy codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server depends_on: - - llm-base + - llm ports: - "7778:7778" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP} + # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT} + - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE} + - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} + ipc: host restart: always codegen-xeon-ui-server: @@ -109,6 +76,92 @@ services: ipc: host restart: always + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" + - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: true + restart: unless-stopped + + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10201}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped + networks: default: - driver: bridge + driver: bridge \ No newline at end of file diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index cb9e742847..e27df5e91a 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -2,12 +2,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -pushd "../../" > /dev/null -source .set_env.sh -popd > /dev/null +# pushd "../../" > /dev/null +# source .set_env.sh +# popd > /dev/null -export host_ip=$(hostname -I | awk '{print $1}') +export your_ip=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" fi @@ -18,9 +19,58 @@ fi export no_proxy=${no_proxy},${host_ip} -export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" -export LLM_ENDPOINT="http://${host_ip}:8028" +export http_proxy=${http_proxy} +export https_proxy=${https_proxy} + +# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" +export TGI_LLM_ENDPOINT="http://${host_ip}:8028" + +export MEGA_SERVICE_PORT=7778 export MEGA_SERVICE_HOST_IP=${host_ip} -export LLM_SERVICE_HOST_IP=${host_ip} +export MEGA_SERVICE_PORT=7778 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" -export MODEL_CACHE="./data" + +export REDIS_DB_PORT=6379 +export REDIS_INSIGHTS_PORT=8001 +export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" +export REDIS_HOST=${host_ip} +export INDEX_NAME="test_codeGen_v1" + + +export RETRIEVAL_SERVICE_HOST_IP=${host_ip} + +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" + +export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" +export MM_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" + +export MM_EMBEDDING_PORT_MICROSERVICE=6000 +export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} + +# export BRIDGE_TOWER_EMBEDDING=true + +# export MULTIMODAL_DATAPREP=true +export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS" + +export REDIS_RETRIEVER_PORT=7000 +export DATAPREP_REDIS_PORT=6007 +export LOGFLAG=false + +# Text Retriever +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" +export INDEX_NAME="CodeGen" + +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export EMBEDDER_PORT=6000 +export TEI_EMBEDDER_PORT=8090 +export TEI_EMBEDDING_HOST_IP=${host_ip} +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + +export DATAPREP_REDIS_PORT=6007 +export LOGFLAG=false + + +# export LLM_SERVICE_HOST_IP=${host_ip} +# export RETRIEVAL_SERVICE_HOST_IP=${host_ip} +# export TEI_EMBEDDING_HOST_IP=${host_ip} \ No newline at end of file From 205b38039c7c64144a6c0bd7d34d9f1a464ddc9b Mon Sep 17 00:00:00 2001 From: Mustafa Date: Tue, 25 Mar 2025 10:43:25 -0700 Subject: [PATCH 02/22] update the env variables Signed-off-by: Mustafa --- .../intel/cpu/xeon/compose.yaml | 3 ++ CodeGen/docker_compose/set_env.sh | 41 ++++--------------- 2 files changed, 11 insertions(+), 33 deletions(-) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index be288975c9..763f93081b 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -22,6 +22,7 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server @@ -39,6 +40,8 @@ services: LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + + codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index e27df5e91a..dd0b97a551 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -2,9 +2,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# pushd "../../" > /dev/null -# source .set_env.sh -# popd > /dev/null +pushd "../../" > /dev/null +source .set_env.sh +popd > /dev/null export your_ip=$(hostname -I | awk '{print $1}') @@ -22,42 +22,21 @@ export no_proxy=${no_proxy},${host_ip} export http_proxy=${http_proxy} export https_proxy=${https_proxy} -# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" +export LLM_SERVICE_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:8028" +export LLM_SERVICE_HOST_IP=${host_ip} export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export MEGA_SERVICE_PORT=7778 export MEGA_SERVICE_HOST_IP=${host_ip} -export MEGA_SERVICE_PORT=7778 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" export REDIS_DB_PORT=6379 export REDIS_INSIGHTS_PORT=8001 export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" -export REDIS_HOST=${host_ip} -export INDEX_NAME="test_codeGen_v1" - - -export RETRIEVAL_SERVICE_HOST_IP=${host_ip} - -export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" - -export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" -export MM_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" - -export MM_EMBEDDING_PORT_MICROSERVICE=6000 -export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} - -# export BRIDGE_TOWER_EMBEDDING=true - -# export MULTIMODAL_DATAPREP=true -export DATAPREP_COMPONENT_NAME="OPEA_DATAPREP_REDIS" - export REDIS_RETRIEVER_PORT=7000 -export DATAPREP_REDIS_PORT=6007 -export LOGFLAG=false - -# Text Retriever +export RETRIEVAL_SERVICE_HOST_IP=${host_ip} export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" export INDEX_NAME="CodeGen" @@ -69,8 +48,4 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export DATAPREP_REDIS_PORT=6007 export LOGFLAG=false - - -# export LLM_SERVICE_HOST_IP=${host_ip} -# export RETRIEVAL_SERVICE_HOST_IP=${host_ip} -# export TEI_EMBEDDING_HOST_IP=${host_ip} \ No newline at end of file +export MODEL_CACHE="./data" From 2a520948d0151d6856cc9ada6d91b7b346e45410 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Tue, 25 Mar 2025 12:13:29 -0700 Subject: [PATCH 03/22] update compose.yaml Signed-off-by: Mustafa --- .../intel/cpu/xeon/compose.yaml | 83 ++++++++++++++----- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 763f93081b..0987b6fa8e 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -2,13 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 services: + tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service + container_name: tgi-server + # profiles: + # - codegen-xeon-tgi ports: - "8028:80" volumes: - - "./data:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -22,46 +25,82 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-server + profiles: + - codegen-xeon-vllm + ports: + - "8028:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 - llm: + llm-base: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server - depends_on: - tgi-service: - condition: service_healthy - ports: - - "9000:9000" - ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped - + + llm-tgi-service: + extends: llm-base + container_name: llm-codegen-tgi-server + # profiles: + # - codegen-xeon-tgi + ports: + - "9000:9000" + ipc: host + depends_on: + tgi-service: + condition: service_healthy + llm-vllm-service: + extends: llm-base + container_name: llm-codegen-vllm-server + profiles: + - codegen-xeon-vllm + ports: + - "9000:9000" + ipc: host + depends_on: + vllm-service: + condition: service_healthy codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server depends_on: - - llm + - llm-base ports: - "7778:7778" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP} - # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT} - - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} - # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE} - - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} - EMBEDDER_PORT=${EMBEDDER_PORT} - ipc: host restart: always codegen-xeon-ui-server: @@ -85,7 +124,7 @@ services: ports: - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" - + dataprep-redis-server: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server @@ -165,6 +204,8 @@ services: RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped + + networks: default: - driver: bridge \ No newline at end of file + driver: bridge From 3efb2f9adf04c812e1c3b571035008732859f1a2 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Wed, 26 Mar 2025 11:22:48 -0700 Subject: [PATCH 04/22] compose.yaml updates Signed-off-by: Mustafa --- CodeGen/codegen.py | 26 ++++++++++++------- .../intel/cpu/xeon/compose.yaml | 22 ++++------------ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index b5cf45bfd9..6384efaa47 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -5,7 +5,7 @@ import os import ast -from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType +from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger from comps.cores.mega.utils import handle_message from comps.cores.proto.api_protocol import ( ChatCompletionRequest, @@ -19,6 +19,9 @@ from fastapi.responses import StreamingResponse from langchain.prompts import PromptTemplate +logger = CustomLogger("opea_dataprep_microservice") +logflag = os.getenv("LOGFLAG", False) + MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778)) LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") @@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k embedding = inputs['data'][0]['embedding'] # Align the inputs for the retriever service inputs = { - "index_name": llm_parameters_dict["key_index_name"], + "index_name": llm_parameters_dict["index_name"], "text": self.input_query, "embedding": embedding } @@ -198,14 +201,14 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, - key_index_name=chat_request.key_index_name + index_name=chat_request.index_name ) # Initialize the initial inputs with the generated prompt initial_inputs = {"query": prompt} # Check if the key index name is provided in the parameters - if parameters.key_index_name: + if parameters.index_name: if agents_flag: # Schedule the retriever microservice result_ret, runtime_graph = await self.megaservice_retriever.schedule( @@ -248,11 +251,16 @@ async def handle_request(self, request: Request): relevant_docs.append(doc) # Update the initial inputs with the relevant documents - query = initial_inputs["query"] - initial_inputs = {} - initial_inputs["retrieved_docs"] = relevant_docs - initial_inputs["initial_query"] = query - megaservice = self.megaservice_llm + if len(relevant_docs)>0: + logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.") + query = initial_inputs["query"] + initial_inputs = {} + initial_inputs["retrieved_docs"] = relevant_docs + initial_inputs["initial_query"] = query + + else: + logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.") + else: # Use the combined retriever and LLM microservice megaservice = self.megaservice_retriever_llm diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 0987b6fa8e..3d132d29f9 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -6,8 +6,8 @@ services: tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-server - # profiles: - # - codegen-xeon-tgi + profiles: + - codegen-xeon-tgi ports: - "8028:80" volumes: @@ -25,7 +25,6 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - vllm-service: image: ${REGISTRY:-opea}/vllm:${TAG:-latest} container_name: vllm-server @@ -48,7 +47,6 @@ services: timeout: 10s retries: 100 command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 - llm-base: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server @@ -60,12 +58,11 @@ services: LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped - llm-tgi-service: extends: llm-base container_name: llm-codegen-tgi-server - # profiles: - # - codegen-xeon-tgi + profiles: + - codegen-xeon-tgi ports: - "9000:9000" ipc: host @@ -83,7 +80,6 @@ services: depends_on: vllm-service: condition: service_healthy - codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server @@ -117,14 +113,12 @@ services: - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} ipc: host restart: always - redis-vector-db: image: redis/redis-stack:7.2.0-v9 container_name: redis-vector-db ports: - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" - dataprep-redis-server: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server @@ -142,7 +136,6 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} LOGFLAG: true restart: unless-stopped - tei-embedding-serving: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-serving @@ -163,7 +156,6 @@ services: interval: 10s timeout: 6s retries: 48 - tei-embedding-server: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: tei-embedding-server @@ -180,7 +172,6 @@ services: tei-embedding-serving: condition: service_healthy restart: unless-stopped - retriever-redis: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} container_name: retriever-redis @@ -203,9 +194,6 @@ services: LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped - - - networks: default: - driver: bridge + driver: bridge \ No newline at end of file From 8875802c6ffb9369deae44be25f933def29d47c5 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Thu, 27 Mar 2025 13:07:14 -0700 Subject: [PATCH 05/22] update readme file Signed-off-by: Mustafa --- CodeGen/README.md | 54 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/CodeGen/README.md b/CodeGen/README.md index 00d54adbc2..647cad3b65 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -1,6 +1,6 @@ # Code Generation Application -Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. +Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. Additionally Retrieval-Augmented Generation (RAG) and Agents are parts of the CodeGen example which provide an additional layer of intelligence and adaptability, ensuring that the generated code is not only relevant but also accurate, efficient, and tailored to the specific needs of the developers and programmers. The capabilities of CodeGen LLMs include: @@ -20,6 +20,7 @@ The workflow falls into the following architecture: The CodeGen example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example. + ```mermaid --- config: @@ -28,7 +29,7 @@ config: rankSpacing: 100 curve: linear themeVariables: - fontSize: 50px + fontSize: 25px --- flowchart LR %% Colors %% @@ -37,34 +38,56 @@ flowchart LR classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 classDef invisible fill:transparent,stroke:transparent; style CodeGen-MegaService stroke:#000000 - %% Subgraphs %% - subgraph CodeGen-MegaService["CodeGen MegaService "] + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - LLM([LLM MicroService]):::blue + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - subgraph UserInterface[" User Interface "] + subgraph User Interface direction LR - a([User Input Query]):::orchid - UI([UI server
]):::orchid + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{LLM Service}} + GW([CodeGen GateWay]):::orange - LLM_gen{{LLM Service
}} - GW([CodeGen GateWay
]):::orange - + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM %% Questions interaction direction LR a[User Input Query] --> UI UI --> GW GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM %% Embedding service flow direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET LLM <-.-> LLM_gen + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ## Deploy CodeGen Service @@ -161,6 +184,15 @@ Two ways of consuming CodeGen Service: -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + + ``` + 2. Access via frontend To access the frontend, open the following URL in your browser: http://{host_ip}:5173. From f33ba5ae77d1cdc43f454ebc31b4a8d693f77fb3 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Thu, 27 Mar 2025 13:35:19 -0700 Subject: [PATCH 06/22] update readme Signed-off-by: Mustafa --- CodeGen/README.md | 20 ++++- .../docker_compose/intel/cpu/xeon/README.md | 84 ++++++++++++++++--- .../docker_compose/intel/hpu/gaudi/README.md | 84 ++++++++++++++++--- 3 files changed, 159 insertions(+), 29 deletions(-) diff --git a/CodeGen/README.md b/CodeGen/README.md index 647cad3b65..692e01848b 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -58,7 +58,7 @@ flowchart LR V_RET{{Retriever
service}} Ingest{{Ingest data}} DP([Data Preparation]):::blue - LLM_gen{{LLM Service}} + LLM_gen{{TGI Service}} GW([CodeGen GateWay]):::orange %% Data Preparation flow @@ -161,11 +161,25 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml). +Start CodeGen based on TGI service: + ```bash -cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon -docker compose up -d +cd GenAIExamples/CodeGen/docker_compose +source set_env.sh +cd intel/cpu/xeon +docker compose --profile codegen-xeon-tgi up -d ``` +Start CodeGen based on vLLM service: + +```bash +cd GenAIExamples/CodeGen/docker_compose +source set_env.sh +cd intel/cpu/xeon +docker compose --profile codegen-xeon-vllm up -d +``` + + Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source. ### Deploy CodeGen on Kubernetes using Helm Chart diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 3cc7a19b3c..75c7b1851b 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -13,28 +13,77 @@ After launching your instance, you can connect to it using SSH (for Linux instan ## 🚀 Start Microservices and MegaService -The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. +The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: ```mermaid +--- +config: + flowchart: + nodeSpacing: 400 + rankSpacing: 100 + curve: linear + themeVariables: + fontSize: 25px +--- flowchart LR - subgraph CodeGen + %% Colors %% + classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef invisible fill:transparent,stroke:transparent; + style CodeGen-MegaService stroke:#000000 + %% Subgraphs %% + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - A[User] --> |Input query| B[CodeGen Gateway] - B --> |Invoke| Megaservice - subgraph Megaservice["Megaservice"] - direction TB - C((LLM
9000)) -. Post .-> D{{TGI Service
8028}} - end - Megaservice --> |Output| E[Response] + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - - subgraph Legend + subgraph User Interface direction LR - G([Microservice]) ==> H([Microservice]) - I([Microservice]) -.-> J{{Server API}} + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange + + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM + + %% Questions interaction + direction LR + a[User Input Query] --> UI + UI --> GW + GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM + + + %% Embedding service flow + direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET + LLM <-.-> LLM_gen + + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ### Setup Environment Variables @@ -111,6 +160,15 @@ docker compose --profile codegen-xeon-vllm up -d }' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + ``` + + ## 🚀 Launch the UI To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 133b32f09f..5408e33654 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -6,28 +6,77 @@ The default pipeline deploys with vLLM as the LLM serving component. It also pro ## 🚀 Start MicroServices and MegaService -The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. +The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: ```mermaid +--- +config: + flowchart: + nodeSpacing: 400 + rankSpacing: 100 + curve: linear + themeVariables: + fontSize: 25px +--- flowchart LR - subgraph CodeGen + %% Colors %% + classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef invisible fill:transparent,stroke:transparent; + style CodeGen-MegaService stroke:#000000 + %% Subgraphs %% + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - A[User] --> |Input query| B[CodeGen Gateway] - B --> |Invoke| Megaservice - subgraph Megaservice["Megaservice"] - direction TB - C((LLM
9000)) -. Post .-> D{{TGI Service
8028}} - end - Megaservice --> |Output| E[Response] + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - - subgraph Legend + subgraph User Interface direction LR - G([Microservice]) ==> H([Microservice]) - I([Microservice]) -.-> J{{Server API}} + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange + + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM + + %% Questions interaction + direction LR + a[User Input Query] --> UI + UI --> GW + GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM + + + %% Embedding service flow + direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET + LLM <-.-> LLM_gen + + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ### Setup Environment Variables @@ -104,6 +153,15 @@ docker compose --profile codegen-gaudi-vllm up -d }' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + ``` + + ## 🚀 Launch the Svelte Based UI To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: From 301b75da9c36c2637cfa2567be7aa09f3a6b4802 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Thu, 13 Feb 2025 22:11:53 -0800 Subject: [PATCH 07/22] update the compose file Signed-off-by: Mustafa initial Signed-off-by: Mustafa added microservice ports Signed-off-by: okhleif-IL update codegen Signed-off-by: Mustafa update for codegen Signed-off-by: Mustafa Initial commit for Gradio UI Signed-off-by: okhleif-IL New UI Signed-off-by: okhleif-IL prepare for merge Signed-off-by: okhleif-IL add agents Signed-off-by: Mustafa env updates Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa merged to main Signed-off-by: Mustafa updates Signed-off-by: Mustafa UI Updates Signed-off-by: okhleif-IL added dockerfile Signed-off-by: okhleif-IL removed files dataframe Signed-off-by: okhleif-IL updated file upload Signed-off-by: okhleif-IL added checkbox for agent Signed-off-by: okhleif-IL key_index_name --> index_name Signed-off-by: okhleif-IL added / removed print statements Signed-off-by: okhleif-IL Support for data streaming (from Melanie) Signed-off-by: okhleif-IL fixed file not supported bug Signed-off-by: okhleif-IL added refresh button to index Signed-off-by: okhleif-IL simplified README Signed-off-by: okhleif-IL --- CodeGen/Dockerfile | 47 +- CodeGen/codegen.py | 24 +- .../docker_compose/intel/cpu/xeon/README.md | 98 +---- .../intel/cpu/xeon/compose.yaml | 83 ++-- CodeGen/docker_compose/set_env.sh | 27 +- CodeGen/docker_image_build/build.yaml | 6 + CodeGen/ui/docker/Dockerfile.gradio | 33 ++ CodeGen/ui/gradio/README.md | 76 ++++ CodeGen/ui/gradio/codegen_ui_gradio.py | 402 ++++++++++++++++++ CodeGen/ui/gradio/requirements.txt | 6 + 10 files changed, 620 insertions(+), 182 deletions(-) create mode 100644 CodeGen/ui/docker/Dockerfile.gradio create mode 100644 CodeGen/ui/gradio/README.md create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py create mode 100644 CodeGen/ui/gradio/requirements.txt diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile index 5305a9d89f..b2b4155fd7 100644 --- a/CodeGen/Dockerfile +++ b/CodeGen/Dockerfile @@ -1,8 +1,51 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -ARG BASE_TAG=latest -FROM opea/comps-base:$BASE_TAG +# Stage 1: base setup used by other stages +FROM python:3.11-slim AS base + +# get security updates +RUN apt-get update && apt-get upgrade -y && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +ENV HOME=/home/user + +RUN useradd -m -s /bin/bash user && \ + mkdir -p $HOME && \ + chown -R user $HOME + +WORKDIR $HOME + + +# Stage 2: latest GenAIComps sources +FROM base AS git + +RUN apt-get update && apt-get install -y --no-install-recommends git +# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git +COPY GenAIComps GenAIComps + + +# Stage 3: common layer shared by services using GenAIComps +FROM base AS comps-base + +# copy just relevant parts +COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps +COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/ + +WORKDIR $HOME/GenAIComps +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt +WORKDIR $HOME + +ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps + +USER user + + +# Stage 4: unique part +FROM comps-base + +ENV LANG=C.UTF-8 COPY ./codegen.py $HOME/codegen.py diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index 6384efaa47..a5d79f8dd2 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -19,9 +19,6 @@ from fastapi.responses import StreamingResponse from langchain.prompts import PromptTemplate -logger = CustomLogger("opea_dataprep_microservice") -logflag = os.getenv("LOGFLAG", False) - MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778)) LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") @@ -83,7 +80,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k embedding = inputs['data'][0]['embedding'] # Align the inputs for the retriever service inputs = { - "index_name": llm_parameters_dict["index_name"], + "index_name": llm_parameters_dict["key_index_name"], "text": self.input_query, "embedding": embedding } @@ -201,14 +198,14 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, - index_name=chat_request.index_name + key_index_name=chat_request.key_index_name ) # Initialize the initial inputs with the generated prompt initial_inputs = {"query": prompt} # Check if the key index name is provided in the parameters - if parameters.index_name: + if parameters.key_index_name: if agents_flag: # Schedule the retriever microservice result_ret, runtime_graph = await self.megaservice_retriever.schedule( @@ -251,16 +248,11 @@ async def handle_request(self, request: Request): relevant_docs.append(doc) # Update the initial inputs with the relevant documents - if len(relevant_docs)>0: - logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.") - query = initial_inputs["query"] - initial_inputs = {} - initial_inputs["retrieved_docs"] = relevant_docs - initial_inputs["initial_query"] = query - - else: - logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.") - + query = initial_inputs["query"] + initial_inputs = {} + initial_inputs["retrieved_docs"] = relevant_docs + initial_inputs["initial_query"] = query + megaservice = self.megaservice_llm else: # Use the combined retriever and LLM microservice megaservice = self.megaservice_retriever_llm diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 75c7b1851b..e6ed4334a8 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -3,89 +3,6 @@ This document outlines the deployment process for a CodeGen application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker images creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, further simplifying the deployment process for this service. The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice. -## 🚀 Create an AWS Xeon Instance - -To run the example on an AWS Xeon instance, start by creating an AWS account if you don't have one already. Then, get started with the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home). AWS EC2 M7i, C7i, C7i-flex and M7i-flex instances are 4th Generation Intel Xeon Scalable processors suitable for the task. - -For detailed information about these instance types, you can refer to [m7i](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options. - -After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. - -## 🚀 Start Microservices and MegaService - -The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. - -The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: - -```mermaid ---- -config: - flowchart: - nodeSpacing: 400 - rankSpacing: 100 - curve: linear - themeVariables: - fontSize: 25px ---- -flowchart LR - %% Colors %% - classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef invisible fill:transparent,stroke:transparent; - style CodeGen-MegaService stroke:#000000 - %% Subgraphs %% - subgraph CodeGen-MegaService["CodeGen-MegaService"] - direction LR - EM([Embedding
MicroService]):::blue - RET([Retrieval
MicroService]):::blue - RER([Agents]):::blue - LLM([LLM
MicroService]):::blue - end - subgraph User Interface - direction LR - a([Submit Query Tab]):::orchid - UI([UI server]):::orchid - Ingest([Manage Resources]):::orchid - end - - CLIP_EM{{Embedding
service}} - VDB{{Vector DB}} - V_RET{{Retriever
service}} - Ingest{{Ingest data}} - DP([Data Preparation]):::blue - LLM_gen{{TGI Service}} - GW([CodeGen GateWay]):::orange - - %% Data Preparation flow - %% Ingest data flow - direction LR - Ingest[Ingest data] --> UI - UI --> DP - DP <-.-> CLIP_EM - - %% Questions interaction - direction LR - a[User Input Query] --> UI - UI --> GW - GW <==> CodeGen-MegaService - EM ==> RET - RET ==> RER - RER ==> LLM - - - %% Embedding service flow - direction LR - EM <-.-> CLIP_EM - RET <-.-> V_RET - LLM <-.-> LLM_gen - - direction TB - %% Vector DB interaction - V_RET <-.->VDB - DP <-.->VDB -``` - ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. @@ -175,17 +92,12 @@ To access the frontend, open the following URL in your browser: `http://{host_ip ```yaml codegen-xeon-ui-server: - image: opea/codegen-ui:latest + image: opea/codegen-gradio-ui:latest ... ports: - "80:5173" ``` -![project-screenshot](../../../../assets/img/codeGen_ui_init.jpg) - -Here is an example of running CodeGen in the UI: - -![project-screenshot](../../../../assets/img/codeGen_ui_response.png) ## 🚀 Launch the React Based UI (Optional) @@ -314,15 +226,15 @@ cd GenAIExamples/CodeGen/ui docker build -t opea/codegen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . ``` -### 4. Build CodeGen React UI Docker Image (Optional) +### 4. Build CodeGen Gradio UI Docker Image -Build react frontend Docker image via below command: +Build gradio frontend Docker image via below command: **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** ```bash cd GenAIExamples/CodeGen/ui -docker build --no-cache -t opea/codegen-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +docker build --no-cache -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.gradio . ``` Then run the command `docker images`, you will have the following Docker Images: @@ -330,4 +242,4 @@ Then run the command `docker images`, you will have the following Docker Images: - `opea/llm-textgen:latest` - `opea/codegen:latest` - `opea/codegen-ui:latest` -- `opea/codegen-react-ui:latest` (optional) +- `opea/codegen-gradio-ui:latest` diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 3d132d29f9..3f6573f01e 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -5,13 +5,11 @@ services: tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-server - profiles: - - codegen-xeon-tgi + container_name: tgi-service ports: - "8028:80" volumes: - - "${MODEL_CACHE:-./data}:/data" + - "./data:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -25,82 +23,47 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - container_name: vllm-server - profiles: - - codegen-xeon-vllm - ports: - - "8028:80" - volumes: - - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - host_ip: ${host_ip} - healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 - llm-base: + llm: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server + depends_on: + tgi-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped - llm-tgi-service: - extends: llm-base - container_name: llm-codegen-tgi-server - profiles: - - codegen-xeon-tgi - ports: - - "9000:9000" - ipc: host - depends_on: - tgi-service: - condition: service_healthy - llm-vllm-service: - extends: llm-base - container_name: llm-codegen-vllm-server - profiles: - - codegen-xeon-vllm - ports: - - "9000:9000" - ipc: host - depends_on: - vllm-service: - condition: service_healthy codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server depends_on: - - llm-base + - llm ports: - "7778:7778" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} - - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP} + # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT} + - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP} - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} - - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE} + - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP} - EMBEDDER_PORT=${EMBEDDER_PORT} + ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server @@ -111,6 +74,9 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} ipc: host restart: always redis-vector-db: @@ -119,6 +85,7 @@ services: ports: - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + dataprep-redis-server: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server @@ -136,6 +103,7 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} LOGFLAG: true restart: unless-stopped + tei-embedding-serving: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-serving @@ -156,6 +124,7 @@ services: interval: 10s timeout: 6s retries: 48 + tei-embedding-server: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: tei-embedding-server @@ -172,6 +141,7 @@ services: tei-embedding-serving: condition: service_healthy restart: unless-stopped + retriever-redis: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} container_name: retriever-redis @@ -194,6 +164,7 @@ services: LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped + networks: default: driver: bridge \ No newline at end of file diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index dd0b97a551..d4cff7af39 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -2,26 +2,18 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -pushd "../../" > /dev/null -source .set_env.sh -popd > /dev/null +# pushd "../../" > /dev/null +# source .set_env.sh +# popd > /dev/null export your_ip=$(hostname -I | awk '{print $1}') export host_ip=$(hostname -I | awk '{print $1}') -if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then - echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" -fi - -if [ -z "${host_ip}" ]; then - echo "Error: host_ip is not set. Please set host_ip first." -fi - -export no_proxy=${no_proxy},${host_ip} - +export no_proxy="${no_proxy},${host_ip}" export http_proxy=${http_proxy} export https_proxy=${https_proxy} +# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" export LLM_SERVICE_PORT=9000 export LLM_ENDPOINT="http://${host_ip}:8028" @@ -30,7 +22,7 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export MEGA_SERVICE_PORT=7778 export MEGA_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/codegen" export REDIS_DB_PORT=6379 export REDIS_INSIGHTS_PORT=8001 @@ -47,5 +39,10 @@ export TEI_EMBEDDING_HOST_IP=${host_ip} export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export DATAPREP_REDIS_PORT=6007 +export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" export LOGFLAG=false -export MODEL_CACHE="./data" + + +# export LLM_SERVICE_HOST_IP=${host_ip} +# export RETRIEVAL_SERVICE_HOST_IP=${host_ip} +# export TEI_EMBEDDING_HOST_IP=${host_ip} \ No newline at end of file diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml index 3275aa71bf..52ca23b109 100644 --- a/CodeGen/docker_image_build/build.yaml +++ b/CodeGen/docker_image_build/build.yaml @@ -23,6 +23,12 @@ services: dockerfile: ./docker/Dockerfile.react extends: codegen image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest} + codegen-gradio-ui: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.gradio + extends: codegen + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} llm-textgen: build: context: GenAIComps diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio new file mode 100644 index 0000000000..11a4f4f581 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.gradio @@ -0,0 +1,33 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + default-jre \ + libgl1-mesa-glx \ + libjemalloc-dev \ + wget + +# Install ffmpeg static build +WORKDIR /root +RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ + mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ + export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ + cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \ + cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/ + +RUN mkdir -p /home/user + +COPY gradio /home/user/gradio + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ +pip install --no-cache-dir -r /home/user/gradio/requirements.txt + +WORKDIR /home/user/gradio +ENTRYPOINT ["python", "codegen_ui_gradio.py"] diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md new file mode 100644 index 0000000000..38b2e964a3 --- /dev/null +++ b/CodeGen/ui/gradio/README.md @@ -0,0 +1,76 @@ +# Document Summary + +This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries. + +## Docker + +### Build UI Docker Image + +To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui +docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio . +``` + +This command builds the Docker image with the tag `opea/docsum-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. + +### Run UI Docker Image + +To run the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui/docker` directory and execute the following commands: + +```bash +cd GenAIExamples/CodeGen/ui/docker + +ip_address=$(hostname -I | awk '{print $1}') +docker run -d -p 5173:5173 --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:8888/v1/docsum \ + opea/codegen-gradio-ui:latest +``` + +This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service. + +### Python + +To run the frontend application directly using Python, navigate to the `GenAIExamples/DocSum/ui/gradio` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui/gradio +python codegen_ui_gradio.py +``` + +This command starts the frontend application using Python. + +## 📸 Project Screenshots + +![project-screenshot](../../assets/img/docSum_ui_gradio_text.png) + +### 🧐 Features + +Here are some of the project's features: + +- Summarizing Uploaded Files: Users can upload files from their local device. Once a file is uploaded, the summarization of the document will start automatically. The summary will be displayed in the 'Summary' box. +- Summarizing Text via Pasting: Users can paste the text to be summarized into the text box. By clicking the 'Generate Summary' button, a condensed summary of the content will be produced and displayed in the 'Summary' box on the right. + +## Additional Information + +### Prerequisites + +Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall. + +### Environment Variables + +- `http_proxy`: Proxy setting for HTTP connections. +- `https_proxy`: Proxy setting for HTTPS connections. +- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying. +- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with. + +### Troubleshooting + +- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access. +- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible. + +This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment. diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py new file mode 100644 index 0000000000..873d0c42b4 --- /dev/null +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -0,0 +1,402 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This is a Gradio app that includes two tabs: one for code generation and another for resource management. +# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files. +# Additionally, three small text boxes have been added for managing file dataframe parameters. + +import argparse +import os +from pathlib import Path +import gradio as gr +from gradio_pdf import PDF +import requests +import pandas as pd +import os +import uvicorn +import json +import argparse +# from utils import build_logger, make_temp_image, server_error_msg, split_video +from urllib.parse import urlparse +from pathlib import Path +from fastapi import FastAPI +# from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.staticfiles import StaticFiles + +# logger = build_logger("gradio_web_server", "gradio_web_server.log") +logflag = os.getenv("LOGFLAG", False) + +# create a FastAPI app +app = FastAPI() +cur_dir = os.getcwd() +static_dir = Path(os.path.join(cur_dir, "static/")) +tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/")) + +Path(static_dir).mkdir(parents=True, exist_ok=True) +app.mount("/static", StaticFiles(directory=static_dir), name="static") + +tmp_upload_folder = "/tmp/gradio/" + + + +host_ip = os.getenv("host_ip") +DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) +DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") +MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + +backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + +dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" +dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" +dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" +dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + +# Define the functions that will be used in the app +def conversation_history(prompt, index, use_agent, history): + # Print the language and prompt, and return a placeholder code + print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}") + history.append([prompt, ""]) + response_generator = generate_code(prompt, index, use_agent) + for token in response_generator: + history[-1][-1] += token + yield history + + +def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100): + media = media.strip().split("\n") + print("Files passed is ", media, flush=True) + if not chunk_size: + chunk_size = 1500 + if not chunk_overlap: + chunk_overlap = 100 + + requests = [] + if type(media) is list: + for file in media: + file_ext = os.path.splitext(file)[-1] + if is_valid_url(file): + print(file, " is valid URL") + print("Ingesting URL...") + value = ingest_url(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + else: + print(file, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + yield requests + + else: + file_ext = os.path.splitext(media)[-1] + if is_valid_url(media): + value = ingest_url(media, index, chunk_size, chunk_overlap) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(media, index, chunk_size, chunk_overlap) + # print("Return value is: ", value, flush=True) + yield value + else: + print(media, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + +def generate_code(query, index=None, use_agent=False): + if index is None or index == "None": + input_dict = {"messages": query, "agents_flag": use_agent} + else: + input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent} + + print("Query is ", input_dict) + headers = {"Content-Type": "application/json"} + + response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True) + + for line in response.iter_lines(): + if line: + line = line.decode('utf-8') + if line.startswith("data: "): # Only process lines starting with "data: " + json_part = line[len("data: "):] # Remove the "data: " prefix + if json_part.strip() == "[DONE]": # Ignore the DONE marker + continue + try: + json_obj = json.loads(json_part) # Convert to dictionary + if "choices" in json_obj: + for choice in json_obj["choices"]: + if "text" in choice: + # Yield each token individually + yield choice["text"] + except json.JSONDecodeError: + print("Error parsing JSON:", json_part) + + +def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): + headers = { + # "Content-Type: multipart/form-data" + } + file_input = {"files": open(file, "rb")} + + if index: + print("Index is", index) + data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + + print("Calling Request Now!") + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data) + # print("Ingest Files", response) + print(response.text) + + # table = update_table() + return response.text + +def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150): + print("URL is ", url) + url = str(url) + if not is_valid_url(url): + print("Invalid URL") + # yield ( + # gr.Textbox( + # visible=True, + # value="Invalid URL entered. Please enter a valid URL", + # ) + # ) + return + headers = { + # "Content-Type: multipart/form-data" + } + + if index: + url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input) + # print("Ingest URL", response) + # table = update_table() + return response.text + + +def is_valid_url(url): + url = str(url) + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + + +# Initialize the file list +file_list = [] + +# def update_files(file): +# # Add the uploaded file to the file list +# file_list.append(file.name) +# file_df["Files"] = file_list +# return file_df + + +def get_files(index=None): + headers = { + # "Content-Type: multipart/form-data" + } + if index == "All Files": + index = None + + if index: + index = {"index_name": index} + response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index) + print("Get files with ", index, response) + table = response.json() + return table + else: + # print("URL IS ", dataprep_get_files_endpoint) + response = requests.post(url=dataprep_get_files_endpoint, headers=headers) + print("Get files ", response) + table = response.json() + return table + +def update_table(index=None): + if index == "All Files": + index = None + files = get_files(index) + print("Files is ", files) + if len(files) == 0: + df = pd.DataFrame(files, columns=["Files"]) + return df + else: + df = pd.DataFrame(files) + return df + +def update_indices(): + indices = get_indices() + df = pd.DataFrame(indices, columns=["File Databases"]) + return df + +def delete_file(file, index=None): + # Remove the selected file from the file list + headers = { + # "Content-Type: application/json" + } + print("URL IS ", dataprep_delete_files_endpoint) + if index: + file_input = {"files": open(file, "rb"), "index_name": index} + else: + file_input = {"files": open(file, "rb")} + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input) + print("Delete file ", response) + table = update_table() + return response.text + +def delete_all_files(index=None): + # Remove all files from the file list + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}') + print("Delete all files ", response) + table = update_table() + + return response.text + +def get_indices(): + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_get_indices_endpoint, headers=headers) + print("Get Indices", response) + indices = response.json() + return indices + +def update_indices_dropdown(): + indices = ["None"] + get_indices() + new_dd = gr.update(choices=indices, value="None") + return new_dd + + +def get_file_names(files): + file_str = "" + if not files: + return file_str + + for file in files: + file_str += file + '\n' + file_str.strip() + return file_str + + +# Define UI components +with gr.Blocks() as ui: + with gr.Tab("Code Generation"): + gr.Markdown("### Generate Code from Natural Language") + chatbot = gr.Chatbot(label="Chat History") + prompt_input = gr.Textbox(label="Enter your query") + with gr.Column(): + with gr.Row(scale=8): + # indices = ["None"] + get_indices() + database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None") + with gr.Row(scale=1): + db_refresh_button = gr.Button("Refresh", variant="primary") + db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown) + use_agent = gr.Checkbox(label="Use Agent", container=False) + + generate_button = gr.Button("Generate Code") + + # Connect the generate button to the conversation_history function + generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot) + + with gr.Tab("Resource Management"): + # File management components + # url_button = gr.Button("Process") + with gr.Row(): + with gr.Column(scale=1): + index_name_input = gr.Textbox(label="Index Name") + chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)") + chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)") + with gr.Column(scale=3): + file_upload = gr.File(label="Upload Files", file_count="multiple") + url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)") + upload_button = gr.Button("Upload", variant="primary") + upload_status = gr.Textbox(label="Upload Status") + file_upload.change(get_file_names, inputs=file_upload, outputs=url_input) + with gr.Column(scale=1): + # table_dropdown = gr.Dropdown(indices) + # file_table = gr.Dataframe(interactive=False, value=update_table()) + file_table = gr.Dataframe(interactive=False, value=update_indices()) + refresh_button = gr.Button("Refresh", variant="primary", size="sm") + refresh_button.click(update_indices, outputs=file_table) + # refresh_button.click(update_indices, outputs=database_dropdown) + # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table) + # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table) + upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status) + + delete_all_button = gr.Button("Delete All", variant="primary", size="sm") + delete_all_button.click(delete_all_files, outputs=upload_status) + + + + # delete_button = gr.Button("Delete Index") + + # selected_file_output = gr.Textbox(label="Selected File") + # delete_button.click(delete_file, inputs=indices, outputs=upload_status) + + + +ui.queue() +app = gr.mount_gradio_app(app, ui, path="/") +share = False +enable_queue = True + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173)) + parser.add_argument("--concurrency-count", type=int, default=20) + parser.add_argument("--share", action="store_true") + + host_ip = os.getenv("host_ip") + DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) + DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") + MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + + + backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + + # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" + # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" + # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" + # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + args = parser.parse_args() + # logger.info(f"args: {args}") + global gateway_addr + gateway_addr = backend_service_endpoint + global dataprep_ingest_addr + dataprep_ingest_addr = dataprep_ingest_endpoint + global dataprep_get_files_addr + dataprep_get_files_addr = dataprep_get_files_endpoint + + + uvicorn.run(app, host=args.host, port=args.port) diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt new file mode 100644 index 0000000000..41e95a141b --- /dev/null +++ b/CodeGen/ui/gradio/requirements.txt @@ -0,0 +1,6 @@ +gradio==5.22.0 +gradio_pdf==0.0.19 +moviepy==1.0.3 +numpy==1.26.4 +opencv-python==4.10.0.82 +Pillow==10.3.0 From 4205703783d189c2bd126a58a53c246e459f9fc3 Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Thu, 27 Mar 2025 14:12:08 -0700 Subject: [PATCH 08/22] updated readme and fixed merge Signed-off-by: okhleif-IL --- .../intel/cpu/xeon/compose.yaml | 81 +++++++++++++------ CodeGen/docker_compose/set_env.sh | 26 +++--- CodeGen/ui/gradio/README.md | 21 ++--- 3 files changed, 75 insertions(+), 53 deletions(-) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 3f6573f01e..c932ece069 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -1,15 +1,14 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - services: tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-service + container_name: tgi-server + profiles: + - codegen-xeon-tgi ports: - "8028:80" volumes: - - "./data:/data" + - "${MODEL_CACHE:-./data}:/data" shm_size: 1g environment: no_proxy: ${no_proxy} @@ -23,43 +22,78 @@ services: timeout: 10s retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 - llm: + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-server + profiles: + - codegen-xeon-vllm + ports: + - "8028:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8028/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 + llm-base: image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} container_name: llm-textgen-server - depends_on: - tgi-service: - condition: service_healthy - ports: - - "9000:9000" - ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${LLM_ENDPOINT} LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped + llm-tgi-service: + extends: llm-base + container_name: llm-codegen-tgi-server + profiles: + - codegen-xeon-tgi + ports: + - "9000:9000" + ipc: host + depends_on: + tgi-service: + condition: service_healthy + llm-vllm-service: + extends: llm-base + container_name: llm-codegen-vllm-server + profiles: + - codegen-xeon-vllm + ports: + - "9000:9000" + ipc: host + depends_on: + vllm-service: + condition: service_healthy codegen-xeon-backend-server: image: ${REGISTRY:-opea}/codegen:${TAG:-latest} container_name: codegen-xeon-backend-server depends_on: - - llm + - llm-base ports: - "7778:7778" environment: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${host_ip} #${MEGA_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${host_ip} #${LLM_SERVICE_HOST_IP} - # - RETRIEVAL_SERVICE_HOST_IP=${REDIS_RETRIEVER_PORT} - - RETRIEVAL_SERVICE_HOST_IP=${host_ip} #${RETRIEVAL_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} - # - MM_EMBEDDING_SERVICE_HOST_IP=${MM_EMBEDDING_PORT_MICROSERVICE} - - TEI_EMBEDDING_HOST_IP=${host_ip} #${TEI_EMBEDDING_HOST_IP} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} - EMBEDDER_PORT=${EMBEDDER_PORT} - ipc: host restart: always codegen-xeon-ui-server: @@ -85,7 +119,6 @@ services: ports: - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" - dataprep-redis-server: image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} container_name: dataprep-redis-server @@ -103,7 +136,6 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} LOGFLAG: true restart: unless-stopped - tei-embedding-serving: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 container_name: tei-embedding-serving @@ -124,7 +156,6 @@ services: interval: 10s timeout: 6s retries: 48 - tei-embedding-server: image: ${REGISTRY:-opea}/embedding:${TAG:-latest} container_name: tei-embedding-server @@ -141,7 +172,6 @@ services: tei-embedding-serving: condition: service_healthy restart: unless-stopped - retriever-redis: image: ${REGISTRY:-opea}/retriever:${TAG:-latest} container_name: retriever-redis @@ -164,7 +194,6 @@ services: LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} restart: unless-stopped - networks: default: driver: bridge \ No newline at end of file diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index d4cff7af39..559f00cf2a 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -2,18 +2,26 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# pushd "../../" > /dev/null -# source .set_env.sh -# popd > /dev/null +pushd "../../" > /dev/null +source .set_env.sh +popd > /dev/null export your_ip=$(hostname -I | awk '{print $1}') export host_ip=$(hostname -I | awk '{print $1}') -export no_proxy="${no_proxy},${host_ip}" +if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then + echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" +fi + +if [ -z "${host_ip}" ]; then + echo "Error: host_ip is not set. Please set host_ip first." +fi + +export no_proxy=${no_proxy},${host_ip} + export http_proxy=${http_proxy} export https_proxy=${https_proxy} -# export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" export LLM_SERVICE_PORT=9000 export LLM_ENDPOINT="http://${host_ip}:8028" @@ -22,7 +30,7 @@ export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export MEGA_SERVICE_PORT=7778 export MEGA_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${MEGA_SERVICE_PORT}/v1/codegen" +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" export REDIS_DB_PORT=6379 export REDIS_INSIGHTS_PORT=8001 @@ -41,8 +49,4 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export DATAPREP_REDIS_PORT=6007 export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" export LOGFLAG=false - - -# export LLM_SERVICE_HOST_IP=${host_ip} -# export RETRIEVAL_SERVICE_HOST_IP=${host_ip} -# export TEI_EMBEDDING_HOST_IP=${host_ip} \ No newline at end of file +export MODEL_CACHE="./data" \ No newline at end of file diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md index 38b2e964a3..9769efb317 100644 --- a/CodeGen/ui/gradio/README.md +++ b/CodeGen/ui/gradio/README.md @@ -13,21 +13,21 @@ cd GenAIExamples/CodeGen/ui docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio . ``` -This command builds the Docker image with the tag `opea/docsum-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. +This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. ### Run UI Docker Image -To run the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui/docker` directory and execute the following commands: +To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands: ```bash -cd GenAIExamples/CodeGen/ui/docker +cd GenAIExamples/CodeGen/ui/gradio ip_address=$(hostname -I | awk '{print $1}') docker run -d -p 5173:5173 --ipc=host \ -e http_proxy=$http_proxy \ -e https_proxy=$https_proxy \ -e no_proxy=$no_proxy \ - -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:8888/v1/docsum \ + -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \ opea/codegen-gradio-ui:latest ``` @@ -35,7 +35,7 @@ This command runs the Docker container in interactive mode, mapping port 5173 of ### Python -To run the frontend application directly using Python, navigate to the `GenAIExamples/DocSum/ui/gradio` directory and run the following command: +To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command: ```bash cd GenAIExamples/CodeGen/ui/gradio @@ -44,17 +44,6 @@ python codegen_ui_gradio.py This command starts the frontend application using Python. -## 📸 Project Screenshots - -![project-screenshot](../../assets/img/docSum_ui_gradio_text.png) - -### 🧐 Features - -Here are some of the project's features: - -- Summarizing Uploaded Files: Users can upload files from their local device. Once a file is uploaded, the summarization of the document will start automatically. The summary will be displayed in the 'Summary' box. -- Summarizing Text via Pasting: Users can paste the text to be summarized into the text box. By clicking the 'Generate Summary' button, a condensed summary of the content will be produced and displayed in the 'Summary' box on the right. - ## Additional Information ### Prerequisites From 6dc0e89a9d0f2ee6e2b0511fda32e12008590f99 Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Thu, 27 Mar 2025 14:19:34 -0700 Subject: [PATCH 09/22] reverted changes Signed-off-by: okhleif-IL --- CodeGen/codegen.py | 26 +++-- .../docker_compose/intel/cpu/xeon/README.md | 98 ++++++++++++++++++- CodeGen/ui/gradio/requirements.txt | 2 - 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index a5d79f8dd2..00521175f0 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -19,6 +19,9 @@ from fastapi.responses import StreamingResponse from langchain.prompts import PromptTemplate +logger = CustomLogger("opea_dataprep_microservice") +logflag = os.getenv("LOGFLAG", False) + MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778)) LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") @@ -80,7 +83,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k embedding = inputs['data'][0]['embedding'] # Align the inputs for the retriever service inputs = { - "index_name": llm_parameters_dict["key_index_name"], + "index_name": llm_parameters_dict["index_name"], "text": self.input_query, "embedding": embedding } @@ -198,14 +201,14 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, - key_index_name=chat_request.key_index_name + index_name=chat_request.index_name ) # Initialize the initial inputs with the generated prompt initial_inputs = {"query": prompt} # Check if the key index name is provided in the parameters - if parameters.key_index_name: + if parameters.index_name: if agents_flag: # Schedule the retriever microservice result_ret, runtime_graph = await self.megaservice_retriever.schedule( @@ -248,11 +251,16 @@ async def handle_request(self, request: Request): relevant_docs.append(doc) # Update the initial inputs with the relevant documents - query = initial_inputs["query"] - initial_inputs = {} - initial_inputs["retrieved_docs"] = relevant_docs - initial_inputs["initial_query"] = query - megaservice = self.megaservice_llm + if len(relevant_docs)>0: + logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.") + query = initial_inputs["query"] + initial_inputs = {} + initial_inputs["retrieved_docs"] = relevant_docs + initial_inputs["initial_query"] = query + + else: + logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.") + else: # Use the combined retriever and LLM microservice megaservice = self.megaservice_retriever_llm @@ -305,4 +313,4 @@ def start(self): if __name__ == "__main__": chatqna = CodeGenService(port=MEGA_SERVICE_PORT) chatqna.add_remote_service() - chatqna.start() + chatqna.start() \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index e6ed4334a8..fc8b81b45f 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -3,6 +3,89 @@ This document outlines the deployment process for a CodeGen application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker images creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, further simplifying the deployment process for this service. The default pipeline deploys with vLLM as the LLM serving component. It also provides options of using TGI backend for LLM microservice. +## 🚀 Create an AWS Xeon Instance + +To run the example on an AWS Xeon instance, start by creating an AWS account if you don't have one already. Then, get started with the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home). AWS EC2 M7i, C7i, C7i-flex and M7i-flex instances are 4th Generation Intel Xeon Scalable processors suitable for the task. + +For detailed information about these instance types, you can refer to [m7i](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options. + +After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. + +## 🚀 Start Microservices and MegaService + +The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. + +The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: + +```mermaid +--- +config: + flowchart: + nodeSpacing: 400 + rankSpacing: 100 + curve: linear + themeVariables: + fontSize: 25px +--- +flowchart LR + %% Colors %% + classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef invisible fill:transparent,stroke:transparent; + style CodeGen-MegaService stroke:#000000 + %% Subgraphs %% + subgraph CodeGen-MegaService["CodeGen-MegaService"] + direction LR + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue + end + subgraph User Interface + direction LR + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid + end + + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange + + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM + + %% Questions interaction + direction LR + a[User Input Query] --> UI + UI --> GW + GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM + + + %% Embedding service flow + direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET + LLM <-.-> LLM_gen + + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB +``` + ### Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. @@ -92,12 +175,17 @@ To access the frontend, open the following URL in your browser: `http://{host_ip ```yaml codegen-xeon-ui-server: - image: opea/codegen-gradio-ui:latest + image: opea/codegen-ui:latest ... ports: - "80:5173" ``` +![project-screenshot](../../../../assets/img/codeGen_ui_init.jpg) + +Here is an example of running CodeGen in the UI: + +![project-screenshot](../../../../assets/img/codeGen_ui_response.png) ## 🚀 Launch the React Based UI (Optional) @@ -226,15 +314,15 @@ cd GenAIExamples/CodeGen/ui docker build -t opea/codegen-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . ``` -### 4. Build CodeGen Gradio UI Docker Image +### 4. Build CodeGen React UI Docker Image (Optional) -Build gradio frontend Docker image via below command: +Build react frontend Docker image via below command: **Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** ```bash cd GenAIExamples/CodeGen/ui -docker build --no-cache -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.gradio . +docker build --no-cache -t opea/codegen-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . ``` Then run the command `docker images`, you will have the following Docker Images: @@ -242,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images: - `opea/llm-textgen:latest` - `opea/codegen:latest` - `opea/codegen-ui:latest` -- `opea/codegen-gradio-ui:latest` +- `opea/codegen-react-ui:latest` (optional) \ No newline at end of file diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt index 41e95a141b..2a4c8e1a30 100644 --- a/CodeGen/ui/gradio/requirements.txt +++ b/CodeGen/ui/gradio/requirements.txt @@ -1,6 +1,4 @@ gradio==5.22.0 -gradio_pdf==0.0.19 -moviepy==1.0.3 numpy==1.26.4 opencv-python==4.10.0.82 Pillow==10.3.0 From 251991a607759748388b984e547d683f8e0eb6c3 Mon Sep 17 00:00:00 2001 From: Omar Khleif Date: Thu, 27 Mar 2025 14:23:03 -0700 Subject: [PATCH 10/22] Gradio UI for CodeGen (#4) * update the compose file Signed-off-by: Mustafa initial Signed-off-by: Mustafa added microservice ports Signed-off-by: okhleif-IL update codegen Signed-off-by: Mustafa update for codegen Signed-off-by: Mustafa Initial commit for Gradio UI Signed-off-by: okhleif-IL New UI Signed-off-by: okhleif-IL prepare for merge Signed-off-by: okhleif-IL add agents Signed-off-by: Mustafa env updates Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa merged to main Signed-off-by: Mustafa updates Signed-off-by: Mustafa UI Updates Signed-off-by: okhleif-IL added dockerfile Signed-off-by: okhleif-IL removed files dataframe Signed-off-by: okhleif-IL updated file upload Signed-off-by: okhleif-IL added checkbox for agent Signed-off-by: okhleif-IL key_index_name --> index_name Signed-off-by: okhleif-IL added / removed print statements Signed-off-by: okhleif-IL Support for data streaming (from Melanie) Signed-off-by: okhleif-IL fixed file not supported bug Signed-off-by: okhleif-IL added refresh button to index Signed-off-by: okhleif-IL simplified README Signed-off-by: okhleif-IL * updated readme and fixed merge Signed-off-by: okhleif-IL * reverted changes Signed-off-by: okhleif-IL --------- Signed-off-by: okhleif-IL Co-authored-by: Mustafa --- CodeGen/Dockerfile | 47 +- CodeGen/codegen.py | 2 +- .../docker_compose/intel/cpu/xeon/README.md | 2 +- .../intel/cpu/xeon/compose.yaml | 8 +- CodeGen/docker_compose/set_env.sh | 3 +- CodeGen/docker_image_build/build.yaml | 6 + CodeGen/ui/docker/Dockerfile.gradio | 33 ++ CodeGen/ui/gradio/README.md | 65 +++ CodeGen/ui/gradio/codegen_ui_gradio.py | 402 ++++++++++++++++++ CodeGen/ui/gradio/requirements.txt | 4 + 10 files changed, 563 insertions(+), 9 deletions(-) create mode 100644 CodeGen/ui/docker/Dockerfile.gradio create mode 100644 CodeGen/ui/gradio/README.md create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py create mode 100644 CodeGen/ui/gradio/requirements.txt diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile index 5305a9d89f..b2b4155fd7 100644 --- a/CodeGen/Dockerfile +++ b/CodeGen/Dockerfile @@ -1,8 +1,51 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -ARG BASE_TAG=latest -FROM opea/comps-base:$BASE_TAG +# Stage 1: base setup used by other stages +FROM python:3.11-slim AS base + +# get security updates +RUN apt-get update && apt-get upgrade -y && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +ENV HOME=/home/user + +RUN useradd -m -s /bin/bash user && \ + mkdir -p $HOME && \ + chown -R user $HOME + +WORKDIR $HOME + + +# Stage 2: latest GenAIComps sources +FROM base AS git + +RUN apt-get update && apt-get install -y --no-install-recommends git +# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git +COPY GenAIComps GenAIComps + + +# Stage 3: common layer shared by services using GenAIComps +FROM base AS comps-base + +# copy just relevant parts +COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps +COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/ + +WORKDIR $HOME/GenAIComps +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt +WORKDIR $HOME + +ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps + +USER user + + +# Stage 4: unique part +FROM comps-base + +ENV LANG=C.UTF-8 COPY ./codegen.py $HOME/codegen.py diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index 6384efaa47..00521175f0 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -313,4 +313,4 @@ def start(self): if __name__ == "__main__": chatqna = CodeGenService(port=MEGA_SERVICE_PORT) chatqna.add_remote_service() - chatqna.start() + chatqna.start() \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 75c7b1851b..fc8b81b45f 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -330,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images: - `opea/llm-textgen:latest` - `opea/codegen:latest` - `opea/codegen-ui:latest` -- `opea/codegen-react-ui:latest` (optional) +- `opea/codegen-react-ui:latest` (optional) \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 3d132d29f9..c932ece069 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - services: tgi-service: @@ -100,7 +97,7 @@ services: ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server @@ -111,6 +108,9 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} ipc: host restart: always redis-vector-db: diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index dd0b97a551..559f00cf2a 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -47,5 +47,6 @@ export TEI_EMBEDDING_HOST_IP=${host_ip} export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export DATAPREP_REDIS_PORT=6007 +export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" export LOGFLAG=false -export MODEL_CACHE="./data" +export MODEL_CACHE="./data" \ No newline at end of file diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml index 3275aa71bf..52ca23b109 100644 --- a/CodeGen/docker_image_build/build.yaml +++ b/CodeGen/docker_image_build/build.yaml @@ -23,6 +23,12 @@ services: dockerfile: ./docker/Dockerfile.react extends: codegen image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest} + codegen-gradio-ui: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.gradio + extends: codegen + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} llm-textgen: build: context: GenAIComps diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio new file mode 100644 index 0000000000..11a4f4f581 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.gradio @@ -0,0 +1,33 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + default-jre \ + libgl1-mesa-glx \ + libjemalloc-dev \ + wget + +# Install ffmpeg static build +WORKDIR /root +RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ + mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ + export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ + cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \ + cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/ + +RUN mkdir -p /home/user + +COPY gradio /home/user/gradio + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ +pip install --no-cache-dir -r /home/user/gradio/requirements.txt + +WORKDIR /home/user/gradio +ENTRYPOINT ["python", "codegen_ui_gradio.py"] diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md new file mode 100644 index 0000000000..9769efb317 --- /dev/null +++ b/CodeGen/ui/gradio/README.md @@ -0,0 +1,65 @@ +# Document Summary + +This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries. + +## Docker + +### Build UI Docker Image + +To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui +docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio . +``` + +This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. + +### Run UI Docker Image + +To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands: + +```bash +cd GenAIExamples/CodeGen/ui/gradio + +ip_address=$(hostname -I | awk '{print $1}') +docker run -d -p 5173:5173 --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \ + opea/codegen-gradio-ui:latest +``` + +This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service. + +### Python + +To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui/gradio +python codegen_ui_gradio.py +``` + +This command starts the frontend application using Python. + +## Additional Information + +### Prerequisites + +Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall. + +### Environment Variables + +- `http_proxy`: Proxy setting for HTTP connections. +- `https_proxy`: Proxy setting for HTTPS connections. +- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying. +- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with. + +### Troubleshooting + +- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access. +- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible. + +This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment. diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py new file mode 100644 index 0000000000..873d0c42b4 --- /dev/null +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -0,0 +1,402 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This is a Gradio app that includes two tabs: one for code generation and another for resource management. +# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files. +# Additionally, three small text boxes have been added for managing file dataframe parameters. + +import argparse +import os +from pathlib import Path +import gradio as gr +from gradio_pdf import PDF +import requests +import pandas as pd +import os +import uvicorn +import json +import argparse +# from utils import build_logger, make_temp_image, server_error_msg, split_video +from urllib.parse import urlparse +from pathlib import Path +from fastapi import FastAPI +# from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.staticfiles import StaticFiles + +# logger = build_logger("gradio_web_server", "gradio_web_server.log") +logflag = os.getenv("LOGFLAG", False) + +# create a FastAPI app +app = FastAPI() +cur_dir = os.getcwd() +static_dir = Path(os.path.join(cur_dir, "static/")) +tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/")) + +Path(static_dir).mkdir(parents=True, exist_ok=True) +app.mount("/static", StaticFiles(directory=static_dir), name="static") + +tmp_upload_folder = "/tmp/gradio/" + + + +host_ip = os.getenv("host_ip") +DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) +DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") +MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + +backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + +dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" +dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" +dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" +dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + +# Define the functions that will be used in the app +def conversation_history(prompt, index, use_agent, history): + # Print the language and prompt, and return a placeholder code + print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}") + history.append([prompt, ""]) + response_generator = generate_code(prompt, index, use_agent) + for token in response_generator: + history[-1][-1] += token + yield history + + +def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100): + media = media.strip().split("\n") + print("Files passed is ", media, flush=True) + if not chunk_size: + chunk_size = 1500 + if not chunk_overlap: + chunk_overlap = 100 + + requests = [] + if type(media) is list: + for file in media: + file_ext = os.path.splitext(file)[-1] + if is_valid_url(file): + print(file, " is valid URL") + print("Ingesting URL...") + value = ingest_url(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + else: + print(file, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + yield requests + + else: + file_ext = os.path.splitext(media)[-1] + if is_valid_url(media): + value = ingest_url(media, index, chunk_size, chunk_overlap) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(media, index, chunk_size, chunk_overlap) + # print("Return value is: ", value, flush=True) + yield value + else: + print(media, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + +def generate_code(query, index=None, use_agent=False): + if index is None or index == "None": + input_dict = {"messages": query, "agents_flag": use_agent} + else: + input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent} + + print("Query is ", input_dict) + headers = {"Content-Type": "application/json"} + + response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True) + + for line in response.iter_lines(): + if line: + line = line.decode('utf-8') + if line.startswith("data: "): # Only process lines starting with "data: " + json_part = line[len("data: "):] # Remove the "data: " prefix + if json_part.strip() == "[DONE]": # Ignore the DONE marker + continue + try: + json_obj = json.loads(json_part) # Convert to dictionary + if "choices" in json_obj: + for choice in json_obj["choices"]: + if "text" in choice: + # Yield each token individually + yield choice["text"] + except json.JSONDecodeError: + print("Error parsing JSON:", json_part) + + +def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): + headers = { + # "Content-Type: multipart/form-data" + } + file_input = {"files": open(file, "rb")} + + if index: + print("Index is", index) + data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + + print("Calling Request Now!") + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data) + # print("Ingest Files", response) + print(response.text) + + # table = update_table() + return response.text + +def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150): + print("URL is ", url) + url = str(url) + if not is_valid_url(url): + print("Invalid URL") + # yield ( + # gr.Textbox( + # visible=True, + # value="Invalid URL entered. Please enter a valid URL", + # ) + # ) + return + headers = { + # "Content-Type: multipart/form-data" + } + + if index: + url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input) + # print("Ingest URL", response) + # table = update_table() + return response.text + + +def is_valid_url(url): + url = str(url) + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + + +# Initialize the file list +file_list = [] + +# def update_files(file): +# # Add the uploaded file to the file list +# file_list.append(file.name) +# file_df["Files"] = file_list +# return file_df + + +def get_files(index=None): + headers = { + # "Content-Type: multipart/form-data" + } + if index == "All Files": + index = None + + if index: + index = {"index_name": index} + response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index) + print("Get files with ", index, response) + table = response.json() + return table + else: + # print("URL IS ", dataprep_get_files_endpoint) + response = requests.post(url=dataprep_get_files_endpoint, headers=headers) + print("Get files ", response) + table = response.json() + return table + +def update_table(index=None): + if index == "All Files": + index = None + files = get_files(index) + print("Files is ", files) + if len(files) == 0: + df = pd.DataFrame(files, columns=["Files"]) + return df + else: + df = pd.DataFrame(files) + return df + +def update_indices(): + indices = get_indices() + df = pd.DataFrame(indices, columns=["File Databases"]) + return df + +def delete_file(file, index=None): + # Remove the selected file from the file list + headers = { + # "Content-Type: application/json" + } + print("URL IS ", dataprep_delete_files_endpoint) + if index: + file_input = {"files": open(file, "rb"), "index_name": index} + else: + file_input = {"files": open(file, "rb")} + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input) + print("Delete file ", response) + table = update_table() + return response.text + +def delete_all_files(index=None): + # Remove all files from the file list + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}') + print("Delete all files ", response) + table = update_table() + + return response.text + +def get_indices(): + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_get_indices_endpoint, headers=headers) + print("Get Indices", response) + indices = response.json() + return indices + +def update_indices_dropdown(): + indices = ["None"] + get_indices() + new_dd = gr.update(choices=indices, value="None") + return new_dd + + +def get_file_names(files): + file_str = "" + if not files: + return file_str + + for file in files: + file_str += file + '\n' + file_str.strip() + return file_str + + +# Define UI components +with gr.Blocks() as ui: + with gr.Tab("Code Generation"): + gr.Markdown("### Generate Code from Natural Language") + chatbot = gr.Chatbot(label="Chat History") + prompt_input = gr.Textbox(label="Enter your query") + with gr.Column(): + with gr.Row(scale=8): + # indices = ["None"] + get_indices() + database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None") + with gr.Row(scale=1): + db_refresh_button = gr.Button("Refresh", variant="primary") + db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown) + use_agent = gr.Checkbox(label="Use Agent", container=False) + + generate_button = gr.Button("Generate Code") + + # Connect the generate button to the conversation_history function + generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot) + + with gr.Tab("Resource Management"): + # File management components + # url_button = gr.Button("Process") + with gr.Row(): + with gr.Column(scale=1): + index_name_input = gr.Textbox(label="Index Name") + chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)") + chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)") + with gr.Column(scale=3): + file_upload = gr.File(label="Upload Files", file_count="multiple") + url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)") + upload_button = gr.Button("Upload", variant="primary") + upload_status = gr.Textbox(label="Upload Status") + file_upload.change(get_file_names, inputs=file_upload, outputs=url_input) + with gr.Column(scale=1): + # table_dropdown = gr.Dropdown(indices) + # file_table = gr.Dataframe(interactive=False, value=update_table()) + file_table = gr.Dataframe(interactive=False, value=update_indices()) + refresh_button = gr.Button("Refresh", variant="primary", size="sm") + refresh_button.click(update_indices, outputs=file_table) + # refresh_button.click(update_indices, outputs=database_dropdown) + # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table) + # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table) + upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status) + + delete_all_button = gr.Button("Delete All", variant="primary", size="sm") + delete_all_button.click(delete_all_files, outputs=upload_status) + + + + # delete_button = gr.Button("Delete Index") + + # selected_file_output = gr.Textbox(label="Selected File") + # delete_button.click(delete_file, inputs=indices, outputs=upload_status) + + + +ui.queue() +app = gr.mount_gradio_app(app, ui, path="/") +share = False +enable_queue = True + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173)) + parser.add_argument("--concurrency-count", type=int, default=20) + parser.add_argument("--share", action="store_true") + + host_ip = os.getenv("host_ip") + DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) + DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") + MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + + + backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + + # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" + # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" + # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" + # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + args = parser.parse_args() + # logger.info(f"args: {args}") + global gateway_addr + gateway_addr = backend_service_endpoint + global dataprep_ingest_addr + dataprep_ingest_addr = dataprep_ingest_endpoint + global dataprep_get_files_addr + dataprep_get_files_addr = dataprep_get_files_endpoint + + + uvicorn.run(app, host=args.host, port=args.port) diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt new file mode 100644 index 0000000000..2a4c8e1a30 --- /dev/null +++ b/CodeGen/ui/gradio/requirements.txt @@ -0,0 +1,4 @@ +gradio==5.22.0 +numpy==1.26.4 +opencv-python==4.10.0.82 +Pillow==10.3.0 From bc60f95c4a3314ac30b3ff6d060bcd482799b329 Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Fri, 28 Mar 2025 13:40:17 -0700 Subject: [PATCH 11/22] bug fixes and ui updates Signed-off-by: okhleif-IL --- .../intel/cpu/xeon/compose.yaml | 1 + CodeGen/ui/gradio/README.md | 2 +- CodeGen/ui/gradio/codegen_ui_gradio.py | 45 +++++++++++-------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index c932ece069..e88b3e4847 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -111,6 +111,7 @@ services: - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} - host_ip=${host_ip} - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} ipc: host restart: always redis-vector-db: diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md index 9769efb317..d994729c82 100644 --- a/CodeGen/ui/gradio/README.md +++ b/CodeGen/ui/gradio/README.md @@ -6,7 +6,7 @@ This project provides a user interface for summarizing documents and text using ### Build UI Docker Image -To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command: +To build the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui` directory and run the following command: ```bash cd GenAIExamples/CodeGen/ui diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py index 873d0c42b4..4f7e46d3a6 100644 --- a/CodeGen/ui/gradio/codegen_ui_gradio.py +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -9,7 +9,6 @@ import os from pathlib import Path import gradio as gr -from gradio_pdf import PDF import requests import pandas as pd import os @@ -81,11 +80,23 @@ def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100): if is_valid_url(file): print(file, " is valid URL") print("Ingesting URL...") + yield ( + gr.Textbox( + visible=True, + value="Ingesting URL...", + ) + ) value = ingest_url(file, index, chunk_size, chunk_overlap) requests.append(value) yield value elif file_ext in ['.pdf', '.txt']: print("Ingesting File...") + yield ( + gr.Textbox( + visible=True, + value="Ingesting file...", + ) + ) value = ingest_file(file, index, chunk_size, chunk_overlap) requests.append(value) yield value @@ -94,7 +105,7 @@ def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100): yield ( gr.Textbox( visible=True, - value="Your file extension type is not supported.", + value="Your media is either an invalid URL or the file extension type is not supported. (Supports .pdf, .txt, url)", ) ) return @@ -173,14 +184,8 @@ def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150): print("URL is ", url) url = str(url) if not is_valid_url(url): - print("Invalid URL") - # yield ( - # gr.Textbox( - # visible=True, - # value="Invalid URL entered. Please enter a valid URL", - # ) - # ) - return + return "Invalid URL entered. Please enter a valid URL" + headers = { # "Content-Type: multipart/form-data" } @@ -249,7 +254,7 @@ def update_table(index=None): def update_indices(): indices = get_indices() - df = pd.DataFrame(indices, columns=["File Databases"]) + df = pd.DataFrame(indices, columns=["File Indices"]) return df def delete_file(file, index=None): @@ -276,20 +281,21 @@ def delete_all_files(index=None): print("Delete all files ", response) table = update_table() - return response.text + return "Delete All status: " + response.text def get_indices(): headers = { # "Content-Type: application/json" } + print("URL IS ", dataprep_get_indices_endpoint) response = requests.post(url=dataprep_get_indices_endpoint, headers=headers) + indices = ["None"] print("Get Indices", response) - indices = response.json() + indices += response.json() return indices def update_indices_dropdown(): - indices = ["None"] + get_indices() - new_dd = gr.update(choices=indices, value="None") + new_dd = gr.update(choices=get_indices(), value="None") return new_dd @@ -311,13 +317,14 @@ def get_file_names(files): chatbot = gr.Chatbot(label="Chat History") prompt_input = gr.Textbox(label="Enter your query") with gr.Column(): - with gr.Row(scale=8): + with gr.Row(equal_height=True): # indices = ["None"] + get_indices() - database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None") - with gr.Row(scale=1): - db_refresh_button = gr.Button("Refresh", variant="primary") + database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None", scale=10) + db_refresh_button = gr.Button("Refresh Dropdown", scale=0.1) db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown) use_agent = gr.Checkbox(label="Use Agent", container=False) + # with gr.Row(scale=1): + generate_button = gr.Button("Generate Code") From 56ad272b088917b0f55875ffc03ce718846b5eef Mon Sep 17 00:00:00 2001 From: Mustafa Date: Fri, 28 Mar 2025 14:42:45 -0700 Subject: [PATCH 12/22] add cpu xeon test Signed-off-by: Mustafa --- .../intel/cpu/xeon/compose.yaml | 1 + CodeGen/tests/test_compose_on_xeon.sh | 57 ++++++++++++++----- CodeGen/ui/gradio/codegen_ui_gradio.py | 1 - 3 files changed, 44 insertions(+), 15 deletions(-) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index c932ece069..e88b3e4847 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -111,6 +111,7 @@ services: - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} - host_ip=${host_ip} - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} ipc: host restart: always redis-vector-db: diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 6fc25963ac..14e01a0899 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { fi cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git git clone https://github.com/vllm-project/vllm.git && cd vllm VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" @@ -82,23 +82,35 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + if [[ "$SERVICE_NAME" == "ingest" ]]; then + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL") - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..." + else + echo "[ $SERVICE_NAME ] Data preparation failed..." + fi + + else + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 fi sleep 5s } @@ -122,6 +134,14 @@ function validate_microservices() { "llm-textgen-server" \ '{"query":"def print_hello_world():", "max_tokens": 256}' + # Data ingest microservice + validate_services \ + "${ip_address}:6007/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "ingest" \ + "dataprep-redis-server" \ + 'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]' + } function validate_megaservice() { @@ -133,6 +153,14 @@ function validate_megaservice() { "codegen-xeon-backend-server" \ '{"messages": "def print_hello_world():", "max_tokens": 256}' + # Curl the Mega Service with index_name and agents_flag + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "print" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + } function validate_frontend() { @@ -191,7 +219,7 @@ function main() { stop_docker "${docker_compose_profiles[${i}]}" done - # build docker images + # # build docker images if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi # loop all profiles @@ -202,7 +230,7 @@ function main() { validate_microservices "${docker_llm_container_names[${i}]}" validate_megaservice - validate_frontend + # validate_frontend stop_docker "${docker_compose_profiles[${i}]}" sleep 5s @@ -212,3 +240,4 @@ function main() { } main + diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py index 873d0c42b4..cb90288cc5 100644 --- a/CodeGen/ui/gradio/codegen_ui_gradio.py +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -9,7 +9,6 @@ import os from pathlib import Path import gradio as gr -from gradio_pdf import PDF import requests import pandas as pd import os From d7275c2b7c87c1d2ee989c1f4e9e42c273195575 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Fri, 28 Mar 2025 15:25:50 -0700 Subject: [PATCH 13/22] add cpu xeon test Signed-off-by: Mustafa --- CodeGen/tests/test_compose_on_gaudi.sh | 3 ++- CodeGen/tests/test_compose_on_xeon.sh | 2 +- CodeGen/ui/svelte/.env | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index c7b6b83f7e..60bd1b6782 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -29,7 +29,8 @@ function build_docker_images() { fi cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # Download Gaudi vllm of latest tag git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 14e01a0899..aa517f7c98 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -219,7 +219,7 @@ function main() { stop_docker "${docker_compose_profiles[${i}]}" done - # # build docker images + # build docker images if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi # loop all profiles diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env index 0bf85fa876..2efb56c8f5 100644 --- a/CodeGen/ui/svelte/.env +++ b/CodeGen/ui/svelte/.env @@ -1 +1 @@ -BASIC_URL = 'http://backend_address:7778/v1/codegen' +BASIC_URL = 'http://10.98.56.44:7778/v1/codegen' From 815083eacf81e28ea33fcf5415b6c5857ac635f4 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Fri, 28 Mar 2025 18:36:27 -0700 Subject: [PATCH 14/22] update gaudi test and compose Signed-off-by: Mustafa --- .../intel/hpu/gaudi/compose.yaml | 96 ++++++++++++++++++- CodeGen/docker_compose/set_env.sh | 3 +- CodeGen/tests/test_compose_on_gaudi.sh | 52 +++++++--- CodeGen/tests/test_compose_on_xeon.sh | 2 +- 4 files changed, 136 insertions(+), 17 deletions(-) diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 2f669e9465..dab03fe080 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -108,11 +108,15 @@ services: - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} ipc: host restart: always - codegen-gaudi-ui-server: - image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} - container_name: codegen-gaudi-ui-server + codegen-xeon-ui-server: + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + container_name: codegen-xeon-ui-server depends_on: - codegen-gaudi-backend-server ports: @@ -122,9 +126,95 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} ipc: host restart: always + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" + - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: true + restart: unless-stopped + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10201}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped + networks: default: driver: bridge diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index 559f00cf2a..73805e6095 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -49,4 +49,5 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" export DATAPREP_REDIS_PORT=6007 export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" export LOGFLAG=false -export MODEL_CACHE="./data" \ No newline at end of file +export MODEL_CACHE="./data" +export NUM_CARDS=1 \ No newline at end of file diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 60bd1b6782..1944b78e16 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -30,6 +30,7 @@ function build_docker_images() { cd $WORKPATH/docker_image_build # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone --depth 1 --branch codegen_rag_agent_v2 https://github.com/MSCetin37/GenAIComps.git # Download Gaudi vllm of latest tag git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork @@ -83,23 +84,34 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + if [[ "$SERVICE_NAME" == "ingest" ]]; then + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL") - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..." + else + echo "[ $SERVICE_NAME ] Data preparation failed..." + fi - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." + else + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 fi sleep 5s } @@ -123,6 +135,14 @@ function validate_microservices() { "llm-textgen-gaudi-server" \ '{"query":"def print_hello_world():"}' + # Data ingest microservice + validate_services \ + "${ip_address}:6007/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "ingest" \ + "dataprep-redis-server" \ + 'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]' + } function validate_megaservice() { @@ -134,6 +154,14 @@ function validate_megaservice() { "codegen-gaudi-backend-server" \ '{"messages": "def print_hello_world():"}' + # Curl the Mega Service with index_name and agents_flag + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "print" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + } function validate_frontend() { @@ -202,7 +230,7 @@ function main() { validate_microservices "${docker_llm_container_names[${i}]}" validate_megaservice - validate_frontend + # validate_frontend stop_docker "${docker_compose_profiles[${i}]}" sleep 5s diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index aa517f7c98..4e210bf540 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -30,6 +30,7 @@ function build_docker_images() { cd $WORKPATH/docker_image_build # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + git clone --depth 1 --branch codegen_rag_agent_v2 https://github.com/MSCetin37/GenAIComps.git git clone https://github.com/vllm-project/vllm.git && cd vllm VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" @@ -92,7 +93,6 @@ function validate_services() { fi else - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." From dc88a1f24956784b9f52db07036cc80636326aa7 Mon Sep 17 00:00:00 2001 From: Mustafa Date: Fri, 28 Mar 2025 18:40:36 -0700 Subject: [PATCH 15/22] update gaudi test and compose Signed-off-by: Mustafa --- CodeGen/ui/svelte/.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env index 2efb56c8f5..0bf85fa876 100644 --- a/CodeGen/ui/svelte/.env +++ b/CodeGen/ui/svelte/.env @@ -1 +1 @@ -BASIC_URL = 'http://10.98.56.44:7778/v1/codegen' +BASIC_URL = 'http://backend_address:7778/v1/codegen' From e11d51668c5f2d1c30af2aa72871f72a0df70d4f Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Mon, 31 Mar 2025 12:23:10 -0700 Subject: [PATCH 16/22] fixed output for index uses Signed-off-by: okhleif-IL --- CodeGen/ui/gradio/codegen_ui_gradio.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py index 4f7e46d3a6..770608a0c9 100644 --- a/CodeGen/ui/gradio/codegen_ui_gradio.py +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -147,17 +147,19 @@ def generate_code(query, index=None, use_agent=False): line = line.decode('utf-8') if line.startswith("data: "): # Only process lines starting with "data: " json_part = line[len("data: "):] # Remove the "data: " prefix - if json_part.strip() == "[DONE]": # Ignore the DONE marker - continue - try: - json_obj = json.loads(json_part) # Convert to dictionary - if "choices" in json_obj: - for choice in json_obj["choices"]: - if "text" in choice: - # Yield each token individually - yield choice["text"] - except json.JSONDecodeError: - print("Error parsing JSON:", json_part) + else: + json_part = line + if json_part.strip() == "[DONE]": # Ignore the DONE marker + continue + try: + json_obj = json.loads(json_part) # Convert to dictionary + if "choices" in json_obj: + for choice in json_obj["choices"]: + if "text" in choice: + # Yield each token individually + yield choice["text"] + except json.JSONDecodeError: + print("Error parsing JSON:", json_part) def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): From 7bf2eb5895e933843a01b10a5d10efb46c43ba9a Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Mon, 31 Mar 2025 15:34:09 -0700 Subject: [PATCH 17/22] added services for gaudi Signed-off-by: okhleif-IL --- .../intel/hpu/gaudi/compose.yaml | 59 ++++++++++++++++++- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 2f669e9465..dd0e5a2b29 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -108,10 +108,15 @@ services: - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} + - host_ip=${host_ip} ipc: host restart: always codegen-gaudi-ui-server: - image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} container_name: codegen-gaudi-ui-server depends_on: - codegen-gaudi-backend-server @@ -122,9 +127,57 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} ipc: host restart: always - + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" + - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: true + restart: unless-stopped + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped networks: default: - driver: bridge + driver: bridge \ No newline at end of file From 9e84e73b72a98051e316a7eb19fda95e3044f30a Mon Sep 17 00:00:00 2001 From: Mustafa Date: Thu, 13 Feb 2025 22:11:53 -0800 Subject: [PATCH 18/22] update the compose file Signed-off-by: Mustafa initial Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa update for codegen Signed-off-by: Mustafa add agents Signed-off-by: Mustafa env updates Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa merged to main Signed-off-by: Mustafa updates Signed-off-by: Mustafa update the env variables Signed-off-by: Mustafa update compose.yaml Signed-off-by: Mustafa compose.yaml updates Signed-off-by: Mustafa update readme file Signed-off-by: Mustafa update readme Signed-off-by: Mustafa Gradio UI for CodeGen (#4) * update the compose file Signed-off-by: Mustafa initial Signed-off-by: Mustafa added microservice ports Signed-off-by: okhleif-IL update codegen Signed-off-by: Mustafa update for codegen Signed-off-by: Mustafa Initial commit for Gradio UI Signed-off-by: okhleif-IL New UI Signed-off-by: okhleif-IL prepare for merge Signed-off-by: okhleif-IL add agents Signed-off-by: Mustafa env updates Signed-off-by: Mustafa update codegen Signed-off-by: Mustafa merged to main Signed-off-by: Mustafa updates Signed-off-by: Mustafa UI Updates Signed-off-by: okhleif-IL added dockerfile Signed-off-by: okhleif-IL removed files dataframe Signed-off-by: okhleif-IL updated file upload Signed-off-by: okhleif-IL added checkbox for agent Signed-off-by: okhleif-IL key_index_name --> index_name Signed-off-by: okhleif-IL added / removed print statements Signed-off-by: okhleif-IL Support for data streaming (from Melanie) Signed-off-by: okhleif-IL fixed file not supported bug Signed-off-by: okhleif-IL added refresh button to index Signed-off-by: okhleif-IL simplified README Signed-off-by: okhleif-IL * updated readme and fixed merge Signed-off-by: okhleif-IL * reverted changes Signed-off-by: okhleif-IL --------- Signed-off-by: okhleif-IL Co-authored-by: Mustafa add cpu xeon test Signed-off-by: Mustafa add cpu xeon test Signed-off-by: Mustafa codegen code cleaning Signed-off-by: Mustafa --- CodeGen/Dockerfile | 47 +- CodeGen/README.md | 72 +++- CodeGen/codegen.py | 228 +++++++++- .../docker_compose/intel/cpu/xeon/README.md | 86 +++- .../intel/cpu/xeon/compose.yaml | 98 ++++- .../docker_compose/intel/hpu/gaudi/README.md | 84 +++- CodeGen/docker_compose/set_env.sh | 34 +- CodeGen/docker_image_build/build.yaml | 6 + CodeGen/tests/test_compose_on_gaudi.sh | 3 +- CodeGen/tests/test_compose_on_xeon.sh | 55 ++- CodeGen/ui/docker/Dockerfile.gradio | 33 ++ CodeGen/ui/gradio/README.md | 65 +++ CodeGen/ui/gradio/codegen_ui_gradio.py | 401 ++++++++++++++++++ CodeGen/ui/gradio/requirements.txt | 4 + CodeGen/ui/svelte/.env | 2 +- 15 files changed, 1141 insertions(+), 77 deletions(-) create mode 100644 CodeGen/ui/docker/Dockerfile.gradio create mode 100644 CodeGen/ui/gradio/README.md create mode 100644 CodeGen/ui/gradio/codegen_ui_gradio.py create mode 100644 CodeGen/ui/gradio/requirements.txt diff --git a/CodeGen/Dockerfile b/CodeGen/Dockerfile index 5305a9d89f..b2b4155fd7 100644 --- a/CodeGen/Dockerfile +++ b/CodeGen/Dockerfile @@ -1,8 +1,51 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -ARG BASE_TAG=latest -FROM opea/comps-base:$BASE_TAG +# Stage 1: base setup used by other stages +FROM python:3.11-slim AS base + +# get security updates +RUN apt-get update && apt-get upgrade -y && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +ENV HOME=/home/user + +RUN useradd -m -s /bin/bash user && \ + mkdir -p $HOME && \ + chown -R user $HOME + +WORKDIR $HOME + + +# Stage 2: latest GenAIComps sources +FROM base AS git + +RUN apt-get update && apt-get install -y --no-install-recommends git +# RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git +COPY GenAIComps GenAIComps + + +# Stage 3: common layer shared by services using GenAIComps +FROM base AS comps-base + +# copy just relevant parts +COPY --from=git $HOME/GenAIComps/comps $HOME/GenAIComps/comps +COPY --from=git $HOME/GenAIComps/*.* $HOME/GenAIComps/LICENSE $HOME/GenAIComps/ + +WORKDIR $HOME/GenAIComps +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r $HOME/GenAIComps/requirements.txt +WORKDIR $HOME + +ENV PYTHONPATH=$PYTHONPATH:$HOME/GenAIComps + +USER user + + +# Stage 4: unique part +FROM comps-base + +ENV LANG=C.UTF-8 COPY ./codegen.py $HOME/codegen.py diff --git a/CodeGen/README.md b/CodeGen/README.md index 00d54adbc2..692e01848b 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -1,6 +1,6 @@ # Code Generation Application -Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. +Code Generation (CodeGen) Large Language Models (LLMs) are specialized AI models designed for the task of generating computer code. Such models undergo training with datasets that encompass repositories, specialized documentation, programming code, relevant web content, and other related data. They possess a deep understanding of various programming languages, coding patterns, and software development concepts. CodeGen LLMs are engineered to assist developers and programmers. When these LLMs are seamlessly integrated into the developer's Integrated Development Environment (IDE), they possess a comprehensive understanding of the coding context, which includes elements such as comments, function names, and variable names. This contextual awareness empowers them to provide more refined and contextually relevant coding suggestions. Additionally Retrieval-Augmented Generation (RAG) and Agents are parts of the CodeGen example which provide an additional layer of intelligence and adaptability, ensuring that the generated code is not only relevant but also accurate, efficient, and tailored to the specific needs of the developers and programmers. The capabilities of CodeGen LLMs include: @@ -20,6 +20,7 @@ The workflow falls into the following architecture: The CodeGen example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example. + ```mermaid --- config: @@ -28,7 +29,7 @@ config: rankSpacing: 100 curve: linear themeVariables: - fontSize: 50px + fontSize: 25px --- flowchart LR %% Colors %% @@ -37,34 +38,56 @@ flowchart LR classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 classDef invisible fill:transparent,stroke:transparent; style CodeGen-MegaService stroke:#000000 - %% Subgraphs %% - subgraph CodeGen-MegaService["CodeGen MegaService "] + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - LLM([LLM MicroService]):::blue + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - subgraph UserInterface[" User Interface "] + subgraph User Interface direction LR - a([User Input Query]):::orchid - UI([UI server
]):::orchid + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange - LLM_gen{{LLM Service
}} - GW([CodeGen GateWay
]):::orange - + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM %% Questions interaction direction LR a[User Input Query] --> UI UI --> GW GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM %% Embedding service flow direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET LLM <-.-> LLM_gen + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ## Deploy CodeGen Service @@ -138,11 +161,25 @@ Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml). +Start CodeGen based on TGI service: + ```bash -cd GenAIExamples/CodeGen/docker_compose/intel/cpu/xeon -docker compose up -d +cd GenAIExamples/CodeGen/docker_compose +source set_env.sh +cd intel/cpu/xeon +docker compose --profile codegen-xeon-tgi up -d +``` + +Start CodeGen based on vLLM service: + +```bash +cd GenAIExamples/CodeGen/docker_compose +source set_env.sh +cd intel/cpu/xeon +docker compose --profile codegen-xeon-vllm up -d ``` + Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source. ### Deploy CodeGen on Kubernetes using Helm Chart @@ -161,6 +198,15 @@ Two ways of consuming CodeGen Service: -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + + ``` + 2. Access via frontend To access the frontend, open the following URL in your browser: http://{host_ip}:5173. diff --git a/CodeGen/codegen.py b/CodeGen/codegen.py index 16db9aa262..af520211a6 100644 --- a/CodeGen/codegen.py +++ b/CodeGen/codegen.py @@ -3,8 +3,9 @@ import asyncio import os +import ast -from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType +from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType, CustomLogger from comps.cores.mega.utils import handle_message from comps.cores.proto.api_protocol import ( ChatCompletionRequest, @@ -16,20 +17,107 @@ from comps.cores.proto.docarray import LLMParams from fastapi import Request from fastapi.responses import StreamingResponse +from langchain.prompts import PromptTemplate + +logger = CustomLogger("opea_dataprep_microservice") +logflag = os.getenv("LOGFLAG", False) MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 7778)) + LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) +RETRIEVAL_SERVICE_HOST_IP = os.getenv("RETRIEVAL_SERVICE_HOST_IP", "0.0.0.0") +REDIS_RETRIEVER_PORT = int(os.getenv("REDIS_RETRIEVER_PORT", 7000)) + +TEI_EMBEDDING_HOST_IP = os.getenv("TEI_EMBEDDING_HOST_IP", "0.0.0.0") +EMBEDDER_PORT = int(os.getenv("EMBEDDER_PORT", 6000)) + +grader_prompt = """You are a grader assessing relevance of a retrieved document to a user question. \n +Here is the user question: {question} \n +Here is the retrieved document: \n\n {document} \n\n + +If the document contains keywords related to the user question, grade it as relevant. +It does not need to be a stringent test. The goal is to filter out erroneous retrievals. +Rules: +- Do not return the question, the provided document or explanation. +- if this document is relevant to the question, return 'yes' otherwise return 'no'. +- Do not include any other details in your response. +""" + +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + """ + Aligns the inputs based on the service type of the current node. + + Parameters: + - self: Reference to the current instance of the class. + - inputs: Dictionary containing the inputs for the current node. + - cur_node: The current node in the service orchestrator. + - runtime_graph: The runtime graph of the service orchestrator. + - llm_parameters_dict: Dictionary containing the LLM parameters. + - kwargs: Additional keyword arguments. + + Returns: + - inputs: The aligned inputs for the current node. + """ + + # Check if the current service type is EMBEDDING + if self.services[cur_node].service_type == ServiceType.EMBEDDING: + # Store the input query for later use + self.input_query = inputs["query"] + # Set the input for the embedding service + inputs["input"] = inputs["query"] + + # Check if the current service type is RETRIEVER + if self.services[cur_node].service_type == ServiceType.RETRIEVER: + # Extract the embedding from the inputs + embedding = inputs['data'][0]['embedding'] + # Align the inputs for the retriever service + inputs = { + "index_name": llm_parameters_dict["index_name"], + "text": self.input_query, + "embedding": embedding + } + + return inputs + class CodeGenService: def __init__(self, host="0.0.0.0", port=8000): self.host = host self.port = port - self.megaservice = ServiceOrchestrator() + ServiceOrchestrator.align_inputs = align_inputs + self.megaservice_llm = ServiceOrchestrator() + self.megaservice_retriever = ServiceOrchestrator() + self.megaservice_retriever_llm = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.CODE_GEN) def add_remote_service(self): + """ + Adds remote microservices to the service orchestrators and defines the flow between them. + """ + + # Define the embedding microservice + embedding = MicroService( + name="embedding", + host=TEI_EMBEDDING_HOST_IP, + port=EMBEDDER_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + + # Define the retriever microservice + retriever = MicroService( + name="retriever", + host=RETRIEVAL_SERVICE_HOST_IP, + port=REDIS_RETRIEVER_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + + # Define the LLM microservice llm = MicroService( name="llm", host=LLM_SERVICE_HOST_IP, @@ -38,13 +126,63 @@ def add_remote_service(self): use_remote_service=True, service_type=ServiceType.LLM, ) - self.megaservice.add(llm) + + # Add the microservices to the megaservice_retriever_llm orchestrator and define the flow + self.megaservice_retriever_llm.add(embedding).add(retriever).add(llm) + self.megaservice_retriever_llm.flow_to(embedding, retriever) + self.megaservice_retriever_llm.flow_to(retriever, llm) + + # Add the microservices to the megaservice_retriever orchestrator and define the flow + self.megaservice_retriever.add(embedding).add(retriever) + self.megaservice_retriever.flow_to(embedding, retriever) + + # Add the LLM microservice to the megaservice_llm orchestrator + self.megaservice_llm.add(llm) + + async def read_streaming_response(self, response: StreamingResponse): + """ + Reads the streaming response from a StreamingResponse object. + + Parameters: + - self: Reference to the current instance of the class. + - response: The StreamingResponse object to read from. + + Returns: + - str: The complete response body as a decoded string. + """ + body = b"" # Initialize an empty byte string to accumulate the response chunks + async for chunk in response.body_iterator: + body += chunk # Append each chunk to the body + return body.decode("utf-8") # Decode the accumulated byte string to a regular string async def handle_request(self, request: Request): + """ + Handles the incoming request, processes it through the appropriate microservices, + and returns the response. + + Parameters: + - self: Reference to the current instance of the class. + - request: The incoming request object. + + Returns: + - ChatCompletionResponse: The response from the LLM microservice. + """ + # Parse the incoming request data data = await request.json() + + # Get the stream option from the request data, default to True if not provided stream_opt = data.get("stream", True) - chat_request = ChatCompletionRequest.parse_obj(data) + + # Validate and parse the chat request data + chat_request = ChatCompletionRequest.model_validate(data) + + # Handle the chat messages to generate the prompt prompt = handle_message(chat_request.messages) + + # Get the agents flag from the request data, default to False if not provided + agents_flag = data.get("agents_flag", False) + + # Define the LLM parameters parameters = LLMParams( max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, @@ -54,18 +192,88 @@ async def handle_request(self, request: Request): presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, stream=stream_opt, + index_name=chat_request.index_name ) - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"query": prompt}, llm_parameters=parameters + + # Initialize the initial inputs with the generated prompt + initial_inputs = {"query": prompt} + + # Check if the key index name is provided in the parameters + if parameters.index_name: + if agents_flag: + # Schedule the retriever microservice + result_ret, runtime_graph = await self.megaservice_retriever.schedule( + initial_inputs=initial_inputs, llm_parameters=parameters + ) + + # Switch to the LLM microservice + megaservice = self.megaservice_llm + + relevant_docs = [] + for doc in result_ret["retriever/MicroService"]["retrieved_docs"]: + # Create the PromptTemplate + prompt_agent = PromptTemplate(template=grader_prompt, input_variables=["question", "document"]) + + # Format the template with the input variables + formatted_prompt = prompt_agent.format(question=prompt, document=doc["text"]) + initial_inputs_grader = {"query": formatted_prompt} + + # Schedule the LLM microservice for grading + grade, runtime_graph = await self.megaservice_llm.schedule( + initial_inputs=initial_inputs_grader, llm_parameters=parameters + ) + + for node, response in grade.items(): + if isinstance(response, StreamingResponse): + # Read the streaming response + grader_response = await self.read_streaming_response(response) + + # Replace null with None + grader_response = grader_response.replace("null", "None") + + # Split the response by "data:" and process each part + for i in grader_response.split("data:"): + if '"text":' in i: + # Convert the string to a dictionary + r = ast.literal_eval(i) + # Check if the response text is "yes" + if r["choices"][0]["text"] == "yes": + # Append the document to the relevant_docs list + relevant_docs.append(doc) + + # Update the initial inputs with the relevant documents + if len(relevant_docs)>0: + logger.info(f"[ CodeGenService - handle_request ] {len(relevant_docs)} relevant document\s found.") + query = initial_inputs["query"] + initial_inputs = {} + initial_inputs["retrieved_docs"] = relevant_docs + initial_inputs["initial_query"] = query + + else: + logger.info("[ CodeGenService - handle_request ] Could not find any relevant documents. The query will be used as input to the LLM.") + + else: + # Use the combined retriever and LLM microservice + megaservice = self.megaservice_retriever_llm + else: + # Use the LLM microservice only + megaservice = self.megaservice_llm + + # Schedule the final megaservice + result_dict, runtime_graph = await megaservice.schedule( + initial_inputs=initial_inputs, llm_parameters=parameters ) + for node, response in result_dict.items(): - # Here it suppose the last microservice in the megaservice is LLM. + # Check if the last microservice in the megaservice is LLM if ( isinstance(response, StreamingResponse) - and node == list(self.megaservice.services.keys())[-1] - and self.megaservice.services[node].service_type == ServiceType.LLM + and node == list(megaservice.services.keys())[-1] + and megaservice.services[node].service_type == ServiceType.LLM ): return response + + # Get the response from the last node in the runtime graph last_node = runtime_graph.all_leaves()[-1] response = result_dict[last_node]["text"] choices = [] @@ -96,4 +304,4 @@ def start(self): if __name__ == "__main__": chatqna = CodeGenService(port=MEGA_SERVICE_PORT) chatqna.add_remote_service() - chatqna.start() + chatqna.start() \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 3cc7a19b3c..fc8b81b45f 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -13,28 +13,77 @@ After launching your instance, you can connect to it using SSH (for Linux instan ## 🚀 Start Microservices and MegaService -The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. +The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: ```mermaid +--- +config: + flowchart: + nodeSpacing: 400 + rankSpacing: 100 + curve: linear + themeVariables: + fontSize: 25px +--- flowchart LR - subgraph CodeGen + %% Colors %% + classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef invisible fill:transparent,stroke:transparent; + style CodeGen-MegaService stroke:#000000 + %% Subgraphs %% + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - A[User] --> |Input query| B[CodeGen Gateway] - B --> |Invoke| Megaservice - subgraph Megaservice["Megaservice"] - direction TB - C((LLM
9000)) -. Post .-> D{{TGI Service
8028}} - end - Megaservice --> |Output| E[Response] + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - - subgraph Legend + subgraph User Interface direction LR - G([Microservice]) ==> H([Microservice]) - I([Microservice]) -.-> J{{Server API}} + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange + + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM + + %% Questions interaction + direction LR + a[User Input Query] --> UI + UI --> GW + GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM + + + %% Embedding service flow + direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET + LLM <-.-> LLM_gen + + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ### Setup Environment Variables @@ -111,6 +160,15 @@ docker compose --profile codegen-xeon-vllm up -d }' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + ``` + + ## 🚀 Launch the UI To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: @@ -272,4 +330,4 @@ Then run the command `docker images`, you will have the following Docker Images: - `opea/llm-textgen:latest` - `opea/codegen:latest` - `opea/codegen-ui:latest` -- `opea/codegen-react-ui:latest` (optional) +- `opea/codegen-react-ui:latest` (optional) \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 5567d9e368..e88b3e4847 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -1,7 +1,5 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - services: + tgi-service: image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu container_name: tgi-server @@ -92,10 +90,14 @@ services: - http_proxy=${http_proxy} - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server @@ -106,9 +108,93 @@ services: - https_proxy=${https_proxy} - http_proxy=${http_proxy} - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} ipc: host restart: always - + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" + - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: true + restart: unless-stopped + tei-embedding-serving: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "apt-get update && apt-get install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://${host_ip}:${TEI_EMBEDDER_PORT}/health"] + interval: 10s + timeout: 6s + retries: 48 + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10201}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped networks: default: - driver: bridge + driver: bridge \ No newline at end of file diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 133b32f09f..5408e33654 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -6,28 +6,77 @@ The default pipeline deploys with vLLM as the LLM serving component. It also pro ## 🚀 Start MicroServices and MegaService -The CodeGen megaservice manages a single microservice called LLM within a Directed Acyclic Graph (DAG). In the diagram above, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. +The CodeGen megaservice manages a several microservices including 'Embedding MicroService', 'Retrieval MicroService' and 'LLM MicroService' within a Directed Acyclic Graph (DAG). In the diagram below, the LLM microservice is a language model microservice that generates code snippets based on the user's input query. The TGI service serves as a text generation interface, providing a RESTful API for the LLM microservice. Data Preparation allows users to save/update documents or online resources to the vector database. Users can upload files or provide URLs, and manage their saved resources. The CodeGen Gateway acts as the entry point for the CodeGen application, invoking the Megaservice to generate code snippets in response to the user's input query. The mega flow of the CodeGen application, from user's input query to the application's output response, is as follows: ```mermaid +--- +config: + flowchart: + nodeSpacing: 400 + rankSpacing: 100 + curve: linear + themeVariables: + fontSize: 25px +--- flowchart LR - subgraph CodeGen + %% Colors %% + classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 + classDef invisible fill:transparent,stroke:transparent; + style CodeGen-MegaService stroke:#000000 + %% Subgraphs %% + subgraph CodeGen-MegaService["CodeGen-MegaService"] direction LR - A[User] --> |Input query| B[CodeGen Gateway] - B --> |Invoke| Megaservice - subgraph Megaservice["Megaservice"] - direction TB - C((LLM
9000)) -. Post .-> D{{TGI Service
8028}} - end - Megaservice --> |Output| E[Response] + EM([Embedding
MicroService]):::blue + RET([Retrieval
MicroService]):::blue + RER([Agents]):::blue + LLM([LLM
MicroService]):::blue end - - subgraph Legend + subgraph User Interface direction LR - G([Microservice]) ==> H([Microservice]) - I([Microservice]) -.-> J{{Server API}} + a([Submit Query Tab]):::orchid + UI([UI server]):::orchid + Ingest([Manage Resources]):::orchid end + + CLIP_EM{{Embedding
service}} + VDB{{Vector DB}} + V_RET{{Retriever
service}} + Ingest{{Ingest data}} + DP([Data Preparation]):::blue + LLM_gen{{TGI Service}} + GW([CodeGen GateWay]):::orange + + %% Data Preparation flow + %% Ingest data flow + direction LR + Ingest[Ingest data] --> UI + UI --> DP + DP <-.-> CLIP_EM + + %% Questions interaction + direction LR + a[User Input Query] --> UI + UI --> GW + GW <==> CodeGen-MegaService + EM ==> RET + RET ==> RER + RER ==> LLM + + + %% Embedding service flow + direction LR + EM <-.-> CLIP_EM + RET <-.-> V_RET + LLM <-.-> LLM_gen + + direction TB + %% Vector DB interaction + V_RET <-.->VDB + DP <-.->VDB ``` ### Setup Environment Variables @@ -104,6 +153,15 @@ docker compose --profile codegen-gaudi-vllm up -d }' ``` + If the user wants a CodeGen service with RAG and Agents based on dedicated documentation. + + ```bash + curl http://localhost:7778/v1/codegen \ + -H "Content-Type: application/json" \ + -d '{"agents_flag": "True", "index_name": "my_API_document", "messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + ``` + + ## 🚀 Launch the Svelte Based UI To access the frontend, open the following URL in your browser: `http://{host_ip}:5173`. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index cb9e742847..559f00cf2a 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -6,8 +6,9 @@ pushd "../../" > /dev/null source .set_env.sh popd > /dev/null -export host_ip=$(hostname -I | awk '{print $1}') +export your_ip=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') if [ -z "${HUGGINGFACEHUB_API_TOKEN}" ]; then echo "Error: HUGGINGFACEHUB_API_TOKEN is not set. Please set HUGGINGFACEHUB_API_TOKEN" fi @@ -18,9 +19,34 @@ fi export no_proxy=${no_proxy},${host_ip} -export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" +export http_proxy=${http_proxy} +export https_proxy=${https_proxy} + +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-32B-Instruct" +export LLM_SERVICE_PORT=9000 export LLM_ENDPOINT="http://${host_ip}:8028" -export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} +export TGI_LLM_ENDPOINT="http://${host_ip}:8028" + +export MEGA_SERVICE_PORT=7778 +export MEGA_SERVICE_HOST_IP=${host_ip} export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:7778/v1/codegen" -export MODEL_CACHE="./data" + +export REDIS_DB_PORT=6379 +export REDIS_INSIGHTS_PORT=8001 +export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" +export REDIS_RETRIEVER_PORT=7000 +export RETRIEVAL_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_COMPONENT_NAME="OPEA_RETRIEVER_REDIS" +export INDEX_NAME="CodeGen" + +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export EMBEDDER_PORT=6000 +export TEI_EMBEDDER_PORT=8090 +export TEI_EMBEDDING_HOST_IP=${host_ip} +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}" + +export DATAPREP_REDIS_PORT=6007 +export DATAPREP_ENDPOINT="http://${host_ip}:${DATAPREP_REDIS_PORT}/v1/dataprep" +export LOGFLAG=false +export MODEL_CACHE="./data" \ No newline at end of file diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml index 3275aa71bf..52ca23b109 100644 --- a/CodeGen/docker_image_build/build.yaml +++ b/CodeGen/docker_image_build/build.yaml @@ -23,6 +23,12 @@ services: dockerfile: ./docker/Dockerfile.react extends: codegen image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest} + codegen-gradio-ui: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.gradio + extends: codegen + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} llm-textgen: build: context: GenAIComps diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index c7b6b83f7e..60bd1b6782 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -29,7 +29,8 @@ function build_docker_images() { fi cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # Download Gaudi vllm of latest tag git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork VLLM_VER=$(git describe --tags "$(git rev-list --tags --max-count=1)") diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 6fc25963ac..aa517f7c98 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -29,7 +29,7 @@ function build_docker_images() { fi cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + # git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git git clone https://github.com/vllm-project/vllm.git && cd vllm VLLM_VER="$(git describe --tags "$(git rev-list --tags --max-count=1)" )" @@ -82,23 +82,35 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + if [[ "$SERVICE_NAME" == "ingest" ]]; then + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL") - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..." + else + echo "[ $SERVICE_NAME ] Data preparation failed..." + fi + + else + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 fi sleep 5s } @@ -122,6 +134,14 @@ function validate_microservices() { "llm-textgen-server" \ '{"query":"def print_hello_world():", "max_tokens": 256}' + # Data ingest microservice + validate_services \ + "${ip_address}:6007/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "ingest" \ + "dataprep-redis-server" \ + 'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]' + } function validate_megaservice() { @@ -133,6 +153,14 @@ function validate_megaservice() { "codegen-xeon-backend-server" \ '{"messages": "def print_hello_world():", "max_tokens": 256}' + # Curl the Mega Service with index_name and agents_flag + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "print" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + } function validate_frontend() { @@ -202,7 +230,7 @@ function main() { validate_microservices "${docker_llm_container_names[${i}]}" validate_megaservice - validate_frontend + # validate_frontend stop_docker "${docker_compose_profiles[${i}]}" sleep 5s @@ -212,3 +240,4 @@ function main() { } main + diff --git a/CodeGen/ui/docker/Dockerfile.gradio b/CodeGen/ui/docker/Dockerfile.gradio new file mode 100644 index 0000000000..11a4f4f581 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.gradio @@ -0,0 +1,33 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + default-jre \ + libgl1-mesa-glx \ + libjemalloc-dev \ + wget + +# Install ffmpeg static build +WORKDIR /root +RUN wget https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz && \ + mkdir ffmpeg-git-amd64-static && tar -xvf ffmpeg-git-amd64-static.tar.xz -C ffmpeg-git-amd64-static --strip-components 1 && \ + export PATH=/root/ffmpeg-git-amd64-static:$PATH && \ + cp /root/ffmpeg-git-amd64-static/ffmpeg /usr/local/bin/ && \ + cp /root/ffmpeg-git-amd64-static/ffprobe /usr/local/bin/ + +RUN mkdir -p /home/user + +COPY gradio /home/user/gradio + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ +pip install --no-cache-dir -r /home/user/gradio/requirements.txt + +WORKDIR /home/user/gradio +ENTRYPOINT ["python", "codegen_ui_gradio.py"] diff --git a/CodeGen/ui/gradio/README.md b/CodeGen/ui/gradio/README.md new file mode 100644 index 0000000000..9769efb317 --- /dev/null +++ b/CodeGen/ui/gradio/README.md @@ -0,0 +1,65 @@ +# Document Summary + +This project provides a user interface for summarizing documents and text using a Dockerized frontend application. Users can upload files or paste text to generate summaries. + +## Docker + +### Build UI Docker Image + +To build the frontend Docker image, navigate to the `GenAIExamples/DocSum/ui` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui +docker build -t opea/codegen-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio . +``` + +This command builds the Docker image with the tag `opea/codegen-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. + +### Run UI Docker Image + +To run the frontend Docker image, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and execute the following commands: + +```bash +cd GenAIExamples/CodeGen/ui/gradio + +ip_address=$(hostname -I | awk '{print $1}') +docker run -d -p 5173:5173 --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e BACKEND_SERVICE_ENDPOINT=http://$ip_address:7778/v1/codegen \ + opea/codegen-gradio-ui:latest +``` + +This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service. + +### Python + +To run the frontend application directly using Python, navigate to the `GenAIExamples/CodeGen/ui/gradio` directory and run the following command: + +```bash +cd GenAIExamples/CodeGen/ui/gradio +python codegen_ui_gradio.py +``` + +This command starts the frontend application using Python. + +## Additional Information + +### Prerequisites + +Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall. + +### Environment Variables + +- `http_proxy`: Proxy setting for HTTP connections. +- `https_proxy`: Proxy setting for HTTPS connections. +- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying. +- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with. + +### Troubleshooting + +- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access. +- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible. + +This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment. diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py new file mode 100644 index 0000000000..cb90288cc5 --- /dev/null +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -0,0 +1,401 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This is a Gradio app that includes two tabs: one for code generation and another for resource management. +# The resource management tab has been updated to allow file uploads, deletion, and a table listing all the files. +# Additionally, three small text boxes have been added for managing file dataframe parameters. + +import argparse +import os +from pathlib import Path +import gradio as gr +import requests +import pandas as pd +import os +import uvicorn +import json +import argparse +# from utils import build_logger, make_temp_image, server_error_msg, split_video +from urllib.parse import urlparse +from pathlib import Path +from fastapi import FastAPI +# from fastapi.responses import JSONResponse, StreamingResponse +from fastapi.staticfiles import StaticFiles + +# logger = build_logger("gradio_web_server", "gradio_web_server.log") +logflag = os.getenv("LOGFLAG", False) + +# create a FastAPI app +app = FastAPI() +cur_dir = os.getcwd() +static_dir = Path(os.path.join(cur_dir, "static/")) +tmp_dir = Path(os.path.join(cur_dir, "split_tmp_videos/")) + +Path(static_dir).mkdir(parents=True, exist_ok=True) +app.mount("/static", StaticFiles(directory=static_dir), name="static") + +tmp_upload_folder = "/tmp/gradio/" + + + +host_ip = os.getenv("host_ip") +DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) +DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") +MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + +backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + +dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" +dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" +dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" +dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + +# Define the functions that will be used in the app +def conversation_history(prompt, index, use_agent, history): + # Print the language and prompt, and return a placeholder code + print(f"Generating code for prompt: {prompt} using index: {index} and use_agent is {use_agent}") + history.append([prompt, ""]) + response_generator = generate_code(prompt, index, use_agent) + for token in response_generator: + history[-1][-1] += token + yield history + + +def upload_media(media, index=None, chunk_size=1500, chunk_overlap=100): + media = media.strip().split("\n") + print("Files passed is ", media, flush=True) + if not chunk_size: + chunk_size = 1500 + if not chunk_overlap: + chunk_overlap = 100 + + requests = [] + if type(media) is list: + for file in media: + file_ext = os.path.splitext(file)[-1] + if is_valid_url(file): + print(file, " is valid URL") + print("Ingesting URL...") + value = ingest_url(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(file, index, chunk_size, chunk_overlap) + requests.append(value) + yield value + else: + print(file, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + yield requests + + else: + file_ext = os.path.splitext(media)[-1] + if is_valid_url(media): + value = ingest_url(media, index, chunk_size, chunk_overlap) + yield value + elif file_ext in ['.pdf', '.txt']: + print("Ingesting File...") + value = ingest_file(media, index, chunk_size, chunk_overlap) + # print("Return value is: ", value, flush=True) + yield value + else: + print(media, "File type not supported") + yield ( + gr.Textbox( + visible=True, + value="Your file extension type is not supported.", + ) + ) + return + +def generate_code(query, index=None, use_agent=False): + if index is None or index == "None": + input_dict = {"messages": query, "agents_flag": use_agent} + else: + input_dict = {"messages": query, "index_name": index, "agents_flag": use_agent} + + print("Query is ", input_dict) + headers = {"Content-Type": "application/json"} + + response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True) + + for line in response.iter_lines(): + if line: + line = line.decode('utf-8') + if line.startswith("data: "): # Only process lines starting with "data: " + json_part = line[len("data: "):] # Remove the "data: " prefix + if json_part.strip() == "[DONE]": # Ignore the DONE marker + continue + try: + json_obj = json.loads(json_part) # Convert to dictionary + if "choices" in json_obj: + for choice in json_obj["choices"]: + if "text" in choice: + # Yield each token individually + yield choice["text"] + except json.JSONDecodeError: + print("Error parsing JSON:", json_part) + + +def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): + headers = { + # "Content-Type: multipart/form-data" + } + file_input = {"files": open(file, "rb")} + + if index: + print("Index is", index) + data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + data = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + + print("Calling Request Now!") + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, files=file_input, data=data) + # print("Ingest Files", response) + print(response.text) + + # table = update_table() + return response.text + +def ingest_url(url, index=None, chunk_size=100, chunk_overlap=150): + print("URL is ", url) + url = str(url) + if not is_valid_url(url): + print("Invalid URL") + # yield ( + # gr.Textbox( + # visible=True, + # value="Invalid URL entered. Please enter a valid URL", + # ) + # ) + return + headers = { + # "Content-Type: multipart/form-data" + } + + if index: + url_input = {"link_list": json.dumps([url]), "index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + else: + url_input = {"link_list": json.dumps([url]), "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} + response = requests.post(url=dataprep_ingest_endpoint, headers=headers, data=url_input) + # print("Ingest URL", response) + # table = update_table() + return response.text + + +def is_valid_url(url): + url = str(url) + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except ValueError: + return False + + + +# Initialize the file list +file_list = [] + +# def update_files(file): +# # Add the uploaded file to the file list +# file_list.append(file.name) +# file_df["Files"] = file_list +# return file_df + + +def get_files(index=None): + headers = { + # "Content-Type: multipart/form-data" + } + if index == "All Files": + index = None + + if index: + index = {"index_name": index} + response = requests.post(url=dataprep_get_files_endpoint, headers=headers, data=index) + print("Get files with ", index, response) + table = response.json() + return table + else: + # print("URL IS ", dataprep_get_files_endpoint) + response = requests.post(url=dataprep_get_files_endpoint, headers=headers) + print("Get files ", response) + table = response.json() + return table + +def update_table(index=None): + if index == "All Files": + index = None + files = get_files(index) + print("Files is ", files) + if len(files) == 0: + df = pd.DataFrame(files, columns=["Files"]) + return df + else: + df = pd.DataFrame(files) + return df + +def update_indices(): + indices = get_indices() + df = pd.DataFrame(indices, columns=["File Databases"]) + return df + +def delete_file(file, index=None): + # Remove the selected file from the file list + headers = { + # "Content-Type: application/json" + } + print("URL IS ", dataprep_delete_files_endpoint) + if index: + file_input = {"files": open(file, "rb"), "index_name": index} + else: + file_input = {"files": open(file, "rb")} + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data=file_input) + print("Delete file ", response) + table = update_table() + return response.text + +def delete_all_files(index=None): + # Remove all files from the file list + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_delete_files_endpoint, headers=headers, data='{"file_path": "all"}') + print("Delete all files ", response) + table = update_table() + + return response.text + +def get_indices(): + headers = { + # "Content-Type: application/json" + } + response = requests.post(url=dataprep_get_indices_endpoint, headers=headers) + print("Get Indices", response) + indices = response.json() + return indices + +def update_indices_dropdown(): + indices = ["None"] + get_indices() + new_dd = gr.update(choices=indices, value="None") + return new_dd + + +def get_file_names(files): + file_str = "" + if not files: + return file_str + + for file in files: + file_str += file + '\n' + file_str.strip() + return file_str + + +# Define UI components +with gr.Blocks() as ui: + with gr.Tab("Code Generation"): + gr.Markdown("### Generate Code from Natural Language") + chatbot = gr.Chatbot(label="Chat History") + prompt_input = gr.Textbox(label="Enter your query") + with gr.Column(): + with gr.Row(scale=8): + # indices = ["None"] + get_indices() + database_dropdown = gr.Dropdown(choices=get_indices(), label="Select Index", value="None") + with gr.Row(scale=1): + db_refresh_button = gr.Button("Refresh", variant="primary") + db_refresh_button.click(update_indices_dropdown, outputs=database_dropdown) + use_agent = gr.Checkbox(label="Use Agent", container=False) + + generate_button = gr.Button("Generate Code") + + # Connect the generate button to the conversation_history function + generate_button.click(conversation_history, inputs=[prompt_input, database_dropdown, use_agent, chatbot], outputs=chatbot) + + with gr.Tab("Resource Management"): + # File management components + # url_button = gr.Button("Process") + with gr.Row(): + with gr.Column(scale=1): + index_name_input = gr.Textbox(label="Index Name") + chunk_size_input = gr.Textbox(label="Chunk Size", value="1500", placeholder="Enter an integer (default: 1500)") + chunk_overlap_input = gr.Textbox(label="Chunk Overlap", value="100", placeholder="Enter an integer (default: 100)") + with gr.Column(scale=3): + file_upload = gr.File(label="Upload Files", file_count="multiple") + url_input = gr.Textbox(label="Media to be ingested (Append URL's in a new line)") + upload_button = gr.Button("Upload", variant="primary") + upload_status = gr.Textbox(label="Upload Status") + file_upload.change(get_file_names, inputs=file_upload, outputs=url_input) + with gr.Column(scale=1): + # table_dropdown = gr.Dropdown(indices) + # file_table = gr.Dataframe(interactive=False, value=update_table()) + file_table = gr.Dataframe(interactive=False, value=update_indices()) + refresh_button = gr.Button("Refresh", variant="primary", size="sm") + refresh_button.click(update_indices, outputs=file_table) + # refresh_button.click(update_indices, outputs=database_dropdown) + # table_dropdown.change(fn=update_table, inputs=table_dropdown, outputs=file_table) + # upload_button.click(upload_media, inputs=[file_upload, index_name_input, chunk_size_input, chunk_overlap_input], outputs=file_table) + upload_button.click(upload_media, inputs=[url_input, index_name_input, chunk_size_input, chunk_overlap_input], outputs=upload_status) + + delete_all_button = gr.Button("Delete All", variant="primary", size="sm") + delete_all_button.click(delete_all_files, outputs=upload_status) + + + + # delete_button = gr.Button("Delete Index") + + # selected_file_output = gr.Textbox(label="Selected File") + # delete_button.click(delete_file, inputs=indices, outputs=upload_status) + + + +ui.queue() +app = gr.mount_gradio_app(app, ui, path="/") +share = False +enable_queue = True + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=os.getenv("UI_PORT", 5173)) + parser.add_argument("--concurrency-count", type=int, default=20) + parser.add_argument("--share", action="store_true") + + host_ip = os.getenv("host_ip") + DATAPREP_REDIS_PORT = os.getenv("DATAPREP_REDIS_PORT", 6007) + DATAPREP_ENDPOINT = os.getenv("DATAPREP_ENDPOINT", f"http://{host_ip}:{DATAPREP_REDIS_PORT}/v1/dataprep") + MEGA_SERVICE_PORT = os.getenv("MEGA_SERVICE_PORT", 7778) + + + backend_service_endpoint = os.getenv( + "BACKEND_SERVICE_ENDPOINT", f"http://{host_ip}:{MEGA_SERVICE_PORT}/v1/codegen" + ) + + # dataprep_ingest_endpoint = f"{DATAPREP_ENDPOINT}/ingest" + # dataprep_get_files_endpoint = f"{DATAPREP_ENDPOINT}/get" + # dataprep_delete_files_endpoint = f"{DATAPREP_ENDPOINT}/delete" + # dataprep_get_indices_endpoint = f"{DATAPREP_ENDPOINT}/indices" + + + args = parser.parse_args() + # logger.info(f"args: {args}") + global gateway_addr + gateway_addr = backend_service_endpoint + global dataprep_ingest_addr + dataprep_ingest_addr = dataprep_ingest_endpoint + global dataprep_get_files_addr + dataprep_get_files_addr = dataprep_get_files_endpoint + + + uvicorn.run(app, host=args.host, port=args.port) diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt new file mode 100644 index 0000000000..2a4c8e1a30 --- /dev/null +++ b/CodeGen/ui/gradio/requirements.txt @@ -0,0 +1,4 @@ +gradio==5.22.0 +numpy==1.26.4 +opencv-python==4.10.0.82 +Pillow==10.3.0 diff --git a/CodeGen/ui/svelte/.env b/CodeGen/ui/svelte/.env index 0bf85fa876..2efb56c8f5 100644 --- a/CodeGen/ui/svelte/.env +++ b/CodeGen/ui/svelte/.env @@ -1 +1 @@ -BASIC_URL = 'http://backend_address:7778/v1/codegen' +BASIC_URL = 'http://10.98.56.44:7778/v1/codegen' From b8ec015ed47fdb29661dd00c617c4ae3bead9c23 Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Wed, 2 Apr 2025 23:14:39 +0000 Subject: [PATCH 19/22] added error handling for exceeded token size Signed-off-by: okhleif-IL --- CodeGen/ui/gradio/codegen_ui_gradio.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py index 770608a0c9..d0e351960a 100644 --- a/CodeGen/ui/gradio/codegen_ui_gradio.py +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -140,9 +140,11 @@ def generate_code(query, index=None, use_agent=False): print("Query is ", input_dict) headers = {"Content-Type": "application/json"} - response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True) + response = requests.post(url=backend_service_endpoint, headers=headers, data=json.dumps(input_dict), stream=True) + line_count = 0 for line in response.iter_lines(): + line_count += 1 if line: line = line.decode('utf-8') if line.startswith("data: "): # Only process lines starting with "data: " @@ -160,6 +162,10 @@ def generate_code(query, index=None, use_agent=False): yield choice["text"] except json.JSONDecodeError: print("Error parsing JSON:", json_part) + + if line_count == 0: + yield f"Something went wrong, No Response Generated! \nIf you are using an Index, try uploading your media again with a smaller chunk size to avoid exceeding the token max. \ + \nOr, check the Use Agent box and try again." def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): @@ -289,7 +295,6 @@ def get_indices(): headers = { # "Content-Type: application/json" } - print("URL IS ", dataprep_get_indices_endpoint) response = requests.post(url=dataprep_get_indices_endpoint, headers=headers) indices = ["None"] print("Get Indices", response) From 18445c5625feef1cbb1ce7ae9119c9aad365167f Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Thu, 3 Apr 2025 17:47:35 +0000 Subject: [PATCH 20/22] xeon --> gaudi Signed-off-by: okhleif-IL --- CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index c2732fcc96..afe6d7ab35 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -114,7 +114,7 @@ services: - EMBEDDER_PORT=${EMBEDDER_PORT} ipc: host restart: always - codegen-xeon-ui-server: + codegen-gaudi-ui-server: image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: From f8178a59fdce16c7d3e478699086751b8ee3d9cd Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Thu, 3 Apr 2025 17:53:05 +0000 Subject: [PATCH 21/22] made tests like codegen ragagents branch Signed-off-by: okhleif-IL --- CodeGen/tests/test_compose_on_gaudi.sh | 53 +++++++------------------- CodeGen/tests/test_compose_on_xeon.sh | 4 +- 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 2ce5b0ec87..a64f6431a9 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -83,34 +83,23 @@ function validate_services() { local DOCKER_NAME="$4" local INPUT_DATA="$5" - if [[ "$SERVICE_NAME" == "ingest" ]]; then - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL") + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..." - else - echo "[ $SERVICE_NAME ] Data preparation failed..." - fi + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - else - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log exit 1 fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 fi sleep 5s } @@ -134,14 +123,6 @@ function validate_microservices() { "llm-textgen-gaudi-server" \ '{"query":"def print_hello_world():"}' - # Data ingest microservice - validate_services \ - "${ip_address}:6007/v1/dataprep/ingest" \ - "Data preparation succeeded" \ - "ingest" \ - "dataprep-redis-server" \ - 'link_list=["https://www.ces.tech/", "https://modin.readthedocs.io/en/latest/index.html"]' - } function validate_megaservice() { @@ -153,14 +134,6 @@ function validate_megaservice() { "codegen-gaudi-backend-server" \ '{"messages": "def print_hello_world():"}' - # Curl the Mega Service with index_name and agents_flag - validate_services \ - "${ip_address}:7778/v1/codegen" \ - "print" \ - "mega-codegen" \ - "codegen-xeon-backend-server" \ - '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' - } function validate_frontend() { @@ -229,7 +202,7 @@ function main() { validate_microservices "${docker_llm_container_names[${i}]}" validate_megaservice - # validate_frontend + validate_frontend stop_docker "${docker_compose_profiles[${i}]}" sleep 5s @@ -238,4 +211,4 @@ function main() { echo y | docker system prune } -main +main \ No newline at end of file diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 670d071600..1049308724 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -92,6 +92,7 @@ function validate_services() { fi else + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") if [ "$HTTP_STATUS" -eq 200 ]; then echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." @@ -238,5 +239,4 @@ function main() { echo y | docker system prune } -main - +main \ No newline at end of file From 21cf358ec1d0b47926420f11c5c92f70aaffbbe7 Mon Sep 17 00:00:00 2001 From: okhleif-IL Date: Thu, 3 Apr 2025 17:55:08 +0000 Subject: [PATCH 22/22] added back \n Signed-off-by: okhleif-IL --- CodeGen/tests/test_compose_on_gaudi.sh | 2 +- CodeGen/tests/test_compose_on_xeon.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index a64f6431a9..60bd1b6782 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -211,4 +211,4 @@ function main() { echo y | docker system prune } -main \ No newline at end of file +main diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 1049308724..a74b78f6cd 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -239,4 +239,4 @@ function main() { echo y | docker system prune } -main \ No newline at end of file +main