diff --git a/GraphRAG/docker_compose/intel/cpu/xeon/GraphRAG_LLM_notes.md b/GraphRAG/docker_compose/intel/cpu/xeon/GraphRAG_LLM_notes.md
new file mode 100644
index 0000000000..e51a5022e4
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/cpu/xeon/GraphRAG_LLM_notes.md
@@ -0,0 +1,109 @@
+# About GraphRAG LLMs
+
+## Overview
+
+GraphRAG uses three distinct LLMs, each optimized for different tasks in the pipeline:
+
+1. Dataprep LLM
+2. Retriever LLM
+3. Final LLM
+
+## 1. Dataprep LLM
+
+Used during data ingestion phase to:
+
+- Process and understand document structure
+- Extract entities and relationships between entities
+- Generate and store community summaries in Neo4j:
+
+```python
+# neo4j_llamaindex.py
+async def generate_community_summary(self, text):
+ """Generate summary for a given text using an LLM."""
+ messages = [
+ ChatMessage(
+ role="system",
+ content=(
+ "You are provided with a set of relationships from a knowledge graph... "
+ "Your task is to create a summary of these relationships..."
+ ),
+ )
+ ]
+ response = await self.llm.achat(trimmed_messages)
+```
+
+**Key Requirements:**
+
+- High-quality model for accurate relationship understanding
+- Larger context window for document processing
+- Can be slower since it's one-time processing
+
+## 2. Retriever LLM
+
+Used during query processing to:
+
+- Evaluate relevance of pre-computed community summaries
+- Generate specific answers from relevant communities
+- Process multiple communities in parallel
+
+```python
+def generate_answer_from_summary(self, community_summary, query):
+ """Generate an answer from a community summary based on a given query using LLM."""
+ prompt = (
+ f"Given the community summary: {community_summary}, "
+ f"how would you answer the following query? Query: {query}"
+ )
+ response = self._llm.chat(messages)
+```
+
+**Key Requirements:**
+
+- Fast inference for real-time processing
+- Efficient batch processing capabilities
+- Balance between quality and speed
+
+## 3. Final LLM
+
+Used as the last step to:
+
+- Process all retriever-generated answers
+- Synthesize information from multiple communities
+- Generate coherent final response
+
+```python
+# In graphrag.py
+llm = MicroService(
+ name="llm",
+ host=LLM_SERVER_HOST_IP,
+ port=LLM_SERVER_PORT,
+ endpoint="/v1/chat/completions",
+ service_type=ServiceType.LLM,
+)
+```
+
+**Key Requirements:**
+
+- Good at synthesizing multiple sources
+- Strong natural language generation
+- Maintains context across multiple inputs
+
+## Data Flow
+
+1. **Ingestion Phase**
+
+ - Documents → Dataprep LLM → Community Summaries
+ - Summaries stored in Neo4j
+
+2. **Query Phase**
+ - Query → Retriever LLM → Individual Community Answers
+ - Answers → Final LLM → Coherent Response
+
+## Configuration
+
+Each LLM can be configured independently through environment variables:
+
+- `DATAPREP_LLM_ENDPOINT` and `DATAPREP_LLM_MODEL_ID`
+- `RETRIEVER_LLM_ENDPOINT` and `RETRIEVER_LLM_MODEL_ID`
+- `FINAL_LLM_ENDPOINT` and `FINAL_LLM_MODEL_ID`
+
+This allows for optimization of each LLM for its specific task in the pipeline.
diff --git a/GraphRAG/docker_compose/intel/cpu/xeon/README.md b/GraphRAG/docker_compose/intel/cpu/xeon/README.md
new file mode 100644
index 0000000000..28afaa1453
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,309 @@
+# GraphRAG Application
+
+While naive RAG works well in fetching precise information it fails on global questions directed at an entire text corpus, such as "What are the main themes in the dataset?".
+GraphRAG was introduced by Microsoft paper "From Local to Global: A Graph RAG Approach to Query-Focused Summarization". The key elements are:
+
+- Uses LLM to derive an entity knowledge graph from the source documents
+- Uses hierarchical leiden algorithm to identify communities of closely-related entities and summaries are extracted for each community
+- For an input query the relevant communities are identified and partial answers are generated from each of the community summaries with a retrieval LLM (query-focused summarization (QFS))
+- There is a final generation stage (last LLM) that responds to the query based on the intermediate community answers (QFS). See [GraphRAG Model Notes](GraphRAG_LLM_notes.md)
+- In this app three LLM models are used: dataprep (knowledge graph), retriever (query-focused summaries), and final generation. CPU (Xeon) is used for the final generation LLM, and embedding, and dataprep and retriever LLMs are used by endpoints.
+
+## Deploy GraphRAG Service
+
+Quick Start Deployment Steps:
+
+1. Set up the environment variables.
+2. Run Docker Compose.
+3. Consume the GraphRAG Service.
+
+Note: If you do not have docker installed you can run this script to install docker : `bash docker_compose/install_docker.sh`
+
+## Pre-requisites
+
+Build images:
+
+```bash
+cd ~/
+git clone https://github.com/opea-project/GenAIExamples.git
+git clone https://github.com/vllm-project/vllm.git
+git clone https://github.com/opea-project/GenAIComps.git
+
+# vllm-service
+cd vllm/
+VLLM_VER=v0.8.3
+git checkout "${VLLM_VER}"
+docker build --no-cache -f docker/Dockerfile.cpu -t opea/vllm-cpu:"${TAG:-latest}" --shm-size=128g .
+
+# opea/dataprep
+cd ~/GenAIComps
+docker build -t opea/dataprep:latest \
+ --build-arg "no_proxy=${no_proxy}" \
+ --build-arg "https_proxy=${https_proxy}" \
+ --build-arg "http_proxy=${http_proxy}" \
+ -f comps/dataprep/src/Dockerfile .
+
+# opea/retrievers
+cd ~/GenAIComps
+docker build -t opea/retriever:latest \
+ --build-arg "no_proxy=${no_proxy}" \
+ --build-arg "https_proxy=${https_proxy}" \
+ --build-arg "http_proxy=${http_proxy}" \
+ -f comps/retrievers/src/Dockerfile .
+
+# opea/graphrag-ui
+cd ~/GenAIExamples/GraphRAG/ui
+docker build -t opea/graphrag-ui:latest \
+ --build-arg "no_proxy=${no_proxy}" \
+ --build-arg "https_proxy=${https_proxy}" \
+ --build-arg "http_proxy=${http_proxy}" \
+ -f docker/Dockerfile .
+
+# opea/graphrag
+cd ~/GenAIExamples/GraphRAG
+docker build -t opea/graphrag:latest .
+
+# Note: it is important to be in the correct path before builds so that docker has the correct context to COPY relevant code to containers.
+```
+
+### Quick Start: 1.Setup Environment Variable
+
+To set up environment variables for deploying GraphRAG services, follow these steps:
+
+1. Set the required private environment variables:
+
+ ```bash
+ # For simplicity Openrouter.ai is used as an endpoint for both dataprep and retriever components.
+ # These endpoints could be configured to any openAI-like endpoint.
+ export OPENROUTER_KEY="mykey"
+ export HUGGINGFACEHUB_API_TOKEN="mytoken"
+
+ source set_env.sh
+
+ # Below will override some of these defaults in set_env.sh
+ export host_ip=$(hostname -I | awk '{print $1}')
+
+ export NEO4J_PORT1=11631
+ export NEO4J_PORT2=11632
+ export NEO4J_URI="bolt://${host_ip}:${NEO4J_PORT2}"
+ export NEO4J_URL="bolt://${host_ip}:${NEO4J_PORT2}"
+
+ export NEO4J_USERNAME="neo4j"
+ export NEO4J_PASSWORD="neo4jtest"
+
+ export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
+
+ # Must explicitly override default to not use OpenAI.
+ export OPENAI_LLM_MODEL=""
+ export OPENAI_EMBEDDING_MODEL=""
+
+ # Embedder endpoint
+ export TEI_EMBEDDER_PORT=6006
+ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:${TEI_EMBEDDER_PORT}"
+
+ # LLM for dataprep is used to extract knowledge graph
+ export DATAPREP_LLM_ENDPOINT="https://openrouter.ai/api"
+ export DATAPREP_LLM_MODEL_ID="anthropic/claude-3-haiku"
+ export DATAPREP_LLM_ENDPOINT_KEY=${OPENROUTER_KEY}
+
+ # LLM for retriever performs community summaries at retrieval time
+ export RETRIEVER_LLM_ENDPOINT="https://openrouter.ai/api"
+ export RETRIEVER_LLM_MODEL_ID="anthropic/claude-3-haiku"
+ export RETRIEVER_LLM_ENDPOINT_KEY=${OPENROUTER_KEY}
+
+ # Final LLM to formulates response based on relevant community summaries.
+ export FINAL_LLM_MODEL_ID="Qwen/Qwen2.5-0.5B-Instruct"
+
+ export LOGFLAG=True
+ export MAX_INPUT_TOKENS=4096
+ export MAX_TOTAL_TOKENS=8192
+ export DATAPREP_PORT=11103
+ export RETRIEVER_PORT=11635
+ export MEGA_SERVICE_PORT=8888
+ ```
+
+2. If you are in a proxy environment, also set the proxy-related environment variables:
+
+ ```bash
+ export http_proxy="Your_HTTP_Proxy"
+ export https_proxy="Your_HTTPs_Proxy"
+ export no_proxy=$no_proxy,${host_ip} #important to add {host_ip} for containers communication
+ ```
+
+### Quick Start: 2.Run Docker Compose
+
+If the microservice images are available in Docker Hub they will be pulled, otherwise you will need to build the container images manually. Please refer to the 'Build Docker Images' in [Guide](../../../../../ChatQnA/docker_compose/intel/cpu/xeon/README.md). [test_compose_on_xeon.sh](../../../../../ChatQnA/tests/test_compose_on_xeon.sh) can be a good resource as it shows how to do image build, starting services, validated each microservices and megaservices. This is what is used in CI/CD.
+
+```bash
+cd GraphRAG/docker_compose/intel/cpu/xeon
+NGINX_PORT=8080 docker compose -f compose.yaml up -d
+```
+
+Here NGINX_PORT=8080 because typically port 80 is used for internet browsing.
+
+#### Check the Deployment Status
+
+After running docker compose, check if all the containers launched via docker compose have started:
+
+```bash
+docker ps -a
+```
+
+The following containers should have started:
+
+```bash
+CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
+740d0061fce2 opea/nginx:latest "/docker-entrypoint.…" 3 hours ago Up 3 hours 0.0.0.0:8080->80/tcp, [::]:8080->80/tcp graphrag-xeon-nginx-server
+3010243786cd opea/graphrag-ui:latest "docker-entrypoint.s…" 3 hours ago Up 3 hours 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp graphrag-ui-server
+f63d10453e22 opea/graphrag:latest "python graphrag.py" 3 hours ago Up 3 hours 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp graphrag-xeon-backend-server
+a48d0fba13e6 opea/dataprep:latest "sh -c 'python $( [ …" 3 hours ago Up 3 hours 0.0.0.0:6004->5000/tcp, [::]:6004->5000/tcp dataprep-neo4j-server
+9301a833f220 opea/retriever:latest "python opea_retriev…" 3 hours ago Up 3 hours 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-neo4j-server
+eda369268406 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 3 hours ago Up 3 hours 0.0.0.0:6006->80/tcp, [::]:6006->80/tcp tei-embedding-server
+f21e82efa1fa opea/vllm-cpu:latest "python3 -m vllm.ent…" 3 hours ago Up 3 hours (healthy) 0.0.0.0:9009->80/tcp, [::]:9009->80/tcp vllm-service
+3b541ceeaf9f neo4j:latest "tini -g -- /startup…" 3 hours ago Up 3 hours 7473/tcp, 0.0.0.0:11631->7474/tcp, [::]:11631->7474/tcp, 0.0.0.0:11632->7687/tcp, [::]:11632->7687/tcp neo4j-apoc
+```
+
+##### Test Final vLLM
+
+```
+curl http://localhost:9009/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -d '{"model":"'${FINAL_LLM_MODEL_ID}'","messages":[{"role":"user","content":"Tell me a joke?"}]}'
+```
+
+### QuickStart: 3.Upload RAG Files and Consume the GraphRAG Service
+
+To chat with retrieved information, you need to upload a file using `Dataprep` service.
+
+Here is an example of uploading sample graph data (which can also be uploaded via the UI):
+
+```bash
+cd ~/GenAIExamples/GraphRAG/example_data
+
+# First file
+curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \
+ -H "Content-Type: multipart/form-data" \
+ -F "files=@./programming_languages.txt"
+
+# Second file
+curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \
+ -H "Content-Type: multipart/form-data" \
+ -F "files=@./programming_languages2.txt"
+```
+
+To login into the Neo4j UI you may browse to http://localhost:{NEO4J_PORT1}/browser, and login with your NEO4J login and password defined in the environment variables section.
+The backend graphrag service can be queried via curl:
+
+```bash
+curl http://${host_ip}:8888/v1/graphrag \
+ -H "Content-Type: application/json" \
+ -d '{"messages": [{"role": "user","content": "what are the main themes of the programming dataset?"}]}'
+```
+
+## Architecture and Deploy details
+
+The GraphRAG example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
+
+```mermaid
+---
+config:
+ flowchart:
+ nodeSpacing: 400
+ rankSpacing: 100
+ curve: linear
+ themeVariables:
+ fontSize: 50px
+---
+flowchart LR
+ %% Colors %%
+ classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+ classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+ classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
+ classDef invisible fill:transparent,stroke:transparent;
+ style GraphRAG-MegaService stroke:#000000
+
+ %% Subgraphs %%
+ subgraph GraphRAG-MegaService["GraphRAG MegaService "]
+ direction LR
+ RET([Retrieval MicroService]):::blue
+ LLM([LLM MicroService]):::blue
+ EM([Embedding MicroService]):::blue
+ end
+ subgraph UserInterface[" User Interface "]
+ direction LR
+ a([User Input Query]):::orchid
+ Ingest([Ingest data]):::orchid
+ UI([UI server
]):::orchid
+ end
+
+
+ GDB{{Graph DB
}}
+ DP([Data Preparation MicroService]):::blue
+ GW([GraphRAG GateWay
]):::orange
+
+
+ %% Data Preparation flow
+ %% Ingest data flow
+ direction LR
+ Ingest[Ingest data] --> UI
+ UI --> DP
+
+ %% interactions buried inside the DP and RET microservice implementations
+ DP <-.-> EM
+ DP <-.-> LLM
+ RET <-.-> EM
+ RET <-.-> LLM
+
+
+ %% Questions interaction
+ direction LR
+ a[User Input Query] --> UI
+ UI --> GW
+ GW <==> GraphRAG-MegaService
+ RET ==> LLM
+
+
+ direction TB
+ %% Graph DB interaction
+ RET <-.-> |d|GDB
+ DP <-.-> |d|GDB
+
+ linkStyle 2 stroke:#000000,stroke-width:2px;
+ linkStyle 3 stroke:#000000,stroke-width:2px;
+ linkStyle 4 stroke:#000000,stroke-width:2px;
+ linkStyle 5 stroke:#000000,stroke-width:2px;
+
+
+```
+
+Xeon default configuration:
+| MicroService | Open Source Project | HW | Default Port | Endpoint |
+| ------------ | ------------------- | --- | ------------ | -------- |
+| Dataprep | Neo4j, LlamaIndex | OpenAI-like Endpoint | 6004 | /v1/dataprep/ingest |
+| Embedding | Llama-index, TEI | Xeon or CPU | 6006 | /v1/embeddings |
+| Retriever | Llama-index, Neo4j | OpenAI-like Endpoint | 7000 | /v1/retrieval |
+| Final LLM | vLLM | Xeon or CPU | 9009 | /v1/chat/completions |
+
+### Models Selection
+
+[GraphRAG Model Notes](GraphRAG_LLM_notes.md)
+
+## Consume GraphRAG Service with RAG
+
+### 1. Check Service Status
+
+Before consuming GraphRAG Service, make sure each microservice is ready by checking the docker logs of each microservice.
+
+```bash
+docker logs container_name
+```
+
+### 2. Access via frontend
+
+To access the frontend, open the following URL in your browser: `http://{host_ip}:NGINX_PORT`
+
+In the above example, the UI runs on port 8080 internally.
+
+## Monitoring OPEA Service with Prometheus and Grafana dashboard
+
+OPEA microservice deployment can easily be monitored through Grafana dashboards in conjunction with Prometheus data collection. Follow the [README](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/grafana/README.md) to setup Prometheus and Grafana servers and import dashboards to monitor the OPEA service.
diff --git a/GraphRAG/docker_compose/intel/cpu/xeon/compose.yaml b/GraphRAG/docker_compose/intel/cpu/xeon/compose.yaml
new file mode 100644
index 0000000000..29b2ec0802
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/cpu/xeon/compose.yaml
@@ -0,0 +1,203 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+ neo4j-apoc:
+ image: neo4j:latest
+ container_name: neo4j-apoc
+ volumes:
+ - /$HOME/neo4j/logs:/logs
+ - /$HOME/neo4j/config:/config
+ - /$HOME/neo4j/data:/data
+ - /$HOME/neo4j/plugins:/plugins
+ ipc: host
+ environment:
+ - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
+ - NEO4J_PLUGINS=["apoc"]
+ - NEO4J_apoc_export_file_enabled=true
+ - NEO4J_apoc_import_file_enabled=true
+ - NEO4J_apoc_import_file_use__neo4j__config=true
+ - NEO4J_dbms_security_procedures_unrestricted=apoc.\*
+ ports:
+ - "${NEO4J_PORT1:-7474}:7474"
+ - "${NEO4J_PORT2:-7687}:7687"
+ restart: always
+
+ tei-embedding-service:
+ image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+ container_name: tei-embedding-server
+ ports:
+ - "6006:80"
+ volumes:
+ - "./data:/data"
+ shm_size: 1g
+ environment:
+ no_proxy: ${no_proxy}
+ NO_PROXY: ${no_proxy}
+ http_proxy: ${http_proxy}
+ https_proxy: ${https_proxy}
+ HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ ipc: host
+ command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+
+ dataprep-neo4j-llamaindex:
+ image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
+ container_name: dataprep-neo4j-server
+ depends_on:
+ - neo4j-apoc
+ - vllm-service
+ - tei-embedding-service
+ ports:
+ - "6004:5000"
+ ipc: host
+ environment:
+ no_proxy: ${no_proxy}
+ http_proxy: ${http_proxy}
+ https_proxy: ${https_proxy}
+ host_ip: ${host_ip}
+ DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX"
+ HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ NEO4J_URL: ${NEO4J_URL}
+ NEO4J_USERNAME: ${NEO4J_USERNAME}
+ NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+ TGI_LLM_ENDPOINT: ${DATAPREP_LLM_ENDPOINT}
+ TGI_LLM_ENDPOINT_KEY: ${DATAPREP_LLM_ENDPOINT_KEY}
+ LLM_MODEL_ID: ${DATAPREP_LLM_MODEL_ID}
+ TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+ OPENAI_API_KEY: ${OPENAI_API_KEY}
+ OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
+ OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
+ EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
+ LOGFLAG: ${LOGFLAG}
+ restart: unless-stopped
+
+ retriever-neo4j-llamaindex:
+ image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
+ container_name: retriever-neo4j-server
+ ports:
+ - "7000:7000"
+ ipc: host
+ environment:
+ no_proxy: ${no_proxy}
+ http_proxy: ${http_proxy}
+ https_proxy: ${https_proxy}
+ host_ip: ${host_ip}
+ HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ LOGFLAG: ${LOGFLAG}
+ RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_NEO4J"
+ # Embedding endpoint
+ TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+ EMBEDDING_MODEL_ID: ${EMBEDDING_MODEL_ID}
+ # Retriever LLM
+ TGI_LLM_ENDPOINT: ${RETRIEVER_LLM_ENDPOINT}
+ TGI_LLM_ENDPOINT_KEY: ${RETRIEVER_LLM_ENDPOINT_KEY}
+ LLM_MODEL_ID: ${RETRIEVER_LLM_MODEL_ID} # This is used for graph indexing and different from vLLM model ID.
+
+ # Only used if using OpenAI models
+ OPENAI_API_KEY: ${OPENAI_API_KEY}
+ OPENAI_LLM_MODEL: ${OPENAI_LLM_MODEL}
+ OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
+ VDMS_USE_CLIP: 0
+ NEO4J_URL: ${NEO4J_URL}
+ NEO4J_URI: ${NEO4J_URI}
+ NEO4J_USERNAME: ${NEO4J_USERNAME}
+ NEO4J_PASSWORD: ${NEO4J_PASSWORD}
+ depends_on:
+ - neo4j-apoc
+ - vllm-service
+ - tei-embedding-service
+ restart: unless-stopped
+
+ # vllm-service is used by the final LLM to summarize retriever results in the backend.
+ vllm-service:
+ image: ${REGISTRY:-opea}/vllm-cpu:${TAG:-latest} # must build add to README.
+ container_name: vllm-service
+ ports:
+ - "9009:80" # this can be accessed by graphrag-xeon-backend-server on internal port 80.
+ ipc: host
+ volumes:
+ - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub"
+ shm_size: 128g
+ environment:
+ no_proxy: ${no_proxy}
+ http_proxy: ${http_proxy}
+ https_proxy: ${https_proxy}
+ HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+ LLM_MODEL_ID: ${FINAL_LLM_MODEL_ID}
+ VLLM_TORCH_PROFILER_DIR: "/mnt"
+ # Laptop environment variables
+ VLLM_USE_CPU: 1
+ VLLM_CPU_OMP_THREADS_BIND: all
+ VLLM_CPU_KVCACHE_SPACE: 4
+ VLLM_MLA_DISABLE: 1
+ healthcheck:
+ test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
+ interval: 10s
+ timeout: 10s
+ retries: 100
+ command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE:-16} # for Qwen on laptop set to 16
+
+ graphrag-xeon-backend-server:
+ image: ${REGISTRY:-opea}/graphrag:${TAG:-latest}
+ container_name: graphrag-xeon-backend-server
+ depends_on:
+ - neo4j-apoc
+ - tei-embedding-service
+ - retriever-neo4j-llamaindex
+ - vllm-service
+ ports:
+ - "8888:8888"
+ environment:
+ - no_proxy=${no_proxy}
+ - https_proxy=${https_proxy}
+ - http_proxy=${http_proxy}
+ - MEGA_SERVICE_HOST_IP=graphrag-xeon-backend-server
+ - RETRIEVER_SERVICE_HOST_IP=retriever-neo4j-llamaindex
+ - RETRIEVER_SERVICE_PORT=7000
+ - LLM_SERVER_HOST_IP=vllm-service # this is the final LLM server that will be used by the backend.
+ - LLM_SERVER_PORT=80
+ - LLM_MODEL_ID=${FINAL_LLM_MODEL_ID} # backend will format the input and provide model to vLLM
+ - LOGFLAG=${LOGFLAG}
+ ipc: host
+ restart: always
+
+ graphrag-ui-server:
+ image: ${REGISTRY:-opea}/graphrag-ui:${TAG:-latest}
+ container_name: graphrag-ui-server
+ depends_on:
+ - graphrag-xeon-backend-server
+ ports:
+ - "5173:5173"
+ environment:
+ - no_proxy=${no_proxy}
+ - https_proxy=${https_proxy}
+ - http_proxy=${http_proxy}
+ - LLM_MODEL_ID=${FINAL_LLM_MODEL_ID} # this is the LLM model ID for payload request
+ ipc: host
+ restart: always
+
+ graphrag-xeon-nginx-server:
+ image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
+ container_name: graphrag-xeon-nginx-server
+ depends_on:
+ - graphrag-xeon-backend-server
+ - graphrag-ui-server
+ ports:
+ - "${NGINX_PORT:-80}:80"
+ environment:
+ - no_proxy=${no_proxy}
+ - https_proxy=${https_proxy}
+ - http_proxy=${http_proxy}
+ # Note should use all internal container ports since its on a docker network.
+ - FRONTEND_SERVICE_IP=graphrag-ui-server
+ - FRONTEND_SERVICE_PORT=5173
+ - BACKEND_SERVICE_NAME=graphrag
+ - BACKEND_SERVICE_IP=graphrag-xeon-backend-server
+ - BACKEND_SERVICE_PORT=8888
+ - DATAPREP_SERVICE_IP=dataprep-neo4j-llamaindex
+ - DATAPREP_SERVICE_PORT=5000
+ ipc: host
+ restart: always
+networks:
+ default:
+ driver: bridge
diff --git a/GraphRAG/docker_compose/intel/cpu/xeon/set_env.sh b/GraphRAG/docker_compose/intel/cpu/xeon/set_env.sh
new file mode 100644
index 0000000000..6ae93314fc
--- /dev/null
+++ b/GraphRAG/docker_compose/intel/cpu/xeon/set_env.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# Remember to set your private variables mentioned in README
+
+# host_ip, OPENAI_API_KEY, HUGGINGFACEHUB_API_TOKEN, proxies...
+pushd "../../../../../" > /dev/null
+source .set_env.sh
+popd > /dev/null
+
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export OPENAI_EMBEDDING_MODEL="text-embedding-3-small"
+export LLM_MODEL_ID="Qwen/Qwen2.5-0.5B-Instruct" # Will use smaller model for Xeon.
+export OPENAI_LLM_MODEL="gpt-4o"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
+export NEO4J_URL="bolt://${host_ip}:7687"
+export NEO4J_USERNAME=neo4j
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep"
+export LOGFLAG=True
+export RETRIEVER_SERVICE_PORT=80
diff --git a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
index 7107a560fe..50ba3bd981 100644
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -199,6 +199,7 @@ services:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
+ - LLM_MODEL_ID=${LLM_MODEL_ID} # must be set for the UI to make request.
ipc: host
restart: always
chatqna-gaudi-nginx-server:
diff --git a/GraphRAG/example_data/README.md b/GraphRAG/example_data/README.md
new file mode 100644
index 0000000000..87dacf06b6
--- /dev/null
+++ b/GraphRAG/example_data/README.md
@@ -0,0 +1,87 @@
+# GraphRAG Example Datasets
+
+This directory contains example datasets carefully crafted to demonstrate GraphRAG's capabilities for knowledge graph construction and querying.
+
+## Programming Languages Dataset (`programming_languages.txt`)
+
+A concise dataset that showcases GraphRAG's ability to:
+
+1. **Entity Extraction**
+
+ - People (Guido van Rossum, James Gosling)
+ - Organizations (CWI, Sun Microsystems, Google)
+ - Programming Languages (Python, Java, ABC, Go)
+ - Technologies (REPL, var keyword)
+
+2. **Relationship Types**
+
+ - Creation relationships ("created by")
+ - Influence relationships ("influenced by")
+ - Employment relationships ("worked at")
+ - Usage relationships ("used by")
+ - Feature relationships ("borrowed ideas from")
+
+3. **Temporal Information**
+
+ - Creation dates (1991, 1995, 2009)
+ - Sequential influences (ABC → Python → Java)
+
+4. **Complex Reasoning Capabilities**
+ - Bidirectional influences (Java ↔ Python)
+ - Multi-hop relationships (ABC → Python → Java's features)
+ - Organizational relationships (Google's use of multiple languages)
+
+### Example Queries
+
+This dataset is ideal for testing queries like:
+
+1. "What are the main themes of the programming dataset?"
+2. "What's the relationship between Google and these programming languages?"
+3. "How did early teaching languages influence modern programming languages?"
+4. "Trace the evolution of programming language features through these languages."
+5. "What role did corporate entities play in language development?"
+
+### Community Detection
+
+The dataset is structured to form natural communities around:
+
+- Language Development (Python, ABC, Guido)
+- Corporate Influence (Google, Java, Go)
+- Language Features (OOP, REPL, var keyword)
+
+This makes it perfect for testing GraphRAG's community detection and summarization capabilities.
+
+### Why Traditional RAG Falls Short
+
+For the example queries above, traditional RAG approaches would struggle in several ways:
+
+1. **Multi-hop Relationships**
+
+ - Traditional RAG: Can only find direct relationships within single documents
+ - Example: For "How did ABC influence Java's features?", traditional RAG might miss the connection because it can't trace ABC → Python → Java
+ - GraphRAG: Can traverse multiple relationship hops to uncover indirect influences
+
+2. **Community Analysis**
+
+ - Traditional RAG: Limited to keyword matching and proximity-based relationships
+ - Example: "What programming language communities formed around Google?" requires understanding organizational and temporal relationships
+ - GraphRAG: Can detect and analyze communities through relationship patterns and clustering
+
+3. **Bidirectional Relationships**
+
+ - Traditional RAG: Typically treats relationships as unidirectional text mentions
+ - Example: Understanding how Java and Python mutually influenced each other requires tracking bidirectional relationships
+ - GraphRAG: Explicitly models bidirectional relationships and their evolution over time
+
+4. **Complex Entity Relationships**
+
+ - Traditional RAG: Struggles to maintain consistency across multiple entity mentions
+ - Example: "Trace the evolution of REPL features" requires understanding how the feature moved across languages
+ - GraphRAG: Maintains consistent entity relationships across the entire knowledge graph
+
+5. **Temporal Evolution**
+ - Traditional RAG: Limited ability to track changes over time
+ - Example: Understanding how language features evolved requires tracking temporal relationships
+ - GraphRAG: Can model and query temporal relationships between entities
+
+These limitations make traditional RAG less effective for complex queries that require understanding relationships, community structures, and temporal evolution. GraphRAG's knowledge graph approach provides a more complete and accurate representation of these complex relationships.
diff --git a/GraphRAG/example_data/programming_languages.txt b/GraphRAG/example_data/programming_languages.txt
new file mode 100644
index 0000000000..5eb839d47a
--- /dev/null
+++ b/GraphRAG/example_data/programming_languages.txt
@@ -0,0 +1,15 @@
+"Python was conceived in the late 1980s, and its implementation began in December 1989 by Guido van Rossum at Centrum Wiskunde & Informatica (CWI) in the Netherlands as a successor to the ABC programming language capable of exception handling and interfacing with the Amoeba operating system." (Wikipedia: https://en.wikipedia.org/wiki/Python_(programming_language))
+
+"Python was designed to be highly extensible. Python can also be embedded in existing applications that need a programmable interface. This design of a small core language with a large standard library and an easily extensible interpreter was intended by Van Rossum from the start because of his frustrations with ABC, which espoused the opposite approach." (Wikipedia: https://en.wikipedia.org/wiki/Python_(programming_language)#Features_and_philosophy)
+
+"Java was originally developed by James Gosling at Sun Microsystems. It was released in May 1995 as a core component of Sun Microsystems' Java platform. The original and reference implementation Java compilers, virtual machines, and class libraries were originally released by Sun under proprietary licenses." (Wikipedia: https://en.wikipedia.org/wiki/Java_(programming_language))
+
+"In 2007, three Google employees, Robert Griesemer, Rob Pike, and Ken Thompson, started sketching the goals for a new language to address criticisms of other languages in use at Google. The new language was initially called 'Go!' but due to trademark issues, it was later renamed 'Go'." (Wikipedia: https://en.wikipedia.org/wiki/Go_(programming_language)#History)
+
+"Go was designed at Google in 2007 to improve programming productivity in an era of multicore, networked machines and large codebases. The designers wanted to address criticism of other languages in use at Google, but keep their useful characteristics." (Wikipedia: https://en.wikipedia.org/wiki/Go_(programming_language))
+
+"The designers were primarily motivated by their shared dislike of C++. Go is influenced by C, but with an emphasis on greater simplicity and safety... Go was publicly announced in November 2009, and version 1.0 was released in March 2012." (Wikipedia: https://en.wikipedia.org/wiki/Go_(programming_language)#History)
+
+"Google has invested significant resources in creating custom versions of the Java virtual machine (JVM) and Python runtime for its production systems. The company's Gotz JVM includes custom garbage collection and monitoring features optimized for large-scale applications. Similarly, Google maintains an internal Python implementation with performance enhancements for its specific use cases." (Google Engineering Blog: https://engineering.google.com/blog/)
+
+"Go addresses issues that make large-scale software development difficult, including slow builds, uncontrolled dependencies, different subsets of languages being used in different places, poor program understanding (code hard to read, poorly documented, and so on), duplicate efforts, and high update costs." (The Go Blog: https://go.dev/blog/go-at-google-language-design-in-the-service-of-software-engineering)
diff --git a/GraphRAG/example_data/programming_languages2.txt b/GraphRAG/example_data/programming_languages2.txt
new file mode 100644
index 0000000000..e0f0302a21
--- /dev/null
+++ b/GraphRAG/example_data/programming_languages2.txt
@@ -0,0 +1,41 @@
+"Python was conceived in the late 1980s, and its implementation began in December 1989 by Guido van Rossum at Centrum Wiskunde & Informatica (CWI) in the Netherlands as a successor to the ABC programming language capable of exception handling and interfacing with the Amoeba operating system." (Wikipedia: https://en.wikipedia.org/wiki/Python_(programming_language))
+
+"TensorFlow is a free and open-source software library for machine learning and artificial intelligence. It can be used across a range of tasks but has a particular focus on training and inference of deep neural networks. TensorFlow was developed by the Google Brain team for internal Google use in research and production." (Wikipedia: https://en.wikipedia.org/wiki/TensorFlow)
+
+"PyTorch is an open source machine learning framework based on the Torch library, used for applications such as computer vision and natural language processing, originally developed by Meta AI and now part of the Linux Foundation umbrella. It is free and open-source software released under the Modified BSD license." (Wikipedia: https://en.wikipedia.org/wiki/PyTorch)
+
+"R is a programming language for statistical computing and graphics supported by the R Core Team and the R Foundation for Statistical Computing. Created by statisticians Ross Ihaka and Robert Gentleman, R is used among data miners, bioinformaticians and statisticians for data analysis and developing statistical software." (Wikipedia: https://en.wikipedia.org/wiki/R_(programming_language))
+
+"Julia is a high-level, dynamic programming language. Its features are well suited for numerical analysis and computational science. Distinctive aspects of Julia's design include a type system with parametric polymorphism and multiple dispatch as its core programming paradigm." (Wikipedia: https://en.wikipedia.org/wiki/Julia_(programming_language))
+
+"Julia was designed from the beginning for high performance. Julia programs compile to efficient native code for multiple platforms via LLVM. The language uses multiple dispatch as a paradigm, making it easy to express many object-oriented and functional programming patterns." (Julia Documentation: https://docs.julialang.org/en/v1/)
+
+"JavaScript, often abbreviated as JS, is a programming language that is one of the core technologies of the World Wide Web, alongside HTML and CSS. As of 2022, 98% of websites use JavaScript on the client side for webpage behavior, often incorporating third-party libraries. All major web browsers have a dedicated JavaScript engine to execute the code on users' devices." (Wikipedia: https://en.wikipedia.org/wiki/JavaScript)
+
+"TensorFlow.js is an open-source hardware-accelerated JavaScript library for training and deploying machine learning models in the browser and on Node.js." (TensorFlow.js Documentation: https://www.tensorflow.org/js)
+
+"Rust is a multi-paradigm, general-purpose programming language that emphasizes performance, type safety, and concurrency. It enforces memory safety—ensuring that all references point to valid memory—without requiring the use of a garbage collector or reference counting present in other memory-safe languages." (Wikipedia: https://en.wikipedia.org/wiki/Rust_(programming_language))
+
+"The Rust Foundation is an independent non-profit organization dedicated to stewarding the Rust programming language, nurturing the Rust ecosystem, and supporting the set of maintainers governing and developing the project. The Rust Foundation was established in February 2021 by its founding corporate members AWS, Google, Huawei, Microsoft, and Mozilla." (Rust Foundation: https://foundation.rust-lang.org/about/)
+
+"Lisp (historically LISP) is a family of programming languages with a long history and a distinctive, fully parenthesized prefix notation. Originally specified in 1958, Lisp is the second-oldest high-level programming language in widespread use today. Only Fortran is older, by one year." (Wikipedia: https://en.wikipedia.org/wiki/Lisp_(programming_language))
+
+"John McCarthy founded the MIT Artificial Intelligence Project in 1959, and the Stanford Artificial Intelligence Laboratory (SAIL) in 1963. During the 1970s and 1980s, Lisp became the preferred programming language for artificial intelligence (AI) research." (Wikipedia: https://en.wikipedia.org/wiki/John_McCarthy_(computer_scientist))
+
+"C++ is a high-level, general-purpose programming language created by Danish computer scientist Bjarne Stroustrup as an extension of the C programming language, or 'C with Classes'. The language has expanded significantly over time, and modern C++ now has object-oriented, generic, and functional features in addition to facilities for low-level memory manipulation." (Wikipedia: https://en.wikipedia.org/wiki/C%2B%2B)
+
+"ONNX Runtime is a cross-platform inference and training machine-learning accelerator. ONNX Runtime inference can be deployed to the cloud, edge devices, or IoT devices. ONNX Runtime training can be used for distributed training on Linux." (Microsoft Documentation: https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models-onnx)
+
+"MATLAB (an abbreviation of 'MATrix LABoratory') is a proprietary multi-paradigm programming language and numeric computing environment developed by MathWorks. MATLAB allows matrix manipulations, plotting of functions and data, implementation of algorithms, creation of user interfaces, and interfacing with programs written in other languages." (Wikipedia: https://en.wikipedia.org/wiki/MATLAB)
+
+"The Neural Network Toolbox provides algorithms, pretrained models, and apps to create, train, visualize, and simulate neural networks. You can perform classification, regression, clustering, dimensionality reduction, time-series forecasting, and dynamic system modeling and control." (MathWorks Documentation: https://www.mathworks.com/products/deep-learning.html)
+
+"Scala is a strong statically typed high-level general-purpose programming language that supports both object-oriented programming and functional programming. Designed to be concise, many of Scala's design decisions are aimed to address criticisms of Java." (Wikipedia: https://en.wikipedia.org/wiki/Scala_(programming_language))
+
+"Scala was created by Martin Odersky and he released the first version in 2003. Scala runs on the Java platform (Java virtual machine) and is compatible with existing Java programs." (Wikipedia: https://en.wikipedia.org/wiki/Scala_(programming_language))
+
+"Apache Spark is an open-source unified analytics engine for large-scale data processing. Spark provides an interface for programming clusters with implicit data parallelism and fault tolerance. Originally developed at the University of California, Berkeley's AMPLab, the Spark codebase was later donated to the Apache Software Foundation, which has maintained it since." (Wikipedia: https://en.wikipedia.org/wiki/Apache_Spark)
+
+"Go is a statically typed, compiled programming language designed at Google by Robert Griesemer, Rob Pike, and Ken Thompson. It is syntactically similar to C, but with memory safety, garbage collection, structural typing, and CSP-style concurrency." (Wikipedia: https://en.wikipedia.org/wiki/Go_(programming_language))
+
+"Go was designed at Google in 2007 to improve programming productivity in an era of multicore, networked machines and large codebases. The designers wanted to address criticism of other languages in use at Google, but keep their useful characteristics." (Wikipedia: https://en.wikipedia.org/wiki/Go_(programming_language))
diff --git a/GraphRAG/graphrag.py b/GraphRAG/graphrag.py
index 77a912418c..c3833d97e4 100644
--- a/GraphRAG/graphrag.py
+++ b/GraphRAG/graphrag.py
@@ -3,8 +3,17 @@
import argparse
import json
+import logging
import os
import re
+import time
+import uuid
+from typing import Dict, List, Union
+
+# Configure logging
+logger = logging.getLogger(__name__)
+log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO
+logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
from comps.cores.mega.utils import handle_message
@@ -57,7 +66,7 @@ def generate_rag_prompt(question, documents):
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.RETRIEVER:
- print("make no changes for retriever inputs. AlreadyCheckCompletionRequest")
+ logger.debug("No changes needed for retriever inputs - already a CompletionRequest")
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
@@ -71,7 +80,15 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
- print("inputs after align:\n", inputs)
+
+ # Convert Pydantic models to dict before logging
+ log_inputs = inputs
+ if hasattr(inputs, "model_dump"): # Pydantic v2
+ log_inputs = inputs.model_dump()
+ elif hasattr(inputs, "dict"): # Pydantic v1
+ log_inputs = inputs.dict()
+
+ logger.debug(f"Inputs after alignment:\n{json.dumps(log_inputs, indent=2)}")
return inputs
@@ -96,10 +113,12 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
- print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
+ logger.warning(
+ f"Template {prompt_template} not used - only supporting input variables ['question', 'context']"
+ )
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
else:
- print("no rerank no chat template")
+ logger.debug("Using default chat template (no rerank or custom template provided)")
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
next_data["inputs"] = prompt
@@ -110,22 +129,36 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
def align_generator(self, gen, **kwargs):
- # OpenAI response format
- # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
- print("generator in align generator:\n", gen)
+ """Aligns the generator output to match ChatQnA's format of sending bytes.
+
+ The UI expects messages in the format: b'content' which it can then decode.
+ """
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1
+ if start == -1 or end <= start:
+ # Skip lines with invalid json structure
+ continue
+
json_str = line[start:end]
try:
- # sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
- if json_data["choices"][0]["finish_reason"] != "eos_token":
- yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
+ if "ops" in json_data and "op" in json_data["ops"][0]:
+ if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
+ yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
+ elif (
+ "choices" in json_data
+ and "delta" in json_data["choices"][0]
+ and "content" in json_data["choices"][0]["delta"]
+ ):
+ content = json_data["choices"][0]["delta"]["content"]
+ yield f"data: {repr(content.encode('utf-8'))}\n\n"
except Exception as e:
+ # If JSON parsing fails, send the raw string as bytes
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
+
yield "data: [DONE]\n\n"
@@ -163,12 +196,12 @@ def add_remote_service(self):
async def handle_request(self, request: Request):
data = await request.json()
stream_opt = data.get("stream", True)
- chat_request = ChatCompletionRequest.parse_obj(data)
+ chat_request = ChatCompletionRequest.model_validate(data)
def parser_input(data, TypeClass, key):
chat_request = None
try:
- chat_request = TypeClass.parse_obj(data)
+ chat_request = TypeClass.model_validate(data)
query = getattr(chat_request, key)
except:
query = None
diff --git a/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts b/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts
index d0ae7b701d..e78c97c344 100644
--- a/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts
+++ b/GraphRAG/ui/svelte/src/lib/network/chat/Network.ts
@@ -5,13 +5,17 @@ import { env } from "$env/dynamic/public";
import { SSE } from "sse.js";
const CHAT_BASE_URL = env.CHAT_BASE_URL;
+if (!env.LLM_MODEL_ID) {
+ throw new Error("LLM_MODEL_ID environment variable must be set");
+}
+const LLM_MODEL_ID = env.LLM_MODEL_ID;
export async function fetchTextStream(query: string) {
let payload = {};
let url = "";
payload = {
- model: "Intel/neural-chat-7b-v3-3",
+ model: LLM_MODEL_ID,
messages: query,
};
url = `${CHAT_BASE_URL}`;