Skip to content
Closed
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
8217203
A compose file to use openrouter, openai style endpoint instead of a …
edlee123 Mar 17, 2025
304b2da
Merge branch 'main' into chatqna_with_openai_endpoint
edlee123 May 19, 2025
af09744
First working version of ChatQnA with openai compatible endpoint
edlee123 May 21, 2025
770ee94
First version of GraphRAG Xeon for LLM, and open endpoint for graph i…
edlee123 May 23, 2025
ca90df5
Updated align_generator to be like ChatQnA bytes format. Added loggin…
edlee123 May 25, 2025
1658169
Working GraphRAG on laptop cpu with help of three LLM endpoints for d…
edlee123 May 25, 2025
42d9d41
The GraphRAG gaudi compose.yaml had to be updated so the UI can pass …
edlee123 May 25, 2025
fa1bc52
Provided build instructions of GraphRAG images for Xeon
edlee123 May 25, 2025
b6e7729
Small edits to GraphRAG xeon readme
edlee123 May 25, 2025
70330c5
Merge branch 'main' into graphrag_workshop
edlee123 May 25, 2025
6606ab0
Add architecture container diagram
edlee123 May 25, 2025
89dac5a
Fix to compose.yaml dataprep-neorj-llamaindex port and dataprep compo…
edlee123 May 25, 2025
5c97a0f
Added sample graphdata and updated README.md to provide instuction
edlee123 May 25, 2025
7493aec
Improving documentation
edlee123 May 25, 2025
f5fcb1e
Made fixes to README
edlee123 May 25, 2025
cb1e441
Small edits to readme and yaml for documentation
edlee123 May 25, 2025
27bd1d8
Merge branch 'opea-project:main' into graphrag_workshop
edlee123 May 27, 2025
88ede23
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2025
afb61ef
Adding instruction to browse the Neo4J db
edlee123 May 28, 2025
12ab0c5
Merge branch 'main' into graphrag_workshop
ZePan110 May 30, 2025
41cd535
Add missing environment variables so the UI can upload files
edlee123 Jun 5, 2025
881dc8a
Fix typos in readmes and change example queries
edlee123 Jun 5, 2025
5c20c18
Merge branch 'graphrag_workshop' of github.com:edlee123/GenAIExamples…
edlee123 Jun 5, 2025
f2506ca
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2025
6c0e87b
Fix relative paths in README.md
edlee123 Jun 5, 2025
342d8bc
Fix README formatting
edlee123 Jun 5, 2025
1750552
Merge branch 'graphrag_workshop' of github.com:edlee123/GenAIExamples…
edlee123 Jun 5, 2025
076a3ec
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2025
ea8371d
Merge branch 'main' into graphrag_workshop
edlee123 Jun 5, 2025
7dd705b
Merge branch 'main' into chatqna_with_openai_endpoint
edlee123 Jun 9, 2025
1f7e1f9
Fix to not show null content json strings for OpenAI-like api. Also i…
edlee123 Jun 9, 2025
907616e
Fix to use correct environment variable for HF token in README.md and…
edlee123 Jun 9, 2025
495169e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 10, 2025
dd03f1b
Remove instruction about checking out particular version
edlee123 Jun 10, 2025
9d2534a
Merge branch 'main' into chatqna_with_openai_endpoint
edlee123 Jun 10, 2025
0d354f6
Merge branch 'chatqna_with_openai_endpoint' of github.com:edlee123/Ge…
edlee123 Jun 10, 2025
87979cb
Exclude graphrag updates
edlee123 Jun 10, 2025
0ff59ce
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 65 additions & 18 deletions ChatQnA/chatqna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@

import argparse
import json
import logging
import os
import re

# Configure logging
logger = logging.getLogger(__name__)
log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO
logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
from comps.cores.mega.utils import handle_message
from comps.cores.proto.api_protocol import (
Expand Down Expand Up @@ -62,6 +68,8 @@ def generate_rag_prompt(question, documents):


def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
logger.debug(f"Aligning inputs for service: {self.services[cur_node].name}, type: {self.services[cur_node].service_type}")

if self.services[cur_node].service_type == ServiceType.EMBEDDING:
inputs["inputs"] = inputs["text"]
del inputs["text"]
Expand All @@ -83,6 +91,9 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs

# Log the aligned inputs (be careful with sensitive data)
logger.debug(f"Aligned inputs for {self.services[cur_node].name}: {type(inputs)}")
return inputs


Expand Down Expand Up @@ -123,7 +134,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
elif input_variables == ["question"]:
prompt = prompt_template.format(question=data["initial_query"])
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
logger.warning(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
else:
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
Expand Down Expand Up @@ -152,7 +163,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
logger.warning(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
else:
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
Expand All @@ -171,29 +182,65 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di


def align_generator(self, gen, **kwargs):
# OpenAI response format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
"""Aligns the generator output to match ChatQnA's format of sending bytes.

Handles different LLM output formats (TGI, OpenAI) and properly filters
empty or null content chunks to avoid UI display issues.
"""
# OpenAI response format example:
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct",
# "system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},
# "logprobs":null,"finish_reason":null}]}\n\n'

for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1

json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1

# Skip lines with invalid JSON structure
if start == -1 or end <= start:
logger.debug("Skipping line with invalid JSON structure")
continue

json_str = line[start:end]

# Parse the JSON data
json_data = json.loads(json_str)

# Handle TGI format responses
if "ops" in json_data and "op" in json_data["ops"][0]:
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
else:
pass
elif (
json_data["choices"][0]["finish_reason"] != "eos_token"
and "content" in json_data["choices"][0]["delta"]
):
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
# Empty value chunks are silently skipped

# Handle OpenAI format responses
elif "choices" in json_data and len(json_data["choices"]) > 0:
# Only yield content if it exists and is not null
if (
"delta" in json_data["choices"][0] and
"content" in json_data["choices"][0]["delta"] and
json_data["choices"][0]["delta"]["content"] is not None
):
content = json_data["choices"][0]["delta"]["content"]
yield f"data: {repr(content.encode('utf-8'))}\n\n"
# Null content chunks are silently skipped
elif (
"delta" in json_data["choices"][0] and
"content" in json_data["choices"][0]["delta"] and
json_data["choices"][0]["delta"]["content"] is None
):
logger.debug("Skipping null content chunk")

except json.JSONDecodeError as e:
# Log the error with the problematic JSON string for better debugging
logger.error(f"JSON parsing error in align_generator: {e}\nProblematic JSON: {json_str[:200]}")
# Skip sending invalid JSON to avoid UI issues
continue
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
logger.error(f"Unexpected error in align_generator: {e}, line snippet: {line[:100]}...")
# Skip sending to avoid UI issues
continue
yield "data: [DONE]\n\n"


Expand Down
Loading
Loading