Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
11eb8d8
Allow OpeaNeo4jLlamaIndexDataprep to use any openailike endpoint with…
edlee123 May 23, 2025
d88ab61
Updated log comment
edlee123 May 23, 2025
506648f
Allow neo4j retriever to use
edlee123 May 25, 2025
2942abc
Fixed openai dependency problem for both dataprep and retrievers comp…
edlee123 May 25, 2025
5a53bb5
Adjusted generate_community_summary Openai-like apis can handle token…
edlee123 May 25, 2025
f0084b5
Merge branch 'main' into graphrag_workshop
edlee123 May 27, 2025
f93944c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2025
0d908a3
Dependency fixes for llama-index vs openai
edlee123 May 28, 2025
2f820f4
Merge branch 'graphrag_workshop' of github.com:edlee123/GenAIComps in…
edlee123 May 28, 2025
8e3e7ef
Merge branch 'main' into graphrag_workshop
edlee123 May 28, 2025
34f27b4
Merge branch 'main' into graphrag_workshop
edlee123 May 29, 2025
86ee521
Merge branch 'main' into graphrag_workshop
ashahba Jun 2, 2025
a688703
Merge branch 'main' into graphrag_workshop
edlee123 Jun 3, 2025
a79720e
Added logic for trimming entite/relationship text below max_input_tok…
edlee123 Jun 5, 2025
f432c4c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2025
0318f38
Made methods private
edlee123 Jun 5, 2025
9ef0458
Merge branch 'graphrag_workshop' of github.com:edlee123/GenAIComps in…
edlee123 Jun 5, 2025
4adf116
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2025
6adc0b9
Merge branch 'main' into graphrag_workshop
edlee123 Jun 5, 2025
f6e2270
Delete redundant comment
edlee123 Jun 5, 2025
5baa7a4
Merge branch 'graphrag_workshop' of github.com:edlee123/GenAIComps in…
edlee123 Jun 5, 2025
b9fd61d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 5, 2025
2bf7596
Merge branch 'main' into graphrag_workshop
edlee123 Jun 6, 2025
2f571dc
Merge branch 'main' into graphrag_workshop
edlee123 Jun 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 204 additions & 60 deletions comps/dataprep/src/integrations/neo4j_llamaindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from llama_index.llms.openai import OpenAI
from llama_index.llms.openai_like import OpenAILike
from neo4j import GraphDatabase
from transformers import AutoTokenizer

from comps import CustomLogger, DocPath, OpeaComponent, OpeaComponentRegistry, ServiceType
from comps.cores.proto.api_protocol import DataprepRequest, Neo4jDataprepRequest
Expand Down Expand Up @@ -68,7 +67,9 @@
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "neo4jtest")

# LLM/Embedding endpoints
# TGI_LLM_ENDPOINT allows indexing graph nodes using openai-like endpoints (not just TGI)
TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005")
TGI_LLM_ENDPOINT_KEY = os.getenv("TGI_LLM_ENDPOINT_KEY", "fake")
TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_EMBEDDING_ENDPOINT", f"http://{host_ip}:6006")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
Expand All @@ -80,6 +81,23 @@
MAX_OUTPUT_TOKENS = os.getenv("MAX_OUTPUT_TOKENS", "1024")


def get_hf_tokenizer(model_name):
"""Attempt to load a HuggingFace tokenizer from string.

Args:
model_name (str): Name of the model to load tokenizer for

Returns:
AutoTokenizer or None: The loaded tokenizer if successful, None otherwise
"""
try:
from transformers import AutoTokenizer

return AutoTokenizer.from_pretrained(model_name)
except Exception:
return None


class GraphRAGStore(Neo4jPropertyGraphStore):
# https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/cookbooks/GraphRAG_v2.ipynb
community_summary = {}
Expand All @@ -92,39 +110,192 @@ def __init__(self, username: str, password: str, url: str, llm: LLM):
self.driver = GraphDatabase.driver(NEO4J_URL, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

async def generate_community_summary(self, text):
"""Generate summary for a given text using an LLM."""
"""Generate summary of extracted entity/relationships using an LLM.

This method may trim the relationships if tokens exceed LLM context length with the following logic:
1. For Hugging Face models: Uses AutoTokenizer to accurately count tokens
2. For other models: Uses character-based token estimation with a conservative ratio. If
the the text is still longer than the max input tokens, it will just use first 5 relationships as
a summary.
"""
model_name = LLM_MODEL_ID
max_input_length = int(MAX_INPUT_TOKENS)
if not model_name or not max_input_length:
raise ValueError(f"Could not retrieve model information from TGI endpoint: {TGI_LLM_ENDPOINT}")

tokenizer = AutoTokenizer.from_pretrained(model_name)

messages = [
ChatMessage(
role="system",
content=(
"You are provided with a set of relationships from a knowledge graph, each represented as "
"entity1->entity2->relation->relationship_description. Your task is to create a summary of these "
"relationships. The summary should include the names of the entities involved and a concise synthesis "
"of the relationship descriptions. The goal is to capture the most critical and relevant details that "
"highlight the nature and significance of each relationship. Ensure that the summary is coherent and "
"integrates the information in a way that emphasizes the key aspects of the relationships."
),
),
ChatMessage(role="user", content=text),
]
# Trim the messages to fit within the token limit
# Microsoft does more sophisticated content optimization
trimmed_messages = trim_messages_to_token_limit(tokenizer, messages, max_input_length)
max_input_tokens = int(MAX_INPUT_TOKENS)

# Define system prompt
system_prompt = (
"You are provided with a set of relationships from a knowledge graph, each represented as "
"entity1->entity2->relation->relationship_description. Your task is to create a summary of these "
"relationships. The summary should include the names of the entities involved and a concise synthesis "
"of the relationship descriptions. The goal is to capture the most critical and relevant details that "
"highlight the nature and significance of each relationship. Ensure that the summary is coherent and "
"integrates the information in a way that emphasizes the key aspects of the relationships."
)

if OPENAI_API_KEY:
response = OpenAI().achat(messages)
else:
response = await self.llm.achat(trimmed_messages)
messages = None
was_trimmed = False

try:
tokenizer = get_hf_tokenizer(model_name)
if tokenizer:
logger.info(f"Using Hugging Face tokenizer to check context limit for model: {model_name}")
original_messages = [
ChatMessage(role="system", content=system_prompt),
ChatMessage(role="user", content=text),
]

messages = self._trim_messages_to_token_limit(tokenizer, original_messages, max_input_tokens)
was_trimmed = (
len(messages) < len(original_messages) or messages[-1].content != original_messages[-1].content
)
if was_trimmed:
logger.info(
f"Content trimmed using {model_name} tokenizer to fit within max {max_input_tokens} tokens."
)
messages[
0
].content += " Note: Due to token limits, only a subset of the relationships are summarized."
else:
raise ValueError(
f"Could not load Hugging Face tokenizer for model {model_name} for trimming - using character-based token estimation"
)

except Exception as e:
logger.info(f"Using character-based token estimation and will trim if needed: {str(e)}")
trimmed_text, was_trimmed = self._trim_messages_with_estimated_tokens(text, system_prompt, max_input_tokens)

system_content = system_prompt
if was_trimmed:
system_content += " Note: Due to token limits, only a subset of the relationships are provided."

messages = [
ChatMessage(role="system", content=system_content),
ChatMessage(role="user", content=trimmed_text),
]

clean_response = re.sub(r"^assistant:\s*", "", str(response)).strip()
return clean_response
try:
# Create the relationship summaries.
if OPENAI_API_KEY:
response = await OpenAI().achat(messages)
else:
response = await self.llm.achat(messages)

clean_response = re.sub(r"^assistant:\s*", "", str(response.message.content)).strip()
return clean_response

except Exception as e:
logger.error(f"Error generating community summary: {str(e)}")
# Use a fallback summary by listing the first 5 relationships
fallback_relationships = 5
relationships = text.split("\n")
important_rels = relationships[: min(fallback_relationships, len(relationships))]
fallback_summary = "Summary could not be generated. Key relationships include:\n\n"
for rel in important_rels:
fallback_summary += f"- {rel}\n"

logger.warning("Returning fallback summary with important relationships due to LLM error")
return fallback_summary

def _trim_messages_with_estimated_tokens(self, text, system_prompt, max_input_tokens):
"""Trim input text to fit within token limits (if needed) using character-based
token count estimation.

A fall-back method can be used if there is an LLM error still i.e. this method
should have trimmed more.

This method:
1. Uses character count to approximate token count, with conservative buffer.
2. Splits text by relationships (newlines)
3. Keeps the first relationship lines before estimated tokens exceeds the token limit.

Args:
text: The input text, typically containing relationship descriptions
system_prompt: The system prompt text
max_input_tokens: Maximum allowed input tokens

Returns:
Tuple of (trimmed_text, was_trimmed)
"""
# Character-based token estimation (conservative 4 chars per token)
chars_per_token = 4
estimated_system_tokens = len(system_prompt) // chars_per_token

# Reserve tokens for system prompt and safety buffer
available_tokens = max_input_tokens - estimated_system_tokens - 100 # 100 token buffer
available_chars = available_tokens * chars_per_token

# If text fits within limits, return as-is
if len(text) <= available_chars:
return text, False

# Otherwise if text exceeds limit, need to trim.
# Will trim by relationships (typically one per line)
relationships = text.split("\n")

# Start with empty trimmed text and add relationships until we approach the limit
trimmed_text = []
current_chars = 0

for rel in relationships:
# Add 1 for the newline character
rel_chars = len(rel) + 1
if current_chars + rel_chars > available_chars:
# We've reached the limit
break

trimmed_text.append(rel)
current_chars += rel_chars

result = "\n".join(trimmed_text)
warning = "\n\n[NOTE: Some relationships were omitted due to estimated max token limits.]"

# Make sure adding the warning doesn't exceed the limit
if len(result) + len(warning) <= available_chars:
result += warning

percentage_kept = (len(result) / len(text)) * 100
logger.info(
f"Text trimmed to fit token limits. Kept {percentage_kept:.1f}% of original content ({len(result)} chars of {len(text)})."
)

return result, True

def _trim_messages_to_token_limit(self, tokenizer, messages, max_tokens):
"""Trim the messages to fit within the token limit using a HuggingFace tokenizer.

This method is used when a HuggingFace model is available, allowing for
precise token counting and trimming based on the model's specific tokenizer.

Args:
tokenizer: A HuggingFace tokenizer instance
messages: List of ChatMessage objects
max_tokens: Maximum allowed input tokens

Returns:
List of trimmed ChatMessage objects
"""
total_tokens = 0
trimmed_messages = []
buffer = 100
effective_max_tokens = max_tokens - buffer

for message in messages:
tokens = tokenizer.tokenize(message.content)
message_token_count = len(tokens)
if total_tokens + message_token_count > effective_max_tokens:
# Trim the message to fit within the remaining token limit
logger.info(f"Trimming messages: {total_tokens + message_token_count} > {effective_max_tokens}")
logger.info(f"message_token_count: {message_token_count}")
remaining_tokens = effective_max_tokens - total_tokens
logger.info(f"remaining_tokens: {remaining_tokens}")
tokens = tokens[:remaining_tokens]
message.content = tokenizer.convert_tokens_to_string(tokens)
trimmed_messages.append(message)
break
else:
total_tokens += message_token_count
trimmed_messages.append(message)

return trimmed_messages

async def build_communities(self):
"""Builds communities from the graph and summarizes them."""
Expand Down Expand Up @@ -490,33 +661,6 @@ def get_attribute_from_tgi_endpoint(url, attribute_name):
return None


def trim_messages_to_token_limit(tokenizer, messages, max_tokens):
"""Trim the messages to fit within the token limit."""
total_tokens = 0
trimmed_messages = []
buffer = 100
effective_max_tokens = max_tokens - buffer

for message in messages:
tokens = tokenizer.tokenize(message.content)
message_token_count = len(tokens)
if total_tokens + message_token_count > effective_max_tokens:
# Trim the message to fit within the remaining token limit
logger.info(f"Trimming messages: {total_tokens + message_token_count} > {effective_max_tokens}")
logger.info(f"message_token_count: {message_token_count}")
remaining_tokens = effective_max_tokens - total_tokens
logger.info(f"remaining_tokens: {remaining_tokens}")
tokens = tokens[:remaining_tokens]
message.content = tokenizer.convert_tokens_to_string(tokens)
trimmed_messages.append(message)
break
else:
total_tokens += message_token_count
trimmed_messages.append(message)

return trimmed_messages


logger = CustomLogger("opea_dataprep_neo4j_llamaindex")
logflag = os.getenv("LOGFLAG", False)

Expand Down Expand Up @@ -549,12 +693,12 @@ def initialize_graph_store_and_models(self):
except Exception as e:
logger.info(f"An error occurred while verifying the API Key: {e}")
else:
logger.info("NO OpenAI API Key. TGI/VLLM/TEI endpoints will be used.")
logger.info("OpenAI-like endpoint will be used.")
# works with TGI and VLLM endpoints
self.llm = OpenAILike(
model=LLM_MODEL_ID,
api_base=TGI_LLM_ENDPOINT + "/v1",
api_key="fake",
api_key=TGI_LLM_ENDPOINT_KEY,
temperature=0.7,
max_tokens=int(MAX_OUTPUT_TOKENS), # 1512
timeout=1200, # timeout in seconds)
Expand Down
1 change: 1 addition & 0 deletions comps/dataprep/src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ markdown
moviepy
neo4j
numpy
# https://github.com/run-llama/llama_index/issues/18823
openai==1.81.0
openai-whisper
opencv-python
Expand Down
2 changes: 2 additions & 0 deletions comps/retrievers/src/integrations/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def get_boolean_env_var(var_name, default_value=False):
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "test")
host_ip = os.getenv("host_ip")
TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", f"http://{host_ip}:6005")
TGI_LLM_ENDPOINT_KEY = os.getenv("TGI_LLM_ENDPOINT_KEY", "fake")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
OPENAI_LLM_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o")
LLM_MODEL_ID = os.getenv("LLM_MODEL_ID", "meta-llama/Meta-Llama-3.1-8B-Instruct")
Expand Down
3 changes: 2 additions & 1 deletion comps/retrievers/src/integrations/neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
OPENAI_LLM_MODEL,
TEI_EMBEDDING_ENDPOINT,
TGI_LLM_ENDPOINT,
TGI_LLM_ENDPOINT_KEY,
)

logger = CustomLogger("neo4j_retrievers")
Expand Down Expand Up @@ -239,7 +240,7 @@ def _initialize_client(self):
llm = OpenAILike(
model=LLM_MODEL_ID,
api_base=TGI_LLM_ENDPOINT + "/v1",
api_key="fake",
api_key=TGI_LLM_ENDPOINT_KEY,
timeout=600,
temperature=0.7,
max_tokens=int(MAX_OUTPUT_TOKENS),
Expand Down
Loading