Skip to content

Commit

Permalink
Fix Community ID loading for DRIFT search over existing indexes (#1360)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlonsoGuevara authored Nov 6, 2024
1 parent 80c0c7b commit 635c211
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 34 deletions.
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20241106000820148024.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Fix Community ID loading for DRIFT search over existing indexes"
}
66 changes: 33 additions & 33 deletions graphrag/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,39 +437,39 @@ def _patch_vector_store(
entities=_entities, vectorstore=description_embedding_store
)

if with_reports is not None:
from graphrag.query.input.loaders.dfs import (
store_reports_semantic_embeddings,
)
from graphrag.vector_stores.lancedb import LanceDBVectorStore

community_reports = with_reports
collection_name = (
config.embeddings.vector_store.get("container_name", "default")
if config.embeddings.vector_store
else "default"
)
# Store report embeddings
_reports = read_indexer_reports(
community_reports,
nodes,
community_level,
content_embedding_col="full_content_embedding",
config=config,
)

full_content_embedding_store = LanceDBVectorStore(
db_uri=config.embeddings.vector_store["db_uri"],
collection_name=f"{collection_name}-community-full_content",
overwrite=config.embeddings.vector_store["overwrite"],
)
full_content_embedding_store.connect(
db_uri=config.embeddings.vector_store["db_uri"]
)
# dump embeddings from the reports list to the full_content_embedding_store
store_reports_semantic_embeddings(
reports=_reports, vectorstore=full_content_embedding_store
)
if with_reports is not None:
from graphrag.query.input.loaders.dfs import (
store_reports_semantic_embeddings,
)
from graphrag.vector_stores.lancedb import LanceDBVectorStore

community_reports = with_reports
collection_name = (
config.embeddings.vector_store.get("container_name", "default")
if config.embeddings.vector_store
else "default"
)
# Store report embeddings
_reports = read_indexer_reports(
community_reports,
nodes,
community_level,
content_embedding_col="full_content_embedding",
config=config,
)

full_content_embedding_store = LanceDBVectorStore(
db_uri=config.embeddings.vector_store["db_uri"],
collection_name=f"{collection_name}-community-full_content",
overwrite=config.embeddings.vector_store["overwrite"],
)
full_content_embedding_store.connect(
db_uri=config.embeddings.vector_store["db_uri"]
)
# dump embeddings from the reports list to the full_content_embedding_store
store_reports_semantic_embeddings(
reports=_reports, vectorstore=full_content_embedding_store
)

return config

Expand Down
2 changes: 1 addition & 1 deletion graphrag/query/indexer_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def read_indexer_reports(

return read_community_reports(
df=report_df,
id_col="community",
id_col="id",
short_id_col="community",
summary_embedding_col=None,
content_embedding_col=content_embedding_col,
Expand Down

0 comments on commit 635c211

Please sign in to comment.