14
14
)
15
15
16
16
from graphrag .cache .pipeline_cache import PipelineCache
17
- from graphrag .index .operations .cluster_graph import cluster_graph
18
17
from graphrag .index .operations .create_graph import create_graph
19
18
from graphrag .index .operations .extract_entities import extract_entities
20
19
from graphrag .index .operations .snapshot import snapshot
25
24
from graphrag .storage .pipeline_storage import PipelineStorage
26
25
27
26
28
- async def create_base_entity_graph (
27
+ async def extract_graph (
29
28
text_units : pd .DataFrame ,
30
29
callbacks : VerbCallbacks ,
31
30
cache : PipelineCache ,
32
31
storage : PipelineStorage ,
33
- runtime_storage : PipelineStorage ,
34
- clustering_strategy : dict [str , Any ],
35
32
extraction_strategy : dict [str , Any ] | None = None ,
36
33
extraction_num_threads : int = 4 ,
37
34
extraction_async_mode : AsyncType = AsyncType .AsyncIO ,
@@ -40,7 +37,7 @@ async def create_base_entity_graph(
40
37
summarization_num_threads : int = 4 ,
41
38
snapshot_graphml_enabled : bool = False ,
42
39
snapshot_transient_enabled : bool = False ,
43
- ) -> None :
40
+ ) -> tuple [ pd . DataFrame , pd . DataFrame ] :
44
41
"""All the steps to create the base entity graph."""
45
42
# this returns a graph for each text unit, to be merged later
46
43
entity_dfs , relationship_dfs = await extract_entities (
@@ -73,17 +70,6 @@ async def create_base_entity_graph(
73
70
74
71
base_entity_nodes = _prep_nodes (merged_entities , entity_summaries , graph )
75
72
76
- communities = cluster_graph (
77
- graph ,
78
- strategy = clustering_strategy ,
79
- )
80
-
81
- base_communities = _prep_communities (communities )
82
-
83
- await runtime_storage .set ("base_entity_nodes" , base_entity_nodes )
84
- await runtime_storage .set ("base_relationship_edges" , base_relationship_edges )
85
- await runtime_storage .set ("base_communities" , base_communities )
86
-
87
73
if snapshot_graphml_enabled :
88
74
# todo: extract graphs at each level, and add in meta like descriptions
89
75
await snapshot_graphml (
@@ -105,12 +91,8 @@ async def create_base_entity_graph(
105
91
storage = storage ,
106
92
formats = ["parquet" ],
107
93
)
108
- await snapshot (
109
- base_communities ,
110
- name = "base_communities" ,
111
- storage = storage ,
112
- formats = ["parquet" ],
113
- )
94
+
95
+ return (base_entity_nodes , base_relationship_edges )
114
96
115
97
116
98
def _merge_entities (entity_dfs ) -> pd .DataFrame :
@@ -158,13 +140,6 @@ def _prep_edges(relationships, summaries) -> pd.DataFrame:
158
140
return edges
159
141
160
142
161
- def _prep_communities (communities ) -> pd .DataFrame :
162
- # Convert the input into a DataFrame and explode the title column
163
- return pd .DataFrame (
164
- communities , columns = pd .Index (["level" , "community" , "parent" , "title" ])
165
- ).explode ("title" )
166
-
167
-
168
143
def _compute_degree (graph : nx .Graph ) -> pd .DataFrame :
169
144
return pd .DataFrame ([
170
145
{"name" : node , "degree" : int (degree )}
0 commit comments