Skip to content

Commit

Permalink
Correct _format_refs to only include unique documents
Browse files Browse the repository at this point in the history
  • Loading branch information
spodgorny9 committed Sep 27, 2024
1 parent 882ffb1 commit 27b0c86
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions elm/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,14 @@ def __init__(self, db_host, db_port, db_name,
self.psycopg2 = try_import('psycopg2')

if meta_columns is None:
self.meta_columns = ['title', 'url', 'id']
self.meta_columns = ['title', 'url', 'nrel_id', 'id']
else:
self.meta_columns = meta_columns

assert 'id' in self.meta_columns, "Please include the 'id' column!"
assert 'id' in self.meta_columns, ("Please include the chunk id column: "
"'id'!")
assert 'nrel_id' in self.meta_columns, ("Please include the document id "
"column: 'nrel_id'!")

if cursor is None:
db_user = os.getenv("EWIZ_DB_USER")
Expand Down Expand Up @@ -652,10 +655,13 @@ def _format_refs(self, refs, ids):
"connection or query.")

unique_ref_list = []
unique_nrel_ids = set()
for ref_dict in ref_list:
if any(ref_dict == d for d in unique_ref_list):
if ref_dict['nrel_id'] in unique_nrel_ids:
continue
unique_ref_list.append(ref_dict)
unique_nrel_ids.add(ref_dict['nrel_id'])

ref_list = unique_ref_list

if 'id' in ref_list[0]:
Expand Down

0 comments on commit 27b0c86

Please sign in to comment.