Skip to content

Commit

Permalink
Correct _format_refs to only include unique documents
Browse files Browse the repository at this point in the history
  • Loading branch information
spodgorny9 committed Sep 30, 2024
1 parent 882ffb1 commit d9f9721
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 27 deletions.
12 changes: 9 additions & 3 deletions elm/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,14 @@ def __init__(self, db_host, db_port, db_name,
self.psycopg2 = try_import('psycopg2')

if meta_columns is None:
self.meta_columns = ['title', 'url', 'id']
self.meta_columns = ['title', 'url', 'nrel_id', 'id']
else:
self.meta_columns = meta_columns

assert 'id' in self.meta_columns, "Please include the 'id' column!"
assert 'id' in self.meta_columns, ("Please include the chunk id "
"column: 'id'!")
assert 'nrel_id' in self.meta_columns, ("Please include the document "
"id column: 'nrel_id'!")

if cursor is None:
db_user = os.getenv("EWIZ_DB_USER")
Expand Down Expand Up @@ -652,10 +655,13 @@ def _format_refs(self, refs, ids):
"connection or query.")

unique_ref_list = []
unique_nrel_ids = set()
for ref_dict in ref_list:
if any(ref_dict == d for d in unique_ref_list):
if ref_dict['nrel_id'] in unique_nrel_ids:
continue
unique_ref_list.append(ref_dict)
unique_nrel_ids.add(ref_dict['nrel_id'])

ref_list = unique_ref_list

if 'id' in ref_list[0]:
Expand Down
2 changes: 1 addition & 1 deletion tests/data/postgres_query_db.txt

Large diffs are not rendered by default.

13 changes: 1 addition & 12 deletions tests/data/postgres_ref_output.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1 @@
[
( "Adapting Agrivoltaics for Solar Mini-Grids in Haiti", "https://research-hub.nrel.gov/en/publications/adapting-agrivoltaics-for-solar-mini-grids-in-haiti"),
("2023 International Energy Workshop Summary (NREL Internal Use Only)", "https://research-hub.nrel.gov/en/publications/2023-international-energy-workshop-summary-nrel-internal-use-only"),
("A 2023 Perspective: What Is the Value of Hybridization?", "https://research-hub.nrel.gov/en/publications/a-2023-perspective-what-is-the-value-of-hybridization"),
("2022 Bioenergy Industry Status Report", "https://research-hub.nrel.gov/en/publications/2022-bioenergy-industry-status-report"),
("Bashar Anwar", "https://research-hub.nrel.gov/en/persons/bashar-anwar"),
("Rajendra Adhikari", "https://research-hub.nrel.gov/en/persons/rajendra-adhikari"),
("2024 JISEA Annual Meeting: Session 4", "https://research-hub.nrel.gov/en/publications/2024-jisea-annual-meeting-session-4"),
("Ad-Mat: Adaptations of Mature Manufacturing Strategies for Accelerated Redox Flow Battery Deployment", "https://research-hub.nrel.gov/en/publications/ad-mat-adaptations-of-mature-manufacturing-strategies-for-acceler"),
("Achieving an 80% Renewable Portfolio in Alaska\'s Railbelt: Cost Analysis", "https://research-hub.nrel.gov/en/publications/achieving-an-80-renewable-portfolio-in-alaskaaposs-railbelt-cost-"),
("Doug Arent", "https://research-hub.nrel.gov/en/persons/doug-arent")
]
[('LA100 Equity Strategies. Chapter 10: Household Transportation Electrification', 'https://research-hub.nrel.gov/en/publications/5d165039-68a9-4a5d-8c48-17261f3cf02e', 'NREL-TP-5400-85957', '0b8ab085-df53-45d7-b338-6fef10bde449'), ('LA100 Equity Strategies. Chapter 10: Household Transportation Electrification', 'https://research-hub.nrel.gov/en/publications/5d165039-68a9-4a5d-8c48-17261f3cf02e', 'NREL-TP-5400-85957', '103d49b6-9273-47ff-924b-253225c444a0'), ('The Los Angeles 100% Renewable Energy Study (LA100): Chapter 12. Synthesis', 'https://research-hub.nrel.gov/en/publications/132a7f1b-3d16-4a8f-944b-f4ebcd358c8d', 'NREL-TP-6A20-85433', '8f0c4221-1710-4381-b084-c9afc4dd8a53'), ('LA100 Equity Strategies. Chapter 11: Truck Electrification for Improved Air Quality and Health', 'https://research-hub.nrel.gov/en/publications/99001a6f-21af-4fb1-ab1d-62a2d2ec8a76', 'NREL-TP-6A20-85958', '8fe6d830-68bf-43ec-85f4-23f911f21c5b'), ('The Los Angeles 100% Renewable Energy Study (LA100): Chapter 10. Environmental Justice', 'https://research-hub.nrel.gov/en/publications/de86d778-6183-46a2-8748-404068d1e6e1', 'NREL-TP-6A20-85431', 'ea6f2998-4c2a-459b-b75b-8dc93d219f4c')]
27 changes: 16 additions & 11 deletions tests/test_wizard_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,11 @@ def test_ref_replace():
db_name='Dummy', db_schema='Dummy',
db_table='Dummy', cursor=Cursor(),
boto_client=BotoClient(),
meta_columns=['title', 'url', 'id'])
meta_columns=['title', 'url',
'nrel_id', 'id'])

refs = [(chr(34), 'test.com', '5a'),
('remove "double" quotes', 'test_2.com', '7b')]
refs = [(chr(34), 'test.com', 'nrel-1', '5a'),
('remove "double" quotes', 'test_2.com', 'nrel-2', '7b')]

ids = np.array(['7b', '5a'])

Expand All @@ -128,10 +129,11 @@ def test_ids():
db_name='Dummy', db_schema='Dummy',
db_table='Dummy', cursor=Cursor(),
boto_client=BotoClient(),
meta_columns=['title', 'url', 'id'])
meta_columns=['title', 'url',
'nrel_id', 'id'])

refs = [('title', 'test.com', '5a'),
('title2', 'test_2.com', '7b')]
refs = [('title', 'test.com', 'nrel-1', '5a'),
('title2', 'test_2.com', 'nrel-2', '7b')]

ids = np.array(['7c', '5a'])

Expand All @@ -148,15 +150,18 @@ def test_sorted_refs():
db_name='Dummy', db_schema='Dummy',
db_table='Dummy', cursor=Cursor(),
boto_client=BotoClient(),
meta_columns=['title', 'url', 'id'])
meta_columns=['title', 'url',
'nrel_id', 'id'])

refs = [('title', 'test.com', '5a'),
('title2', 'test_2.com', '7b')]
refs = [('title', 'test.com', 'nrel-1', '5a'),
('title2', 'test_2.com', 'nrel-2', '7b')]

ids = np.array(['7b', '5a'])

expected = ['{"title": "title2", "url": "test_2.com", "id": "7b"}',
'{"title": "title", "url": "test.com", "id": "5a"}']
expected = [('{"title": "title2", "url": "test_2.com", '
'"nrel_id": "nrel-2", "id": "7b"}'),
('{"title": "title", "url": "test.com", '
'"nrel_id": "nrel-1", "id": "5a"}')]

out = wizard._format_refs(refs, ids)

Expand Down

0 comments on commit d9f9721

Please sign in to comment.