Skip to content

Commit

Permalink
Merge pull request #20 from NREL/sp/rhub_bug_fixes
Browse files Browse the repository at this point in the history
Rhub bug fixes: handling for missing links and skipping lastName:NREL
  • Loading branch information
spodgorny9 authored Jun 11, 2024
2 parents f4ee815 + 30acc43 commit 21bd276
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 11 deletions.
66 changes: 57 additions & 9 deletions elm/web/rhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ def clean_text(html_text):

return clean

@property
def first_name(self):
"""Get the first name of this researcher.
Returns
-------
first : str
Full name of researcher.
"""
names = self.get('name')
first = names.get('firstName')

return first

@property
def last_name(self):
"""Get the last name of this researcher.
Returns
-------
last : str
Last name of researcher.
"""
names = self.get('name')
last = names.get('lastName')

return last

@property
def title(self):
"""Get the full name of this researcher.
Expand Down Expand Up @@ -329,10 +357,9 @@ def __init__(self, url, n_pages=1):
self._n_pages = 0
self._iter = 0

records = self._get_first()
for page in self._get_pages(n_pages=n_pages):
records += page
records = self._get_all(n_pages)
records = [ProfilesRecord(single) for single in records]
records = [prof for prof in records if prof.last_name != 'NREL']
super().__init__(records)

def _get_first(self):
Expand Down Expand Up @@ -398,6 +425,27 @@ def _get_pages(self, n_pages):
else:
break

def _get_all(self, n_pages):
"""Get all pages of profiles up to n_pages.
Parameters
----------
n_pages : int
Number of pages to retrieve
Returns
-------
all_records : list
List of all publication records.
"""
first_page = self._get_first()
records = first_page

for page in self._get_pages(n_pages):
records.extend(page)

return records

def meta(self):
"""Get a meta dataframe with details on all of the profiles.
Expand Down Expand Up @@ -583,12 +631,12 @@ def links(self):

doi = None
pdf_url = None

for link in ev:
if link.get('doi'):
doi = link.get('doi')
if link.get('link'):
pdf_url = link.get('link')
if ev:
for link in ev:
if link.get('doi'):
doi = link.get('doi')
if link.get('link'):
pdf_url = link.get('link')

return doi, pdf_url

Expand Down
4 changes: 2 additions & 2 deletions elm/wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def engineer_query(self, query, token_budget=None, new_info_threshold=0.7,
used_index = np.array(used_index)
references = self.make_ref_list(used_index)

return message, references
return message, references, used_index

@abstractmethod
def make_ref_list(self, idx):
Expand Down Expand Up @@ -200,7 +200,7 @@ def chat(self, query,
out = self.engineer_query(query, token_budget=token_budget,
new_info_threshold=new_info_threshold,
convo=convo)
query, references = out
query, references, _ = out

messages = [{"role": "system", "content": self.MODEL_ROLE},
{"role": "user", "content": query}]
Expand Down

0 comments on commit 21bd276

Please sign in to comment.