Skip to content

Commit

Permalink
fix extraction of hidden invoice summary links
Browse files Browse the repository at this point in the history
  • Loading branch information
moritzj29 committed Nov 5, 2023
1 parent 6795b4c commit 966b437
Showing 1 changed file with 11 additions and 15 deletions.
26 changes: 11 additions & 15 deletions finance_dl/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def get_invoice_path(self, year, order_id):
return os.path.join(self.output_directory, order_id + '.html')

def get_order_id(self, href) -> str:
m = re.match('.*[&?]orderID=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
m = re.match('.*[&?]orderI[Dd]=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
if m is None:
raise RuntimeError(
'Failed to parse order ID from href %r' % (href, ))
Expand Down Expand Up @@ -402,25 +402,21 @@ def invoice_link_finder(invoice_link):
href = "/".join(tokens)
return (order_id, href)

def invoice_link_finder_hidden():
# submenu containing order summary takes some time to load after click
# search for order summary link and compare order_id
# repeat until order_id is different to last order_id
summary_links = self.driver.find_elements(By.LINK_TEXT,
self.domain.order_summary)
if summary_links:
href = summary_links[0].get_attribute('href')
order_id = self.get_order_id(href)
if order_id != last_order_id:
return (order_id, href)
return False

for invoice_link in invoices:
if not self.domain.order_summary_hidden:
(order_id, href) = invoice_link_finder(invoice_link)
else:
# get order id to find the correct summary link
order_id=self.get_order_id(invoice_link.get_attribute('href'))
invoice_link.click()
(order_id, href), = self.wait_and_return(invoice_link_finder_hidden)
# submenu containing order summary takes some time to load after click
summary_link, = self.wait_and_locate(
(By.XPATH,'//a[contains(@href,"{}") and contains(@href,"/gp/css/summary")]'.format(order_id)))
if summary_link:
href = summary_link.get_attribute('href')
else:
logger.info('Link extraction failed for order id: %r', order_id)
order_id = False
if order_id:
if order_id in order_ids_seen:
logger.info('Skipping already-seen order id: %r', order_id)
Expand Down

0 comments on commit 966b437

Please sign in to comment.