Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "[amazon] fixes for DE" #102

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 40 additions & 54 deletions finance_dl/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ class Domain():

# Find invoices.
your_orders: str
archived_orders: str
invoice: str
invoice_link: List[str]
order_summary: str
Expand Down Expand Up @@ -140,7 +139,6 @@ def __init__(self) -> None:
sign_out='Sign Out',

your_orders='Your Orders',
archived_orders='Archived Orders',
invoice='Invoice',
invoice_link=["View order", "View invoice"],
# View invoice -> regular/digital order, View order -> Amazon Fresh
Expand Down Expand Up @@ -173,7 +171,6 @@ def __init__(self) -> None:
sign_out='Sign out',

your_orders='Your Orders',
archived_orders='Archived Orders',
invoice='Invoice',
invoice_link=["View order", "View invoice"],
# View invoice -> regular/digital order, View order -> Amazon Fresh
Expand Down Expand Up @@ -205,7 +202,6 @@ def __init__(self) -> None:
sign_out='Abmelden',

your_orders='Meine Bestellungen',
archived_orders='Archivierte Bestellungen',
invoice='Rechnung',
invoice_link=["Bestelldetails anzeigen"],
fresh_fallback=None,
Expand Down Expand Up @@ -329,7 +325,7 @@ def get_invoice_path(self, year, order_id):
return os.path.join(self.output_directory, order_id + '.html')

def get_order_id(self, href) -> str:
m = re.match('.*[&?]orderI[Dd]=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
m = re.match('.*[&?]orderID=((?:D)?[0-9\\-]+)(?:&.*)?$', href)
if m is None:
raise RuntimeError(
'Failed to parse order ID from href %r' % (href, ))
Expand Down Expand Up @@ -363,30 +359,34 @@ def get_invoice_urls():
# order summary is hidden behind submenu which requires a click to be visible

def invoice_finder():
# order summary link is visible on page
elements_raw = self.driver.find_elements(
By.XPATH, '//a[contains(@href, "orderID=")]')
elements = []
for invoice_link in elements_raw:
if invoice_link.text not in self.domain.invoice_link:
# skip invoice if label is not known
# different labels are possible e.g. for regular orders vs. Amazon fresh
if invoice_link.text != "":
# log non-empty link texts -> may be new type
logger.debug(
'Skipping invoice due to unknown invoice_link.text: %s',
invoice_link.text)
else:
elements.append(invoice_link)
return elements

if not self.domain.order_summary_hidden:
# order summary link is visible on page
return self.driver.find_elements(
By.XPATH, '//a[contains(@href, "orderID=")]')
else:
# order summary link is hidden in submenu for each order
elements = self.driver.find_elements(By.XPATH,
'//a[@class="a-popover-trigger a-declarative"]')
return [a for a in elements if a.text == self.domain.invoice]

if initial_iteration:
invoices = invoice_finder()
else:
invoices, = self.wait_and_return(invoice_finder)
initial_iteration = False

last_order_id = None

def invoice_link_finder(invoice_link):
if invoice_link.text not in self.domain.invoice_link:
# skip invoice if label is not known
# different labels are possible e.g. for regular orders vs. Amazon fresh
if invoice_link.text != "":
# log non-empty link texts -> may be new type
logger.debug(
'Skipping invoice due to unknown invoice_link.text: %s',
invoice_link.text)
return (False, False)
href = invoice_link.get_attribute('href')
order_id = self.get_order_id(href)
if self.domain.fresh_fallback is not None and invoice_link.text == self.domain.fresh_fallback:
Expand All @@ -397,39 +397,26 @@ def invoice_link_finder(invoice_link):
tokens[-1] = f"gp/css/summary/print.html?orderID={order_id}"
href = "/".join(tokens)
return (order_id, href)

def invoice_link_finder_hidden(invoice_link):
# get order id to later find the correct summary link
order_id=self.get_order_id(invoice_link.get_attribute('href'))

# get parent element to search for invoice menu button (has no orderID specified)
parent=invoice_link.find_element(By.XPATH,"./..")
# leading dot in './/' specifies to only search in children
popover=parent.find_elements(By.XPATH,'.//a[contains(@href, "invoice/invoice.html")]')
# depending on the order group the XPATH may be different
if len(popover) == 0:
popover=parent.find_elements(
By.XPATH,
f'.//a[contains(text(), {self.domain.invoice}) and @class="a-popover-trigger a-declarative"]')

# open invoice popover to extract invoice link
popover[0].click()

# submenu containing order summary takes some time to load after click
summary_link, = self.wait_and_locate(
(By.XPATH,'//a[contains(@href,"{}") and contains(text(),"{}")]'.format(order_id, self.domain.order_summary)))
if summary_link:
href = summary_link.get_attribute('href')
return (order_id, href)
else:
logger.info('Link extraction failed for order id: %r', order_id)
return (False, False)

def invoice_link_finder_hidden():
# submenu containing order summary takes some time to load after click
# search for order summary link and compare order_id
# repeat until order_id is different to last order_id
summary_links = self.driver.find_elements(By.LINK_TEXT,
self.domain.order_summary)
if summary_links:
href = summary_links[0].get_attribute('href')
order_id = self.get_order_id(href)
if order_id != last_order_id:
return (order_id, href)
return False

for invoice_link in invoices:
if not self.domain.order_summary_hidden:
(order_id, href) = invoice_link_finder(invoice_link)
else:
(order_id, href) = invoice_link_finder_hidden(invoice_link)
invoice_link.click()
(order_id, href), = self.wait_and_return(invoice_link_finder_hidden)
if order_id:
if order_id in order_ids_seen:
logger.info('Skipping already-seen order id: %r', order_id)
Expand All @@ -440,6 +427,7 @@ def invoice_link_finder_hidden(invoice_link):
logger.info('Found order \'{}\''.format(order_id))
invoice_hrefs.append((href, order_id))
order_ids_seen.add(order_id)
last_order_id = order_id

# Find next link
next_links = self.find_elements_by_descendant_text_match(
Expand All @@ -457,9 +445,7 @@ def retrieve_all_order_groups():
order_select_index = 0

while True:
(order_filter,), = self.wait_and_return(
lambda: self.find_visible_elements(By.XPATH, '//select[@name="timeFilter"]')
)
order_filter, = self.wait_and_locate((By.CSS_SELECTOR, '#time-filter, #orderFilter'))
order_select = Select(order_filter)
num_options = len(order_select.options)
if order_select_index >= num_options:
Expand All @@ -468,7 +454,7 @@ def retrieve_all_order_groups():
order_select_index]
option_text = option.text.strip()
order_select_index += 1
if option_text == self.domain.archived_orders:
if option_text == 'Archived Orders':
continue
if self.order_groups is not None and option_text not in self.order_groups:
logger.info('Skipping order group: %r', option_text)
Expand Down
Loading