Skip to content

Commit

Permalink
Amazon menu selector change. And some string changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
Zburatorul committed Oct 20, 2023
1 parent 4b8e28a commit a540017
Showing 1 changed file with 25 additions and 16 deletions.
41 changes: 25 additions & 16 deletions finance_dl/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ class Domain():
# other domains: digital orders are in the regular order list
digital_orders_menu: bool
digital_orders_menu_text: Optional[str] = None

fresh_fallback: Optional[str] = None


class DOT_COM(Domain):
def __init__(self) -> None:
super().__init__(
top_level='com',
sign_in='Sign In',
sign_in='Hello, sign in',
sign_out='Sign Out',

your_orders='Your Orders',
Expand Down Expand Up @@ -227,14 +227,14 @@ class _parserinfo(dateutil.parser.parserinfo):
('Jul', 'Juli'), ('Aug', 'August'), ('Sep', 'September'),
('Okt', 'Oktober'), ('Nov', 'November'), ('Dez', 'Dezember')
]

@staticmethod
def parse_date(date_str) -> datetime.date:
return dateutil.parser.parse(date_str, parserinfo=DOT_DE._parserinfo(dayfirst=True)).date()

DOMAINS = {
".com": DOT_COM,
".co.uk": DOT_CO_UK,
".co.uk": DOT_CO_UK,
".de": DOT_DE
}

Expand Down Expand Up @@ -279,7 +279,7 @@ def login(self):
if self.logged_in:
return

sign_out_links = self.find_elements_by_descendant_partial_text(self.domain.sign_out, 'a')
sign_out_links = self.find_elements_by_descendant_partial_text(self.domain.sign_out, 'span')
if len(sign_out_links) > 0:
logger.info('You must be already logged in!')
self.logged_in = True
Expand Down Expand Up @@ -341,18 +341,22 @@ def get_orders(self, regular=True, digital_orders_menu=True):
if name.endswith('.html')
])

def get_return_order_ids():
elements = self.driver.find_elements(By.XPATH, "//a[contains(text(),'View return/refund status')]/ancestor::div[@class='a-box-group a-spacing-base']/div[@class='a-box a-color-offset-background order-header']/div/div/div/div/div/div/span[1]")


def get_invoice_urls():
initial_iteration = True
while True:
# break when there is no "next page"

# Problem: different site structures depending on country

# .com / .uk
# Order Summary buttons are directly visible and can be
# identified with href containing "orderID="
# but order summary may have different names, e.g. for Amazon Fresh orders

# .de
# only link with href containing "orderID=" is "Bestelldetails anzeigen" (=Order Details)
# which is not helpful
Expand All @@ -365,10 +369,10 @@ def invoice_finder():
By.XPATH, '//a[contains(@href, "orderID=")]')
else:
# order summary link is hidden in submenu for each order
elements = self.driver.find_elements(By.XPATH,
elements = self.driver.find_elements(By.XPATH,
'//a[@class="a-popover-trigger a-declarative"]')
return [a for a in elements if a.text == self.domain.invoice]

if initial_iteration:
invoices = invoice_finder()
else:
Expand Down Expand Up @@ -402,7 +406,7 @@ def invoice_link_finder_hidden():
# submenu containing order summary takes some time to load after click
# search for order summary link and compare order_id
# repeat until order_id is different to last order_id
summary_links = self.driver.find_elements(By.LINK_TEXT,
summary_links = self.driver.find_elements(By.LINK_TEXT,
self.domain.order_summary)
if summary_links:
href = summary_links[0].get_attribute('href')
Expand Down Expand Up @@ -441,12 +445,17 @@ def invoice_link_finder_hidden():
logging.info("Next page.")
self.click(next_links[0])

def retrieve_all_order_groups():
def retrieve_all_order_groups(digital=False):
order_select_index = 0

if digital:
selector = '//select[@name="orderFilter"]'
else:
selector = '//select[@name="timeFilter"]'

while True:
(order_filter,), = self.wait_and_return(
lambda: self.find_visible_elements(By.XPATH, '//select[@name="orderFilter"]')
lambda: self.find_visible_elements(By.XPATH, selector)
)
order_select = Select(order_filter)
num_options = len(order_select.options)
Expand Down Expand Up @@ -486,7 +495,7 @@ def retrieve_all_order_groups():
)
scrape_lib.retry(lambda: self.click(digital_orders_link),
retry_delay=2)
retrieve_all_order_groups()
retrieve_all_order_groups(digital=True)

self.retrieve_invoices(invoice_hrefs)

Expand Down Expand Up @@ -530,7 +539,7 @@ def is_order_placed_node(node):
# order placed information in page header (top left)
m = re.fullmatch(self.domain.regular_order_placed, node.text.strip())
return m is not None

def is_digital_order_row(node):
# information in heading of order table
if node.name != 'tr':
Expand All @@ -552,15 +561,15 @@ def is_digital_order_row(node):
# regular order
node = soup.find(is_order_placed_node)
regex = self.domain.regular_order_placed

m = re.fullmatch(regex, node.text.strip())
if m is None:
return None
order_date = self.domain.parse_date(m.group(1))
return order_date

order_date = get_date(page_source, order_id)
if order_date is None:
if order_date is None:
if self.dir_per_year:
raise ValueError(f'Failed to get date for order {order_id}')
else:
Expand Down

0 comments on commit a540017

Please sign in to comment.