diff --git a/finance_dl/paypal.py b/finance_dl/paypal.py index 31b120d..3148c38 100644 --- a/finance_dl/paypal.py +++ b/finance_dl/paypal.py @@ -175,8 +175,8 @@ def get_csrf_token(self): logging.info('Getting CSRF token') self.driver.get('https://www.paypal.com/myaccount/transactions/') # Get CSRF token - body_element, = self.wait_and_locate((By.ID, "__react_data__")) - attribute_object = json.loads(body_element.get_attribute("data")) + body_element, = self.wait_and_locate((By.ID, "__APP_DATA__")) + attribute_object = json.loads(body_element.get_attribute("innerHTML")) self.csrf_token = attribute_object["_csrf"] return self.csrf_token diff --git a/finance_dl/pge.py b/finance_dl/pge.py index dcf2a8f..689390d 100644 --- a/finance_dl/pge.py +++ b/finance_dl/pge.py @@ -104,15 +104,15 @@ def login(self): if self.logged_in: return logger.info('Initiating log in') - self.driver.get('https://www.pge.com/en/myhome/myaccount/index.page') + self.driver.get('https://m.pge.com/') (username, password), = self.wait_and_return( self.find_username_and_password_in_any_frame) logger.info('Entering username and password') username.send_keys(self.credentials['username']) password.send_keys(self.credentials['password']) - with self.wait_for_page_load(): - password.send_keys(Keys.ENTER) + password.send_keys(Keys.ENTER) + self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory')) logger.info('Logged in') self.logged_in = True @@ -136,7 +136,7 @@ def process_download(self, download_result, output_dir): new_path = self.get_output_path(output_dir, date) if os.path.exists(new_path): logger.info('Skipping duplicate download: %s', date) - return True + return False tmp_path = new_path.replace('.pdf', '.tmp.pdf') with open(tmp_path, 'wb') as f: download_data = download_result[1] @@ -157,15 +157,11 @@ def get_bills(self, output_dir): actions.send_keys(Keys.ESCAPE) actions.perform() logger.info('Looking for download link') - (bills_link, ), = self.wait_and_return( - lambda: self.find_visible_elements_by_descendant_partial_text('BILL & PAYMENT HISTORY', 'h2')) + (bills_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory')) scrape_lib.retry(lambda: self.click(bills_link), retry_delay=2) - (more_link, ), = self.wait_and_return( - lambda: self.find_visible_elements_by_descendant_partial_text('View up to 24 months of activity', 'a')) + (more_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'href-view-24month-history')) scrape_lib.retry(lambda: self.click(more_link), retry_delay=2) - links, = self.wait_and_return( - lambda: self.find_visible_elements(By.PARTIAL_LINK_TEXT, "View Bill PDF") - ) + links, = self.wait_and_return(lambda: self.find_visible_elements(By.CSS_SELECTOR, ".utag-bill-history-view-bill-pdf")) for link in links: if not self.do_download_from_link(link, output_dir) and self.stop_early: diff --git a/finance_dl/scrape_lib.py b/finance_dl/scrape_lib.py index fa4ca58..cfb4e1d 100644 --- a/finance_dl/scrape_lib.py +++ b/finance_dl/scrape_lib.py @@ -195,7 +195,7 @@ def get_downloaded_file(self): partial_names = [] other_names = [] for name in names: - if name.endswith('.part') or name.endswith('.crdownload'): + if name.endswith('.part') or name.endswith('.crdownload') or name.startswith('.com.google.Chrome'): partial_names.append(name) else: other_names.append(name) diff --git a/finance_dl/venmo.py b/finance_dl/venmo.py index 6f576e5..bff76ea 100644 --- a/finance_dl/venmo.py +++ b/finance_dl/venmo.py @@ -82,7 +82,7 @@ def CONFIG_venmo(): import os import time from selenium.webdriver.common.by import By -from selenium.common.exceptions import NoSuchElementException +from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException from selenium.webdriver.support.ui import Select from selenium.webdriver.common.keys import Keys @@ -146,19 +146,67 @@ def __init__(self, credentials, output_directory, def check_after_wait(self): check_url(self.driver.current_url) + def find_venmo_username(self): + for frame in self.for_each_frame(): + try: + return self.driver.find_elements(By.XPATH, '//input[@type="text" or @type="email"]') + except NoSuchElementException: + pass + raise NoSuchElementException() + + def find_venmo_password(self): + for frame in self.for_each_frame(): + try: + return self.driver.find_elements(By.XPATH, '//input[@type="password"]') + except NoSuchElementException: + pass + raise NoSuchElementException() + + def wait_for(self, condition_function): + start_time = time.time() + while time.time() < start_time + 3: + if condition_function(): + return True + else: + time.sleep(0.1) + raise Exception( + 'Timeout waiting for {}'.format(condition_function.__name__) + ) + + def click_through_to_new_page(self, button_text): + link = self.driver.find_element(By.XPATH, f'//button[@name="{button_text}"]') + link.click() + + def link_has_gone_stale(): + try: + # poll the link with an arbitrary call + link.find_elements(By.XPATH, 'doesnt-matter') + return False + except StaleElementReferenceException: + return True + + self.wait_for(link_has_gone_stale) + def login(self): if self.logged_in: return logger.info('Initiating log in') self.driver.get('https://venmo.com/account/sign-in') - (username, password), = self.wait_and_return( - self.find_username_and_password_in_any_frame) - logger.info('Entering username and password') - username.send_keys(self.credentials['username']) + #(username, password), = self.wait_and_return( + # self.find_username_and_password_in_any_frame) + username = self.wait_and_return(self.find_venmo_username)[0][0] + try: + logger.info('Entering username') + username.send_keys(self.credentials['username']) + username.send_keys(Keys.ENTER) + except ElementNotInteractableException: + # indicates that username already filled in + logger.info("Skipped") + password = self.wait_and_return(self.find_venmo_password)[0][0] + logger.info('Entering password') password.send_keys(self.credentials['password']) - with self.wait_for_page_load(): - password.send_keys(Keys.ENTER) + self.click_through_to_new_page("Sign in") logger.info('Logged in') self.logged_in = True @@ -173,7 +221,7 @@ def goto_statement(self, start_date, end_date): def download_csv(self): logger.info('Looking for CSV link') download_button, = self.wait_and_locate( - (By.XPATH, '//a[text() = "Download CSV"]')) + (By.XPATH, '//*[text() = "Download CSV"]')) self.click(download_button) logger.info('Waiting for CSV download') download_result, = self.wait_and_return(self.get_downloaded_file) @@ -182,8 +230,8 @@ def download_csv(self): def get_balance(self, balance_type): try: - balance_node = self.driver.find_element( - By.XPATH, '//*[@class="%s"]/child::*[@class="balance-amt"]' % + balance_node = self.driver.find_element( + By.XPATH, '//*[text() = "%s"]/following-sibling::*' % balance_type) return balance_node.text except NoSuchElementException: @@ -191,9 +239,11 @@ def get_balance(self, balance_type): def get_balances(self): def maybe_get_balance(): - start_balance = self.get_balance('start-balance') - end_balance = self.get_balance('end-balance') + start_balance = self.get_balance('Beginning amount') + end_balance = self.get_balance('Ending amount') if start_balance is not None and end_balance is not None: + start_balance = start_balance.replace("\n", "") + end_balance = end_balance.replace("\n", "") return (start_balance, end_balance) try: error_node = self.driver.find_element( @@ -303,13 +353,20 @@ def fetch_history(self): while start_date <= self.latest_history_date: end_date = min(self.latest_history_date, - start_date + datetime.timedelta(days=89)) + self.last_day_of_month(start_date)) self.fetch_statement(start_date, end_date) start_date = end_date + datetime.timedelta(days=1) logger.debug('Venmo hack: waiting 5 seconds between requests') time.sleep(5) + + def last_day_of_month(self, any_day): + # The day 28 exists in every month. 4 days later, it's always next month + next_month = any_day.replace(day=28) + datetime.timedelta(days=4) + # subtracting the number of the current day brings us back one month + return next_month - datetime.timedelta(days=next_month.day) + def run(self): self.login() self.fetch_history()