Skip to content

Commit

Permalink
Merge remote-tracking branch 'Original_Repo/master' into dev/amazon
Browse files Browse the repository at this point in the history
  • Loading branch information
moritzj29 committed May 19, 2024
2 parents 9347eec + a87f8f7 commit 03940e7
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 27 deletions.
4 changes: 2 additions & 2 deletions finance_dl/paypal.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ def get_csrf_token(self):
logging.info('Getting CSRF token')
self.driver.get('https://www.paypal.com/myaccount/transactions/')
# Get CSRF token
body_element, = self.wait_and_locate((By.ID, "__react_data__"))
attribute_object = json.loads(body_element.get_attribute("data"))
body_element, = self.wait_and_locate((By.ID, "__APP_DATA__"))
attribute_object = json.loads(body_element.get_attribute("innerHTML"))
self.csrf_token = attribute_object["_csrf"]
return self.csrf_token

Expand Down
18 changes: 7 additions & 11 deletions finance_dl/pge.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,15 @@ def login(self):
if self.logged_in:
return
logger.info('Initiating log in')
self.driver.get('https://www.pge.com/en/myhome/myaccount/index.page')
self.driver.get('https://m.pge.com/')

(username, password), = self.wait_and_return(
self.find_username_and_password_in_any_frame)
logger.info('Entering username and password')
username.send_keys(self.credentials['username'])
password.send_keys(self.credentials['password'])
with self.wait_for_page_load():
password.send_keys(Keys.ENTER)
password.send_keys(Keys.ENTER)
self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory'))
logger.info('Logged in')
self.logged_in = True

Expand All @@ -136,7 +136,7 @@ def process_download(self, download_result, output_dir):
new_path = self.get_output_path(output_dir, date)
if os.path.exists(new_path):
logger.info('Skipping duplicate download: %s', date)
return True
return False
tmp_path = new_path.replace('.pdf', '.tmp.pdf')
with open(tmp_path, 'wb') as f:
download_data = download_result[1]
Expand All @@ -157,15 +157,11 @@ def get_bills(self, output_dir):
actions.send_keys(Keys.ESCAPE)
actions.perform()
logger.info('Looking for download link')
(bills_link, ), = self.wait_and_return(
lambda: self.find_visible_elements_by_descendant_partial_text('BILL & PAYMENT HISTORY', 'h2'))
(bills_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'arrowBillPaymentHistory'))
scrape_lib.retry(lambda: self.click(bills_link), retry_delay=2)
(more_link, ), = self.wait_and_return(
lambda: self.find_visible_elements_by_descendant_partial_text('View up to 24 months of activity', 'a'))
(more_link, ), = self.wait_and_return(lambda: self.find_visible_elements(By.ID, 'href-view-24month-history'))
scrape_lib.retry(lambda: self.click(more_link), retry_delay=2)
links, = self.wait_and_return(
lambda: self.find_visible_elements(By.PARTIAL_LINK_TEXT, "View Bill PDF")
)
links, = self.wait_and_return(lambda: self.find_visible_elements(By.CSS_SELECTOR, ".utag-bill-history-view-bill-pdf"))

for link in links:
if not self.do_download_from_link(link, output_dir) and self.stop_early:
Expand Down
2 changes: 1 addition & 1 deletion finance_dl/scrape_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def get_downloaded_file(self):
partial_names = []
other_names = []
for name in names:
if name.endswith('.part') or name.endswith('.crdownload'):
if name.endswith('.part') or name.endswith('.crdownload') or name.startswith('.com.google.Chrome'):
partial_names.append(name)
else:
other_names.append(name)
Expand Down
83 changes: 70 additions & 13 deletions finance_dl/venmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def CONFIG_venmo():
import os
import time
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException, StaleElementReferenceException
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys

Expand Down Expand Up @@ -146,19 +146,67 @@ def __init__(self, credentials, output_directory,
def check_after_wait(self):
check_url(self.driver.current_url)

def find_venmo_username(self):
for frame in self.for_each_frame():
try:
return self.driver.find_elements(By.XPATH, '//input[@type="text" or @type="email"]')
except NoSuchElementException:
pass
raise NoSuchElementException()

def find_venmo_password(self):
for frame in self.for_each_frame():
try:
return self.driver.find_elements(By.XPATH, '//input[@type="password"]')
except NoSuchElementException:
pass
raise NoSuchElementException()

def wait_for(self, condition_function):
start_time = time.time()
while time.time() < start_time + 3:
if condition_function():
return True
else:
time.sleep(0.1)
raise Exception(
'Timeout waiting for {}'.format(condition_function.__name__)
)

def click_through_to_new_page(self, button_text):
link = self.driver.find_element(By.XPATH, f'//button[@name="{button_text}"]')
link.click()

def link_has_gone_stale():
try:
# poll the link with an arbitrary call
link.find_elements(By.XPATH, 'doesnt-matter')
return False
except StaleElementReferenceException:
return True

self.wait_for(link_has_gone_stale)

def login(self):
if self.logged_in:
return
logger.info('Initiating log in')
self.driver.get('https://venmo.com/account/sign-in')

(username, password), = self.wait_and_return(
self.find_username_and_password_in_any_frame)
logger.info('Entering username and password')
username.send_keys(self.credentials['username'])
#(username, password), = self.wait_and_return(
# self.find_username_and_password_in_any_frame)
username = self.wait_and_return(self.find_venmo_username)[0][0]
try:
logger.info('Entering username')
username.send_keys(self.credentials['username'])
username.send_keys(Keys.ENTER)
except ElementNotInteractableException:
# indicates that username already filled in
logger.info("Skipped")
password = self.wait_and_return(self.find_venmo_password)[0][0]
logger.info('Entering password')
password.send_keys(self.credentials['password'])
with self.wait_for_page_load():
password.send_keys(Keys.ENTER)
self.click_through_to_new_page("Sign in")
logger.info('Logged in')
self.logged_in = True

Expand All @@ -173,7 +221,7 @@ def goto_statement(self, start_date, end_date):
def download_csv(self):
logger.info('Looking for CSV link')
download_button, = self.wait_and_locate(
(By.XPATH, '//a[text() = "Download CSV"]'))
(By.XPATH, '//*[text() = "Download CSV"]'))
self.click(download_button)
logger.info('Waiting for CSV download')
download_result, = self.wait_and_return(self.get_downloaded_file)
Expand All @@ -182,18 +230,20 @@ def download_csv(self):

def get_balance(self, balance_type):
try:
balance_node = self.driver.find_element(
By.XPATH, '//*[@class="%s"]/child::*[@class="balance-amt"]' %
balance_node = self.driver.find_element(
By.XPATH, '//*[text() = "%s"]/following-sibling::*' %
balance_type)
return balance_node.text
except NoSuchElementException:
return None

def get_balances(self):
def maybe_get_balance():
start_balance = self.get_balance('start-balance')
end_balance = self.get_balance('end-balance')
start_balance = self.get_balance('Beginning amount')
end_balance = self.get_balance('Ending amount')
if start_balance is not None and end_balance is not None:
start_balance = start_balance.replace("\n", "")
end_balance = end_balance.replace("\n", "")
return (start_balance, end_balance)
try:
error_node = self.driver.find_element(
Expand Down Expand Up @@ -303,13 +353,20 @@ def fetch_history(self):

while start_date <= self.latest_history_date:
end_date = min(self.latest_history_date,
start_date + datetime.timedelta(days=89))
self.last_day_of_month(start_date))
self.fetch_statement(start_date, end_date)
start_date = end_date + datetime.timedelta(days=1)

logger.debug('Venmo hack: waiting 5 seconds between requests')
time.sleep(5)


def last_day_of_month(self, any_day):
# The day 28 exists in every month. 4 days later, it's always next month
next_month = any_day.replace(day=28) + datetime.timedelta(days=4)
# subtracting the number of the current day brings us back one month
return next_month - datetime.timedelta(days=next_month.day)

def run(self):
self.login()
self.fetch_history()
Expand Down

0 comments on commit 03940e7

Please sign in to comment.