diff --git a/README.md b/README.md index 1935e3a..fdc5645 100644 --- a/README.md +++ b/README.md @@ -76,10 +76,12 @@ pip install -e . Configuration == -Create a Python file like `example_finance_dl_config.py`. +Create a configuration file called something like `finance_dl_config.py`. +For a complete example of this file and some documentation, +see [example_finance_dl_config.py](example_finance_dl_config.py). Refer to the documentation of the individual scraper modules for -details. +further details. Basic Usage == @@ -162,7 +164,7 @@ In this event, you have a few options: `chromedriver_binary` somewhere other than your system's default Chrome version, and set the environment variable `CHROMEDRIVER_CHROME_BINARY` to point to it. (You can do this from within your finance_dl config script, - e.g. with a line like `os.environ[CHROMEDRIVER_CHROME_BINARY] = "/usr/bin/google-chrome-beta"`). + e.g. with a line like `os.environ["CHROMEDRIVER_CHROME_BINARY"] = "/usr/bin/google-chrome-beta"`). License == diff --git a/example_finance_dl_config.py b/example_finance_dl_config.py index 2d4158f..72c3a22 100644 --- a/example_finance_dl_config.py +++ b/example_finance_dl_config.py @@ -7,6 +7,20 @@ Rather than hard code your usernames and passwords into this configuration file, you may instead wish to write some code to retrieve them from some external password store. + +For example, you can input the username/password interactively like so: + + from getpass import getpass + + def CONFIG_paypal(): + return dict( + module='finance_dl.paypal', + credentials={ + 'username': input('PayPal username: '), # <---- + 'password': getpass('PayPal password: '), # <---- + }, + output_directory=os.path.join(data_dir, 'paypal'), + ) """ import os diff --git a/finance_dl/amazon.py b/finance_dl/amazon.py index df5fd34..e02ee78 100644 --- a/finance_dl/amazon.py +++ b/finance_dl/amazon.py @@ -43,6 +43,13 @@ order page "order groups" that will be scanned for orders to download. Order groups include years (e.g. '2020'), as well as 'last 30 days' and 'past 3 months'. +- `download_preorder_invoices`: Optional. If specified and True, invoices for + preorders (i.e. orders that have not actually been charged yet) will be + skipped. Such preorder invoices are not typically useful for accounting + since they claim a card was charged even though it actually has not been + yet; they get replaced with invoices containing the correct information when + the order is actually fulfilled. + Output format: ============== @@ -112,6 +119,7 @@ class Domain(): grand_total: str grand_total_digital: str order_cancelled: str + pre_order: str digital_order: str regular_order_placed: str @@ -144,6 +152,7 @@ def __init__(self) -> None: grand_total='Grand Total:', grand_total_digital='Grand Total:', order_cancelled='Order Canceled', + pre_order='Pre-order', digital_order='Digital Order: (.*)', regular_order_placed=r'(?:Subscribe and Save )?Order Placed:\s+([^\s]+ \d+, \d{4})', @@ -176,6 +185,7 @@ def __init__(self) -> None: grand_total='Grand Total:', grand_total_digital='Grand Total:', order_cancelled='Order Canceled', + pre_order='Pre-order', digital_order='Digital Order: (.*)', regular_order_placed=r'(?:Subscribe and Save )?Order Placed:\s+([^\s]+ \d+, \d{4})', @@ -206,6 +216,7 @@ def __init__(self) -> None: grand_total='Gesamtsumme:', grand_total_digital='Endsumme:', order_cancelled='Order Canceled', + pre_order='Pre-order', digital_order='Digitale Bestellung: (.*)', regular_order_placed=r'(?:GetÃĪtigte Spar-Abo-Bestellung|Bestellung aufgegeben am):\s+(\d+\. [^\s]+ \d{4})', @@ -241,6 +252,7 @@ def __init__(self, regular: bool = True, digital: Optional[bool] = None, order_groups: Optional[List[str]] = None, + download_preorder_invoices: bool = False, **kwargs): super().__init__(**kwargs) if amazon_domain not in DOMAINS: @@ -254,6 +266,7 @@ def __init__(self, self.regular = regular self.digital_orders_menu = digital if digital is not None else self.domain.digital_orders_menu self.order_groups = order_groups + self.download_preorder_invoices = download_preorder_invoices def check_url(self, url): netloc_re = r'^([^\.@]+\.)*amazon.' + self.domain.top_level + '$' @@ -312,7 +325,7 @@ def finish_login(self): def get_invoice_path(self, year, order_id): if self.dir_per_year: - return os.path.join(self.output_directory, year, order_id + '.html') + return os.path.join(self.output_directory, str(year), order_id + '.html') return os.path.join(self.output_directory, order_id + '.html') def get_order_id(self, href) -> str: @@ -393,7 +406,7 @@ def invoice_link_finder_hidden(): # submenu containing order summary takes some time to load after click # search for order summary link and compare order_id # repeat until order_id is different to last order_id - summary_links = self.driver.find_elements_by_link_text( + summary_links = self.driver.find_elements(By.LINK_TEXT, self.domain.order_summary) if summary_links: href = summary_links[0].get_attribute('href') @@ -505,6 +518,10 @@ def get_source(): return None page_source, = self.wait_and_return(get_source) + if self.domain.pre_order in page_source and not self.download_preorder_invoices: + # Pre-orders don't have enough information to download yet. Skip them. + logger.info(f'Skipping pre-order invoice {order_id}') + return if order_id not in page_source: raise ValueError(f'Failed to retrieve information for order {order_id}') diff --git a/finance_dl/comcast.py b/finance_dl/comcast.py index d4c4b90..0a90f55 100644 --- a/finance_dl/comcast.py +++ b/finance_dl/comcast.py @@ -150,7 +150,7 @@ def get_bills_link(): pass bills_link = get_bills_link() - self.driver.find_element_by_tag_name('body').send_keys(Keys.ESCAPE) + self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.ESCAPE) bills_link.click() def get_links(): @@ -168,7 +168,7 @@ def get_links(): cur_el = link bill_date = None while True: - parent = cur_el.find_element_by_xpath('..') + parent = cur_el.find_element(By.XPATH, '..') if parent == cur_el: break try: diff --git a/finance_dl/discover.py b/finance_dl/discover.py index a50bcca..bf3afa8 100644 --- a/finance_dl/discover.py +++ b/finance_dl/discover.py @@ -57,6 +57,7 @@ def CONFIG_discover(): import os import shutil from selenium.common.exceptions import NoSuchElementException, TimeoutException +from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from . import scrape_lib @@ -85,11 +86,11 @@ def check_after_wait(self): check_url(self.driver.current_url) def find_account_last4(self): - return self.driver.find_element_by_xpath(XPATH_OF_LAST_FOUR_DIGITS).text + return self.driver.find_element(By.XPATH, XPATH_OF_LAST_FOUR_DIGITS).text def login(self): try: - account = self.driver.find_element_by_xpath(XPATH_OF_LAST_FOUR_DIGITS) + account = self.driver.find_element(By.XPATH, XPATH_OF_LAST_FOUR_DIGITS) logger.info("Already logged in") except NoSuchElementException: logger.info('Initiating log in') diff --git a/finance_dl/gemini.py b/finance_dl/gemini.py index ec48fa6..14cced9 100644 --- a/finance_dl/gemini.py +++ b/finance_dl/gemini.py @@ -187,8 +187,9 @@ def get_balances_and_prices(requester, api_key, api_secret, data_dir): logger.info(f"Got balances. Found {len(balances)} currencies.") # Prices - tickers = [b['currency'] + "USD" for b in balances if b['currency'] != 'USD'] + tickers = [b['currency'] + "USD" for b in balances if b['currency'] != 'USD' and b['currency'] != 'GUSD'] prices = {} + prices['GUSD'] = 1 for t in tickers: obj = requester.make_request(TICKERS_URL+"/"+t.lower(), None, get = True) price = (float(obj['ask']) + float(obj['bid']))/2 diff --git a/finance_dl/healthequity.py b/finance_dl/healthequity.py index 5d79319..cdacd16 100644 --- a/finance_dl/healthequity.py +++ b/finance_dl/healthequity.py @@ -74,6 +74,9 @@ def CONFIG_healthequity(): import logging import os import bs4 +import tempfile +import openpyxl +from openpyxl.cell.cell import MergedCell from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import Select from selenium.webdriver.common.keys import Keys @@ -101,43 +104,66 @@ def find_first_matching_date(lines, date_format): FUND_ACTIVITY_HEADERS = [ - 'Fund', 'Name', 'Shares (#)', 'Closing Price', 'Closing Value' + 'Fund', 'Name', 'Class', 'Target %\nallocation', 'Est. %\nholding', 'Shares\nheld', 'Closing\nprice', 'Closing\nvalue' ] +# For compatibility with beancount-import's healthequity plugin, write the old +# format for balances.csv files. The three new columns are fairly useless +# anyway, and the new (multiline) column titles are unambiguously worse even if +# a human were to actually ever read these CSVs. +OLD_FUND_ACTIVITY_HEADERS = [ + 'Fund','Name',None,None,None,'Shares (#)','Closing Price','Closing Value' +] def write_balances(data, path): rows = [] for entry in data: - keys = [x[0] for x in entry] - if keys == FUND_ACTIVITY_HEADERS: + keys, values = zip(*entry) + if list(keys) == FUND_ACTIVITY_HEADERS: + entry = [ + (k, v.strip().split('\n')[0].strip('$')) + for (k, v) in zip(OLD_FUND_ACTIVITY_HEADERS, values) + if k + ] row_values = dict(entry) row_values['Fund'] = row_values['Fund'].strip().split()[0] + row_values['Name'] = row_values['Name'].strip().split('\n')[0] rows.append(row_values) - csv_merge.write_csv(FUND_ACTIVITY_HEADERS, rows, path) + csv_merge.write_csv([h for h in OLD_FUND_ACTIVITY_HEADERS if h], rows, path) def write_fund_activity(raw_transactions_data, path): - input_date_format = '%m/%d/%Y' - output_date_format = '%Y-%m-%d' - soup = bs4.BeautifulSoup(raw_transactions_data.decode('utf-8'), 'lxml') + def format_cell(c): + if c.is_date: + return c.value.strftime('%Y-%m-%d') + if c.number_format[0] == '$': + base = '${:,.2f}'.format(abs(c.value)) + if c.value >= 0: + return base + else: + return '(%s)' % base + return str(c.value) + + wb = None + with tempfile.NamedTemporaryFile(suffix='.xlsx') as xlsx: + xlsx.write(raw_transactions_data) + xlsx.flush() + wb = openpyxl.load_workbook(xlsx.name) + + ws = wb.worksheets[0] headers = [ 'Date', 'Fund', 'Category', 'Description', 'Price', 'Amount', 'Shares', 'Total Shares', 'Total Value' ] rows = [] - for row in soup.find_all('tr'): - cells = [str(x.text).strip() for x in row.find_all('td')] - while cells and not cells[-1].strip(): - del cells[-1] - if len(cells) == 1: + for row in ws.rows: + if any([isinstance(c, MergedCell) for c in row]): continue - assert len(cells) == len(headers) + assert len(row) == len(headers) + cells = [format_cell(c) for c in row] if cells == headers: continue - row_values = dict(zip(headers, cells)) - row_values['Date'] = datetime.datetime.strptime( - row_values['Date'], input_date_format).strftime(output_date_format) - rows.append(row_values) + rows.append(dict(zip(headers, cells))) csv_merge.merge_into_file(filename=path, field_names=headers, data=rows, sort_by=lambda x: x['Date']) @@ -157,12 +183,14 @@ def write_transactions(raw_transactions_data, path): continue if cells[0] == 'TOTAL': continue - assert len(cells) == len(headers) - if cells == headers: + assert len(cells) >= len(headers), (cells, headers) + if cells[:len(headers)] == headers: continue row_values = dict(zip(headers, cells)) # Sanitize whitespace in description row_values['Transaction'] = ' '.join(row_values['Transaction'].split()) + # Remove duplicate tax year in description + row_values['Transaction'] = re.sub(r'(\(Tax year: \d+\)) *\1', r'\1', row_values['Transaction']) row_values['Cash Balance'] = row_values.pop('HSA Cash Balance') # Sanitize date_str @@ -174,7 +202,11 @@ def write_transactions(raw_transactions_data, path): rows.append(row_values) rows.reverse() csv_merge.merge_into_file(filename=path, field_names=output_headers, - data=rows, sort_by=lambda x: x['Date']) + data=rows, sort_by=lambda x: x['Date'], + # Don't consider balance-after in comparing rows, + # because txn order (and therefore running + # balance) is not stable across visits + compare_fields = output_headers[0:3]) class Scraper(scrape_lib.Scraper): @@ -205,7 +237,7 @@ def login(self): def download_transaction_history(self): (transactions_link, ), = self.wait_and_return( - lambda: self.find_visible_elements_by_descendant_partial_text('Transaction History', 'td')) + lambda: self.find_visible_elements(By.ID, 'viewAllLink')) scrape_lib.retry(transactions_link.click, retry_delay=2) (date_select, ), = self.wait_and_return( lambda: self.find_visible_elements_by_descendant_partial_text('All dates', 'select')) @@ -244,7 +276,7 @@ def download_transaction_history(self): def get_investment_balance(self): headers = FUND_ACTIVITY_HEADERS (table, ), = self.wait_and_return( - lambda: scrape_lib.find_table_by_headers(self, headers)) + lambda: self.driver.find_elements(By.TAG_NAME, 'table')) data = scrape_lib.extract_table_data(table, headers) return data @@ -256,16 +288,16 @@ def go_to_investment_history(self): def download_fund_activity(self): logger.info('Looking for fund activity link') (fund_activity_link,), = self.wait_and_return( - lambda: self.find_visible_elements(By.XPATH, '//a[contains(@href, "FundActivity")]')) + lambda: self.find_visible_elements(By.ID, 'EditPortfolioTab')) scrape_lib.retry(fund_activity_link.click, retry_delay=2) - logger.info('Selecting date ranage for fund activity') + logger.info('Selecting date range for fund activity') (start_date,), = self.wait_and_return( - lambda: self.find_visible_elements(By.XPATH, '//input[@type="text" and contains(@id, "dateSelectStart")]')) + lambda: self.find_visible_elements(By.XPATH, '//input[@type="text" and contains(@id, "startDate")]')) start_date.clear() - start_date.send_keys('01011900') + start_date.send_keys('01/01/1900\n') logger.info('Downloading fund activity') (download_link, ), = self.wait_and_return( - lambda: self.driver.find_elements_by_link_text('Download')) + lambda: self.find_visible_elements(By.ID, 'fundPerformanceDownload')) scrape_lib.retry(download_link.click, retry_delay=2) logger.info('Waiting for fund activity download') download_result, = self.wait_and_return(self.get_downloaded_file) diff --git a/finance_dl/ofx.py b/finance_dl/ofx.py index 0b076f1..c08b741 100644 --- a/finance_dl/ofx.py +++ b/finance_dl/ofx.py @@ -132,7 +132,43 @@ def CONFIG_vanguard(): import ofxclient.institution import ofxclient -from beancount.ingest.importers.ofx import parse_ofx_time, find_child +# find_child and parse_ofx_time were derived from implementation in beancount/ingest/importers/ofx.py{,test} +# Copyright (C) 2016 Martin Blais +# GNU GPLv2 +def find_child(node, name, conversion=None): + """Find a child under the given node and return its value. + + Args: + node: A bs4.element.Tag. + name: A string, the name of the child node. + conversion: A callable object used to convert the value to a new data type. + Returns: + A string, or None. + """ + child = node.find(name) + if not child: + return None + if not child.contents: + value = '' + else: + value = child.contents[0].strip() + if conversion: + value = conversion(value) + return value + + +def parse_ofx_time(date_str): + """Parse an OFX time string and return a datetime object. + + Args: + date_str: A string, the date to be parsed. + Returns: + A datetime.datetime instance. + """ + if len(date_str) < 14: + return datetime.datetime.strptime(date_str[:8], '%Y%m%d') + return datetime.datetime.strptime(date_str[:14], '%Y%m%d%H%M%S') + warnings.filterwarnings('ignore', message='split()', module='re') diff --git a/finance_dl/paypal.py b/finance_dl/paypal.py index 1f3ae11..31b120d 100644 --- a/finance_dl/paypal.py +++ b/finance_dl/paypal.py @@ -51,6 +51,7 @@ from selenium.webdriver.support.ui import Select from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException +from requests.exceptions import HTTPError import jsonschema from atomicwrites import atomic_write from . import scrape_lib @@ -68,24 +69,30 @@ 'properties': { 'data': { 'type': 'object', - 'required': ['activity'], + 'required': ['data'], 'properties': { - 'activity': { + 'data': { 'type': 'object', - 'required': ['transactions'], + 'required': ['activity'], 'properties': { - 'transactions': { - 'type': 'array', - 'items': { - 'type': 'object', - 'required': ['id'], - 'properties': { - 'id': { - 'type': 'string', - 'pattern': r'^[A-Za-z0-9\-]+$', - }, + 'activity': { + 'type': 'object', + 'required': ['transactions'], + 'properties': { + 'transactions': { + 'type': 'array', + 'items': { + 'type': 'object', + 'required': ['id'], + 'properties': { + 'id': { + 'type': 'string', + 'pattern': r'^[A-Za-z0-9\-]+$', + }, + }, + } }, - } + }, }, }, }, @@ -102,9 +109,9 @@ 'properties': { 'data': { 'type': 'object', - 'required': ['details'], + 'required': ['amount'], 'properties': { - 'details': { + 'amount': { 'type': 'object', }, }, @@ -168,9 +175,9 @@ def get_csrf_token(self): logging.info('Getting CSRF token') self.driver.get('https://www.paypal.com/myaccount/transactions/') # Get CSRF token - body_element, = self.wait_and_locate((By.XPATH, - '//body[@data-token!=""]')) - self.csrf_token = body_element.get_attribute('data-token') + body_element, = self.wait_and_locate((By.ID, "__react_data__")) + attribute_object = json.loads(body_element.get_attribute("data")) + self.csrf_token = attribute_object["_csrf"] return self.csrf_token def get_transaction_list(self): @@ -188,7 +195,7 @@ def get_transaction_list(self): resp.raise_for_status() j = resp.json() jsonschema.validate(j, transaction_list_schema) - return j['data']['activity']['transactions'] + return j['data']['data']['activity']['transactions'] def save_transactions(self): transaction_list = self.get_transaction_list() @@ -226,15 +233,6 @@ def save_transactions(self): + transaction_id) html_path = output_prefix + '.html' json_path = output_prefix + '.json' - if not os.path.exists(html_path): - logging.info('Retrieving HTML %s', details_url) - html_resp = self.driver.request('GET', details_url) - html_resp.raise_for_status() - with atomic_write( - html_path, mode='w', encoding='utf-8', - newline='\n', overwrite=True) as f: - # Write with Unicode Byte Order Mark to ensure content will be properly interpreted as UTF-8 - f.write('\ufeff' + html_resp.text) if not os.path.exists(json_path): logging.info('Retrieving JSON %s', inline_details_url) json_resp = self.make_json_request(inline_details_url) @@ -243,7 +241,26 @@ def save_transactions(self): jsonschema.validate(j, transaction_details_schema) with atomic_write(json_path, mode='wb', overwrite=True) as f: f.write( - json.dumps(j['data']['details'], indent=' ').encode()) + json.dumps(j['data'], indent=' ', sort_keys=True).encode()) + if not os.path.exists(html_path): + logging.info('Retrieving HTML %s', details_url) + html_resp = self.driver.request('GET', details_url) + try: + html_resp.raise_for_status() + except HTTPError as e: + # in rare cases no HTML detail page exists but JSON could be extracted + # if JSON is present gracefully skip HTML download if it fails + if os.path.exists(json_path): + # HTML download failed but JSON present -> only log warning + logging.warning('Retrieving HTML %s failed due to %s but JSON is already present. Continuing...', details_url, e) + else: + logging.error('Retrieving HTML %s failed due to %s and no JSON is present. Aborting...', details_url, e) + raise e + with atomic_write( + html_path, mode='w', encoding='utf-8', + newline='\n', overwrite=True) as f: + # Write with Unicode Byte Order Mark to ensure content will be properly interpreted as UTF-8 + f.write('\ufeff' + html_resp.text) def run(self): if not os.path.exists(self.output_directory): diff --git a/finance_dl/scrape_lib.py b/finance_dl/scrape_lib.py index 3cdf123..fa4ca58 100644 --- a/finance_dl/scrape_lib.py +++ b/finance_dl/scrape_lib.py @@ -23,12 +23,12 @@ def all_conditions(*conditions): def extract_table_data(table, header_names, single_header=False): - rows = table.find_elements_by_xpath('thead/tr | tbody/tr | tr') + rows = table.find_elements(By.XPATH, 'thead/tr | tbody/tr | tr') headers = [] seen_data = False data = [] for row in rows: - cell_elements = row.find_elements_by_xpath('th | td') + cell_elements = row.find_elements(By.XPATH, 'th | td') cell_values = [x.text.strip() for x in cell_elements] is_header_values = [x in header_names for x in cell_values if x] if len(is_header_values) == 0: @@ -217,7 +217,7 @@ def get_downloaded_file(self): # See http://www.obeythetestinggoat.com/how-to-get-selenium-to-wait-for-page-load-after-a-click.html @contextlib.contextmanager def wait_for_page_load(self, timeout=30): - old_page = self.driver.find_element_by_tag_name('html') + old_page = self.driver.find_element(By.TAG_NAME, 'html') yield WebDriverWait(self.driver, timeout).until( expected_conditions.staleness_of(old_page), @@ -355,7 +355,7 @@ def find_visible_elements_by_descendant_partial_text( def find_elements_by_descendant_partial_text(self, text, element_name, only_displayed=False): - all_elements = self.driver.find_elements_by_xpath( + all_elements = self.driver.find_elements(By.XPATH, "//text()[contains(.,%r)]/ancestor::*[self::%s][1]" % (text, element_name)) if only_displayed: @@ -364,7 +364,7 @@ def find_elements_by_descendant_partial_text(self, text, element_name, def find_elements_by_descendant_text_match(self, text_match, element_name, only_displayed=False): - all_elements = self.driver.find_elements_by_xpath( + all_elements = self.driver.find_elements(By.XPATH, "//text()[%s]/ancestor::*[self::%s][1]" % (text_match, element_name)) if only_displayed: @@ -372,7 +372,7 @@ def find_elements_by_descendant_text_match(self, text_match, element_name, return all_elements def find_visible_elements_by_partial_text(self, text, element_name): - all_elements = self.driver.find_elements_by_xpath( + all_elements = self.driver.find_elements(By.XPATH, "//%s[contains(.,%r)]" % (element_name, text)) return [x for x in all_elements if is_displayed(x)] diff --git a/finance_dl/usbank.py b/finance_dl/usbank.py index ebf49dc..b9a2678 100644 --- a/finance_dl/usbank.py +++ b/finance_dl/usbank.py @@ -146,7 +146,7 @@ def login(self): def find_account_link_in_any_frame(self): for frame in self.for_each_frame(): try: - return self.driver.find_element_by_partial_link_text(self.account_name) + return self.driver.find_element(By.PARTIAL_LINK_TEXT, self.account_name) except: pass raise NoSuchElementException() @@ -155,7 +155,7 @@ def find_account_link_in_any_frame(self): def find_download_page_in_any_frame(self): for frame in self.for_each_frame(): try: - return self.driver.find_element_by_partial_link_text("Download Transactions") + return self.driver.find_element(By.PARTIAL_LINK_TEXT, "Download Transactions") except: pass raise NoSuchElementException() @@ -164,8 +164,8 @@ def find_download_page_in_any_frame(self): def find_date_fields(self): for frame in self.for_each_frame(): try: - fromDate = self.driver.find_element_by_id("FromDateInput") - toDate = self.driver.find_element_by_id("ToDateInput") + fromDate = self.driver.find_element(By.ID, "FromDateInput") + toDate = self.driver.find_element(By.ID, "ToDateInput") return (fromDate, toDate) except: pass @@ -175,7 +175,7 @@ def find_date_fields(self): def find_download_link(self): for frame in self.for_each_frame(): try: - return self.driver.find_elements_by_id("DTLLink")[0] + return self.driver.find_elements(By.ID, "DTLLink")[0] except: pass raise NoSuchElementException() diff --git a/finance_dl/waveapps.py b/finance_dl/waveapps.py index 4f1e40c..33188fd 100644 --- a/finance_dl/waveapps.py +++ b/finance_dl/waveapps.py @@ -75,7 +75,7 @@ def CONFIG_waveapps(): """ -from typing import List, Any +from typing import List, Any, Optional import contextlib import logging import json @@ -154,7 +154,7 @@ def get_receipts(self, business_id: str): receipts.extend(cur_list) return receipts - def save_receipts(self, receipts: List[Any], output_directory: str = None): + def save_receipts(self, receipts: List[Any], output_directory: Optional[str] = None): if not output_directory: output_directory = self.output_directory if not os.path.exists(output_directory):