Skip to content

Commit

Permalink
[Paypal] handle case gracefully when only JSON can be extracted but n…
Browse files Browse the repository at this point in the history
…o HTML (#81)

* [Paypal] handle case gracefully when only JSON can be extracted but no HTML
  • Loading branch information
moritzj29 committed Jan 9, 2023
1 parent aabfa12 commit 4b8e28a
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions finance_dl/paypal.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from requests.exceptions import HTTPError
import jsonschema
from atomicwrites import atomic_write
from . import scrape_lib
Expand Down Expand Up @@ -232,15 +233,6 @@ def save_transactions(self):
+ transaction_id)
html_path = output_prefix + '.html'
json_path = output_prefix + '.json'
if not os.path.exists(html_path):
logging.info('Retrieving HTML %s', details_url)
html_resp = self.driver.request('GET', details_url)
html_resp.raise_for_status()
with atomic_write(
html_path, mode='w', encoding='utf-8',
newline='\n', overwrite=True) as f:
# Write with Unicode Byte Order Mark to ensure content will be properly interpreted as UTF-8
f.write('\ufeff' + html_resp.text)
if not os.path.exists(json_path):
logging.info('Retrieving JSON %s', inline_details_url)
json_resp = self.make_json_request(inline_details_url)
Expand All @@ -250,6 +242,25 @@ def save_transactions(self):
with atomic_write(json_path, mode='wb', overwrite=True) as f:
f.write(
json.dumps(j['data'], indent=' ', sort_keys=True).encode())
if not os.path.exists(html_path):
logging.info('Retrieving HTML %s', details_url)
html_resp = self.driver.request('GET', details_url)
try:
html_resp.raise_for_status()
except HTTPError as e:
# in rare cases no HTML detail page exists but JSON could be extracted
# if JSON is present gracefully skip HTML download if it fails
if os.path.exists(json_path):
# HTML download failed but JSON present -> only log warning
logging.warning('Retrieving HTML %s failed due to %s but JSON is already present. Continuing...', details_url, e)
else:
logging.error('Retrieving HTML %s failed due to %s and no JSON is present. Aborting...', details_url, e)
raise e
with atomic_write(
html_path, mode='w', encoding='utf-8',
newline='\n', overwrite=True) as f:
# Write with Unicode Byte Order Mark to ensure content will be properly interpreted as UTF-8
f.write('\ufeff' + html_resp.text)

def run(self):
if not os.path.exists(self.output_directory):
Expand Down

0 comments on commit 4b8e28a

Please sign in to comment.