From f4367a88dce9ee181fe2fdcdb14bf27495be96b1 Mon Sep 17 00:00:00 2001 From: "Bart van Blokland (on MECHANINJA)" Date: Tue, 11 Jul 2017 09:15:35 +0200 Subject: [PATCH] Fixed a crash on some strangely formatted images --- scrape.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scrape.py b/scrape.py index 39eeaa3..35000fb 100644 --- a/scrape.py +++ b/scrape.py @@ -378,12 +378,15 @@ def download_file(institution, url, destination_directory, session, index=None, except Exception: # Can occur in a case of an encoded image. If so, dump it. if base64_png_image_url[institution] in url or base64_jpeg_image_url[institution] in url: - extension = url.split(':')[2].split(';')[0].split('/')[1] - print('\tDownloaded Base64 encoded {} image'.format(extension).encode('ascii', 'ignore')) - start_index = url.index(',') + 1 - base64_encoded_file_contents = url[start_index:] - decoded_bytes = base64.b64decode(base64_encoded_file_contents) - bytesToTextFile(decoded_bytes, destination_directory + '/' + base64_encoded_file_contents[0:10] + '.' + extension) + try: + extension = url.split(':')[2].split(';')[0].split('/')[1] + print('\tDownloaded Base64 encoded {} image'.format(extension).encode('ascii', 'ignore')) + start_index = url.index(',') + 1 + base64_encoded_file_contents = url[start_index:] + decoded_bytes = base64.b64decode(base64_encoded_file_contents) + bytesToTextFile(decoded_bytes, destination_directory + '/' + base64_encoded_file_contents[0:10] + '.' + extension) + except Exception: + print('Base64 Image Download Failed: unknown umage formatting. Skipping.') return elif url.startswith('/'): try: