From f4367a88dce9ee181fe2fdcdb14bf27495be96b1 Mon Sep 17 00:00:00 2001
From: "Bart van Blokland (on MECHANINJA)" <bart.van.blokland@idi.ntnu.no>
Date: Tue, 11 Jul 2017 09:15:35 +0200
Subject: [PATCH] Fixed a crash on some strangely formatted images

---
 scrape.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/scrape.py b/scrape.py
index 39eeaa3..35000fb 100644
--- a/scrape.py
+++ b/scrape.py
@@ -378,12 +378,15 @@ def download_file(institution, url, destination_directory, session, index=None,
 	except Exception:
 		# Can occur in a case of an encoded image. If so, dump it.
 		if base64_png_image_url[institution] in url or base64_jpeg_image_url[institution] in url:
-			extension = url.split(':')[2].split(';')[0].split('/')[1]
-			print('\tDownloaded Base64 encoded {} image'.format(extension).encode('ascii', 'ignore'))
-			start_index = url.index(',') + 1
-			base64_encoded_file_contents = url[start_index:]
-			decoded_bytes = base64.b64decode(base64_encoded_file_contents)
-			bytesToTextFile(decoded_bytes, destination_directory + '/' + base64_encoded_file_contents[0:10] + '.' + extension)
+			try:
+				extension = url.split(':')[2].split(';')[0].split('/')[1]
+				print('\tDownloaded Base64 encoded {} image'.format(extension).encode('ascii', 'ignore'))
+				start_index = url.index(',') + 1
+				base64_encoded_file_contents = url[start_index:]
+				decoded_bytes = base64.b64decode(base64_encoded_file_contents)
+				bytesToTextFile(decoded_bytes, destination_directory + '/' + base64_encoded_file_contents[0:10] + '.' + extension)
+			except Exception:
+				print('Base64 Image Download Failed: unknown umage formatting. Skipping.')
 			return
 		elif url.startswith('/'):
 			try: