Fixed crash when dumping an online test as a teacher

bartvbl · Jun 26, 2017 · 9eb9ee3 · 9eb9ee3
1 parent e9b7e01
commit 9eb9ee3
Showing 1 changed file with 137 additions and 131 deletions.
diff --git a/scrape.py b/scrape.py
@@ -1021,170 +1021,176 @@ def processOnlineTest(institution, pathThusFar, nttUrl, nttID, session):
 	dumpDirectory = sanitisePath(dumpDirectory)
 	dumpDirectory = makeDirectories(dumpDirectory)
 
-	results_root_element = online_test_document.get_element_by_id('ctl39_ResultsTable_table')
-	test_info_elements = online_test_document.find_class('itsl-detailed-info')
+	has_submitted_answer = True
+	try:
+		results_root_element = online_test_document.get_element_by_id('ctl39_ResultsTable_table')
+	except Exception:
+		has_submitted_answer = False
 
-
-	# Extracting test information
-	info_file_contents = ''
+	if has_submitted_answer:
+		test_info_elements = online_test_document.find_class('itsl-detailed-info')
 
-	for info_element in test_info_elements:
-		for info_list_element in info_element:
-			info_file_contents += info_list_element[0].text_content() + ' ' + info_list_element[1].text_content() + '\n'
+		
+		# Extracting test information
+		info_file_contents = ''
 
-	bytesToTextFile(info_file_contents.encode('utf-8'), dumpDirectory + '/Test Information' + output_text_extension)
+		for info_element in test_info_elements:
+			for info_list_element in info_element:
+				info_file_contents += info_list_element[0].text_content() + ' ' + info_list_element[1].text_content() + '\n'
 
-	# Download test answers
+		bytesToTextFile(info_file_contents.encode('utf-8'), dumpDirectory + '/Test Information' + output_text_extension)
 
-	table_headers = []
-	for index, table_row in enumerate(results_root_element):
-		# results_root_element[0] is a <caption> element
-		if index == 0:
-			continue
-		# results_root_element[1] contains table headers
-		elif index == 1:
-			for table_header in results_root_element[1]:
-				table_headers.append(table_header.text_content())
-			continue
-		# The remainder of the table rows are attempts we want to save.
-		attempt_file_contents = ''
-
-		attempt_index = index - 1
-		details_URL = None
-
-		for cell_index, table_cell in enumerate(table_row):
-			table_cell_name = table_headers[cell_index]
-			table_cell_content = table_cell.text_content()
-			if table_cell_name == 'Details':
-				details_URL = table_cell[0].get('href')
-			else:
-				attempt_file_contents += table_cell_name + ': ' + table_cell_content + '\n'
-
-		# Only dumping the details afterwards so that we get a nice header in the output file containing the attempt details.
-		if details_URL is not None:
-			details_page_response = session.get(itslearning_root_url[institution] + details_URL, allow_redirects=True)
-			details_page_document = fromstring(details_page_response.text)
+		# Download test answers
+
+		table_headers = []
+		for index, table_row in enumerate(results_root_element):
+			# results_root_element[0] is a <caption> element
+			if index == 0:
+				continue
+			# results_root_element[1] contains table headers
+			elif index == 1:
+				for table_header in results_root_element[1]:
+					table_headers.append(table_header.text_content())
+				continue
+			# The remainder of the table rows are attempts we want to save.
+			attempt_file_contents = ''
 
-			attempt_file_contents += '\n'
+			attempt_index = index - 1
+			details_URL = None
+
+			for cell_index, table_cell in enumerate(table_row):
+				table_cell_name = table_headers[cell_index]
+				table_cell_content = table_cell.text_content()
+				if table_cell_name == 'Details':
+					details_URL = table_cell[0].get('href')
+				else:
+					attempt_file_contents += table_cell_name + ': ' + table_cell_content + '\n'
+
+			# Only dumping the details afterwards so that we get a nice header in the output file containing the attempt details.
+			if details_URL is not None:
+				details_page_response = session.get(itslearning_root_url[institution] + details_URL, allow_redirects=True)
+				details_page_document = fromstring(details_page_response.text)
+
+				attempt_file_contents += '\n'
 
-			# Dumping result data shown on page
-			for result_details_element in details_page_document.find_class('ntt-test-result-status-label'):
-				result_details_label = result_details_element.text_content().strip()
-				result_details_value = result_details_element.getnext().text_content().strip().replace('  ', ' ').replace('\n', '')
-				attempt_file_contents += result_details_label + ' ' + result_details_value + '\n'
+				# Dumping result data shown on page
+				for result_details_element in details_page_document.find_class('ntt-test-result-status-label'):
+					result_details_label = result_details_element.text_content().strip()
+					result_details_value = result_details_element.getnext().text_content().strip().replace('  ', ' ').replace('\n', '')
+					attempt_file_contents += result_details_label + ' ' + result_details_value + '\n'
 
-			total_assessment_element = details_page_document.find_class('ccl-assess-badge')
-			if len(total_assessment_element) > 0:
-				attempt_file_contents += 'Assessment: ' + total_assessment_element[0].text_content().strip() + '\n'
+				total_assessment_element = details_page_document.find_class('ccl-assess-badge')
+				if len(total_assessment_element) > 0:
+					attempt_file_contents += 'Assessment: ' + total_assessment_element[0].text_content().strip() + '\n'
 
-			last_page_dumped = False
+				last_page_dumped = False
 
-			attempt_file_contents += '\n-------------- Answers --------------\n\n'
+				attempt_file_contents += '\n-------------- Answers --------------\n\n'
 
-			question_index = 1
-			page_index = 1
-			question_table_body = details_page_document.get_element_by_id('ctl00_ContentPlaceHolder_ResultsGrid_TB')
+				question_index = 1
+				page_index = 1
+				question_table_body = details_page_document.get_element_by_id('ctl00_ContentPlaceHolder_ResultsGrid_TB')
 
-			attemptDirectory = dumpDirectory + '/Attempt ' + str(attempt_index)
-			attemptDirectory = makeDirectories(attemptDirectory)
+				attemptDirectory = dumpDirectory + '/Attempt ' + str(attempt_index)
+				attemptDirectory = makeDirectories(attemptDirectory)
 
-			while len(question_table_body) > 0:
-				for question_element in question_table_body:
-					print('\tSaving question', question_index)
-					question_link = itslearning_root_url[institution] + question_element[0][1].get('href')
-
-					question_response = session.get(question_link, allow_redirects=True)
-					question_document = fromstring(question_response.text)
+				while len(question_table_body) > 0:
+					for question_element in question_table_body:
+						print('\tSaving question', question_index)
+						question_link = itslearning_root_url[institution] + question_element[0][1].get('href')
+						
+						question_response = session.get(question_link, allow_redirects=True)
+						question_document = fromstring(question_response.text)
 
-					question_title = question_element[1].text_content()
-					question_result = question_document.find_class('question-result')[0].text_content()
+						question_title = question_element[1].text_content()
+						question_result = question_document.find_class('question-result')[0].text_content()
 
-					attempt_file_contents += 'Question ' + str(question_index) + ': ' + question_title + '\n\n'
-					attempt_file_contents += question_result + '\n'
+						attempt_file_contents += 'Question ' + str(question_index) + ': ' + question_title + '\n\n'
+						attempt_file_contents += question_result + '\n'
 
-					try:
-						question_options_table = question_document.get_element_by_id('qti-choiceinteraction-container')
+						try:
+							question_options_table = question_document.get_element_by_id('qti-choiceinteraction-container')
 
-						for option_index, question_options_row in enumerate(question_options_table):
-							if option_index == 0:
-								attempt_file_contents += question_options_row.text_content() + '\n'
-								continue
-							if question_options_row.get('class') is not None and 'checkedrow' in question_options_row.get('class'):
-								attempt_file_contents += 'Option (selected): ' + question_options_row.text_content() + '\n'
-							else:
-								attempt_file_contents += 'Option: ' + question_options_row.text_content() + '\n'
-					except Exception:
-						attempt_file_contents += 'Non Multiple Choice question. You can find the entire page in the folder titled "Attempt {}".\n'.format(question_index)
-
-					# Need to download images from hotspot questions
-					content_divs = question_document.find_class('content')
-					for content_div in content_divs:
-						for image_tag in content_div.iterfind(".//img"):
-
-							image_URL = image_tag.get('src')
-
-							# For some reason there can be images containing nothing on a page. No idea why.
-							if image_URL is None: 
-								continue
+							for option_index, question_options_row in enumerate(question_options_table):
+								if option_index == 0:
+									attempt_file_contents += question_options_row.text_content() + '\n'
+									continue
+								if question_options_row.get('class') is not None and 'checkedrow' in question_options_row.get('class'):
+									attempt_file_contents += 'Option (selected): ' + question_options_row.text_content() + '\n'
+								else:
+									attempt_file_contents += 'Option: ' + question_options_row.text_content() + '\n'
+						except Exception:
+							attempt_file_contents += 'Non Multiple Choice question. You can find the entire page in the folder titled "Attempt {}".\n'.format(question_index)
 
-							# Special case for relative URL's: drop the It's Learning root URL in front of it
-							if not image_URL.startswith('http'):
-								image_URL = itslearning_root_url[institution] + image_URL
+						# Need to download images from hotspot questions
+						content_divs = question_document.find_class('content')
+						for content_div in content_divs:
+							for image_tag in content_div.iterfind(".//img"):
 
-							filename = download_file(institution, image_URL, attemptDirectory, session)
-							attempt_file_contents += 'Attachment image: ' + filename + '\n'
+								image_URL = image_tag.get('src')
 
-							delay()
-
-					bytesToTextFile(question_response.text.encode('utf-8'), attemptDirectory + '/Question ' + str(question_index) + '.html')
+								# For some reason there can be images containing nothing on a page. No idea why.
+								if image_URL is None: 
+									continue
 
-					attempt_file_contents += '\n'
+								# Special case for relative URL's: drop the It's Learning root URL in front of it
+								if not image_URL.startswith('http'):
+									image_URL = itslearning_root_url[institution] + image_URL
 
+								filename = download_file(institution, image_URL, attemptDirectory, session)
+								attempt_file_contents += 'Attachment image: ' + filename + '\n'
 
-					delay()
-					question_index += 1
+								delay()
+
+						bytesToTextFile(question_response.text.encode('utf-8'), attemptDirectory + '/Question ' + str(question_index) + '.html')
 
-				print('\tPage finished, loading next one.')
-				postback_form = None
-				for form in details_page_document.forms:
-					if '__EVENTTARGET' in form.fields:
-						postback_form = form
-						break
+						attempt_file_contents += '\n'
 
-				if postback_form is None:
-					raise Error('No postback form found on page!\nURL: ' + details_URL)
 
-				# Extracting the destination URL from the form action field.
-				# The URL starts with ./, so I'm removing the dot to obtain a complete URL
-				form_action_url = itslearning_root_url[institution] + details_URL
+						delay()
+						question_index += 1
 
-				page_index += 1
+					print('\tPage finished, loading next one.')
+					postback_form = None
+					for form in details_page_document.forms:
+						if '__EVENTTARGET' in form.fields:
+							postback_form = form
+							break
 
-				# This specific page requires these additional elements to do the postback
-				postback_form = details_page_document.forms[0]
-				postback_form.fields['__EVENTTARGET'] = 'ctl00$ContentPlaceHolder$ResultsGrid'
-				postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HPN'] = str(page_index)
-				postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HSE'] = ''
-				postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HGC'] = ''
-				postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HFI'] = ''
+					if postback_form is None:
+						raise Error('No postback form found on page!\nURL: ' + details_URL)
 
-				post_data = convert_lxml_form_to_requests(postback_form)
+					# Extracting the destination URL from the form action field.
+					# The URL starts with ./, so I'm removing the dot to obtain a complete URL
+					form_action_url = itslearning_root_url[institution] + details_URL
 
-				# Submitting the form to obtain the next page
-				headers = {}
-				headers['Referer'] = form_action_url
+					page_index += 1
 
-				details_page_response = session.post(form_action_url, headers=headers, data=post_data, allow_redirects=True)
-				details_page_document = fromstring(details_page_response.text)
-				question_table_body = details_page_document.get_element_by_id('ctl00_ContentPlaceHolder_ResultsGrid_TB')
-				delay()
-			print('\tAll pages have been loaded.')
-		else:
-			print('ERROR: COULD NOT FIND DETAILS PAGE OF ONLINE TEST.')
-			print('NO DETAILS WILL BE SAVED OF THIS TEST.')
+					# This specific page requires these additional elements to do the postback
+					postback_form = details_page_document.forms[0]
+					postback_form.fields['__EVENTTARGET'] = 'ctl00$ContentPlaceHolder$ResultsGrid'
+					postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HPN'] = str(page_index)
+					postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HSE'] = ''
+					postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HGC'] = ''
+					postback_form.fields['ctl00$ContentPlaceHolder$ResultsGrid$HFI'] = ''
+
+					post_data = convert_lxml_form_to_requests(postback_form)
+
+					# Submitting the form to obtain the next page
+					headers = {}
+					headers['Referer'] = form_action_url
+
+					details_page_response = session.post(form_action_url, headers=headers, data=post_data, allow_redirects=True)
+					details_page_document = fromstring(details_page_response.text)
+					question_table_body = details_page_document.get_element_by_id('ctl00_ContentPlaceHolder_ResultsGrid_TB')
+					delay()
+				print('\tAll pages have been loaded.')
+			else:
+				print('ERROR: COULD NOT FIND DETAILS PAGE OF ONLINE TEST.')
+				print('NO DETAILS WILL BE SAVED OF THIS TEST.')
 
 
-		bytesToTextFile(attempt_file_contents.encode('utf-8'), dumpDirectory + '/Attempt ' + str(attempt_index) + output_text_extension)
+			bytesToTextFile(attempt_file_contents.encode('utf-8'), dumpDirectory + '/Attempt ' + str(attempt_index) + output_text_extension)
 
 
 def processFile(institution, pathThusFar, fileURL, session):