diff --git a/youtube_callback/sftp_id_retrieve.py b/youtube_callback/sftp_id_retrieve.py index d4e638f..3e86f95 100644 --- a/youtube_callback/sftp_id_retrieve.py +++ b/youtube_callback/sftp_id_retrieve.py @@ -2,10 +2,12 @@ Check SFTP dropboxes for YT Video ID XML information """ +import csv import datetime import fnmatch import logging import os +import re import shutil import sys import xml.etree.ElementTree as ET @@ -48,21 +50,22 @@ def callfunction(course): shutil.rmtree(workdir) os.mkdir(workdir) - xml_downloader(course) + xml_csv_downloader(course) for file in os.listdir(workdir): - upload_data = domxml_parser(file) + if 'report-' in file: + upload_data = domxml_parser(file) if is_xml_file(file) else csv_parser(file) - if upload_data is not None: - LOGGER.info('[YOUTUBE_CALLBACK] : {inst}{clss} {upload_data}'.format( - inst=course.institution, - clss=course.edx_classid, - upload_data=upload_data - )) - urlpatch(upload_data) + if upload_data is not None: + LOGGER.info('[YOUTUBE CALLBACK] : {inst}{clss} {upload_data}'.format( + inst=course.institution, + clss=course.edx_classid, + upload_data=upload_data + )) + urlpatch(upload_data) -def xml_downloader(course): +def xml_csv_downloader(course): """ :param course: @@ -89,17 +92,17 @@ def xml_downloader(course): for d in s1.listdir_attr(): crawl_sftp(d=d, s1=s1) except AuthenticationException: - LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format( + LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format( inst=course.institution, clss=course.edx_classid )) except SSHException: - LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format( + LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format( inst=course.institution, clss=course.edx_classid )) except IOError: - LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : List Dir Failed".format( + LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : List Dir Failed".format( inst=course.institution, clss=course.edx_classid )) @@ -155,6 +158,8 @@ def crawl_sftp(d, s1): return except SSHException: return + except OSError: + return s1.cwd('..') @@ -164,10 +169,6 @@ def domxml_parser(file): :param file: :return: """ - - if 'status-' not in file: - return - upload_data = { 'datetime': None, 'status': None, @@ -176,11 +177,18 @@ def domxml_parser(file): 'file_suffix': None, 'youtube_id': None } + try: tree = ET.parse(os.path.join(workdir, file)) except ET.ParseError: + LOGGER.error('[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'.format( + filename=file + )) return except IOError: + LOGGER.error('[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'.format( + filename=file + )) return root = tree.getroot() for child in root: @@ -221,6 +229,105 @@ def domxml_parser(file): return upload_data +def csv_parser(filename): + """ + :param filename: string + :return: upload_data : dict + """ + upload_data = { + 'datetime': None, + 'status': None, + 'duplicate_url': None, + 'edx_id': filename.strip('report-').split('_')[0], + 'file_suffix': None, + 'youtube_id': None + } + + status_index = file_suffix_index = youtube_id_index = 0 + + if not os.path.exists(os.path.join(workdir, filename)): + LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} does not exist'.format( + filename=filename + )) + return + + with open(os.path.join(workdir, filename), 'rb') as csvfile: + file_reader = csv.reader(csvfile, delimiter=',') + try: + headers = next(file_reader) + except StopIteration: + LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'.format( + filename=filename + )) + return + + for column in headers: + if column == "Status": + status_index = headers.index(column) + elif column == "Video file": + file_suffix_index = headers.index(column) + elif column == "Video ID": + youtube_id_index = headers.index(column) + + for row in file_reader: + video_url = row[file_suffix_index] + upload_data['status'] = row[status_index] + if upload_data['status'] == "Errors": + upload_data = _process_errors(upload_data, filename) + + upload_data['youtube_id'] = row[youtube_id_index] + + try: + upload_data['file_suffix'] = video_url.split("_")[1].split(".")[0] + except IndexError: + upload_data['file_suffix'] = 100 + + return upload_data + + +def _process_errors(upload_data, reports_file): + """ + :param upload_data : dict + reports_file : string + :return: upload_data : dict + """ + errors_file = os.path.join(workdir, reports_file.replace("report-", "errors-")) + + error_code_index = error_message_index = 0 + error_message_pattern = re.compile('Duplicate video ID is \[(?P[0-9a-zA-Z_-]*)\]') + + try: + with open(errors_file, 'rb') as csvfile: + file_reader = csv.reader(csvfile, delimiter=',') + headers = next(file_reader) + for column in headers: + if column == "Error code": + error_code_index = headers.index(column) + elif column == "Error message": + error_message_index = headers.index(column) + + for row in file_reader: + if row[error_code_index] == "VIDEO_REJECTED_DUPLICATE": + upload_data['status'] = "Duplicate" + error_message = row[error_message_index] + youtube_id_search = error_message_pattern.search(error_message) + if youtube_id_search: + upload_data['duplicate_url'] = youtube_id_search.groups()[0] + else: + LOGGER.error( + '[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but ' + + 'duplicate video ID could not be found. Upload data: {upload_data}. ' + + 'CSV: {csv}'.format( + upload_data=upload_data, + csv=row + )) + except IOError: + LOGGER.error('[YOUTUBE CALLBACK] : Could not open error file {file}'.format( + file=errors_file + )) + return upload_data + + def urlpatch(upload_data): """ @@ -235,7 +342,10 @@ def urlpatch(upload_data): upload_data['status'] = 'Failure' return - if upload_data['status'] == 'Success': + if upload_data['status'] == 'Successful': + LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'.format( + upload_data=upload_data + )) url_query = URL.objects.filter( encode_url=upload_data['youtube_id'] ) @@ -321,6 +431,10 @@ def urlpatch(upload_data): elif upload_data['status'] == 'Duplicate' and \ upload_data['file_suffix'] == '100': + LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'.format( + upload_data=upload_data + )) + url_query = URL.objects.filter( videoID=Video.objects.filter( edx_id=upload_data['edx_id'] @@ -354,3 +468,7 @@ def urlpatch(upload_data): encode_profile='youtube' ) ApiConn.call() + + +def is_xml_file(file): + return file.lower().endswith(('.xml'))