Merge pull request #100 from edx/sofiya/youtube-bug

Multiple courses stuck in YouTube renditions
edx · Apr 20, 2018 · 2f0d8d4 · 2f0d8d4
2 parents 5c67dfa + e7b3838
commit 2f0d8d4
Showing 1 changed file with 136 additions and 18 deletions.
diff --git a/youtube_callback/sftp_id_retrieve.py b/youtube_callback/sftp_id_retrieve.py
@@ -2,10 +2,12 @@
 Check SFTP dropboxes for YT Video ID XML information
 
 """
+import csv
 import datetime
 import fnmatch
 import logging
 import os
+import re
 import shutil
 import sys
 import xml.etree.ElementTree as ET
@@ -48,21 +50,22 @@ def callfunction(course):
         shutil.rmtree(workdir)
     os.mkdir(workdir)
 
-    xml_downloader(course)
+    xml_csv_downloader(course)
 
     for file in os.listdir(workdir):
-        upload_data = domxml_parser(file)
+        if 'report-' in file:
+            upload_data = domxml_parser(file) if is_xml_file(file) else csv_parser(file)
 
-        if upload_data is not None:
-            LOGGER.info('[YOUTUBE_CALLBACK] : {inst}{clss} {upload_data}'.format(
-                inst=course.institution,
-                clss=course.edx_classid,
-                upload_data=upload_data
-            ))
-            urlpatch(upload_data)
+            if upload_data is not None:
+                LOGGER.info('[YOUTUBE CALLBACK] : {inst}{clss} {upload_data}'.format(
+                    inst=course.institution,
+                    clss=course.edx_classid,
+                    upload_data=upload_data
+                ))
+                urlpatch(upload_data)
 
 
-def xml_downloader(course):
+def xml_csv_downloader(course):
     """
 
     :param course:
@@ -89,17 +92,17 @@ def xml_downloader(course):
             for d in s1.listdir_attr():
                 crawl_sftp(d=d, s1=s1)
     except AuthenticationException:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
             inst=course.institution,
             clss=course.edx_classid
         ))
     except SSHException:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
             inst=course.institution,
             clss=course.edx_classid
         ))
     except IOError:
-        LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : List Dir Failed".format(
+        LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : List Dir Failed".format(
             inst=course.institution,
             clss=course.edx_classid
         ))
@@ -155,6 +158,8 @@ def crawl_sftp(d, s1):
         return
     except SSHException:
         return
+    except OSError:
+        return
     s1.cwd('..')
 
 
@@ -164,10 +169,6 @@ def domxml_parser(file):
     :param file:
     :return:
     """
-
-    if 'status-' not in file:
-        return
-
     upload_data = {
         'datetime': None,
         'status': None,
@@ -176,11 +177,18 @@ def domxml_parser(file):
         'file_suffix': None,
         'youtube_id': None
     }
+
     try:
         tree = ET.parse(os.path.join(workdir, file))
     except ET.ParseError:
+        LOGGER.error('[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'.format(
+            filename=file
+        ))
         return
     except IOError:
+        LOGGER.error('[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'.format(
+            filename=file
+        ))
         return
     root = tree.getroot()
     for child in root:
@@ -221,6 +229,105 @@ def domxml_parser(file):
     return upload_data
 
 
+def csv_parser(filename):
+    """
+    :param filename: string
+    :return: upload_data : dict
+    """
+    upload_data = {
+        'datetime': None,
+        'status': None,
+        'duplicate_url': None,
+        'edx_id': filename.strip('report-').split('_')[0],
+        'file_suffix': None,
+        'youtube_id': None
+    }
+
+    status_index = file_suffix_index = youtube_id_index = 0
+
+    if not os.path.exists(os.path.join(workdir, filename)):
+        LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} does not exist'.format(
+            filename=filename
+        ))
+        return
+
+    with open(os.path.join(workdir, filename), 'rb') as csvfile:
+        file_reader = csv.reader(csvfile, delimiter=',')
+        try:
+            headers = next(file_reader)
+        except StopIteration:
+            LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'.format(
+                filename=filename
+            ))
+            return
+
+        for column in headers:
+            if column == "Status":
+                status_index = headers.index(column)
+            elif column == "Video file":
+                file_suffix_index = headers.index(column)
+            elif column == "Video ID":
+                youtube_id_index = headers.index(column)
+
+        for row in file_reader:
+            video_url = row[file_suffix_index]
+            upload_data['status'] = row[status_index]
+            if upload_data['status'] == "Errors":
+                upload_data = _process_errors(upload_data, filename)
+
+            upload_data['youtube_id'] = row[youtube_id_index]
+
+            try:
+                upload_data['file_suffix'] = video_url.split("_")[1].split(".")[0]
+            except IndexError:
+                upload_data['file_suffix'] = 100
+
+    return upload_data
+
+
+def _process_errors(upload_data, reports_file):
+    """
+    :param upload_data : dict
+           reports_file : string
+    :return: upload_data : dict
+    """
+    errors_file = os.path.join(workdir, reports_file.replace("report-", "errors-"))
+
+    error_code_index = error_message_index = 0
+    error_message_pattern = re.compile('Duplicate video ID is \[(?P<thing>[0-9a-zA-Z_-]*)\]')
+
+    try:
+        with open(errors_file, 'rb') as csvfile:
+            file_reader = csv.reader(csvfile, delimiter=',')
+            headers = next(file_reader)
+            for column in headers:
+                if column == "Error code":
+                    error_code_index = headers.index(column)
+                elif column == "Error message":
+                    error_message_index = headers.index(column)
+
+            for row in file_reader:
+                if row[error_code_index] == "VIDEO_REJECTED_DUPLICATE":
+                    upload_data['status'] = "Duplicate"
+                    error_message = row[error_message_index]
+                    youtube_id_search = error_message_pattern.search(error_message)
+                    if youtube_id_search:
+                        upload_data['duplicate_url'] = youtube_id_search.groups()[0]
+                    else:
+                        LOGGER.error(
+                            '[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but ' +
+                            'duplicate video ID could not be found. Upload data: {upload_data}. ' +
+                            'CSV: {csv}'.format(
+                                upload_data=upload_data,
+                                csv=row
+                            ))
+    except IOError:
+        LOGGER.error('[YOUTUBE CALLBACK] : Could not open error file {file}'.format(
+            file=errors_file
+        ))
+    return upload_data
+
+
 def urlpatch(upload_data):
     """
 
@@ -235,7 +342,10 @@ def urlpatch(upload_data):
         upload_data['status'] = 'Failure'
         return
 
-    if upload_data['status'] == 'Success':
+    if upload_data['status'] == 'Successful':
+        LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'.format(
+            upload_data=upload_data
+        ))
         url_query = URL.objects.filter(
             encode_url=upload_data['youtube_id']
         )
@@ -321,6 +431,10 @@ def urlpatch(upload_data):
     elif upload_data['status'] == 'Duplicate' and \
             upload_data['file_suffix'] == '100':
 
+        LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'.format(
+            upload_data=upload_data
+        ))
+
         url_query = URL.objects.filter(
             videoID=Video.objects.filter(
                 edx_id=upload_data['edx_id']
@@ -354,3 +468,7 @@ def urlpatch(upload_data):
                 encode_profile='youtube'
             )
             ApiConn.call()
+
+
+def is_xml_file(file):
+    return file.lower().endswith(('.xml'))