Skip to content
This repository has been archived by the owner on Oct 1, 2020. It is now read-only.

Commit

Permalink
Merge pull request #100 from edx/sofiya/youtube-bug
Browse files Browse the repository at this point in the history
Multiple courses stuck in YouTube renditions
  • Loading branch information
ssemenova authored Apr 20, 2018
2 parents 5c67dfa + e7b3838 commit 2f0d8d4
Showing 1 changed file with 136 additions and 18 deletions.
154 changes: 136 additions & 18 deletions youtube_callback/sftp_id_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Check SFTP dropboxes for YT Video ID XML information
"""
import csv
import datetime
import fnmatch
import logging
import os
import re
import shutil
import sys
import xml.etree.ElementTree as ET
Expand Down Expand Up @@ -48,21 +50,22 @@ def callfunction(course):
shutil.rmtree(workdir)
os.mkdir(workdir)

xml_downloader(course)
xml_csv_downloader(course)

for file in os.listdir(workdir):
upload_data = domxml_parser(file)
if 'report-' in file:
upload_data = domxml_parser(file) if is_xml_file(file) else csv_parser(file)

if upload_data is not None:
LOGGER.info('[YOUTUBE_CALLBACK] : {inst}{clss} {upload_data}'.format(
inst=course.institution,
clss=course.edx_classid,
upload_data=upload_data
))
urlpatch(upload_data)
if upload_data is not None:
LOGGER.info('[YOUTUBE CALLBACK] : {inst}{clss} {upload_data}'.format(
inst=course.institution,
clss=course.edx_classid,
upload_data=upload_data
))
urlpatch(upload_data)


def xml_downloader(course):
def xml_csv_downloader(course):
"""
:param course:
Expand All @@ -89,17 +92,17 @@ def xml_downloader(course):
for d in s1.listdir_attr():
crawl_sftp(d=d, s1=s1)
except AuthenticationException:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
inst=course.institution,
clss=course.edx_classid
))
except SSHException:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : Authentication Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : Authentication Failed".format(
inst=course.institution,
clss=course.edx_classid
))
except IOError:
LOGGER.error("[YOUTUBE_CALLBACK] : {inst}{clss} : List Dir Failed".format(
LOGGER.error("[YOUTUBE CALLBACK] : {inst}{clss} : List Dir Failed".format(
inst=course.institution,
clss=course.edx_classid
))
Expand Down Expand Up @@ -155,6 +158,8 @@ def crawl_sftp(d, s1):
return
except SSHException:
return
except OSError:
return
s1.cwd('..')


Expand All @@ -164,10 +169,6 @@ def domxml_parser(file):
:param file:
:return:
"""

if 'status-' not in file:
return

upload_data = {
'datetime': None,
'status': None,
Expand All @@ -176,11 +177,18 @@ def domxml_parser(file):
'file_suffix': None,
'youtube_id': None
}

try:
tree = ET.parse(os.path.join(workdir, file))
except ET.ParseError:
LOGGER.error('[YOUTUBE CALLBACK] : Parse Error in domxml parser : file {filename}'.format(
filename=file
))
return
except IOError:
LOGGER.error('[YOUTUBE CALLBACK] : IO Error in domxml parser : file {filename}'.format(
filename=file
))
return
root = tree.getroot()
for child in root:
Expand Down Expand Up @@ -221,6 +229,105 @@ def domxml_parser(file):
return upload_data


def csv_parser(filename):
"""
:param filename: string
:return: upload_data : dict
"""
upload_data = {
'datetime': None,
'status': None,
'duplicate_url': None,
'edx_id': filename.strip('report-').split('_')[0],
'file_suffix': None,
'youtube_id': None
}

status_index = file_suffix_index = youtube_id_index = 0

if not os.path.exists(os.path.join(workdir, filename)):
LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} does not exist'.format(
filename=filename
))
return

with open(os.path.join(workdir, filename), 'rb') as csvfile:
file_reader = csv.reader(csvfile, delimiter=',')
try:
headers = next(file_reader)
except StopIteration:
LOGGER.info('[YOUTUBE CALLBACK] : CSV file {filename} exists but is empty'.format(
filename=filename
))
return

for column in headers:
if column == "Status":
status_index = headers.index(column)
elif column == "Video file":
file_suffix_index = headers.index(column)
elif column == "Video ID":
youtube_id_index = headers.index(column)

for row in file_reader:
video_url = row[file_suffix_index]
upload_data['status'] = row[status_index]
if upload_data['status'] == "Errors":
upload_data = _process_errors(upload_data, filename)

upload_data['youtube_id'] = row[youtube_id_index]

try:
upload_data['file_suffix'] = video_url.split("_")[1].split(".")[0]
except IndexError:
upload_data['file_suffix'] = 100

return upload_data


def _process_errors(upload_data, reports_file):
"""
:param upload_data : dict
reports_file : string
:return: upload_data : dict
"""
errors_file = os.path.join(workdir, reports_file.replace("report-", "errors-"))

error_code_index = error_message_index = 0
error_message_pattern = re.compile('Duplicate video ID is \[(?P<thing>[0-9a-zA-Z_-]*)\]')

try:
with open(errors_file, 'rb') as csvfile:
file_reader = csv.reader(csvfile, delimiter=',')
headers = next(file_reader)
for column in headers:
if column == "Error code":
error_code_index = headers.index(column)
elif column == "Error message":
error_message_index = headers.index(column)

for row in file_reader:
if row[error_code_index] == "VIDEO_REJECTED_DUPLICATE":
upload_data['status'] = "Duplicate"
error_message = row[error_message_index]
youtube_id_search = error_message_pattern.search(error_message)
if youtube_id_search:
upload_data['duplicate_url'] = youtube_id_search.groups()[0]
else:
LOGGER.error(
'[YOUTUBE CALLBACK] : Youtube callback returned Duplicate Video error but ' +
'duplicate video ID could not be found. Upload data: {upload_data}. ' +
'CSV: {csv}'.format(
upload_data=upload_data,
csv=row
))
except IOError:
LOGGER.error('[YOUTUBE CALLBACK] : Could not open error file {file}'.format(
file=errors_file
))
return upload_data


def urlpatch(upload_data):
"""
Expand All @@ -235,7 +342,10 @@ def urlpatch(upload_data):
upload_data['status'] = 'Failure'
return

if upload_data['status'] == 'Success':
if upload_data['status'] == 'Successful':
LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is successful : {upload_data}'.format(
upload_data=upload_data
))
url_query = URL.objects.filter(
encode_url=upload_data['youtube_id']
)
Expand Down Expand Up @@ -321,6 +431,10 @@ def urlpatch(upload_data):
elif upload_data['status'] == 'Duplicate' and \
upload_data['file_suffix'] == '100':

LOGGER.info('[YOUTUBE CALLBACK] : Urlpatch : Upload status is duplicate : {upload_data}'.format(
upload_data=upload_data
))

url_query = URL.objects.filter(
videoID=Video.objects.filter(
edx_id=upload_data['edx_id']
Expand Down Expand Up @@ -354,3 +468,7 @@ def urlpatch(upload_data):
encode_profile='youtube'
)
ApiConn.call()


def is_xml_file(file):
return file.lower().endswith(('.xml'))

0 comments on commit 2f0d8d4

Please sign in to comment.