worked on rewrite

jonnotjohn · Jul 9, 2018 · f8f967b · f8f967b
1 parent e5e787e
commit f8f967b
Showing 1 changed file with 189 additions and 26 deletions.
diff --git a/RedditMirrorBot.py b/RedditMirrorBot.py
@@ -2,10 +2,12 @@
 
 from configparser import ConfigParser
 from json import load, dump
-from os import getpid, listdir, remove, path, makedirs
+from os import getpid, listdir, remove, path, makedirs, rename
+from shutil import rmtree
 from prawcore.exceptions import RequestException, ServerError
 from time import sleep, ctime, time
 from requests import get
+from copy import copy
 
 import logging
 import praw
@@ -57,32 +59,36 @@ def error(self, msg):
 ydl_opts = {
     'format': 'best[ext=webm]/bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
     'logger': MyLogger(),
-    'outtmpl': "Media/%(id)s.mp4",
 }
 
-def cleanup_network():
-    logger.info("Cleanup")
-    if not path.exists("Media/network"):
-        makedirs("Media/network")
+def cleanup_init():
+    if not path.exists("Media/"):
+        makedirs("Media/")
+        logger.debug("Media path doesn't exist, created it.")
+        return
 
-    for file in listdir("Media/network"):
-        remove("Media/network/" + file)
+    else:
+        rmtree("Media/")
+        makedirs("Media/")
+        logger.debug("Cleaned Media path")
+        return
+
 
-def cleanup_submissions():
-    logger.info("Cleanup")
-    if not path.exists("Media/submission"):
-        makedirs("Media/submission")
+def cleanup(path):
+    if path[-1] != "/":
+        path += "/"
 
-    for file in listdir("Media/submission"):
-        remove("Media/submission/" + file)
-
-def cleanup_comments():
-    logger.info("Cleanup")
-    if not path.exists("Media/comment"):
-        makedirs("Media/comment")
+    logger.debug("Cleanup " + path)
 
-    for file in listdir("Media/comment"):
-        remove("Media/comment/" + file)
+    if not path.exists(path):
+        logging.debug("Cleanup: " + path + " does not exist. Creating it.")
+        makedirs(path)
+        return
+
+    else:
+        rmtree(path)
+        logging.debug("Cleanup: " + path + " successfully removed.")
+        return
 
 # Returns true if a video for the submission already exists, false otherwise
 def is_submission_mirrored(submission):
@@ -113,18 +119,172 @@ def get_bucket(bucket_name):
     return resource.Bucket(bucket_name)
 
 # Takes in a url and attempts to download the video
-def download_video(url):
+# Returns true if successful
+def download_video(url, sub_id=None, working_dir):
     logger.info("Downloading " + url)
-    logger.debug("Got next post: ", submission.title, " ", "https://reddit.com" + submission.permalink + "\n" + submission.url)
+    output_filename = working_dir + "video.mp4"
+    opts = copy(ydl_opts)
+    opts['outtmpl'] = output_filename
+    yt = youtube_dl.YoutubeDL(opts)
+    # Twitter post
+    if "twitter" in url:
+        logger.info('Twitter video detected.')
+        response = yt.extract_info(url, process=False)
+
+        try:
+            while response.get("url"):
+                response = yt.extract_info(response["url"], process=False)
+
+        except youtube_dl.utils.DownloadError:
+            logger.error("Twitter Youtube-dl download error. Unable to download video from URL.")
+            return False
+
+        url = response["webpage_url"]
+
+    # Reddit video, only entered if sub_id exists (won't be used for comment mirroring)
+    elif "v.redd.it" in url and sub_id:
+        if download_reddit_video(url, sub_id, working_dir, output_filename):
+            logger.info(url + " successfully downloaded.")
+        else:
+            logger.error(url + " was unable to be downloaded.")
 
-    #Don't wanna mirror nazi propaganda
-    if "Antifa" in submission.title or "antifa" in submission.title:
-        return
+    # Otherwise, download normally
+    else:
+        try:
+            yt.download([url])
+        except (youtube_dl.utils.DownloadError) as e:
+            logger.error("Download error: " + str(e))
+            return False
+        except (youtube_dl.utils.SameFileError) as e:
+            logger.error("Download error: " + str(e))
+            return False
+        except (UnicodeDecodeError) as e:
+            logger.error("Download error: " + str(e))
+            return False
+
+    return True
+
+# Attempts to download a reddit-hosted video.
+# Returns true if successful
+def download_reddit_video(url, sub_id, working_dir, output_filename):
+    sub = reddit.submission(id=sub_id)
+    if sub.media is None:
+        if hasattr(sub, "crosspost_parent"):
+            sub.media = reddit.submission(sub.crosspost_parent[3:]).media
+        else:
+            url = get(sub.url).url
+            _id = praw.models.reddit.submission.Submission.id_from_url(url)
+            sub.media = reddit.submission(_id).media
+
+    # Time added to filename to avoid processes writing over each other
+    video_url = sub.media["reddit_video"]["fallback_url"]
+    video_name = working_dir + (time()) + "_video"
+    download(video_name, video_url)
+
+    audio_url = video_url.rsplit("/", 1)[0] + "/audio"
+    audio_name = working_dir + str(time()) + "_audio"
+    download(audio_name, audio_url)
+
+    if sub.media["reddit_video"]["is_gif"]:
+        logger.debug("Reddit video is a gif.")
+        rename(video_name, output_filename)
+
+    #if not gif but still no audio
+    elif not 'octet-stream' in magic.Magic(mime=True,uncompress=True).from_file('Media/audio'):
+        logger.debug("Reddit video has no audio.")
+        rename(video_name, output_filename)
 
+    #audio exists
+    else:
+        logger.debug("Running combine_media() on reddit video.")
+        combine_media(video_name, audio_name, output_filename)
+        logger.debug("Media combining complete.")
+
+    return True
+
+def combine_media(video, audio, output_filename):
+    output = str(time()) + "_output.mp4"
+    command = [
+        "ffmpeg",
+        "-v", "quiet",
+        "-i", "Media/" + video,
+        "-i", "Media/" + audio,
+        "-c", "copy",
+        "-f", "mp4",
+        "Media/" + output,
+        "-y"
+    ]
+
+    subprocess.run(command)
+
+
+def download(filename, url):
+    with open("Media/" + filename, "wb") as file:
+        file.write(get(url).content)
+
+#converts given file to mp4, and returns new filename
+def conv_to_mp4(file_name):
+
+    vid_name = file_name[:-4] + ".mp4"
+
+    ##check if file is mkv and convert to mp4
+    if ".mkv" in file_name:
+        ffmpeg_subproc = [
+            "ffmpeg",
+            "-i", file_name,
+            "-strict", "-2", #fixes opus experimental error
+            "-vcodec", "copy",
+            "-y",
+            vid_name
+            ]
+        conv_process = subprocess.run(ffmpeg_subproc)
+        return vid_name
+
+    else:
+        return file_name
+
+def upload_video(file_name, _id):
+    file_name = conv_to_mp4(file_name)
+    logger.debug("Uploading to DO...")
+    save_file_size(file_name)
+    logger.debug("Size:", str(os.path.getsize(file_name)/1024/1024) + "MB")
+    client = session.client('s3',
+        region_name='nyc3',
+        endpoint_url="https://nyc3.digitaloceanspaces.com",
+        aws_access_key_id=do_access_id,
+        aws_secret_access_key=do_secret_key)
+    key = "videos/" + str(_id) + ".mp4"
+
+    try:
+        client.upload_file(file_name, 'pf-mirror-1', key)
+    except (boto3.S3UploadFailedError) as e:
+        logger.error(_id + " failed to upload: " + str(e))
+        return None
+
+    logger.debug(_id + " successfully uploaded.")
+
+    resource = boto3.resource('s3',
+        region_name='nyc3',
+        endpoint_url="https://nyc3.digitaloceanspaces.com",
+        aws_access_key_id=do_access_id,
+        aws_secret_access_key=do_secret_key)
+
+    logger.debug("DO key: " + key)
+    client.put_object_acl(ACL='public-read', Bucket='pf-mirror-1', Key=key)
+    key = "videos/" + key
+    mirror_url = "https://pf-mirror-1.nyc3.digitaloceanspaces.com/" + key
+
+    logger.info("Upload complete!")
+    return str(mirror_url)
+
 
 # Takes in a submission object and attempts to mirror it
 # Returns true if mirror was successful
 def mirror_submission(submission):
+    working_dir = "Media/" + str(time()) + "_dl/"
+    cleanup(working_dir)
+    download_video(submission.url, submission.id, working_dir)
+    mirror_url = upload_video(working_dir + "video.mp4", submission.id)
 
 
 # Listens for POST requests to create new mirrors
@@ -141,6 +301,8 @@ def sub_watcher():
         try:
             # Get next post
             submission = next(stream)
+            logger.debug("Got next post: ", submission.title, " ", "https://reddit.com" + submission.permalink + "\n" + submission.url)
+
         except RequestException:
             # Client side error
             logger.error("RequestException in sub_watcher")
@@ -197,6 +359,7 @@ def main():
         job.start()
 
 if __name__ == "__main__":
+    cleanup_init()
     if path.exists("/usr/bin/ffmpeg"):
         print(main())
     else: