Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extra functionality #11

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 97 additions & 85 deletions start_kit/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,29 @@
# 2. Extract YouTube frames and create video instances.

import os
import sys
import glob
import json
import cv2

import shutil
import re

import logging
logging.basicConfig(
filename="preProc.log",
filemode='w',
level=logging.DEBUG
)
logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))


def convert_everything_to_mp4():
cmd = 'bash scripts/swf2mp4.sh'
def convert_frames_to_video(frame_array, path_out, size, fps=25):
out = cv2.VideoWriter(path_out, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)

os.system(cmd)
for i in range(len(frame_array)):
# writing to a image array
out.write(frame_array[i])
out.release()


def video_to_frames(video_path, size=None):
Expand All @@ -20,13 +34,14 @@ def video_to_frames(video_path, size=None):
size -> (int, int), width, height.
"""

print(f"video_path: {video_path} size: {size}")
cap = cv2.VideoCapture(video_path)

frames = []

while True:
ret, frame = cap.read()

if ret:
if size:
frame = cv2.resize(frame, size)
Expand All @@ -39,91 +54,88 @@ def video_to_frames(video_path, size=None):
return frames


def convert_frames_to_video(frame_array, path_out, size, fps=25):
out = cv2.VideoWriter(path_out, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)

for i in range(len(frame_array)):
# writing to a image array
out.write(frame_array[i])
out.release()


def extract_frame_as_video(src_video_path, start_frame, end_frame):
frames = video_to_frames(src_video_path)

return frames[start_frame: end_frame+1]


def extract_all_yt_instances(content):
cnt = 1

if not os.path.exists('videos'):
os.mkdir('videos')

for entry in content:
instances = entry['instances']

for inst in instances:
url = inst['url']
video_id = inst['video_id']

if 'youtube' in url or 'youtu.be' in url:
cnt += 1

yt_identifier = url[-11:]

src_video_path = os.path.join('raw_videos_mp4', yt_identifier + '.mp4')
dst_video_path = os.path.join('videos', video_id + '.mp4')

if not os.path.exists(src_video_path):
continue

if os.path.exists(dst_video_path):
print('{} exists.'.format(dst_video_path))
continue

# because the JSON file indexes from 1.
start_frame = inst['frame_start'] - 1
end_frame = inst['frame_end'] - 1

if end_frame <= 0:
shutil.copyfile(src_video_path, dst_video_path)
continue

selected_frames = extract_frame_as_video(src_video_path, start_frame, end_frame)

# when OpenCV reads an image, it returns size in (h, w, c)
# when OpenCV creates a writer, it requres size in (w, h).
size = selected_frames[0].shape[:2][::-1]

convert_frames_to_video(selected_frames, dst_video_path, size)

print(cnt, dst_video_path)
else:
cnt += 1

src_video_path = os.path.join('raw_videos_mp4', video_id + '.mp4')
dst_video_path = os.path.join('videos', video_id + '.mp4')

if os.path.exists(dst_video_path):
print('{} exists.'.format(dst_video_path))
continue

if not os.path.exists(src_video_path):
continue

print(cnt, dst_video_path)
shutil.copyfile(src_video_path, dst_video_path)


def main():
# 1. Convert .swf, .mkv file to mp4.
convert_everything_to_mp4()

content = json.load(open('WLASL_v0.3.json'))
extract_all_yt_instances(content)
class Preproc:
def __init__(self,
idxf="WLASL_v0.3.json",
videoDir="data"):
self.indexFile = idxf
self.vd = videoDir

def convertTomp4(self):
for f in os.scandir(self.vd):
if (
not f.path.endswith(".mp4") and
not glob.glob(
os.path.join(
self.vd,
os.path.splitext(f.name)[0]) + '.mp4'
)
):
dest = os.path.join(self.vd,
os.path.splitext(f.name)[0] + '.mp4'
)
if (
os.system(
f"ffmpeg -loglevel panic -i {f.path} -vf "
f"pad=\"width=ceil(iw/2)*2\" {dest}"
) == 0
):
logging.info(f"Conversion Successful\t-\t{f.name}")
else:
logging.error(f"Conversion Failed\t\t-\t{f.name}")
elif f.path.endswith(".swf"):
logging.info(f"{f.name} already converted - Skipping")

def extractVideo(self):
idx = json.load(open(self.indexFile))

for i in idx:
for j in i["instances"]:
if re.search(r"youtu\.?be", j["url"]):
src = os.path.join(
self.vd, j["video_id"] + '.yt.mp4'
)
dst = os.path.join(
self.vd, j["video_id"] + '.mp4'
)
if not os.path.exists(src):
continue
if os.path.exists(dst):
logging.info(f"{src} already extracted - Skipping ")
continue

if j["frame_end"] - 1 <= 0:
shutil.copyfile(src, dst)
continue

print(f"src: {src}")
selected_frames = extract_frame_as_video(
src,
j["frame_start"] - 1,
j["frame_end"] - 1
)

size = selected_frames[0].shape[:2][::-1]
convert_frames_to_video(selected_frames, dst, size)

def main(self):
# logging.info(">>>Converting files to mp4")
# self.convertTomp4()
# logging.info(">>>Extracting youtube videos")
# self.extractVideo()
for r, d, f in os.walk(self.vd):
print(r)
print(d)
print(f)
print("==============")


if __name__ == "__main__":
main()

preproc = Preproc()
preproc.main()
2 changes: 1 addition & 1 deletion start_kit/scripts/swf2mp4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ do
fi

echo "${i} / ${total}, ${filename}"

if [ ${extension} != "mp4" ];
then
ffmpeg -loglevel panic -i ${src_file} -vf pad="width=ceil(iw/2)*2:height=ceil(ih/2)*2" ${dst_file}
Expand Down
Loading