From 0bb152aee63b71f44f4f7920cbc179f1ded32692 Mon Sep 17 00:00:00 2001 From: DreamGallery <2511462508@qq.com> Date: Sat, 9 Sep 2023 15:17:09 +0800 Subject: [PATCH] CPU Version --- README.md | 4 ++-- main.py | 8 ++++---- src/adv_text.py | 10 +++++----- src/events.py | 2 +- src/frame.py | 27 +++++++++++++++++++-------- src/match.py | 31 +++++++++++++++++-------------- 6 files changed, 48 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 40fc82a..8734918 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Also using `OPENCV` to fix timeline of subtitle frame by frame.
※This tool is based on [MalitsPlus/HoshimiToolkit](https://github.com/MalitsPlus/HoshimiToolkit), you need to get subtitle files in the game through this project first, the names of required files are usually `adv_***.txt`. -# Usage +# Usage(CPU Version) ## Install from the repository @@ -35,7 +35,7 @@ python generate.py ## Generate .ass file with time-fix To use time-fix option you need to put the recorded video in `adv/video`, and the recommended resolution is `[1920x1080]` or you can change the `[Font Config]` in `config.ini` to fit your video(compare in PS is a good idea).
-If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames near line `26~29` in `src/frame.py`. +If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames in `src/frame.py`. ``` python main.py diff --git a/main.py b/main.py index 6ea65c1..b5e0599 100644 --- a/main.py +++ b/main.py @@ -37,9 +37,9 @@ for dial in extract(sub_file_name): dial_list.append(dial) -files = [] -current_count = 0 -start_file_index = 0 +files: list[str] = [] +current_count = int(0) +start_file_index = int(0) content = script_info + "\n" + garbage + "\n" + style + "\n" + event print("ASS-Generate-Progress start") @@ -49,7 +49,7 @@ for dial in dial_list: if "SkipTime" in dial: - start_file_index = start_file_index + int(float(str(dial).split(":")[1]) * stream.fps) + start_file_index = start_file_index + int(float(dial.split(":")[1]) * stream.fps) continue dial_event = AssEvents() dial_event.from_dialogue(dial) diff --git a/src/adv_text.py b/src/adv_text.py index 03bc5f3..1d58d70 100644 --- a/src/adv_text.py +++ b/src/adv_text.py @@ -12,8 +12,8 @@ _KEY_TITLE = config.get("Text KEY", "KEY_TITLE") -def extract(input: str) -> list: - dial_list = [] +def extract(input: str) -> list[str]: + dial_list: list[str] = [] with open(f"{_TXT_PATH}/{input}", "r", encoding="utf8") as f: for line in f: if "text" in line: @@ -30,7 +30,7 @@ def get_title(input: str) -> str: return title -def get_text(input: str) -> [str, bool]: +def get_text(input: str) -> tuple[str, bool]: if _KEY_MASSAGE in input: text = ( input[1:-2].split(_KEY_MASSAGE)[1].split(f"\u0020{_KEY_NAME}")[0].replace("{user}", _player_name) @@ -43,7 +43,7 @@ def get_text(input: str) -> [str, bool]: elif _KEY_NARRATION in input: text = input[1:-2].split(_KEY_NARRATION)[1].split(f"\u0020{_KEY_CLIP}")[0] gray = True - return text, gray + return (text, gray) def get_name(input: str) -> str: @@ -54,7 +54,7 @@ def get_name(input: str) -> str: return name -def get_clip(input: str) -> any: +def get_clip(input: str): clip = input[1:-2].split(f"\u0020{_KEY_CLIP}")[1].replace("\\", "") data = json.loads(clip) return data diff --git a/src/events.py b/src/events.py index 1a3fab1..72dcb95 100644 --- a/src/events.py +++ b/src/events.py @@ -31,7 +31,7 @@ def __init__( self.Effect = Effect self.Text = Text - def from_dialogue(self, input: str): + def from_dialogue(self, input: str) -> None: self.Start = to_time(get_clip(input)["_startTime"]) self.Duration = get_clip(input)["_duration"] self.End = end_time(get_clip(input)["_startTime"], get_clip(input)["_duration"]) diff --git a/src/frame.py b/src/frame.py index 2778d6c..85e93e1 100644 --- a/src/frame.py +++ b/src/frame.py @@ -1,4 +1,4 @@ -import cv2 +import cv2, cv2.typing import os, sys import threading import numpy as np @@ -11,18 +11,25 @@ _VIDEO_PATH = config.get("File PATH", "VIDEO_PATH") _lock = threading.Lock() -_current_count = 0 +_current_count = int(0) class FrameProcess(object): fps: float - def one_task(self, image_folder_path: str, frame: any, milliseconds: float, total_fps: int): + def one_task( + self, + image_folder_path: str, + frame: cv2.typing.MatLike, + width: int, + height: int, + milliseconds: float, + total_fps: int, + ) -> None: global _current_count seconds = "%.4f" % (milliseconds // 1000 + (milliseconds % 1000) / 1000) name = seconds[:-1].replace(".", "_") - height = len(frame) - width = len(frame[0]) + # Modify the following content if your resolution ratio is not 16:9 img = frame[ (height * 29 // 36) : (height * 8 // 9), (width * 1 // 16) : (width * 15 // 16), @@ -43,12 +50,14 @@ def one_task(self, image_folder_path: str, frame: any, milliseconds: float, tota sys.stdout.flush() _lock.release() - def to_frame(self, input: str): + def to_frame(self, input: str) -> None: image_folder_path = f"{_CACHE_PATH}/{input.split('.')[0]}" os.makedirs(image_folder_path, exist_ok=True) video_path = f"{_VIDEO_PATH}/{input}" vc = cv2.VideoCapture(video_path) self.fps = vc.get(cv2.CAP_PROP_FPS) + width = int(vc.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT)) total_fps = int(vc.get(cv2.CAP_PROP_FRAME_COUNT)) executor = ThreadPoolExecutor(max_workers=20) frame_tasks = [] @@ -58,13 +67,15 @@ def to_frame(self, input: str): break milliseconds = vc.get(cv2.CAP_PROP_POS_MSEC) frame_tasks.append( - executor.submit(self.one_task, image_folder_path, frame, milliseconds, total_fps) + executor.submit( + self.one_task, image_folder_path, frame, width, height, milliseconds, total_fps + ) ) vc.release() wait(frame_tasks, return_when="ALL_COMPLETED") print("\u0020", "Pre-Progress finished") - def get_fps(self, input: str): + def get_fps(self, input: str) -> None: video_path = f"{_VIDEO_PATH}/{input}" vc = cv2.VideoCapture(video_path) self.fps = vc.get(cv2.CAP_PROP_FPS) diff --git a/src/match.py b/src/match.py index 14bdfb2..691288d 100644 --- a/src/match.py +++ b/src/match.py @@ -1,18 +1,19 @@ -import cv2 +import cv2, cv2.typing import numpy as np from src.read_ini import config from PIL import Image, ImageDraw, ImageFont +from PIL.ImageFont import FreeTypeFont _half_split_length = config.getint("Arg", "half_split_length") -def to_binary(img: any, thresh: float) -> any: +def to_binary(img: cv2.typing.MatLike, thresh: float) -> cv2.typing.MatLike: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, binary = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY) return binary -def to_binary_adaptive(img: any, blocksize: int, C: float) -> any: +def to_binary_adaptive(img: cv2.typing.MatLike, blocksize: int, C: float) -> cv2.typing.MatLike: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, C) return binary @@ -20,13 +21,13 @@ def to_binary_adaptive(img: any, blocksize: int, C: float) -> any: def draw_text( text: str, font_path: list[str], fontsize: list[int], strokewidth: int, kerning: int -) -> [list[any], list[any]]: +) -> tuple[list[cv2.typing.MatLike], list[cv2.typing.MatLike]]: font_japan = ImageFont.truetype(font_path[0], fontsize[0]) font_alpha = ImageFont.truetype(font_path[1], fontsize[1]) font_digit = ImageFont.truetype(font_path[2], fontsize[2]) - char_info = [] - text_height = 0 + char_info: list[tuple[FreeTypeFont, int]] = [] + text_height = int(0) for char in text: if char.encode("utf-8").isalpha(): font = font_alpha @@ -39,14 +40,14 @@ def draw_text( text_height = max((char_bbox[3] - char_bbox[1]), text_height) char_info.append([font, char_width]) - text_width = 0 + text_width = int(0) for info in char_info: text_width += info[1] text_size = ((text_width + (len(text) - 1) * kerning), text_height) text_img = Image.new("RGBA", text_size) draw = ImageDraw.Draw(text_img) - tmp_width = 0 + tmp_width = int(0) for index, char in enumerate(text): draw.text( (((char_info[index][1]) // 2 + tmp_width), (text_size[1] // 2)), @@ -57,8 +58,8 @@ def draw_text( stroke_fill=(32, 32, 32), ) tmp_width = tmp_width + char_info[index][1] + kerning - binary = [] - mask = [] + binary: list[cv2.typing.MatLike] = [] + mask: list[cv2.typing.MatLike] = [] kernel = np.ones((3, 3), np.uint8) if len(text) >= _half_split_length: spilt_pixel = sum( @@ -76,15 +77,17 @@ def draw_text( binary.append(to_binary(np.asarray(text_img), 127)) mask.append(cv2.erode(to_binary(np.asarray(text_img), 30), kernel, iterations=1)) - return binary, mask + return (binary, mask) -def compare(img_path: str, binary: list[any], threshold: float, mask: list[any]) -> bool: - img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY) +def compare( + img_path: str, binary: list[cv2.typing.MatLike], threshold: float, mask: list[cv2.typing.MatLike] +) -> bool: + img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) white_pixels = cv2.countNonZero(img) if white_pixels < 100: return False - part_max = [] + part_max: list[float] = [] for image in zip(binary, mask): res = cv2.matchTemplate(img, image[0], cv2.TM_CCORR_NORMED, mask=image[1]) res[np.isinf(res)] = 0