From 0bb152aee63b71f44f4f7920cbc179f1ded32692 Mon Sep 17 00:00:00 2001
From: DreamGallery <2511462508@qq.com>
Date: Sat, 9 Sep 2023 15:17:09 +0800
Subject: [PATCH] CPU Version

---
 README.md       |  4 ++--
 main.py         |  8 ++++----
 src/adv_text.py | 10 +++++-----
 src/events.py   |  2 +-
 src/frame.py    | 27 +++++++++++++++++++--------
 src/match.py    | 31 +++++++++++++++++--------------
 6 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/README.md b/README.md
index 40fc82a..8734918 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Also using `OPENCV` to fix timeline of subtitle frame by frame.<br />
 
 ※This tool is based on [MalitsPlus/HoshimiToolkit](https://github.com/MalitsPlus/HoshimiToolkit), you need to get subtitle files in the game through this project first, the names of required files are usually `adv_***.txt`.
 
-# Usage
+# Usage(CPU Version)
 
 ## Install from the repository
 
@@ -35,7 +35,7 @@ python generate.py
 ## Generate .ass file with time-fix
 
 To use time-fix option you need to put the recorded video in `adv/video`, and the recommended resolution is `[1920x1080]` or you can change the `[Font Config]` in `config.ini` to fit your video(compare in PS is a good idea).<br />
-If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames near line `26~29` in `src/frame.py`.
+If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames in `src/frame.py`.
 
 ```
 python main.py
diff --git a/main.py b/main.py
index 6ea65c1..b5e0599 100644
--- a/main.py
+++ b/main.py
@@ -37,9 +37,9 @@
     for dial in extract(sub_file_name):
         dial_list.append(dial)
 
-files = []
-current_count = 0
-start_file_index = 0
+files: list[str] = []
+current_count = int(0)
+start_file_index = int(0)
 content = script_info + "\n" + garbage + "\n" + style + "\n" + event
 print("ASS-Generate-Progress start")
 
@@ -49,7 +49,7 @@
 
 for dial in dial_list:
     if "SkipTime" in dial:
-        start_file_index = start_file_index + int(float(str(dial).split(":")[1]) * stream.fps)
+        start_file_index = start_file_index + int(float(dial.split(":")[1]) * stream.fps)
         continue
     dial_event = AssEvents()
     dial_event.from_dialogue(dial)
diff --git a/src/adv_text.py b/src/adv_text.py
index 03bc5f3..1d58d70 100644
--- a/src/adv_text.py
+++ b/src/adv_text.py
@@ -12,8 +12,8 @@
 _KEY_TITLE = config.get("Text KEY", "KEY_TITLE")
 
 
-def extract(input: str) -> list:
-    dial_list = []
+def extract(input: str) -> list[str]:
+    dial_list: list[str] = []
     with open(f"{_TXT_PATH}/{input}", "r", encoding="utf8") as f:
         for line in f:
             if "text" in line:
@@ -30,7 +30,7 @@ def get_title(input: str) -> str:
     return title
 
 
-def get_text(input: str) -> [str, bool]:
+def get_text(input: str) -> tuple[str, bool]:
     if _KEY_MASSAGE in input:
         text = (
             input[1:-2].split(_KEY_MASSAGE)[1].split(f"\u0020{_KEY_NAME}")[0].replace("{user}", _player_name)
@@ -43,7 +43,7 @@ def get_text(input: str) -> [str, bool]:
     elif _KEY_NARRATION in input:
         text = input[1:-2].split(_KEY_NARRATION)[1].split(f"\u0020{_KEY_CLIP}")[0]
         gray = True
-    return text, gray
+    return (text, gray)
 
 
 def get_name(input: str) -> str:
@@ -54,7 +54,7 @@ def get_name(input: str) -> str:
     return name
 
 
-def get_clip(input: str) -> any:
+def get_clip(input: str):
     clip = input[1:-2].split(f"\u0020{_KEY_CLIP}")[1].replace("\\", "")
     data = json.loads(clip)
     return data
diff --git a/src/events.py b/src/events.py
index 1a3fab1..72dcb95 100644
--- a/src/events.py
+++ b/src/events.py
@@ -31,7 +31,7 @@ def __init__(
         self.Effect = Effect
         self.Text = Text
 
-    def from_dialogue(self, input: str):
+    def from_dialogue(self, input: str) -> None:
         self.Start = to_time(get_clip(input)["_startTime"])
         self.Duration = get_clip(input)["_duration"]
         self.End = end_time(get_clip(input)["_startTime"], get_clip(input)["_duration"])
diff --git a/src/frame.py b/src/frame.py
index 2778d6c..85e93e1 100644
--- a/src/frame.py
+++ b/src/frame.py
@@ -1,4 +1,4 @@
-import cv2
+import cv2, cv2.typing
 import os, sys
 import threading
 import numpy as np
@@ -11,18 +11,25 @@
 _VIDEO_PATH = config.get("File PATH", "VIDEO_PATH")
 
 _lock = threading.Lock()
-_current_count = 0
+_current_count = int(0)
 
 
 class FrameProcess(object):
     fps: float
 
-    def one_task(self, image_folder_path: str, frame: any, milliseconds: float, total_fps: int):
+    def one_task(
+        self,
+        image_folder_path: str,
+        frame: cv2.typing.MatLike,
+        width: int,
+        height: int,
+        milliseconds: float,
+        total_fps: int,
+    ) -> None:
         global _current_count
         seconds = "%.4f" % (milliseconds // 1000 + (milliseconds % 1000) / 1000)
         name = seconds[:-1].replace(".", "_")
-        height = len(frame)
-        width = len(frame[0])
+        # Modify the following content if your resolution ratio is not 16:9
         img = frame[
             (height * 29 // 36) : (height * 8 // 9),
             (width * 1 // 16) : (width * 15 // 16),
@@ -43,12 +50,14 @@ def one_task(self, image_folder_path: str, frame: any, milliseconds: float, tota
         sys.stdout.flush()
         _lock.release()
 
-    def to_frame(self, input: str):
+    def to_frame(self, input: str) -> None:
         image_folder_path = f"{_CACHE_PATH}/{input.split('.')[0]}"
         os.makedirs(image_folder_path, exist_ok=True)
         video_path = f"{_VIDEO_PATH}/{input}"
         vc = cv2.VideoCapture(video_path)
         self.fps = vc.get(cv2.CAP_PROP_FPS)
+        width = int(vc.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT))
         total_fps = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))
         executor = ThreadPoolExecutor(max_workers=20)
         frame_tasks = []
@@ -58,13 +67,15 @@ def to_frame(self, input: str):
                 break
             milliseconds = vc.get(cv2.CAP_PROP_POS_MSEC)
             frame_tasks.append(
-                executor.submit(self.one_task, image_folder_path, frame, milliseconds, total_fps)
+                executor.submit(
+                    self.one_task, image_folder_path, frame, width, height, milliseconds, total_fps
+                )
             )
         vc.release()
         wait(frame_tasks, return_when="ALL_COMPLETED")
         print("\u0020", "Pre-Progress finished")
 
-    def get_fps(self, input: str):
+    def get_fps(self, input: str) -> None:
         video_path = f"{_VIDEO_PATH}/{input}"
         vc = cv2.VideoCapture(video_path)
         self.fps = vc.get(cv2.CAP_PROP_FPS)
diff --git a/src/match.py b/src/match.py
index 14bdfb2..691288d 100644
--- a/src/match.py
+++ b/src/match.py
@@ -1,18 +1,19 @@
-import cv2
+import cv2, cv2.typing
 import numpy as np
 from src.read_ini import config
 from PIL import Image, ImageDraw, ImageFont
+from PIL.ImageFont import FreeTypeFont
 
 _half_split_length = config.getint("Arg", "half_split_length")
 
 
-def to_binary(img: any, thresh: float) -> any:
+def to_binary(img: cv2.typing.MatLike, thresh: float) -> cv2.typing.MatLike:
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     ret, binary = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY)
     return binary
 
 
-def to_binary_adaptive(img: any, blocksize: int, C: float) -> any:
+def to_binary_adaptive(img: cv2.typing.MatLike, blocksize: int, C: float) -> cv2.typing.MatLike:
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, C)
     return binary
@@ -20,13 +21,13 @@ def to_binary_adaptive(img: any, blocksize: int, C: float) -> any:
 
 def draw_text(
     text: str, font_path: list[str], fontsize: list[int], strokewidth: int, kerning: int
-) -> [list[any], list[any]]:
+) -> tuple[list[cv2.typing.MatLike], list[cv2.typing.MatLike]]:
     font_japan = ImageFont.truetype(font_path[0], fontsize[0])
     font_alpha = ImageFont.truetype(font_path[1], fontsize[1])
     font_digit = ImageFont.truetype(font_path[2], fontsize[2])
 
-    char_info = []
-    text_height = 0
+    char_info: list[tuple[FreeTypeFont, int]] = []
+    text_height = int(0)
     for char in text:
         if char.encode("utf-8").isalpha():
             font = font_alpha
@@ -39,14 +40,14 @@ def draw_text(
         text_height = max((char_bbox[3] - char_bbox[1]), text_height)
         char_info.append([font, char_width])
 
-    text_width = 0
+    text_width = int(0)
     for info in char_info:
         text_width += info[1]
     text_size = ((text_width + (len(text) - 1) * kerning), text_height)
     text_img = Image.new("RGBA", text_size)
     draw = ImageDraw.Draw(text_img)
 
-    tmp_width = 0
+    tmp_width = int(0)
     for index, char in enumerate(text):
         draw.text(
             (((char_info[index][1]) // 2 + tmp_width), (text_size[1] // 2)),
@@ -57,8 +58,8 @@ def draw_text(
             stroke_fill=(32, 32, 32),
         )
         tmp_width = tmp_width + char_info[index][1] + kerning
-    binary = []
-    mask = []
+    binary: list[cv2.typing.MatLike] = []
+    mask: list[cv2.typing.MatLike] = []
     kernel = np.ones((3, 3), np.uint8)
     if len(text) >= _half_split_length:
         spilt_pixel = sum(
@@ -76,15 +77,17 @@ def draw_text(
         binary.append(to_binary(np.asarray(text_img), 127))
         mask.append(cv2.erode(to_binary(np.asarray(text_img), 30), kernel, iterations=1))
 
-    return binary, mask
+    return (binary, mask)
 
 
-def compare(img_path: str, binary: list[any], threshold: float, mask: list[any]) -> bool:
-    img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY)
+def compare(
+    img_path: str, binary: list[cv2.typing.MatLike], threshold: float, mask: list[cv2.typing.MatLike]
+) -> bool:
+    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
     white_pixels = cv2.countNonZero(img)
     if white_pixels < 100:
         return False
-    part_max = []
+    part_max: list[float] = []
     for image in zip(binary, mask):
         res = cv2.matchTemplate(img, image[0], cv2.TM_CCORR_NORMED, mask=image[1])
         res[np.isinf(res)] = 0