From 0bb152aee63b71f44f4f7920cbc179f1ded32692 Mon Sep 17 00:00:00 2001
From: DreamGallery <2511462508@qq.com>
Date: Sat, 9 Sep 2023 15:17:09 +0800
Subject: [PATCH] CPU Version
---
README.md | 4 ++--
main.py | 8 ++++----
src/adv_text.py | 10 +++++-----
src/events.py | 2 +-
src/frame.py | 27 +++++++++++++++++++--------
src/match.py | 31 +++++++++++++++++--------------
6 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/README.md b/README.md
index 40fc82a..8734918 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Also using `OPENCV` to fix timeline of subtitle frame by frame.
※This tool is based on [MalitsPlus/HoshimiToolkit](https://github.com/MalitsPlus/HoshimiToolkit), you need to get subtitle files in the game through this project first, the names of required files are usually `adv_***.txt`.
-# Usage
+# Usage(CPU Version)
## Install from the repository
@@ -35,7 +35,7 @@ python generate.py
## Generate .ass file with time-fix
To use time-fix option you need to put the recorded video in `adv/video`, and the recommended resolution is `[1920x1080]` or you can change the `[Font Config]` in `config.ini` to fit your video(compare in PS is a good idea).
-If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames near line `26~29` in `src/frame.py`.
+If your resolution ratio is not `16:9`, you may also have to modify the cutting area of frames in `src/frame.py`.
```
python main.py
diff --git a/main.py b/main.py
index 6ea65c1..b5e0599 100644
--- a/main.py
+++ b/main.py
@@ -37,9 +37,9 @@
for dial in extract(sub_file_name):
dial_list.append(dial)
-files = []
-current_count = 0
-start_file_index = 0
+files: list[str] = []
+current_count = int(0)
+start_file_index = int(0)
content = script_info + "\n" + garbage + "\n" + style + "\n" + event
print("ASS-Generate-Progress start")
@@ -49,7 +49,7 @@
for dial in dial_list:
if "SkipTime" in dial:
- start_file_index = start_file_index + int(float(str(dial).split(":")[1]) * stream.fps)
+ start_file_index = start_file_index + int(float(dial.split(":")[1]) * stream.fps)
continue
dial_event = AssEvents()
dial_event.from_dialogue(dial)
diff --git a/src/adv_text.py b/src/adv_text.py
index 03bc5f3..1d58d70 100644
--- a/src/adv_text.py
+++ b/src/adv_text.py
@@ -12,8 +12,8 @@
_KEY_TITLE = config.get("Text KEY", "KEY_TITLE")
-def extract(input: str) -> list:
- dial_list = []
+def extract(input: str) -> list[str]:
+ dial_list: list[str] = []
with open(f"{_TXT_PATH}/{input}", "r", encoding="utf8") as f:
for line in f:
if "text" in line:
@@ -30,7 +30,7 @@ def get_title(input: str) -> str:
return title
-def get_text(input: str) -> [str, bool]:
+def get_text(input: str) -> tuple[str, bool]:
if _KEY_MASSAGE in input:
text = (
input[1:-2].split(_KEY_MASSAGE)[1].split(f"\u0020{_KEY_NAME}")[0].replace("{user}", _player_name)
@@ -43,7 +43,7 @@ def get_text(input: str) -> [str, bool]:
elif _KEY_NARRATION in input:
text = input[1:-2].split(_KEY_NARRATION)[1].split(f"\u0020{_KEY_CLIP}")[0]
gray = True
- return text, gray
+ return (text, gray)
def get_name(input: str) -> str:
@@ -54,7 +54,7 @@ def get_name(input: str) -> str:
return name
-def get_clip(input: str) -> any:
+def get_clip(input: str):
clip = input[1:-2].split(f"\u0020{_KEY_CLIP}")[1].replace("\\", "")
data = json.loads(clip)
return data
diff --git a/src/events.py b/src/events.py
index 1a3fab1..72dcb95 100644
--- a/src/events.py
+++ b/src/events.py
@@ -31,7 +31,7 @@ def __init__(
self.Effect = Effect
self.Text = Text
- def from_dialogue(self, input: str):
+ def from_dialogue(self, input: str) -> None:
self.Start = to_time(get_clip(input)["_startTime"])
self.Duration = get_clip(input)["_duration"]
self.End = end_time(get_clip(input)["_startTime"], get_clip(input)["_duration"])
diff --git a/src/frame.py b/src/frame.py
index 2778d6c..85e93e1 100644
--- a/src/frame.py
+++ b/src/frame.py
@@ -1,4 +1,4 @@
-import cv2
+import cv2, cv2.typing
import os, sys
import threading
import numpy as np
@@ -11,18 +11,25 @@
_VIDEO_PATH = config.get("File PATH", "VIDEO_PATH")
_lock = threading.Lock()
-_current_count = 0
+_current_count = int(0)
class FrameProcess(object):
fps: float
- def one_task(self, image_folder_path: str, frame: any, milliseconds: float, total_fps: int):
+ def one_task(
+ self,
+ image_folder_path: str,
+ frame: cv2.typing.MatLike,
+ width: int,
+ height: int,
+ milliseconds: float,
+ total_fps: int,
+ ) -> None:
global _current_count
seconds = "%.4f" % (milliseconds // 1000 + (milliseconds % 1000) / 1000)
name = seconds[:-1].replace(".", "_")
- height = len(frame)
- width = len(frame[0])
+ # Modify the following content if your resolution ratio is not 16:9
img = frame[
(height * 29 // 36) : (height * 8 // 9),
(width * 1 // 16) : (width * 15 // 16),
@@ -43,12 +50,14 @@ def one_task(self, image_folder_path: str, frame: any, milliseconds: float, tota
sys.stdout.flush()
_lock.release()
- def to_frame(self, input: str):
+ def to_frame(self, input: str) -> None:
image_folder_path = f"{_CACHE_PATH}/{input.split('.')[0]}"
os.makedirs(image_folder_path, exist_ok=True)
video_path = f"{_VIDEO_PATH}/{input}"
vc = cv2.VideoCapture(video_path)
self.fps = vc.get(cv2.CAP_PROP_FPS)
+ width = int(vc.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_fps = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))
executor = ThreadPoolExecutor(max_workers=20)
frame_tasks = []
@@ -58,13 +67,15 @@ def to_frame(self, input: str):
break
milliseconds = vc.get(cv2.CAP_PROP_POS_MSEC)
frame_tasks.append(
- executor.submit(self.one_task, image_folder_path, frame, milliseconds, total_fps)
+ executor.submit(
+ self.one_task, image_folder_path, frame, width, height, milliseconds, total_fps
+ )
)
vc.release()
wait(frame_tasks, return_when="ALL_COMPLETED")
print("\u0020", "Pre-Progress finished")
- def get_fps(self, input: str):
+ def get_fps(self, input: str) -> None:
video_path = f"{_VIDEO_PATH}/{input}"
vc = cv2.VideoCapture(video_path)
self.fps = vc.get(cv2.CAP_PROP_FPS)
diff --git a/src/match.py b/src/match.py
index 14bdfb2..691288d 100644
--- a/src/match.py
+++ b/src/match.py
@@ -1,18 +1,19 @@
-import cv2
+import cv2, cv2.typing
import numpy as np
from src.read_ini import config
from PIL import Image, ImageDraw, ImageFont
+from PIL.ImageFont import FreeTypeFont
_half_split_length = config.getint("Arg", "half_split_length")
-def to_binary(img: any, thresh: float) -> any:
+def to_binary(img: cv2.typing.MatLike, thresh: float) -> cv2.typing.MatLike:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, binary = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY)
return binary
-def to_binary_adaptive(img: any, blocksize: int, C: float) -> any:
+def to_binary_adaptive(img: cv2.typing.MatLike, blocksize: int, C: float) -> cv2.typing.MatLike:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, blocksize, C)
return binary
@@ -20,13 +21,13 @@ def to_binary_adaptive(img: any, blocksize: int, C: float) -> any:
def draw_text(
text: str, font_path: list[str], fontsize: list[int], strokewidth: int, kerning: int
-) -> [list[any], list[any]]:
+) -> tuple[list[cv2.typing.MatLike], list[cv2.typing.MatLike]]:
font_japan = ImageFont.truetype(font_path[0], fontsize[0])
font_alpha = ImageFont.truetype(font_path[1], fontsize[1])
font_digit = ImageFont.truetype(font_path[2], fontsize[2])
- char_info = []
- text_height = 0
+ char_info: list[tuple[FreeTypeFont, int]] = []
+ text_height = int(0)
for char in text:
if char.encode("utf-8").isalpha():
font = font_alpha
@@ -39,14 +40,14 @@ def draw_text(
text_height = max((char_bbox[3] - char_bbox[1]), text_height)
char_info.append([font, char_width])
- text_width = 0
+ text_width = int(0)
for info in char_info:
text_width += info[1]
text_size = ((text_width + (len(text) - 1) * kerning), text_height)
text_img = Image.new("RGBA", text_size)
draw = ImageDraw.Draw(text_img)
- tmp_width = 0
+ tmp_width = int(0)
for index, char in enumerate(text):
draw.text(
(((char_info[index][1]) // 2 + tmp_width), (text_size[1] // 2)),
@@ -57,8 +58,8 @@ def draw_text(
stroke_fill=(32, 32, 32),
)
tmp_width = tmp_width + char_info[index][1] + kerning
- binary = []
- mask = []
+ binary: list[cv2.typing.MatLike] = []
+ mask: list[cv2.typing.MatLike] = []
kernel = np.ones((3, 3), np.uint8)
if len(text) >= _half_split_length:
spilt_pixel = sum(
@@ -76,15 +77,17 @@ def draw_text(
binary.append(to_binary(np.asarray(text_img), 127))
mask.append(cv2.erode(to_binary(np.asarray(text_img), 30), kernel, iterations=1))
- return binary, mask
+ return (binary, mask)
-def compare(img_path: str, binary: list[any], threshold: float, mask: list[any]) -> bool:
- img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY)
+def compare(
+ img_path: str, binary: list[cv2.typing.MatLike], threshold: float, mask: list[cv2.typing.MatLike]
+) -> bool:
+ img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
white_pixels = cv2.countNonZero(img)
if white_pixels < 100:
return False
- part_max = []
+ part_max: list[float] = []
for image in zip(binary, mask):
res = cv2.matchTemplate(img, image[0], cv2.TM_CCORR_NORMED, mask=image[1])
res[np.isinf(res)] = 0