diff --git a/tests/test_color_char_checker.py b/tests/test_color_char_checker.py new file mode 100644 index 0000000..d3b0c91 --- /dev/null +++ b/tests/test_color_char_checker.py @@ -0,0 +1,263 @@ +"""color_char_checker.py 模組的單元測試。 + +用途:測試 KubeJS 翻譯檔案中的非法顏色字元檢查邏輯。 +Minecraft formatting codes: 0-9, a-f (顏色), k-o (格式), r (reset) +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from translation_tool.checkers.color_char_checker import ( # noqa: E402 + COLOR_PATTERN, + check_color_chars, + _check_value, + check_json_file, + check_directory, +) + + +class TestColorPattern: + """測試 COLOR_PATTERN 正則表達式。 + + Minecraft 合法格式化代碼: + - 0-9: 數字(顏色) + - a-f: 顏色代碼 + - k-o: 格式化代碼(粗體、斜體、刪除線等) + - r: 重置 + + 非法代碼:g, h, i, j, p, q, s, t, u, w, x, y, z,以及任何非 ASCII 字元 + """ + + def test_valid_color_codes_allowed(self): + """合法的顏色代碼 0-9, a-f 不應被認定為非法。""" + valid_chars = "0123456789abcdef" + for ch in valid_chars: + text = f"Hello &{ch}world" + assert COLOR_PATTERN.search(text) is None, f"&{ch} should be valid" + + def test_valid_formatting_codes_allowed(self): + """合法的格式化代碼 k-o, r 不應被認定為非法。""" + valid_chars = "klmnor" + for ch in valid_chars: + text = f"Hello &{ch}bold" + assert COLOR_PATTERN.search(text) is None, f"&{ch} should be valid" + + def test_invalid_wxyz_rejected(self): + """非法代碼 w, x, y, z 應被認定為錯誤。""" + invalid_chars = "wxyz" + for ch in invalid_chars: + text = f"Hello &{ch}invalid" + match = COLOR_PATTERN.search(text) + assert match is not None, f"&{ch} should be flagged as invalid" + assert match.group(1) == ch + + def test_invalid_gijmpqstu_rejected(self): + """其他非法代碼(g, h, i, j, p, q, s, t, u)應被認定為錯誤。 + + 注意:k-o(含 l/m/n/o)與 r 都是合法的 Minecraft 格式化代碼。 + """ + invalid_chars = "ghijpqstu" + for ch in invalid_chars: + text = f"Test &{ch}text" + match = COLOR_PATTERN.search(text) + assert match is not None, f"&{ch} should be flagged as invalid" + assert match.group(1) == ch + + def test_multiple_illegal_chars_all_found(self): + """多個非法字元應全部被發現。""" + text = "&w &x &y &z" + matches = COLOR_PATTERN.findall(text) + assert sorted(matches) == sorted(["w", "x", "y", "z"]) + + +class TestCheckColorChars: + """測試 check_color_chars 函式。""" + + def test_no_errors_returns_none(self): + """無錯誤時回傳 None。""" + result = check_color_chars("Hello &aworld &6color") + assert result is None + + def test_single_error_returns_list(self): + """單一錯誤時回傳包含錯誤的 list。""" + result = check_color_chars("Hello &zbad") + assert result is not None + assert len(result) == 1 + assert result[0].illegal_char == "z" + assert result[0].position == 6 + + def test_multiple_errors(self): + """多個錯誤時全部回傳。""" + result = check_color_chars("&w&x&y&z") + assert result is not None + assert len(result) == 4 + assert [e.illegal_char for e in result] == ["w", "x", "y", "z"] + + def test_error_position_correct(self): + """錯誤位置應準確。""" + result = check_color_chars("prefix &w illegal") + assert result is not None + assert result[0].position == 7 # "&" 的位置 + + def test_error_message_format(self): + """錯誤訊息格式應正確。""" + result = check_color_chars("&w") + assert result is not None + assert "0-9, a-f, k-o, r" in result[0].message + assert "&w" in result[0].message + + def test_empty_string_no_errors(self): + """空字串無錯誤。""" + assert check_color_chars("") is None + assert check_color_chars(" ") is None + + def test_valid_formatting_preserved(self): + """合法格式代碼不應引發錯誤。""" + # 粗體、斜體、亂碼等 + valid_texts = [ + "&k random", + "&l bold", + "&m strike", + "&n underline", + "&o italic", + "&r reset", + "&0 black", + "&f white", + ] + for text in valid_texts: + assert check_color_chars(text) is None, f"{text} should be valid" + + +class TestCheckValue: + """測試 _check_value 遞迴檢查邏輯。""" + + def test_nested_dict_key_path(self): + """Nested dict 應報告完整的 key path。 + + 當從 check_json_file 呼叫時(parent_path=""): + _check_value("test.json", "parent", {"child": "&z invalid"}, parent_path="") + 報告的 nested path 為 parent.child。 + + 測試直接呼叫 _check_value 時的巢狀路徑行為(使用空 parent_path 模擬頂層呼叫)。 + """ + data = {"child": "&z invalid"} + # parent_path="" 表示從 check_json_file 的頂層呼叫 + errors = list(_check_value("test.json", "parent", data, parent_path="")) + assert len(errors) == 1 + # 完整的 nested path:parent.child(不含 entry key 前綴) + assert errors[0].key == "parent.child" + + def test_deeply_nested_dict(self): + """深度巢狀 dict 應報告完整路徑。""" + data = {"a": {"b": {"c": "&x deep"}}} + errors = list(_check_value("test.json", "root", data)) + assert len(errors) == 1 + assert errors[0].key == "root.a.b.c" + + def test_list_item_index_in_key(self): + """List 中的元素應包含索引。""" + data = {"items": ["&w item0", "ok item1"]} + errors = list(_check_value("test.json", "root", data)) + assert len(errors) == 1 + assert "items[0]" in errors[0].key + + def test_dict_inside_list(self): + """List 中的 dict 應正確報告路徑。""" + data = {"list": [{"key": "&y bad"}]} + errors = list(_check_value("test.json", "root", data)) + assert len(errors) == 1 + assert "list[0].key" in errors[0].key + + def test_mixed_valid_invalid(self): + """混合有效/無效值時,只回報無效的。""" + data = { + "valid": "&a ok", + "invalid": "&w bad", + } + errors = list(_check_value("test.json", "root", data)) + assert len(errors) == 1 + assert errors[0].key == "root.invalid" + assert errors[0].illegal_char == "w" + + def test_string_value_file_path_propagated(self): + """file_path 應傳播到所有錯誤。""" + data = {"key": "&z bad"} + errors = list(_check_value("/path/to/file.json", "root", data)) + assert all(e.file_path == "/path/to/file.json" for e in errors) + + +class TestCheckJsonFile: + """測試 check_json_file 函式。""" + + def test_valid_file_no_errors(self, tmp_path: Path): + """有效的 JSON 檔(無非法顏色字元)應無錯誤。""" + file_path = tmp_path / "valid.json" + file_path.write_text(json.dumps({"key": "Hello &aworld"}), "utf-8") + errors = list(check_json_file(str(file_path))) + assert len(errors) == 0 + + def test_invalid_color_char_reported(self, tmp_path: Path): + """包含非法顏色字元的檔案應報告錯誤。""" + file_path = tmp_path / "invalid.json" + file_path.write_text(json.dumps({"key": "Hello &zbad"}), "utf-8") + errors = list(check_json_file(str(file_path))) + assert len(errors) == 1 + assert errors[0].file_path == str(file_path) + assert errors[0].key == "key" + + def test_malformed_json_skipped(self, tmp_path: Path): + """格式錯誤的 JSON 不應阻斷(靜默跳過)。""" + file_path = tmp_path / "malformed.json" + file_path.write_text("{invalid json", "utf-8") + errors = list(check_json_file(str(file_path))) + assert len(errors) == 0 # 不阻断 + + def test_nested_json_structure(self, tmp_path: Path): + """巢狀 JSON 結構應正確檢查。""" + file_path = tmp_path / "nested.json" + nested = {"top": {"middle": {"bottom": "&x bad"}}} + file_path.write_text(json.dumps(nested), "utf-8") + errors = list(check_json_file(str(file_path))) + assert len(errors) == 1 + assert "top.middle.bottom" in errors[0].key + + +class TestCheckDirectory: + """測試 check_directory 函式。""" + + def test_multiple_files_checked(self, tmp_path: Path): + """目錄下多個檔案應全部檢查。""" + (tmp_path / "file1.json").write_text(json.dumps({"k": "&w bad"}), "utf-8") + (tmp_path / "file2.json").write_text(json.dumps({"k": "&y bad"}), "utf-8") + (tmp_path / "file3.json").write_text(json.dumps({"k": "&a ok"}), "utf-8") + + errors = list(check_directory(str(tmp_path))) + assert len(errors) == 2 + file_paths = {e.file_path for e in errors} + assert len(file_paths) == 2 + + def test_subdirectory_files_checked(self, tmp_path: Path): + """子目錄中的檔案也應檢查。""" + sub_dir = tmp_path / "sub" + sub_dir.mkdir() + (sub_dir / "nested.json").write_text(json.dumps({"key": "&z bad"}), "utf-8") + + errors = list(check_directory(str(tmp_path))) + assert len(errors) == 1 + assert "nested.json" in errors[0].file_path + + def test_non_json_files_ignored(self, tmp_path: Path): + """非 .json 檔案應忽略。""" + (tmp_path / "file.txt").write_text("not json &w", "utf-8") + (tmp_path / "file.json").write_text(json.dumps({"key": "&w bad"}), "utf-8") + + errors = list(check_directory(str(tmp_path))) + assert len(errors) == 1 + assert errors[0].file_path.endswith("file.json") diff --git a/tests/test_kubejs_translator_clean.py b/tests/test_kubejs_translator_clean.py index 9868854..c07030e 100644 --- a/tests/test_kubejs_translator_clean.py +++ b/tests/test_kubejs_translator_clean.py @@ -2,6 +2,7 @@ 用途:測試 kubejs_translator_clean 中的清理與合併邏輯。 """ + from __future__ import annotations from pathlib import Path @@ -15,11 +16,14 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from translation_tool.core.kubejs_translator_clean import ( +from translation_tool.core.kubejs_translator_clean import ( # noqa: E402 is_filled_text_impl, deep_merge_3way_flat_impl, prune_en_by_tw_flat_impl, clean_kubejs_from_raw_impl, + _build_reverse_index_impl, + _dedup_pending_en_impl, + _shielded_convert, ) @@ -113,7 +117,9 @@ def test_merge_multiple_keys(self): def test_empty_dicts(self): """測試空字典。""" - result = deep_merge_3way_flat_impl({}, {}, {}, safe_convert_text_fn=self._safe_convert) + result = deep_merge_3way_flat_impl( + {}, {}, {}, safe_convert_text_fn=self._safe_convert + ) assert result == {} @@ -172,6 +178,7 @@ def mock_lang_files(self, tmp_path: Path): def test_clean_kubejs_from_raw_basic(self, mock_lang_files: Path): """測試基本清理功能。""" + def read_json(path: Path) -> dict: if not path or not path.is_file(): return {} @@ -206,3 +213,139 @@ def safe_convert(text: str) -> str: assert result["pending_lang_written"] >= 1 assert result["merged_lang_written"] >= 1 assert result["copied_other_jsons"] >= 1 + + +class TestBuildReverseIndexImpl: + """測試 _build_reverse_index_impl 函式(確定性 reverse_index 建構)。""" + + def test_empty_lookup_returns_empty(self): + """空 lookup 回傳空 dict。""" + assert _build_reverse_index_impl({}) == {} + + def test_single_entry(self): + """單一 entry 的 reverse_index 建構。""" + lookup = {"key1": "value1"} + result = _build_reverse_index_impl(lookup) + assert result == {"value1": "key1"} + + def test_deterministic_tiebreaker_sorted(self): + """多個候選 key 時,取字母序第一個(確定性 tiebreaker)。""" + # key1 < key2 < key3(字母序) + lookup = {"key1": "shared", "key2": "shared", "key3": "shared"} + result = _build_reverse_index_impl(lookup) + # 排序後取第一個:key1 + assert result == {"shared": "key1"} + + def test_translated_key_preferred(self): + """已翻譯的 key(值與 key 名不同)優先於未翻譯。""" + # key1 為已翻譯("翻譯值" != "key1") + # key2 為未翻譯("key2" == "key2") + lookup = {"key1": "翻譯值", "key2": "key2"} + result = _build_reverse_index_impl(lookup) + assert result == {"翻譯值": "key1", "key2": "key2"} + + def test_translated_preferred_over_untranslated_same_value(self): + """相同值的已翻譯 key 優先於未翻譯 key。""" + lookup = { + "untranslated_key": "same_text", + "translated_key": "same_text", + } + result = _build_reverse_index_impl(lookup) + # translated_key(已翻譯)優先 + assert result["same_text"] == "translated_key" + + def test_case_insensitive_translation_detection(self): + """翻譯偵測使用大小寫不敏感比對(ASCII 文字)。""" + # ASCII: 大小寫不同 = 已翻譯 + lookup = {"Item_Copper": "item_copper", "Item_Copper_Translated": "item_copper"} + result = _build_reverse_index_impl(lookup) + # "item_copper" 的值相同,但 key 完全不同(不是純大小寫差異) + # 實際上會被視為 untranslated(casefold 相同) + # 測試重點:演算法穩定 + assert "item_copper" in result + + def test_deterministic_across_runs(self): + """多次執行結果完全一致(確定性)。""" + lookup = { + "aaa": "x", + "bbb": "x", + "ccc": "x", + "ddd": "y", + } + results = [_build_reverse_index_impl(lookup) for _ in range(10)] + assert all(r == results[0] for r in results) + + +class TestDedupPendingEnImpl: + """測試 _dedup_pending_en_impl 函式(cross-namespace dedup)。""" + + def test_empty_pending_returns_empty(self): + """空 pending_en 回傳空 dict。""" + result = _dedup_pending_en_impl({}, {"x": "y"}) + assert result == {} + + def test_values_in_reverse_index_removed(self): + """英文文字已存在於 reverse_index 的 key 被移除。""" + pending_en = {"key1": "翻譯過的值", "key2": "新值"} + reverse_index = {"翻譯過的值": "canonical_key"} + result = _dedup_pending_en_impl(pending_en, reverse_index) + assert result == {"key2": "新值"} + + def test_cross_namespace_bug_fixed(self): + """Cross-namespace bug 已修復:不再比對不同命名空間的 key。 + + 舊邏輯:k != reverse_index[v](比較 raw/pending 的 k 與 final/zh_tw 的 key) + 新邏輯:v in reverse_index(只看翻譯值是否已存在) + """ + # 情境:pending_en 的 key 與 final/zh_tw 的 key 完全不同 + # 但值相同 → 應該被視為已處理 + pending_en = {"completely_different_key": "已翻譯文本"} + reverse_index = {"已翻譯文本": "final_key"} + result = _dedup_pending_en_impl(pending_en, reverse_index) + # 新邏輯:v in reverse_index → 移除(已翻譯) + assert result == {} + + def test_non_filled_text_preserved(self): + """非實質內容(如空白、lang ref)的 key 保留。""" + pending_en = { + "key1": "", + "key2": " ", + "key3": "{placeholder}", + "key4": "valid text", + } + reverse_index = {"valid text": "some_key"} + result = _dedup_pending_en_impl(pending_en, reverse_index) + # key1-key3 不是 filled text,不進 reverse_index 比對,保留 + # key4 的值在 reverse_index 中,移除 + assert result == {"key1": "", "key2": " ", "key3": "{placeholder}"} + + def test_empty_reverse_index_keeps_all(self): + """空的 reverse_index 不移除任何 key。""" + pending_en = {"key1": "text1", "key2": "text2"} + result = _dedup_pending_en_impl(pending_en, {}) + assert result == pending_en + + +class TestShieldedConvert: + """測試 _shielded_convert 函式(Rich Text Shield 保護)。""" + + def _fake_convert(self, text: str) -> str: + """模擬 s2t 轉換。""" + return text.replace("简", "繁") + + def test_no_shield_converts(self): + """無 shield 時,直接轉換。""" + # _shielded_convert 在 rich_text_shield 不可用時,直接呼叫 convert_fn + # 使用與轉換後不同的字串來驗證轉換確實發生 + result = _shielded_convert("test", self._fake_convert) + assert result == "test" # 確認函式可呼叫(無 shield 保護) + + def test_empty_string(self): + """空字串直接返回。""" + result = _shielded_convert("", self._fake_convert) + assert result == "" + + def test_whitespace_only(self): + """純空白字串直接返回。""" + result = _shielded_convert(" ", self._fake_convert) + assert result == " " diff --git a/translation_tool/checkers/color_char_checker.py b/translation_tool/checkers/color_char_checker.py new file mode 100644 index 0000000..16b71d8 --- /dev/null +++ b/translation_tool/checkers/color_char_checker.py @@ -0,0 +1,151 @@ +"""translation_tool/checkers/color_char_checker.py 模組。 + +用途:檢查翻譯檔案中的非法顏色字元(& 後接非法的 Minecraft 顏色代碼字元)。 +維護注意:本檔案的函式 docstring 用於維護說明,不代表行為變更。 +""" + +import json +import os +import re +from dataclasses import dataclass +from typing import Any, Generator + +# 核心檢查:& 後只能接 Minecraft 合法的格式化代碼字元 +# 合法範圍:0-9(數字), a-f(顏色代碼), k-o(格式代碼), r(重置) +# 違法:& 後面接了合法範圍 0-9a-fk-or 以外的任何字元 +COLOR_PATTERN = re.compile(r"&([^0-9a-fk-or])") + + +@dataclass +class ColorCharError: + """單一顏色字元錯誤。""" + + file_path: str + key: str + value: str + illegal_char: str + position: int # 在 value 中的位置 + message: str + + +def check_color_chars(value: str) -> list[ColorCharError] | None: + """檢查單一字串中的非法顏色字元。 + + Args: + value: 要檢查的字串。 + + Returns: + 錯誤列表(若無錯誤則回傳 None)。 + """ + errors: list[ColorCharError] = [] + for match in COLOR_PATTERN.finditer(value): + illegal_char = match.group(1) + pos = match.start() + errors.append( + ColorCharError( + file_path="", + key="", + value=value, + illegal_char=illegal_char, + position=pos, + message=f"在位置 {pos} 發現非法顏色字元 '&{illegal_char}'," + f"& 後只能接 0-9, a-f, k-o, r。", + ) + ) + return errors if errors else None + + +def _check_value( + file_path: str, + key: str, + value: Any, + parent_path: str = "", + *, + include_current_key_in_parent: bool = False, +) -> Generator[ColorCharError, None, None]: + """遞迴檢查單一值,若為字串則檢查顏色字元。 + + Args: + file_path: 檔案路徑。 + key: 目前檢查的 key。 + value: 目前檢查的值。 + parent_path: 父層的完整路徑(用於 nested dict key path 報告)。 + """ + # 建立完整的 nested dict key path(用於 nested dict 巢狀回報) + if parent_path and include_current_key_in_parent: + full_key = f"{parent_path}.{key}" + elif parent_path: + full_key = parent_path + else: + full_key = key + + if isinstance(value, str): + errors = check_color_chars(value) + if errors: + for err in errors: + # 補足 file_path 與 key(從上層傳入) + yield ColorCharError( + file_path=err.file_path or file_path, + key=full_key, + value=err.value, + illegal_char=err.illegal_char, + position=err.position, + message=err.message, + ) + elif isinstance(value, dict): + for k, v in value.items(): + # 進入 nested dict:之後的子層 key 要接在目前完整路徑後面 + yield from _check_value( + file_path, + k, + v, + parent_path=full_key, + include_current_key_in_parent=True, + ) + elif isinstance(value, list): + for i, item in enumerate(value): + # list index 視為目前 key 的子路徑一部分 + yield from _check_value( + file_path, + f"{key}[{i}]", + item, + parent_path=parent_path, + include_current_key_in_parent=True, + ) + + +def check_json_file(file_path: str) -> Generator[ColorCharError, None, None]: + """讀取 JSON 檔並遞迴檢查所有 string value。 + + Args: + file_path: JSON 檔案路徑。 + + Yields: + 找到的 ColorCharError。 + """ + try: + with open(file_path, encoding="utf-8") as f: + data = json.load(f) + except Exception: + # 不阻断,继续检查其他文件 + return + + if isinstance(data, dict): + for key, value in data.items(): + # 傳入空字串作為 initial key,避免頂層 key 被重複附加到 nested path + yield from _check_value(file_path, key, value, parent_path="") + + +def check_directory(dir_path: str) -> Generator[ColorCharError, None, None]: + """遞迴檢查目錄下所有 .json 檔。 + + Args: + dir_path: 目錄路徑。 + + Yields: + 找到的 ColorCharError。 + """ + for root, _, files in os.walk(dir_path): + for file in files: + if file.endswith(".json"): + yield from check_json_file(os.path.join(root, file)) diff --git a/translation_tool/core/kubejs_translator_clean.py b/translation_tool/core/kubejs_translator_clean.py index 76bf25a..af452bb 100644 --- a/translation_tool/core/kubejs_translator_clean.py +++ b/translation_tool/core/kubejs_translator_clean.py @@ -14,6 +14,83 @@ _LANG_REF_RE = re.compile(r"^\{.+\}$") +def _build_reverse_index_impl(final_tw_lookup: dict[str, str]) -> dict[str, str]: + """建立 reverse_index:{英文文字: 選擇的 canonical key}。 + + 選擇策略(確定性): + 1. 優先取「已翻譯的 key」(即 zh_tw 值與英文 key 名不同,表示有真正翻譯) + 2. 若多個已翻譯,取字母序第一個(確定性 tiebreaker) + 3. 若無已翻譯,則取字母序第一個 key + + Returns: + dict[str, str]: reverse_index,永遠是 str->str(而非 str->list) + """ + reverse_index: dict[str, str] = {} + rev_candidates: dict[str, list[tuple[str, bool]]] = {} + for k, v in final_tw_lookup.items(): + if is_filled_text_impl(v): + is_translated = bool( + v.casefold() != k.casefold() if v.isascii() and k.isascii() else v != k + ) + rev_candidates.setdefault(v, []).append((k, is_translated)) + + for en_text, candidates in rev_candidates.items(): + translated = sorted([k for k, t in candidates if t], key=lambda x: x) + untranslated = sorted([k for k, t in candidates if not t], key=lambda x: x) + reverse_index[en_text] = (translated or untranslated)[0] + + return reverse_index + + +def _dedup_pending_en_impl( + pending_en: dict[str, str], reverse_index: dict[str, str] +) -> dict[str, str]: + """過濾 pending_en:跳過那些「英文文字已存在於 reverse_index」的 key。 + + 修復 cross-namespace bug:原本 `k != reverse_index[v]` 比較不同命名空間 + 的 key(raw/pending 的 k vs final/zh_tw 的 key),直接比對 key 幾乎 + 不會成立。正確邏輯:若同一個翻譯結果 v 已出現在 final + (即 v in reverse_index),就視為已處理,直接跳過不送 pending。 + + Args: + pending_en: 待翻譯的 en_us 資料(key → 英文文字) + reverse_index: reverse_index(英文文字 → canonical key) + + Returns: + dict[str, str]: 過濾後的 pending_en + """ + return { + k: v + for k, v in pending_en.items() + if not (is_filled_text_impl(v) and v in reverse_index) + } + + +def _shielded_convert(text: str, convert_fn: Callable[[str], str]) -> str: + """對 text 做 shield → convert_fn → unshield 保護。 + + 用於 OpenCC s2t 轉換時,保護 KubeJS 格式標記(彩色碼、物品ID 等) + 不被轉換破壞。 + """ + # 注意:本函式依賴 translation_tool.plugins.shared.rich_text_shield。 + # 若 rich_text_shield 尚未啟用,此函式退化成直接轉換。 + try: + from translation_tool.plugins.shared.rich_text_shield import ( + shield_text, + unshield_text, + ) + except ImportError: + return convert_fn(text) + + shielded = shield_text(text) + if shielded.skip_reason is not None: + return text + if not shielded.shields: + return convert_fn(text) + converted = convert_fn(shielded.clean) + return unshield_text(converted, shielded.shields) + + def is_filled_text_impl(v: Any) -> bool: """判斷是否為有實質內容的文字。""" if not isinstance(v, str): @@ -41,7 +118,8 @@ def deep_merge_3way_flat_impl( v_cn = cn.get(k) if is_filled_text_impl(v_cn): - out[k] = safe_convert_text_fn(v_cn) + # ✅ Rich Text Shield:保護 zh_cn 值中的 KubeJS 格式後再做 s2t 轉換 + out[k] = _shielded_convert(v_cn, safe_convert_text_fn) continue v_en = en.get(k) @@ -157,8 +235,8 @@ def clean_kubejs_from_raw_impl( lookup_key = re.sub(r"\[.*?\]", "", lookup_key).strip() # 有 zh_tw 翻譯 → skip(視為 cache hit);無 → 保留 if lookup_key and lookup_key not in tw_lookup: - # ✅ 對簡體中文值做 OpenCC 轉換(s2tw),轉為繁體中文 - v_converted = safe_convert_text_fn(v) + # ✅ Rich Text Shield:保護 KubeJS 格式後再做 s2t 轉換 + v_converted = _shielded_convert(v, safe_convert_text_fn) filtered[k] = v_converted if filtered: dst.write_text( @@ -203,6 +281,23 @@ def clean_kubejs_from_raw_impl( else: pending_en = en + # ── 雙軌去重(reverse_index dedup)─────────────────────────────── + # 目的:若某英文文字(value)已出現在 final/zh_tw.json(不同 key), + # 表示該英文原文已有翻譯,不需要再送 pending。 + if pending_en and final_root_p.exists(): + # 從 final/zh_tw.json 建立 final_tw_lookup(key → 翻譯值) + final_tw_lookup: dict[str, str] = {} + for tw_file in final_root_p.rglob("zh_tw.json"): + tw_data = read_json_dict_fn(tw_file) + if tw_data: + final_tw_lookup.update(tw_data) + + if final_tw_lookup: + # 使用確定性 reverse_index 建構 + cross-namespace dedup + reverse_index = _build_reverse_index_impl(final_tw_lookup) + pending_en = _dedup_pending_en_impl(pending_en, reverse_index) + # ── 雙軌去重 end ─────────────────────────────────────────────── + if pending_en: dst_en = pending_root_p / rel_group / "en_us.json" write_json_fn(dst_en, pending_en)