Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: ✨ doc update #661

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
feat: ✨ docker supports custom config.
hellofinch committed Feb 24, 2025
commit 6e59fd64d2516dd59b21cb864cd4a12a884eb885
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -6,6 +6,8 @@ WORKDIR /app
EXPOSE 7860

ENV PYTHONUNBUFFERED=1
ENV NOTO_FONT_PATH=/app
ENV DOCKER_CONFIG=1

# Download all required fonts
ADD "https://github.com/satbyy/go-noto-universal/releases/download/v7.0/GoNotoKurrent-Regular.ttf" /app/
@@ -26,4 +28,4 @@ COPY . .

RUN uv pip install --system --no-cache .

CMD ["pdf2zh", "-i"]
CMD ["pdf2zh", "--config", "/app/config.json", "-i"]
5 changes: 4 additions & 1 deletion pdf2zh/config.py
Original file line number Diff line number Diff line change
@@ -81,7 +81,10 @@ def custome_config(cls, file_path):
"""使用自定义路径加载配置文件"""
custom_path = Path(file_path)
if not custom_path.exists():
raise ValueError(f"Config file {custom_path} not found!")
if "DOCKER_CONFIG" not in os.environ:
raise ValueError(f"Config file {custom_path} not found!")
with open("config.json", "w") as file:
json.dump({}, file, indent=4, ensure_ascii=False)
# 加锁
with cls._lock:
instance = cls()
38 changes: 28 additions & 10 deletions pdf2zh/high_level.py
Original file line number Diff line number Diff line change
@@ -380,7 +380,7 @@ def translate(


def download_remote_fonts(lang: str):
URL_PREFIX = "https://github.com/timelic/source-han-serif/releases/download/main/"
URL_PREFIX = ConfigManager.get("FONT_URL_PREFIX")
LANG_NAME_MAP = {
**{la: "GoNotoKurrent-Regular.ttf" for la in noto_list},
**{
@@ -396,14 +396,32 @@ def download_remote_fonts(lang: str):
}
font_name = LANG_NAME_MAP.get(lang, "GoNotoKurrent-Regular.ttf")

cache_folder = os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh")
os.makedirs(cache_folder, exist_ok=True)
# docker
font_path = ConfigManager.get("NOTO_FONT_PATH", Path("/app", font_name).as_posix())
if not Path(font_path).exists():
font_path = Path(cache_folder, font_name).as_posix()
if not Path(font_path).exists():
font_path = ConfigManager.get(
"NOTO_FONT_PATH", os.path.join(os.path.expanduser("~"), ".cache", "pdf2zh")
)
if not Path(font_path, font_name).exists():
font_path = Path(font_path, font_name).as_posix()
print(f"Downloading {font_name}...")
urllib.request.urlretrieve(f"{URL_PREFIX}{font_name}", font_path)

return font_path
with tqdm.tqdm(
unit="B", unit_scale=True, leave=False, unit_divisor=1024, desc=font_path
) as t:
last_downloaded = [0] # 使用列表保存上一次的下载量

def reporthook(block_num, block_size, total_size):
if total_size > 0:
t.total = total_size
downloaded = block_num * block_size
# 计算本次的增量
delta = downloaded - last_downloaded[0]
t.update(delta)
last_downloaded[0] = downloaded

urllib.request.urlretrieve(
f"{URL_PREFIX}{font_name}", font_path, reporthook=reporthook
)

print(f"Downloaded {font_path}...")
return font_path
print(font_path)
return Path(font_path, font_name).as_posix()
19 changes: 19 additions & 0 deletions pdf2zh/pdf2zh.py
Original file line number Diff line number Diff line change
@@ -174,6 +174,13 @@ def create_parser() -> argparse.ArgumentParser:
help="Use experimental backend babeldoc.",
)

parse_params.add_argument(
"--CN",
default=False,
action="store_true",
help="download in mainland China.",
)

return parser


@@ -229,6 +236,18 @@ def main(args: Optional[List[str]] = None) -> int:
if parsed_args.debug:
log.setLevel(logging.DEBUG)

if parsed_args.CN:
ConfigManager.set(
"FONT_URL_PREFIX",
"https://gitee.com/xzk1234/source-han-serif/releases/download/0.1/",
)
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
else:
ConfigManager.set(
"FONT_URL_PREFIX",
"https://github.com/timelic/source-han-serif/releases/download/main/",
)

if parsed_args.onnx:
ModelInstance.value = OnnxModel(parsed_args.onnx)
else: