Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .precommit/check_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"GPUtil": "GPUtil",
"huggingface_hub": "huggingface-hub",
"imagesize": "imagesize",
"jieba": "jieba",
"jinja2": "Jinja2",
"joblib": "joblib",
"langchain": "langchain",
Expand All @@ -60,6 +61,7 @@
"modelscope": "modelscope",
"numpy": "numpy",
"openai": "openai",
"opencc": "OpenCC",
"cv2": "opencv-contrib-python",
"openpyxl": "openpyxl",
"packaging": "packaging",
Expand All @@ -73,6 +75,7 @@
"pycocotools": "pycocotools",
"pydantic": "pydantic",
"pypdfium2": "pypdfium2",
"pypinyin": "pypinyin",
"yaml": "PyYAML",
"regex": "regex",
"requests": "requests",
Expand Down Expand Up @@ -120,6 +123,7 @@
"paddle_custom_device",
"ultra_infer",
"fastdeploy",
"onnxruntime",
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ comments: true
```python
from paddlex import create_model
model = create_model(model_name="fastspeech2_csmsc")
output = model.predict(input=[151, 120, 182, 82, 182, 82, 174, 75, 262, 51, 37, 186, 38, 233]. , batch_size=1)
output = model.predict(input=[[151, 120, 182, 82, 182, 82, 174, 75, 262, 51, 37, 186, 38, 233]] , batch_size=1)
for res in output:
res.print()
res.save_to_json(save_path="./output/res.json")
Expand Down
2 changes: 1 addition & 1 deletion paddlex/configs/modules/text_to_pinyin/G2PWModel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ Predict:
batch_size: 1
input: "欢迎使用飞桨"
kernel_option:
run_mode: paddle
run_mode: paddle
2 changes: 1 addition & 1 deletion paddlex/inference/common/batch_sampler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@
from .doc_vlm_batch_sampler import DocVLMBatchSampler
from .image_batch_sampler import ImageBatchSampler
from .markdown_batch_sampler import MarkDownBatchSampler
from .text_batch_sampler import TextBatchSampler
from .ts_batch_sampler import TSBatchSampler
from .video_batch_sampler import VideoBatchSampler
from .text_batch_sampler import TextBatchSampler
4 changes: 2 additions & 2 deletions paddlex/inference/common/batch_sampler/text_batch_sampler.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -58,4 +58,4 @@ def batch_size(self, batch_size):
f"audio batch sampler only support batch size 1, but got {batch_size}."
)
else:
self._batch_size = batch_size
self._batch_size = batch_size
2 changes: 1 addition & 1 deletion paddlex/inference/common/result/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .base_audio_result import BaseAudioResult
from .base_cv_result import BaseCVResult
from .base_result import BaseResult
from .base_ts_result import BaseTSResult
from .base_video_result import BaseVideoResult
from .base_audio_result import BaseAudioResult
from .mixin import (
Base64Mixin,
CSVMixin,
Expand Down
17 changes: 11 additions & 6 deletions paddlex/inference/common/result/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

from ....utils import logging
from ...utils.io import (
AudioWriter,
CSVWriter,
HtmlWriter,
ImageWriter,
Expand All @@ -36,7 +37,6 @@
TextWriter,
VideoWriter,
XlsxWriter,
AudioWriter,
)


Expand Down Expand Up @@ -1062,7 +1062,8 @@ def _is_video_file(file_path):
f"The result has multiple video files need to be saved. But the `save_path` has been specified as `{save_path}`!"
)
video_writer.write(save_path, video[list(video.keys())[0]], *args, **kwargs)



class AudioMixin:
"""Mixin class for adding Audio handling capabilities."""

Expand Down Expand Up @@ -1107,8 +1108,7 @@ def save_to_audio(self, save_path: str, *args: List, **kwargs: Dict) -> None:
def _is_audio_file(file_path):
mime_type, _ = mimetypes.guess_type(file_path)
return mime_type is not None and mime_type.startswith("audio/")



audio = self._to_audio()
if not _is_audio_file(save_path):
fn = Path(self._get_input_fn())
Expand All @@ -1117,13 +1117,18 @@ def _is_audio_file(file_path):
base_save_path = Path(save_path)
for key in audio:
save_path = base_save_path / f"{stem}_{key}{suffix}"
self._audio_writer.write(save_path.as_posix(), audio[key], *args, **kwargs)
self._audio_writer.write(
save_path.as_posix(), audio[key], *args, **kwargs
)
else:
if len(audio) > 1:
logging.warning(
f"The result has multiple audio files need to be saved. But the `save_path` has been specified as `{save_path}`!"
)
self._audio_writer.write(save_path, audio[list(audio.keys())[0]], *args, **kwargs)
self._audio_writer.write(
save_path, audio[list(audio.keys())[0]], *args, **kwargs
)


class MarkdownMixin:
"""Mixin class for adding Markdown handling capabilities."""
Expand Down
7 changes: 4 additions & 3 deletions paddlex/inference/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,15 @@
from .table_structure_recognition import TablePredictor
from .text_detection import TextDetPredictor
from .text_recognition import TextRecPredictor
from .text_to_pinyin import TextToPinyinPredictor
from .text_to_speech_acoustic import Fastspeech2Predictor
from .text_to_speech_vocoder import PwganPredictor
from .ts_anomaly_detection import TSAdPredictor
from .ts_classification import TSClsPredictor
from .ts_forecasting import TSFcPredictor
from .video_classification import VideoClasPredictor
from .video_detection import VideoDetPredictor
from .text_to_speech_acoustic import Fastspeech2Predictor
from .text_to_speech_vocoder import PwganPredictor
from .text_to_pinyin import TextToPinyinPredictor


def create_predictor(
model_name: str,
Expand Down
3 changes: 2 additions & 1 deletion paddlex/inference/models/common/static_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ def _create(
logging.debug("`device_id` has been set to None")

if (
self._option.device_type in ("gpu", "dcu", "npu", "mlu", "gcu", "xpu", "iluvatar_gpu")
self._option.device_type
in ("gpu", "dcu", "npu", "mlu", "gcu", "xpu", "iluvatar_gpu")
and self._option.device_id is None
):
self._option.device_id = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
import numpy as np

from .....utils import logging
from .....utils.download import download
from .....utils.cache import CACHE_DIR
from .....utils.download import download

__all__ = [
"AddedToken",
Expand Down
2 changes: 1 addition & 1 deletion paddlex/inference/models/text_to_pinyin/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
16 changes: 4 additions & 12 deletions paddlex/inference/models/text_to_pinyin/predictor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -12,14 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ....utils.func_register import FuncRegister
from ....modules.text_to_pinyin.model_list import MODELS
from ...common.batch_sampler import TextBatchSampler

from ..base import BasePredictor
from .result import TextToPinyinResult
from ....modules.text_to_pinyin.model_list import MODELS


class TextToPinyinPredictor(BasePredictor):
Expand Down Expand Up @@ -58,9 +54,7 @@ def _build(self):
Returns:
G2PWOnnxConverter: An instance of G2PWOnnxConverter.
"""
from .processors import (
G2PWOnnxConverter,
)
from .processors import G2PWOnnxConverter

# build model
model = G2PWOnnxConverter(
Expand All @@ -79,6 +73,4 @@ def process(self, batch_data):
dict: A dictionary containing the input path and result. The result include the output pinyin dict.
"""
result = self.model(batch_data[0])
return {
"result": [result]
}
return {"result": [result]}
240 changes: 159 additions & 81 deletions paddlex/inference/models/text_to_pinyin/processors.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions paddlex/inference/models/text_to_pinyin/result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
# limitations under the License.

from ...common.result import BaseResult
import copy


class TextToPinyinResult(BaseResult):

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion paddlex/inference/models/text_to_speech_acoustic/result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion paddlex/inference/models/text_to_speech_vocoder/result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# copyright (c) 2025 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def generate_prompt(

after_rule = "9. 请在翻译完成后添加特殊标记 <<END>>,确保翻译完整。"
prompt = f"""{task_description}{rules_str}{after_rule}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""

language_name = language_map.get(language, language)
task_type = self.task_type
if task_type == "translate_prompt":
Expand Down
17 changes: 11 additions & 6 deletions paddlex/inference/pipelines/pp_doctranslation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,10 @@ def translate_html_block(html_block, chunk_size, translate_func, results):
Returns:
None
"""
from bs4 import BeautifulSoup
import copy

from bs4 import BeautifulSoup

# If the HTML is short and simple, translate directly
if (
html_block.count("<") < 5
Expand Down Expand Up @@ -203,7 +204,7 @@ def translate_html_block(html_block, chunk_size, translate_func, results):
td_batch_nodes.append(parent_td)
td_batch_texts.append(td_text)
td_seen.add(id(parent_td))

# Process <td>/<th> nodes in batches
batch_size = chunk_size
i = 0
Expand All @@ -212,12 +213,15 @@ def translate_html_block(html_block, chunk_size, translate_func, results):
batch_nodes = []
batch_texts = []
current_length = 0
while i < len(td_batch_nodes) and current_length + len(td_batch_texts[i]) <= batch_size:
while (
i < len(td_batch_nodes)
and current_length + len(td_batch_texts[i]) <= batch_size
):
batch_nodes.append(td_batch_nodes[i])
batch_texts.append(td_batch_texts[i])
current_length += len(td_batch_texts[i])
i += 1

# Translate the batch and reinsert translated content
placeholder = "__TD__"
batch_text = placeholder.join(batch_texts)
Expand All @@ -230,7 +234,6 @@ def translate_html_block(html_block, chunk_size, translate_func, results):
for child in frag.contents:
td_node.append(copy.deepcopy(child))


text_nodes = []
for node in soup.find_all(string=True, recursive=True):
if not node.find_parent(["td", "th"]) and node.strip():
Expand All @@ -245,7 +248,9 @@ def translate_html_block(html_block, chunk_size, translate_func, results):
while idx < total:
node_text = text_nodes[idx].strip()
if len(node_text) > chunk_size:
translated_text = split_text_recursive(node_text, chunk_size, translate_func)
translated_text = split_text_recursive(
node_text, chunk_size, translate_func
)
text_nodes[idx].replace_with(translated_text)
idx += 1
continue
Expand Down
2 changes: 1 addition & 1 deletion paddlex/inference/utils/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
YAMLReader,
)
from .writers import (
AudioWriter,
CSVWriter,
HtmlWriter,
ImageWriter,
Expand All @@ -34,5 +35,4 @@
WriterType,
XlsxWriter,
YAMLWriter,
AudioWriter,
)
4 changes: 3 additions & 1 deletion paddlex/inference/utils/io/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import numpy as np
import pandas as pd
import soundfile as sf
import yaml
from PIL import Image

Expand All @@ -29,6 +28,9 @@
if is_dep_available("opencv-contrib-python"):
import cv2

if is_dep_available("soundfile"):
import soundfile as sf


__all__ = [
"WriterType",
Expand Down
7 changes: 5 additions & 2 deletions paddlex/inference/utils/pp_option.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from ...utils import logging
from ...utils.device import get_default_device, parse_device, set_env_for_device_type
from ...utils.flags import ENABLE_MKLDNN_BYDEFAULT, USE_PIR_TRT, DISABLE_DEVICE_FALLBACK
from ...utils.flags import DISABLE_DEVICE_FALLBACK, ENABLE_MKLDNN_BYDEFAULT, USE_PIR_TRT
from .misc import is_mkldnn_available
from .mkldnn_blocklist import MKLDNN_BLOCKLIST
from .new_ir_blocklist import NEWIR_BLOCKLIST
Expand Down Expand Up @@ -84,7 +84,10 @@ def setdefault_by_model_name(self, model_name):
if self.device_type == "gpu":
import paddle

if not (paddle.device.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0):
if not (
paddle.device.is_compiled_with_cuda()
and paddle.device.cuda.device_count() > 0
):
if DISABLE_DEVICE_FALLBACK:
raise RuntimeError(
"Device fallback is disabled and the specified device (GPU) is not available. "
Expand Down
Loading