diff --git a/deploy/hps/server_env/requirements/app.in b/deploy/hps/server_env/requirements/app.in index 2cb9003a5f..a32c621604 100644 --- a/deploy/hps/server_env/requirements/app.in +++ b/deploy/hps/server_env/requirements/app.in @@ -4,4 +4,5 @@ numpy >= 1.24 opencv-contrib-python == 4.10.0.84 pycocotools >= 2 pydantic >= 2 +safetensors @ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl typing-extensions >= 4.11 diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt index c24a8fc5af..18e88dd165 100644 --- a/deploy/hps/server_env/requirements/cpu.txt +++ b/deploy/hps/server_env/requirements/cpu.txt @@ -174,6 +174,7 @@ lxml==5.3.1 # paddlex (../../../setup.py) # premailer # python-docx + # python-docx markupsafe==3.0.2 # via jinja2 marshmallow==3.26.1 @@ -243,6 +244,7 @@ packaging==24.2 # matplotlib # paddlex (../../../setup.py) # scikit-image +paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl # via -r requirements/cpu.in pandas==1.3.5 @@ -304,6 +306,8 @@ python-dateutil==2.9.0.post0 # pandas python-docx==1.2.0 # via paddlex (../../../setup.py) +python-docx==1.2.0 + # via paddlex (../../../setup.py) pytz==2025.1 # via pandas pyyaml==6.0.2 @@ -408,6 +412,7 @@ typing-extensions==4.12.2 # pydantic # pydantic-core # python-docx + # python-docx # sqlalchemy # typing-inspect # uvicorn diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt index caa9a8fbc0..9d63afdd87 100644 --- a/deploy/hps/server_env/requirements/gpu.txt +++ b/deploy/hps/server_env/requirements/gpu.txt @@ -174,6 +174,7 @@ lxml==5.3.1 # paddlex (../../../setup.py) # premailer # python-docx + # python-docx markupsafe==3.0.2 # via jinja2 marshmallow==3.26.1 @@ -243,6 +244,7 @@ packaging==24.2 # matplotlib # paddlex (../../../setup.py) # scikit-image +paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl # via -r requirements/gpu.in pandas==1.3.5 @@ -304,6 +306,8 @@ python-dateutil==2.9.0.post0 # pandas python-docx==1.2.0 # via paddlex (../../../setup.py) +python-docx==1.2.0 + # via paddlex (../../../setup.py) pytz==2025.1 # via pandas pyyaml==6.0.2 @@ -408,6 +412,7 @@ typing-extensions==4.12.2 # pydantic # pydantic-core # python-docx + # python-docx # sqlalchemy # starlette # typing-inspect diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md index 442f2f2805..b832ef3327 100644 --- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md +++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md @@ -1087,8 +1087,8 @@ paddlex --get_pipeline_config PaddleOCR-VL VLRecognition: ... genai_config: - backend: vllm-server - server_url: http://127.0.0.1:8118/v1 + backend: vllm + server_url: http://127.0.0.1:8118 ``` 之后,可以使用修改好的配置文件进行产线调用。例如通过 CLI 调用: diff --git a/paddlex/.version b/paddlex/.version index 15a2799817..5f6fc5edc2 100644 --- a/paddlex/.version +++ b/paddlex/.version @@ -1 +1 @@ -3.3.0 +3.3.10 diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py index 3b77910ba9..6257eaf12e 100644 --- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py +++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py @@ -111,17 +111,27 @@ def eager_attention_forward( attn_weights = paddle.matmul(x=query.scale(scaling), y=key, transpose_y=True) attn_weights = attn_weights.cast(paddle.float32) + origin_dtype = query.dtype + + attn_weights = paddle.matmul(x=query.scale(scaling), y=key, transpose_y=True) + attn_weights = attn_weights.cast(paddle.float32) + if attention_mask is not None: + attnetion_mask = attention_mask.cast(paddle.float32) attnetion_mask = attention_mask.cast(paddle.float32) attn_weights = attn_weights + attention_mask attn_weights = F.softmax(attn_weights, axis=-1) attn_weights = attn_weights.cast(origin_dtype) + attn_weights = F.softmax(attn_weights, axis=-1) + attn_weights = attn_weights.cast(origin_dtype) + attn_weights = F.dropout(attn_weights, p=dropout, training=module.training) attn_output = paddle.matmul(attn_weights, value) attn_output = attn_output.transpose((0, 2, 1, 3)) + attn_output = attn_output.transpose((0, 2, 1, 3)) return attn_output, attn_weights @@ -182,6 +192,11 @@ def forward( cos, sin = rope_emb q, k = apply_rotary_pos_emb_vision(q, k, cos, sin) + if not self._supports_sdpa or q.dtype == paddle.float32: + # → [B, H, L, Dh] + q = q.transpose([0, 2, 1, 3]) + k = k.transpose([0, 2, 1, 3]) + v = v.transpose([0, 2, 1, 3]) if not self._supports_sdpa or q.dtype == paddle.float32: # → [B, H, L, Dh] q = q.transpose([0, 2, 1, 3]) @@ -210,6 +225,28 @@ def forward( training=self.training, ) attn_output = attn_output.reshape([B, L, D]) + attn_output, _ = eager_attention_forward( + self, + q, + k, + v, + attention_mask, + is_causal=self.is_causal, + scaling=self.scale, + dropout=0.0 if not self.training else self.dropout, + ) + attn_output = attn_output.reshape([B, L, D]) + else: + attn_output = paddle.nn.functional.scaled_dot_product_attention( + q, + k, + v, + attention_mask, + dropout_p=self.dropout, + is_causal=self.is_causal, + training=self.training, + ) + attn_output = attn_output.reshape([B, L, D]) attn_output = self.out_proj(attn_output) diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py index a348836836..a35b5577ae 100644 --- a/paddlex/inference/pipelines/components/retriever/base.py +++ b/paddlex/inference/pipelines/components/retriever/base.py @@ -22,8 +22,8 @@ from .....utils.subclass_register import AutoRegisterABCMetaClass if is_dep_available("langchain"): - from langchain.docstore.document import Document - from langchain.text_splitter import RecursiveCharacterTextSplitter + from langchain_core.documents.base import Document + from langchain_text_splitters.character import RecursiveCharacterTextSplitter if is_dep_available("langchain-community"): from langchain_community import vectorstores from langchain_community.vectorstores import FAISS