diff --git a/deploy/hps/server_env/requirements/app.in b/deploy/hps/server_env/requirements/app.in
index 2cb9003a5f..a32c621604 100644
--- a/deploy/hps/server_env/requirements/app.in
+++ b/deploy/hps/server_env/requirements/app.in
@@ -4,4 +4,5 @@ numpy >= 1.24
 opencv-contrib-python == 4.10.0.84
 pycocotools >= 2
 pydantic >= 2
+safetensors @ https://paddle-whl.bj.bcebos.com/nightly/cu126/safetensors/safetensors-0.6.2.dev0-cp38-abi3-linux_x86_64.whl
 typing-extensions >= 4.11
diff --git a/deploy/hps/server_env/requirements/cpu.txt b/deploy/hps/server_env/requirements/cpu.txt
index c24a8fc5af..18e88dd165 100644
--- a/deploy/hps/server_env/requirements/cpu.txt
+++ b/deploy/hps/server_env/requirements/cpu.txt
@@ -174,6 +174,7 @@ lxml==5.3.1
     #   paddlex (../../../setup.py)
     #   premailer
     #   python-docx
+    #   python-docx
 markupsafe==3.0.2
     # via jinja2
 marshmallow==3.26.1
@@ -243,6 +244,7 @@ packaging==24.2
     #   matplotlib
     #   paddlex (../../../setup.py)
     #   scikit-image
+paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl
 paddlepaddle @ https://paddle-whl.bj.bcebos.com/stable/cpu/paddlepaddle/paddlepaddle-3.2.1-cp310-cp310-linux_x86_64.whl
     # via -r requirements/cpu.in
 pandas==1.3.5
@@ -304,6 +306,8 @@ python-dateutil==2.9.0.post0
     #   pandas
 python-docx==1.2.0
     # via paddlex (../../../setup.py)
+python-docx==1.2.0
+    # via paddlex (../../../setup.py)
 pytz==2025.1
     # via pandas
 pyyaml==6.0.2
@@ -408,6 +412,7 @@ typing-extensions==4.12.2
     #   pydantic
     #   pydantic-core
     #   python-docx
+    #   python-docx
     #   sqlalchemy
     #   typing-inspect
     #   uvicorn
diff --git a/deploy/hps/server_env/requirements/gpu.txt b/deploy/hps/server_env/requirements/gpu.txt
index caa9a8fbc0..9d63afdd87 100644
--- a/deploy/hps/server_env/requirements/gpu.txt
+++ b/deploy/hps/server_env/requirements/gpu.txt
@@ -174,6 +174,7 @@ lxml==5.3.1
     #   paddlex (../../../setup.py)
     #   premailer
     #   python-docx
+    #   python-docx
 markupsafe==3.0.2
     # via jinja2
 marshmallow==3.26.1
@@ -243,6 +244,7 @@ packaging==24.2
     #   matplotlib
     #   paddlex (../../../setup.py)
     #   scikit-image
+paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl
 paddlepaddle-gpu @ https://paddle-model-ecology.bj.bcebos.com/paddlex/PaddleX3.0/deploy/deps/paddlepaddle/paddlepaddle_gpu-3.2.1%2Bfc-cp310-cp310-linux_x86_64.whl
     # via -r requirements/gpu.in
 pandas==1.3.5
@@ -304,6 +306,8 @@ python-dateutil==2.9.0.post0
     #   pandas
 python-docx==1.2.0
     # via paddlex (../../../setup.py)
+python-docx==1.2.0
+    # via paddlex (../../../setup.py)
 pytz==2025.1
     # via pandas
 pyyaml==6.0.2
@@ -408,6 +412,7 @@ typing-extensions==4.12.2
     #   pydantic
     #   pydantic-core
     #   python-docx
+    #   python-docx
     #   sqlalchemy
     #   starlette
     #   typing-inspect
diff --git a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
index 442f2f2805..b832ef3327 100644
--- a/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
+++ b/docs/pipeline_usage/tutorials/ocr_pipelines/PaddleOCR-VL.md
@@ -1087,8 +1087,8 @@ paddlex --get_pipeline_config PaddleOCR-VL
 VLRecognition:
   ...
   genai_config:
-    backend: vllm-server
-    server_url: http://127.0.0.1:8118/v1
+    backend: vllm
+    server_url: http://127.0.0.1:8118
 ```
 
 之后，可以使用修改好的配置文件进行产线调用。例如通过 CLI 调用：
diff --git a/paddlex/.version b/paddlex/.version
index 15a2799817..5f6fc5edc2 100644
--- a/paddlex/.version
+++ b/paddlex/.version
@@ -1 +1 @@
-3.3.0
+3.3.10
diff --git a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
index 3b77910ba9..6257eaf12e 100644
--- a/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
+++ b/paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py
@@ -111,17 +111,27 @@ def eager_attention_forward(
     attn_weights = paddle.matmul(x=query.scale(scaling), y=key, transpose_y=True)
     attn_weights = attn_weights.cast(paddle.float32)
 
+    origin_dtype = query.dtype
+
+    attn_weights = paddle.matmul(x=query.scale(scaling), y=key, transpose_y=True)
+    attn_weights = attn_weights.cast(paddle.float32)
+
     if attention_mask is not None:
+        attnetion_mask = attention_mask.cast(paddle.float32)
         attnetion_mask = attention_mask.cast(paddle.float32)
         attn_weights = attn_weights + attention_mask
 
     attn_weights = F.softmax(attn_weights, axis=-1)
     attn_weights = attn_weights.cast(origin_dtype)
 
+    attn_weights = F.softmax(attn_weights, axis=-1)
+    attn_weights = attn_weights.cast(origin_dtype)
+
     attn_weights = F.dropout(attn_weights, p=dropout, training=module.training)
 
     attn_output = paddle.matmul(attn_weights, value)
     attn_output = attn_output.transpose((0, 2, 1, 3))
+    attn_output = attn_output.transpose((0, 2, 1, 3))
 
     return attn_output, attn_weights
 
@@ -182,6 +192,11 @@ def forward(
             cos, sin = rope_emb
             q, k = apply_rotary_pos_emb_vision(q, k, cos, sin)
 
+        if not self._supports_sdpa or q.dtype == paddle.float32:
+            # → [B, H, L, Dh]
+            q = q.transpose([0, 2, 1, 3])
+            k = k.transpose([0, 2, 1, 3])
+            v = v.transpose([0, 2, 1, 3])
         if not self._supports_sdpa or q.dtype == paddle.float32:
             # → [B, H, L, Dh]
             q = q.transpose([0, 2, 1, 3])
@@ -210,6 +225,28 @@ def forward(
                 training=self.training,
             )
         attn_output = attn_output.reshape([B, L, D])
+            attn_output, _ = eager_attention_forward(
+                self,
+                q,
+                k,
+                v,
+                attention_mask,
+                is_causal=self.is_causal,
+                scaling=self.scale,
+                dropout=0.0 if not self.training else self.dropout,
+            )
+            attn_output = attn_output.reshape([B, L, D])
+        else:
+            attn_output = paddle.nn.functional.scaled_dot_product_attention(
+                q,
+                k,
+                v,
+                attention_mask,
+                dropout_p=self.dropout,
+                is_causal=self.is_causal,
+                training=self.training,
+            )
+        attn_output = attn_output.reshape([B, L, D])
 
         attn_output = self.out_proj(attn_output)
 
diff --git a/paddlex/inference/pipelines/components/retriever/base.py b/paddlex/inference/pipelines/components/retriever/base.py
index a348836836..a35b5577ae 100644
--- a/paddlex/inference/pipelines/components/retriever/base.py
+++ b/paddlex/inference/pipelines/components/retriever/base.py
@@ -22,8 +22,8 @@
 from .....utils.subclass_register import AutoRegisterABCMetaClass
 
 if is_dep_available("langchain"):
-    from langchain.docstore.document import Document
-    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain_core.documents.base import Document
+    from langchain_text_splitters.character import RecursiveCharacterTextSplitter
 if is_dep_available("langchain-community"):
     from langchain_community import vectorstores
     from langchain_community.vectorstores import FAISS