diff --git a/paddlex/inference/models/doc_vlm/predictor.py b/paddlex/inference/models/doc_vlm/predictor.py
index 2dd5c84d4..2f212310f 100644
--- a/paddlex/inference/models/doc_vlm/predictor.py
+++ b/paddlex/inference/models/doc_vlm/predictor.py
@@ -405,110 +405,123 @@ def _genai_client_process(
         min_pixels,
         max_pixels,
     ):
+        import ctypes
+        import platform
+        import gc
+
+        def _force_memory_compact():
+            gc.collect()
+            if platform.system() == "Linux":
+                try:
+                    libc = ctypes.CDLL("libc.so.6")
+                    libc.malloc_trim(0)
+                except Exception:
+                    pass
+
         lock = Lock()
 
         def _process(item):
             image = item["image"]
-            if isinstance(image, str):
-                if image.startswith("http://") or image.startswith("https://"):
-                    image_url = image
+            image_url = None
+            
+            try:
+                if isinstance(image, str):
+                    if image.startswith("http://") or image.startswith("https://"):
+                        image_url = image
+                    else:
+                        from PIL import Image
+                        with Image.open(image) as img:
+                            img = img.convert("RGB")
+                            with io.BytesIO() as buf:
+                                img.save(buf, format="JPEG")
+                                image_url = "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode("ascii")
+                
+                elif isinstance(image, np.ndarray):
+                    import cv2
+
+                    success, buffer = cv2.imencode('.jpg', image, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
+                    if not success:
+                        raise ValueError("Encode failed")
+
+                    b64_str = base64.b64encode(buffer).decode("ascii")
+                    image_url = f"data:image/jpeg;base64,{b64_str}"
+                    
+                    del buffer
+                    del b64_str
+                
                 else:
-                    from PIL import Image
-
-                    with Image.open(image) as img:
-                        img = img.convert("RGB")
-                        with io.BytesIO() as buf:
-                            img.save(buf, format="JPEG")
-                            image_url = "data:image/jpeg;base64," + base64.b64encode(
-                                buf.getvalue()
-                            ).decode("ascii")
-            elif isinstance(image, np.ndarray):
-                import cv2
-                from PIL import Image
-
-                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                img = Image.fromarray(image)
-                with io.BytesIO() as buf:
-                    img.save(buf, format="JPEG")
-                    image_url = "data:image/jpeg;base64," + base64.b64encode(
-                        buf.getvalue()
-                    ).decode("ascii")
-            else:
-                raise TypeError(f"Not supported image type: {type(image)}")
+                    raise TypeError(f"Not supported image type: {type(image)}")
 
-            if self._genai_client.backend == "fastdeploy-server":
-                kwargs = {
-                    "temperature": 1 if temperature is None else temperature,
-                    "top_p": 0 if top_p is None else top_p,
-                }
-            else:
-                kwargs = {
-                    "temperature": 0 if temperature is None else temperature,
-                }
-                if top_p is not None:
-                    kwargs["top_p"] = top_p
+                del image
+                item["image"] = None 
 
-            if max_new_tokens is not None:
-                kwargs["max_completion_tokens"] = max_new_tokens
-            elif self.model_name in self.model_group["PaddleOCR-VL"]:
-                kwargs["max_completion_tokens"] = 8192
-
-            kwargs["extra_body"] = {}
-            if skip_special_tokens is not None:
-                if self._genai_client.backend in (
-                    "fastdeploy-server",
-                    "vllm-server",
-                    "sglang-server",
-                ):
-                    kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                if self._genai_client.backend == "fastdeploy-server":
+                    kwargs = {
+                        "temperature": 1 if temperature is None else temperature,
+                        "top_p": 0 if top_p is None else top_p,
+                    }
                 else:
-                    raise ValueError("Not supported")
+                    kwargs = {
+                        "temperature": 0 if temperature is None else temperature,
+                    }
+                    if top_p is not None:
+                        kwargs["top_p"] = top_p
+
+                if max_new_tokens is not None:
+                    kwargs["max_completion_tokens"] = max_new_tokens
+                elif self.model_name in self.model_group["PaddleOCR-VL"]:
+                    kwargs["max_completion_tokens"] = 8192
+
+                kwargs["extra_body"] = {}
+                if skip_special_tokens is not None:
+                    if self._genai_client.backend in (
+                        "fastdeploy-server",
+                        "vllm-server",
+                        "sglang-server",
+                    ):
+                        kwargs["extra_body"]["skip_special_tokens"] = skip_special_tokens
+                    else:
+                        raise ValueError("Not supported")
+
+                if repetition_penalty is not None:
+                    kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
 
-            if repetition_penalty is not None:
-                kwargs["extra_body"]["repetition_penalty"] = repetition_penalty
-
-            if min_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "min_pixels"
-                    ] = min_pixels
-                else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `min_pixels`."
-                    )
+                if min_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"].setdefault("mm_processor_kwargs", {})["min_pixels"] = min_pixels
+                    else:
+                        warnings.warn(f"{repr(self._genai_client.backend)} does not support `min_pixels`.")
 
-            if max_pixels is not None:
-                if self._genai_client.backend == "vllm-server":
-                    kwargs["extra_body"]["mm_processor_kwargs"] = kwargs[
-                        "extra_body"
-                    ].get("mm_processor_kwargs", {})
-                    kwargs["extra_body"]["mm_processor_kwargs"][
-                        "max_pixels"
-                    ] = max_pixels
-                else:
-                    warnings.warn(
-                        f"{repr(self._genai_client.backend)} does not support `max_pixels`."
+                if max_pixels is not None:
+                    if self._genai_client.backend == "vllm-server":
+                        kwargs["extra_body"].setdefault("mm_processor_kwargs", {})["max_pixels"] = max_pixels
+                    else:
+                        warnings.warn(f"{repr(self._genai_client.backend)} does not support `max_pixels`.")
+
+                with lock:
+                    future = self._genai_client.create_chat_completion(
+                        [
+                            {
+                                "role": "user",
+                                "content": [
+                                    {"type": "image_url", "image_url": {"url": image_url}},
+                                    {"type": "text", "text": item["query"]},
+                                ],
+                            }
+                        ],
+                        return_future=True,
+                        timeout=600,
+                        **kwargs,
                     )
-
-            with lock:
-                future = self._genai_client.create_chat_completion(
-                    [
-                        {
-                            "role": "user",
-                            "content": [
-                                {"type": "image_url", "image_url": {"url": image_url}},
-                                {"type": "text", "text": item["query"]},
-                            ],
-                        }
-                    ],
-                    return_future=True,
-                    timeout=600,
-                    **kwargs,
-                )
-                return future
+                    return future
+            
+            except Exception as e:
+                logging.error(f"Processing error: {e}")
+                raise e
+            finally:
+                if 'image' in locals(): del image
+                if 'buffer' in locals(): del buffer
+                if 'b64_str' in locals(): del b64_str
 
         if len(data) > 1:
             futures = list(self._thread_pool.map(_process, data))
@@ -519,5 +532,7 @@ def _process(item):
         for future in futures:
             result = future.result()
             results.append(result.choices[0].message.content)
+        
+        _force_memory_compact()
 
-        return results
+        return results
\ No newline at end of file