diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_detection.md b/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_detection.md
index dc2405342..bdcbd0792 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_detection.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_detection.md
@@ -71,7 +71,7 @@ paddlex --pipeline open_vocabulary_detection \
 
 ```python
 from paddlex import create_pipeline
-pipeline = create_pipeline(pipeline_name="open_vocabulary_detection")
+pipeline = create_pipeline(pipeline="open_vocabulary_detection")
 output = pipeline.predict(input="open_vocabulary_detection.jpg", prompt="bus . walking man . rearview mirror .")
 for res in output:
     res.print()
diff --git a/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md b/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md
index 1bca24289..b64d4363a 100644
--- a/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md
+++ b/docs/pipeline_usage/tutorials/cv_pipelines/open_vocabulary_segmentation.md
@@ -73,7 +73,7 @@ paddlex --pipeline open_vocabulary_segmentation \
 
 ```python
 from paddlex import create_pipeline
-pipeline = create_pipeline(pipeline_name="open_vocabulary_segmentation")
+pipeline = create_pipeline(pipeline="open_vocabulary_segmentation")
 output = pipeline.predict(input="open_vocabulary_segmentation.jpg", prompt_type="box", prompt=[[112.9,118.4,513.8,382.1],[4.6,263.6,92.2,336.6],[592.4,260.9,607.2,294.2]])
 for res in output:
     res.print()
diff --git a/paddlex/inference/models/open_vocabulary_detection/predictor.py b/paddlex/inference/models/open_vocabulary_detection/predictor.py
index 7cf0cc3cf..6faa23d7f 100644
--- a/paddlex/inference/models/open_vocabulary_detection/predictor.py
+++ b/paddlex/inference/models/open_vocabulary_detection/predictor.py
@@ -94,8 +94,8 @@ def process(
             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names
                 for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
         """
-        image_paths = batch_data
-        src_images = self.pre_ops[0](batch_data)
+        image_paths = batch_data.input_paths
+        src_images = self.pre_ops[0](batch_data.instances)
         datas = src_images
         # preprocess
         for pre_op in self.pre_ops[1:-1]:
@@ -117,7 +117,7 @@ def process(
 
         return {
             "input_path": image_paths,
-            "input_img": src_images,
+            "input_img": [img[..., ::-1] for img in src_images],
             "boxes": boxes,
         }
 
diff --git a/paddlex/inference/models/open_vocabulary_segmentation/predictor.py b/paddlex/inference/models/open_vocabulary_segmentation/predictor.py
index c0fe7503f..25a462228 100644
--- a/paddlex/inference/models/open_vocabulary_segmentation/predictor.py
+++ b/paddlex/inference/models/open_vocabulary_segmentation/predictor.py
@@ -90,8 +90,8 @@ def process(self, batch_data: List[Any], prompts: Dict[str, Any]):
             dict: A dictionary containing the input path, raw image, class IDs, scores, and label names
                 for every instance of the batch. Keys include 'input_path', 'input_img', 'class_ids', 'scores', and 'label_names'.
         """
-        image_paths = batch_data
-        src_images = self.pre_ops[0](batch_data)
+        image_paths = batch_data.input_paths
+        src_images = self.pre_ops[0](batch_data.instances)
         datas = src_images
         # preprocess
         for pre_op in self.pre_ops[1:-1]: