diff --git a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py index c854f21e5..1c24a26d3 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/pipeline.py +++ b/paddlex/inference/pipelines/paddleocr_vl/pipeline.py @@ -253,12 +253,14 @@ def get_layout_parsing_results( blocks.append(blocks_for_img) for j, block in enumerate(blocks_for_img): block_img = block["img"] - block_label = block["label"] + text_prompt = block["label"] + block_label = block["label"].lower() if block_label not in image_labels and block_img is not None: figure_token_map = {} - text_prompt = "OCR:" drop_figures = [] - if block_label == "table": + if block_label == "ocr": + text_prompt = "OCR:" + elif block_label == "table": text_prompt = "Table Recognition:" block_img, figure_token_map, drop_figures = ( tokenize_figure_of_table( @@ -308,7 +310,7 @@ def get_layout_parsing_results( for j, block in enumerate(blocks_for_img): block_img = block["img"] block_bbox = block["box"] - block_label = block["label"] + block_label = block["label"].lower() block_content = "" if curr_vlm_block_idx < len(vlm_block_ids) and vlm_block_ids[ curr_vlm_block_idx @@ -447,12 +449,17 @@ def predict( prompt_label = prompt_label if prompt_label else "ocr" if prompt_label.lower() == "chart": model_settings["use_chart_recognition"] = True - assert prompt_label.lower() in [ + if prompt_label.lower() not in [ "ocr", "formula", "table", "chart", - ], f"Layout detection is disabled (use_layout_detection=False). 'prompt_label' must be one of ['ocr', 'formula', 'table', 'chart'], but got '{prompt_label}'." + ]: + logging.warning( + f"Layout detection is disabled (use_layout_detection=False). " + f"'prompt_label' must be one of ['ocr', 'formula', 'table', 'chart'], " + f"but got '{prompt_label}'. Program will continue anyway." + ) def _process_cv(batch_data, new_batch_size=None): if not new_batch_size: @@ -510,7 +517,7 @@ def _process_cv(batch_data, new_batch_size=None): "boxes": [ { "cls_id": 0, - "label": prompt_label.lower(), + "label": prompt_label, "score": 1, "coordinate": [ 0, diff --git a/paddlex/inference/pipelines/paddleocr_vl/result.py b/paddlex/inference/pipelines/paddleocr_vl/result.py index 85e4c422b..8d0a32848 100644 --- a/paddlex/inference/pipelines/paddleocr_vl/result.py +++ b/paddlex/inference/pipelines/paddleocr_vl/result.py @@ -510,7 +510,7 @@ def _to_markdown(self, pretty=True, show_formula_number=False) -> dict: markdown_info["markdown_images"][block.image["path"]] = block.image[ "img" ] - handle_func = handle_funcs_dict.get(label, None) + handle_func = handle_funcs_dict.get(label, lambda block: block.content) if ( show_formula_number and (label == "display_formula" or label == "formula")