Skip to content

Commit f06a321

Browse files
committed
make多模态markdown时图片地址更改为fullpath
1 parent 59b0b0c commit f06a321

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

magic_pdf/dict2md/ocr_mkcontent.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def ocr_mk_mm_markdown(pdf_info_dict: dict):
4242
if not span.get('image_path'):
4343
continue
4444
else:
45-
content = f"![]({span['image_path']})"
45+
content = f"![](s3://mllm-raw-media/pdf2md_img/{span['image_path']})"
4646
else:
4747
content = ocr_escape_special_markdown_char(span['content']) # 转义特殊符号
4848
if span['type'] == ContentType.InlineEquation:
@@ -53,3 +53,11 @@ def ocr_mk_mm_markdown(pdf_info_dict: dict):
5353
# 在行末添加两个空格以强制换行
5454
markdown.append(line_text.strip() + ' ')
5555
return '\n'.join(markdown)
56+
57+
def ocr_mk_mm_standard_format():
58+
'''
59+
content_list
60+
type string image/text/table/equation(行间的单独拿出来,行内的和text合并)
61+
62+
'''
63+
pass

0 commit comments

Comments
 (0)