4
4
from loguru import logger
5
5
from pathlib import Path
6
6
7
- from magic_pdf .dict2md .ocr_mkcontent import mk_nlp_markdown , mk_mm_markdown
7
+ from magic_pdf .dict2md .ocr_mkcontent import ocr_mk_nlp_markdown , ocr_mk_mm_markdown
8
8
from magic_pdf .libs .commons import join_path
9
9
from magic_pdf .pdf_parse_by_ocr import parse_pdf_by_ocr
10
10
@@ -30,12 +30,12 @@ def read_json_file(file_path):
30
30
31
31
32
32
if __name__ == '__main__' :
33
- # ocr_pdf_path = r"D:\project\20231108code-clean\ocr\new\双栏\s0043-1354(02)00581-x.pdf"
34
- # ocr_json_file_path = r"D:\project\20231108code-clean\ocr\new\双栏\s0043-1354(02)00581-x.json"
33
+ ocr_pdf_path = r"D:\project\20231108code-clean\ocr\new\双栏\s0043-1354(02)00581-x.pdf"
34
+ ocr_json_file_path = r"D:\project\20231108code-clean\ocr\new\双栏\s0043-1354(02)00581-x.json"
35
35
# ocr_pdf_path = r"D:\project\20231108code-clean\ocr\new\双栏\j.1540-627x.2006.00176.x.pdf"
36
36
# ocr_json_file_path = r"D:\project\20231108code-clean\ocr\new\双栏\j.1540-627x.2006.00176.x.json"
37
- ocr_pdf_path = r"D:\project\20231108code-clean\ocr\new\demo_4\ocr_demo\ocr_1_org.pdf"
38
- ocr_json_file_path = r"D:\project\20231108code-clean\ocr\new\demo_4\ocr_demo\ocr_1 .json"
37
+ # ocr_pdf_path = r"D:\project\20231108code-clean\ocr\new\demo_4\ocr_demo\ocr_1_org.pdf"
38
+ # ocr_json_file_path = r"D:\project\20231108code-clean\ocr\new\demo_4\ocr_demo\ocr_1_fix .json"
39
39
try :
40
40
ocr_pdf_model_info = read_json_file (ocr_json_file_path )
41
41
pth = Path (ocr_json_file_path )
@@ -56,8 +56,8 @@ def read_json_file(file_path):
56
56
if not os .path .exists (parent_dir ):
57
57
os .makedirs (parent_dir )
58
58
59
- # markdown_content = mk_nlp_markdown (pdf_info_dict)
60
- markdown_content = mk_mm_markdown (pdf_info_dict )
59
+ # markdown_content = ocr_mk_nlp_markdown (pdf_info_dict)
60
+ markdown_content = ocr_mk_mm_markdown (pdf_info_dict )
61
61
62
62
with open (text_content_save_path , "w" , encoding = "utf-8" ) as f :
63
63
f .write (markdown_content )
0 commit comments