Skip to content

Commit df9d43b

Browse files
delete 无用字段
1 parent 5db9102 commit df9d43b

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

magic_pdf/pipeline_txt.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,13 @@ def txt_pdf_to_mm_markdown_format(jso: dict, debug_mode=False) -> dict:
5757
pdf_intermediate_dict = JsonCompressor.decompress_json(pdf_intermediate_dict)
5858
standard_format = mk_universal_format(pdf_intermediate_dict)
5959
mm_content = mk_mm_markdown(standard_format)
60-
jso["content_list"] = mm_content
60+
jso["content"] = mm_content
6161
logger.info(f"book_name is:{get_data_source(jso)}/{jso['file_id']},content_list length is {len(standard_format)}",)
6262
# 把无用的信息清空
63-
jso["doc_layout_result"] = ""
64-
jso["pdf_intermediate_dict"] = ""
65-
jso["pdf_meta"] = ""
63+
to_del_keys = ["doc_layout_result", "pdf_intermediate_dict", "pdf_meta", "parsed_result"]
64+
for key in to_del_keys:
65+
if jso.get(key):
66+
del jso[key]
6667
except Exception as e:
6768
jso = exception_handler(jso, e)
6869
return jso

0 commit comments

Comments
 (0)