DS4SD · cau-git · Sep 16, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py
@@ -14,6 +14,8 @@
 
 _log = logging.getLogger(__name__)
 
+pdf_parser = pdf_parser()
+
 
 class DoclingParsePageBackend(PdfPageBackend):
     def __init__(
@@ -190,7 +192,7 @@ def __init__(self, path_or_stream: Union[BytesIO, Path], document_hash: str):
         super().__init__(path_or_stream, document_hash)
 
         self._pdoc = pdfium.PdfDocument(path_or_stream)
-        self.parser = pdf_parser()
+        self.parser = pdf_parser
 
         success = False
         if isinstance(path_or_stream, BytesIO):

diff --git a/tests/test_backend_docling_parse.py b/tests/test_backend_docling_parse.py
@@ -17,7 +17,7 @@ def test_doc_path():
 def test_text_cell_counts():
     pdf_doc = Path("./tests/data/redp5695.pdf")
 
-    doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz")
+    doc_backend = DoclingParseDocumentBackend(pdf_doc, "123456xyz5")
 
     for page_index in range(0, doc_backend.page_count()):
         last_cell_count = None
@@ -36,7 +36,7 @@ def test_text_cell_counts():
 
 
 def test_get_text_from_rect(test_doc_path):
-    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
+    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz4")
     page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
 
     # Get the title text of the DocLayNet paper
@@ -46,19 +46,22 @@ def test_get_text_from_rect(test_doc_path):
     ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"
 
     assert textpiece.strip() == ref
+    doc_backend.unload()
 
 
 def test_crop_page_image(test_doc_path):
-    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
+    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz3")
     page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
 
     # Crop out "Figure 1" from the DocLayNet paper
     im = page_backend.get_page_image(
         scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
     )
     # im.show()
+    doc_backend.unload()
 
 
 def test_num_pages(test_doc_path):
-    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz")
+    doc_backend = DoclingParseDocumentBackend(test_doc_path, "123456xyz2")
     doc_backend.page_count() == 9
+    doc_backend.unload()