|
2 | 2 | from collections import namedtuple
|
3 | 3 | from typing import Dict, Iterator, List, Optional, Set, Tuple
|
4 | 4 |
|
5 |
| -import numpy as np |
6 | 5 | from dedocutils.data_structures.bbox import BBox
|
7 | 6 | from numpy import ndarray
|
8 | 7 |
|
|
13 | 12 | from dedoc.readers.pdf_reader.data_classes.line_with_location import LineWithLocation
|
14 | 13 | from dedoc.readers.pdf_reader.data_classes.pdf_image_attachment import PdfImageAttachment
|
15 | 14 | from dedoc.readers.pdf_reader.data_classes.tables.scantable import ScanTable
|
16 |
| -from dedoc.readers.pdf_reader.pdf_image_reader.table_recognizer.gost_frame_recognizer import GOSTFrameRecognizer |
17 | 15 |
|
18 | 16 |
|
19 | 17 | ParametersForParseDoc = namedtuple("ParametersForParseDoc", [
|
@@ -164,7 +162,7 @@ def _process_document_with_gost_frame(self, images: Iterator[ndarray], first_pag
|
164 | 162 | page_range = range(first_page, first_page + len(gost_analyzed_images))
|
165 | 163 | gost_analyzed_images = dict(zip(page_range, gost_analyzed_images))
|
166 | 164 | if isinstance(self, PdfTxtlayerReader):
|
167 |
| - self.gost_frame_boxes = dict(zip(page_range, [item[1] for item in gost_analyzed_images.values()])) |
| 165 | + self.gost_frame_boxes = dict(zip(page_range, [(item[1], item[2]) for item in gost_analyzed_images.values()])) |
168 | 166 | result = Parallel(n_jobs=self.config["n_jobs"])(
|
169 | 167 | delayed(self._process_one_page)(image, parameters, page_number, path) for page_number, (image, box, original_image_shape) in
|
170 | 168 | gost_analyzed_images.items()
|
|
0 commit comments