Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #42 from climatepolicyradar/bugfix/add-logging-to-…
Browse files Browse the repository at this point in the history
…index-data

Bugfix/add logging to index data
  • Loading branch information
THOR300 authored Sep 20, 2023
2 parents 767a229 + bd3a3dd commit 94f134c
Showing 1 changed file with 21 additions and 15 deletions.
36 changes: 21 additions & 15 deletions src/cpr_data_access/parser_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,21 +328,27 @@ def vertically_flip_text_block_coords(self: _PO) -> _PO:
page.page_number: page.dimensions[1] for page in self.pdf_data.page_metadata
}

for text_block in self.pdf_data.text_blocks:
if text_block.coords is not None and text_block.page_number is not None:
text_block.coords = [
(x, page_height_map[text_block.page_number] - y)
for x, y in text_block.coords
]

# flip top and bottom so y values are still increasing as you go
# through the coordinates list
text_block.coords = [
text_block.coords[3],
text_block.coords[2],
text_block.coords[1],
text_block.coords[0],
]
try:
for text_block in self.pdf_data.text_blocks:
if text_block.coords is not None and text_block.page_number is not None:
text_block.coords = [
(x, page_height_map[text_block.page_number] - y)
for x, y in text_block.coords
]

# flip top and bottom so y values are still increasing as you go
# through the coordinates list
text_block.coords = [
text_block.coords[3],
text_block.coords[2],
text_block.coords[1],
text_block.coords[0],
]
except Exception:
logger.exception(
"Error flipping text block coordinates.",
extra={"props": {"document_id": self.document_id}},
)

return self

Expand Down

0 comments on commit 94f134c

Please sign in to comment.