Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
brambg committed Nov 21, 2023
1 parent 2bee853 commit 7480925
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 19 deletions.
1 change: 0 additions & 1 deletion pagexml/helper/file_helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import glob
import os
import tarfile
import zipfile
Expand Down
17 changes: 8 additions & 9 deletions pagexml/helper/pagexml_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def read_line_format_file(line_format_files: Union[str, List[str]],
else:
if len(row) > len(headers):
raise IndexError(
f"Missing columns. Header has {len(headers)} columns while line {li+1} in row "
f"Missing columns. Header has {len(headers)} columns while line {li + 1} in row "
f"has {len(row)} columns")
yield {header: row[hi] if len(row) > hi else None for hi, header in enumerate(headers)}

Expand Down Expand Up @@ -380,19 +380,20 @@ def get_custom_tags(doc: pdm.PageXMLDoc) -> List[Dict[str, any]]:
offset = tag_el["offset"]
length = tag_el["length"]

value = line.text[offset:offset+length]
value = line.text[offset:offset + length]

custom_tags.append({
"type": tag,
"value": value,
"region_id": region.id,
"line_id": line.id,
"offset": offset,
"type": tag,
"value": value,
"region_id": region.id,
"line_id": line.id,
"offset": offset,
"length": length,
})

return custom_tags


class LineIterable:

def __init__(self, line_format_files: Union[str, List[str]], headers: List[str] = None):
Expand Down Expand Up @@ -532,5 +533,3 @@ def merge_lines(lines: List[pdm.PageXMLTextLine], remove_word_break: bool = Fals
text += curr_line.text
return pdm.PageXMLTextLine(metadata=copy.deepcopy(lines[0].metadata),
coords=coords, text=text)


17 changes: 8 additions & 9 deletions tests/physical_document_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from unittest.mock import Mock

import pagexml.model.physical_document_model as pdm
# from pagexml.model.physical_document_model import pdm.Coords, pdm.StructureDoc, PhysicalStructureDoc, pdm.LogicalStructureDoc


class TestCoords(unittest.TestCase):
Expand Down Expand Up @@ -52,14 +51,14 @@ def test_list_of_line_point_coords_to_hull_of_coords(self):

def test_valid_points_from_str(self):
coords = pdm.Coords('1216,1119 1205,1109 1202,1109 1198,1112 1195,1112 1191,1116 1164,1116 1160,1119 1147,1119'
' 1143,1123 1126,1123 1123,1126 1102,1126 1098,1130 1074,1130 1071,1133 1016,1133 1012,1136'
' 964,1136 961,1140 957,1140 954,1143 940,1143 937,1147 930,1147 926,1150 916,1150 912,1154'
' 899,1154 895,1157 888,1157 885,1160 882,1160 878,1164 875,1164 857,1181 847,1181 840,1188'
' 837,1188 833,1191 830,1191 826,1195 823,1195 820,1198 816,1198 813,1202 809,1202 795,1216'
' 795,1229 799,1229 802,1233 813,1233 816,1236 875,1236 878,1240 895,1240 899,1243 923,1243'
' 926,1247 1036,1247 1040,1243 1147,1243 1150,1240 1181,1240 1185,1236 1209,1236 1212,1233'
' 1216,1233 1219,1229 1219,1226 1222,1222 1222,1216 1219,1212 1219,1209 1216,1205 1216,1150'
' 1219,1147 1219,1143 1216,1140')
' 1143,1123 1126,1123 1123,1126 1102,1126 1098,1130 1074,1130 1071,1133 1016,1133 1012,1136'
' 964,1136 961,1140 957,1140 954,1143 940,1143 937,1147 930,1147 926,1150 916,1150 912,1154'
' 899,1154 895,1157 888,1157 885,1160 882,1160 878,1164 875,1164 857,1181 847,1181 840,1188'
' 837,1188 833,1191 830,1191 826,1195 823,1195 820,1198 816,1198 813,1202 809,1202 795,1216'
' 795,1229 799,1229 802,1233 813,1233 816,1236 875,1236 878,1240 895,1240 899,1243 923,1243'
' 926,1247 1036,1247 1040,1243 1147,1243 1150,1240 1181,1240 1185,1236 1209,1236 1212,1233'
' 1216,1233 1219,1229 1219,1226 1222,1222 1222,1216 1219,1212 1219,1209 1216,1205 1216,1150'
' 1219,1147 1219,1143 1216,1140')
x = [p[0] for p in coords.points]
print(min(x), max(x))
y = [p[1] for p in coords.points]
Expand Down

0 comments on commit 7480925

Please sign in to comment.