|
17 | 17 | from pdfminer.pdfpage import PDFPage
|
18 | 18 |
|
19 | 19 | from dedoc.common.exceptions.bad_file_error import BadFileFormatError
|
20 |
| -from dedoc.data_structures import BBoxAnnotation |
21 | 20 | from dedoc.data_structures.annotation import Annotation
|
| 21 | +from dedoc.data_structures.concrete_annotations.bbox_annotation import BBoxAnnotation |
22 | 22 | from dedoc.data_structures.concrete_annotations.bold_annotation import BoldAnnotation
|
23 | 23 | from dedoc.data_structures.concrete_annotations.italic_annotation import ItalicAnnotation
|
24 | 24 | from dedoc.data_structures.concrete_annotations.size_annotation import SizeAnnotation
|
@@ -175,7 +175,6 @@ def __get_line_annotations(self, lobj: LTTextLineHorizontal, k_w: float, k_h: fl
|
175 | 175 | chars_with_style = []
|
176 | 176 | rand_weight = self._get_new_weight()
|
177 | 177 | prev_style = ""
|
178 |
| - annotations: List[Annotation] |
179 | 178 |
|
180 | 179 | for lobj_char in lobj:
|
181 | 180 | if isinstance(lobj_char, LTChar) or isinstance(lobj_char, LTAnno):
|
@@ -207,7 +206,7 @@ def __get_line_annotations(self, lobj: LTTextLineHorizontal, k_w: float, k_h: fl
|
207 | 206 |
|
208 | 207 | return annotations
|
209 | 208 |
|
210 |
| - def __extract_words_bbox_annotation(self, lobj: LTTextContainer, k_w: float, k_h: float, height: int, width: int) -> List[BBoxAnnotation]: |
| 209 | + def __extract_words_bbox_annotation(self, lobj: LTTextContainer, k_w: float, k_h: float, height: int, width: int) -> List[Annotation]: |
211 | 210 | words: List[WordObj] = []
|
212 | 211 | word: WordObj = WordObj(start=0, end=0, value=LTTextContainer())
|
213 | 212 | if isinstance(lobj, LTTextLineHorizontal):
|
@@ -240,11 +239,13 @@ def __parse_style_string(self, chars_with_meta: str, begin: int, end: int) -> Li
|
240 | 239 | font, size, *_ = prev_style.split("_")
|
241 | 240 | fontname_wo_rand = font.split("+")[-1]
|
242 | 241 | styles = fontname_wo_rand.split("-")[-1]
|
| 242 | + annotations.append(StyleAnnotation(begin, end, value=fontname_wo_rand)) |
| 243 | + |
243 | 244 | if "Bold" in styles:
|
244 | 245 | annotations.append(BoldAnnotation(begin, end, value="True"))
|
245 | 246 | if "Italic" in styles:
|
246 | 247 | annotations.append(ItalicAnnotation(begin, end, value="True"))
|
247 |
| - annotations.append(StyleAnnotation(begin, end, value=fontname_wo_rand)) |
| 248 | + |
248 | 249 | if size.replace(".", "", 1).isnumeric():
|
249 | 250 | annotations.append(SizeAnnotation(begin, end, value=size))
|
250 | 251 |
|
|
0 commit comments