From 19ee077ce1d2904c7eb4bc770e076f9249649492 Mon Sep 17 00:00:00 2001 From: Bram Buitendijk Date: Tue, 14 Mar 2023 17:35:51 +0100 Subject: [PATCH] formatting --- pagexml/helper/file_helper.py | 4 ++-- pagexml/parser.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pagexml/helper/file_helper.py b/pagexml/helper/file_helper.py index a9e8795..71a194f 100644 --- a/pagexml/helper/file_helper.py +++ b/pagexml/helper/file_helper.py @@ -187,8 +187,8 @@ def __iter__(self): yield file_info, file_data -def get_archiver_mode(page_archive_file: str) -> Tuple[Literal["tar", "zip", "py7zr"], -Literal["r", "r:", "r:gz", "r:bz2"]]: +def get_archiver_mode(page_archive_file: str) -> \ + Tuple[Literal["tar", "zip", "py7zr"], Literal["r", "r:", "r:gz", "r:bz2"]]: archived_fname_dir, archived_fname_file, archived_fname_ext = parse_archived_filename(page_archive_file) if archived_fname_ext in {".tar.gz", ".tgz"}: return "tar", "r:gz" diff --git a/pagexml/parser.py b/pagexml/parser.py index 041e971..18ed523 100644 --- a/pagexml/parser.py +++ b/pagexml/parser.py @@ -7,9 +7,9 @@ import xmltodict from dateutil.parser import parse as date_parse +import pagexml.model.physical_document_model as pdm from pagexml.helper.file_helper import read_page_archive_file from pagexml.model.physical_document_model import Baseline, Coords, parse_derived_coords -import pagexml.model.physical_document_model as pdm def parse_coords(coords: dict) -> Union[Coords, None]: @@ -52,10 +52,10 @@ def parse_line_words(textline: dict) -> List[pdm.PageXMLWord]: if "@conf" in word_dict["TextEquiv"]: conf = word_dict["TextEquiv"]["@conf"] word = pdm.PageXMLWord(text=unicode_string, - doc_id=word_dict['@id'] if '@id' in word_dict else None, - metadata=parse_custom_metadata(word_dict) if '@custom' in word_dict else None, - coords=parse_coords(word_dict["Coords"]), - conf=conf) + doc_id=word_dict['@id'] if '@id' in word_dict else None, + metadata=parse_custom_metadata(word_dict) if '@custom' in word_dict else None, + coords=parse_coords(word_dict["Coords"]), + conf=conf) words.append(word) except TypeError: print('Unexpected format for Word Unicode representation:', word_dict)