From 45cd340d9619029de6e30a2dda1ba49b6e88e741 Mon Sep 17 00:00:00 2001 From: DeltaDaniel <139119540+DeltaDaniel@users.noreply.github.com> Date: Wed, 10 Jul 2024 23:32:24 +0200 Subject: [PATCH] ignore ahb tables where no pruefi is provided --- src/kohlrahbi/ahb/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/kohlrahbi/ahb/__init__.py b/src/kohlrahbi/ahb/__init__.py index 5a4c6609..fd2c6c03 100644 --- a/src/kohlrahbi/ahb/__init__.py +++ b/src/kohlrahbi/ahb/__init__.py @@ -189,7 +189,9 @@ def extract_pruefis_from_table(table: Table) -> list[str]: def table_header_contains_text_pruefidentifikator(table: Table) -> bool: """Checks if the table header contains the text 'Prüfidentifikator'.""" - return table.row_cells(0)[-1].paragraphs[-1].text.startswith("Prüfidentifikator") # type:ignore[no-any-return] + pattern = r"Prüfidentifikator(?:\t){0,10}\t\d+" + # "matches "Prüfidentifikator" followed by at least 1 tab separated numbers, max 11 pruefis is chosen arbitrarily + return bool(re.search(pattern, table.row_cells(0)[-1].text)) def get_pruefi_to_file_mapping(basic_input_path: Path, format_version: EdifactFormatVersion) -> dict[str, str]: