Skip to content

Commit

Permalink
✨ Allow Minimal Sanity Check for XML/CSV Files via MAUS (#91)
Browse files Browse the repository at this point in the history
  • Loading branch information
hf-kklein authored May 9, 2022
1 parent 74b033e commit 1965511
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 2 deletions.
9 changes: 9 additions & 0 deletions src/maus/reader/flat_ahb_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ def to_flat_ahb(self) -> FlatAnwendungshandbuch:
raise NotImplementedError("The inheriting class has to implement this method")


def check_file_can_be_parsed_as_ahb_csv(file_path: Path) -> None:
"""
Returns nothing iff the given file is parsable as CSV and contains no obvious errors.
This is not a really sophisticated analysis but just a basic minimal sanity check.
In case of error an exception is raised.
"""
_ = FlatAhbCsvReader(file_path) # this may die with a meaningful exception


class FlatAhbCsvReader(FlatAhbReader):
"""
reads csv files and returns AHBs
Expand Down
11 changes: 11 additions & 0 deletions src/maus/reader/mig_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from lxml import etree # type:ignore[import]

from maus import SegmentGroupHierarchy
from maus.edifact import EdifactFormat
from maus.models._internal import EdifactStackSearchStrategy, MigFilterResult
from maus.models.edifact_components import EdifactStack, EdifactStackLevel, EdifactStackQuery
from maus.reader.etree_element_helpers import (
Expand Down Expand Up @@ -47,6 +48,16 @@ def get_edifact_stack(self, query: EdifactStackQuery) -> Optional[EdifactStack]:
Result = TypeVar("Result") #: is a type var to indicate an "arbitrary but same" type in a generic function


def check_file_can_be_parsed_as_mig_xml(file_path: Path) -> None:
"""
Returns nothing iff the given file is parsable as XML and contains no obvious errors.
This is not a really sophisticated analysis but just a basic minimal sanity check.
In case of error an exception is raised.
"""
reader = MigXmlReader(file_path)
_ = EdifactFormat(reader.get_format_name()) # dies with an exception if the value is invalid


# pylint:disable=c-extension-no-member
class MigXmlReader(MigReader):
"""
Expand Down
7 changes: 6 additions & 1 deletion tests/unit_tests/test_ahb_csv_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest # type:ignore[import]

from maus.reader.flat_ahb_reader import FlatAhbCsvReader
from maus.reader.flat_ahb_reader import FlatAhbCsvReader, check_file_can_be_parsed_as_ahb_csv


class TestAhbCsvReader:
Expand Down Expand Up @@ -235,3 +235,8 @@ def test_csv_file_reading_11042(self, datafiles):
def test_merging_of_section_only_lines(self, input_lines: List[dict], expected_lines: List[dict]):
actual = FlatAhbCsvReader.merge_section_only_lines(input_lines)
assert actual == expected_lines

@pytest.mark.datafiles("./ahbs/FV2204/UTILMD/11042.csv")
def test_is_parsable(self, datafiles):
check_file_can_be_parsed_as_ahb_csv(Path(datafiles) / Path("11042.csv"))
# if no exception is thrown, the test is successful
7 changes: 6 additions & 1 deletion tests/unit_tests/test_mig_xml_reader_real_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from lxml.etree import Element # type:ignore[import]

from maus.models.edifact_components import EdifactStackQuery
from maus.reader.mig_reader import MigXmlReader
from maus.reader.mig_reader import MigXmlReader, check_file_can_be_parsed_as_mig_xml

ALL_MIG_XML_FILES = pytest.mark.datafiles(
"./migs/FV2204/template_xmls/utilmd_1131.xml",
Expand Down Expand Up @@ -261,3 +261,8 @@ def test_simple_paths(
actual_stack = reader.get_edifact_stack(query)
assert actual_stack is not None
assert actual_stack.to_json_path() == expected_path # type:ignore[union-attr]

@ALL_MIG_XML_FILES
def test_is_parsable(self, datafiles):
check_file_can_be_parsed_as_mig_xml(Path(datafiles) / Path("utilmd_3225.xml"))
# if no exception is thrown, the test is successful

0 comments on commit 1965511

Please sign in to comment.