Skip to content

Commit

Permalink
filter for most recent file (#321)
Browse files Browse the repository at this point in the history
* filter for most recent file

* Added test
  • Loading branch information
DeltaDaniel committed May 21, 2024
1 parent 73e4e0c commit 6f162a7
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 6 deletions.
18 changes: 12 additions & 6 deletions src/kohlrahbi/docxfilefinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,20 @@ def filter_latest_version(groups: dict[str, list[Path]]) -> list[Path]:
if len(group_items) == 1:
result.append(group_items[0])
else:
for path in group_items:
if (
"KonsolidierteLesefassungmitFehlerkorrekturen" in path.name
most_recent_file = max(
(
path
for path in group_items
if "KonsolidierteLesefassungmitFehlerkorrekturen" in path.name
or "AußerordentlicheVeröffentlichung" in path.name
):
result.append(path)
else:
),
key=lambda path: (int(path.stem.split("_")[-1])),
)
for path in group_items:
if path != most_recent_file:
logger.debug("Ignoring file %s", path.name)
else:
result.append(most_recent_file)
return result

def filter_for_latest_mig_and_ahb_docx_files(self) -> None:
Expand Down
37 changes: 37 additions & 0 deletions unittests/test_docx_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
tests all the features the kohlrahbi package provides to process Docx files (by using the docx package)
"""

from pathlib import Path
from typing import Generator

import pytest
Expand All @@ -11,6 +12,7 @@
from docx.table import Table
from docx.text.paragraph import Paragraph

from kohlrahbi.docxfilefinder import DocxFileFinder
from kohlrahbi.read_functions import get_all_paragraphs_and_tables


Expand Down Expand Up @@ -40,3 +42,38 @@ def test_get_all_paragraphs_and_tables(self, create_docx_from_filename: Document
actual = list(get_all_paragraphs_and_tables(create_docx_from_filename))
assert len(actual) == expected_length
assert all([isinstance(x, Table) or isinstance(x, Paragraph) for x in actual]) is True

@pytest.mark.parametrize(
"all_file_paths, filtered_file_paths",
[
pytest.param(
[Path("IFTSTAAHB-informatorischeLesefassung2.0e_99991231_20231001.docx")],
[Path("IFTSTAAHB-informatorischeLesefassung2.0e_99991231_20231001.docx")],
id="One file",
),
pytest.param(
[
Path("IFTSTAAHB-informatorischeLesefassung2.0e_99991231_20231001.docx"),
Path(
"IFTSTAAHB-informatorischeLesefassung2.0e-AußerordentlicheVeröffentlichung_20231211_20231001.docx"
),
Path(
"IFTSTAAHB-informatorischeLesefassung2.0eKonsolidierteLesefassungmitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx"
),
Path(
"IFTSTAAHB-informatorischeLesefassung2.0eKonsolidierteLesefassungmitFehlerkorrekturenStand12.12.2023_20240310_20231212.docx"
),
],
[
Path(
"IFTSTAAHB-informatorischeLesefassung2.0eKonsolidierteLesefassungmitFehlerkorrekturenStand11.03.2024_99991231_20240311.docx"
)
],
id="Several files",
),
],
)
def test_filter_lastest_version(self, all_file_paths, filtered_file_paths):
input_file_dict = {"FORMAT": all_file_paths}
actual = DocxFileFinder.filter_latest_version(input_file_dict)
assert actual == filtered_file_paths

0 comments on commit 6f162a7

Please sign in to comment.