Skip to content

Commit

Permalink
Feature/pdct 1418 Make skeleton for GCF event data (#10)
Browse files Browse the repository at this point in the history
* Add events skeleton

* Check events df has required fields

* Move has_required_fields into separate helpers file

* Add tests for has_required_fields helper

* Add typehints for has_required_fields tests

* Raise an attribute error if required fields not present

* Bump to 0.1.6

* Add raise in docstring for verify_required_fields_present
  • Loading branch information
katybaulch authored Sep 3, 2024
1 parent 6fb760c commit e26532a
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 2 deletions.
3 changes: 2 additions & 1 deletion gcf_data_mapper/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from gcf_data_mapper.parsers.collection import collection
from gcf_data_mapper.parsers.document import document
from gcf_data_mapper.parsers.event import event
from gcf_data_mapper.parsers.family import family
from gcf_data_mapper.read import read

Expand Down Expand Up @@ -91,7 +92,7 @@ def wrangle_to_json(
"collections": collection(debug),
"families": family(project_info, debug),
"documents": document(doc_info, debug),
"events": [],
"events": event(project_info, debug),
}


Expand Down
32 changes: 32 additions & 0 deletions gcf_data_mapper/parsers/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from enum import Enum
from typing import Any, Optional

import click
import pandas as pd

from gcf_data_mapper.parsers.helpers import verify_required_fields_present


class RequiredColumns(Enum):
APPROVED = "ApprovalDate"
UNDER_IMPLEMENTATION = "StartDate"
COMPLETED = "DateCompletion"


def event(projects_data: pd.DataFrame, debug: bool) -> list[Optional[dict[str, Any]]]:
"""Map the GCF event info to new structure.
:param pd.DataFrame projects_data: The MCF and GCF project data,
joined on FP num.
:param bool debug: Whether debug mode is on.
:return list[Optional[dict[str, Any]]]: A list of GCF families in
the 'destination' format described in the GCF Data Mapper Google
Sheet.
"""
if debug:
click.echo("📝 Wrangling GCF event data.")

required_fields = set(str(e.value) for e in RequiredColumns)
verify_required_fields_present(projects_data, required_fields)

return []
22 changes: 22 additions & 0 deletions gcf_data_mapper/parsers/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd


def verify_required_fields_present(
data: pd.DataFrame, required_fields: set[str]
) -> bool:
"""Map the GCF event info to new structure.
:param pd.DataFrame data: The DataFrame to check.
:param set[str] required_fields: The required DataFrame columns.
:param bool debug: Whether debug mode is on.
:raise AttributeError if any of the required fields are missing.
:return bool: True if the DataFrame contains the required fields.
"""
cols = set(data.columns)
diff = set(required_fields).difference(cols)
if diff == set():
return True
raise AttributeError(
f"Required fields '{str(diff)}' not present in df columns '"
f"{cols if cols != set() else r'{}'}'"
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gcf-data-mapper"
version = "0.1.5"
version = "0.1.6"
description = "A CLI tool to wrangle GCF data into format recognised by the bulk-import tool."
authors = ["CPR-dev-team <[email protected]>"]
license = "Apache-2.0"
Expand Down
31 changes: 31 additions & 0 deletions tests/unit_tests/parsers/event/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pandas as pd
import pytest


@pytest.fixture(
params=[
{
"col1": ["record1"],
},
{
"ApprovalDate": ["some_approval"],
},
{
"ApprovalDate": ["some_ref"],
"StartDate": ["some_start"],
},
]
)
def required_cols_missing(request):
yield pd.DataFrame(request.param)


@pytest.fixture()
def valid_data():
yield pd.DataFrame(
{
"ApprovalDate": ["some_approval"],
"StartDate": ["some_start"],
"DateCompletion": ["some_end"],
}
)
13 changes: 13 additions & 0 deletions tests/unit_tests/parsers/event/test_event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from gcf_data_mapper.parsers.event import event


def test_returns_empty_when_cols_missing(required_cols_missing):
with pytest.raises(AttributeError):
event(required_cols_missing, debug=False)


def test_success_with_valid_data(valid_data):
event_data = event(valid_data, debug=False)
assert event_data == []
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import pandas as pd
import pytest

from gcf_data_mapper.parsers.helpers import verify_required_fields_present


@pytest.mark.parametrize(
("test_df", "expected_fields", "expected_error"),
[
(
pd.DataFrame(
{
"fruits": ["apple", "banana", "cherry"],
}
),
set(["fruits", "vegetables"]),
"Required fields '{'vegetables'}' not present in df columns '{'fruits'}'",
),
(
pd.DataFrame(),
set(["cars"]),
"Required fields '{'cars'}' not present in df columns '{}'",
),
],
)
def test_returns_false_when_missing_fields(
test_df: pd.DataFrame, expected_fields: set[str], expected_error: str
):
with pytest.raises(AttributeError) as e:
verify_required_fields_present(test_df, expected_fields)
assert str(e.value) == expected_error


@pytest.mark.parametrize(
("test_df", "expected_fields"),
[
(
pd.DataFrame(
{
"fruits": ["date", "elderberry", "fig"],
"vegetables": ["asparagus", "beetroot", "carrot"],
}
),
set(["fruits", "vegetables"]),
),
(
pd.DataFrame(
{
"cars": ["Ford", "Renault", "Audi"],
}
),
set(["cars"]),
),
],
)
def test_returns_true_when_no_missing_fields(
test_df: pd.DataFrame, expected_fields: set[str]
):
return_value = verify_required_fields_present(test_df, expected_fields)
assert return_value is True

0 comments on commit e26532a

Please sign in to comment.