-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/pdct 1418 Make skeleton for GCF event data (#10)
* Add events skeleton * Check events df has required fields * Move has_required_fields into separate helpers file * Add tests for has_required_fields helper * Add typehints for has_required_fields tests * Raise an attribute error if required fields not present * Bump to 0.1.6 * Add raise in docstring for verify_required_fields_present
- Loading branch information
1 parent
6fb760c
commit e26532a
Showing
7 changed files
with
161 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from enum import Enum | ||
from typing import Any, Optional | ||
|
||
import click | ||
import pandas as pd | ||
|
||
from gcf_data_mapper.parsers.helpers import verify_required_fields_present | ||
|
||
|
||
class RequiredColumns(Enum): | ||
APPROVED = "ApprovalDate" | ||
UNDER_IMPLEMENTATION = "StartDate" | ||
COMPLETED = "DateCompletion" | ||
|
||
|
||
def event(projects_data: pd.DataFrame, debug: bool) -> list[Optional[dict[str, Any]]]: | ||
"""Map the GCF event info to new structure. | ||
:param pd.DataFrame projects_data: The MCF and GCF project data, | ||
joined on FP num. | ||
:param bool debug: Whether debug mode is on. | ||
:return list[Optional[dict[str, Any]]]: A list of GCF families in | ||
the 'destination' format described in the GCF Data Mapper Google | ||
Sheet. | ||
""" | ||
if debug: | ||
click.echo("📝 Wrangling GCF event data.") | ||
|
||
required_fields = set(str(e.value) for e in RequiredColumns) | ||
verify_required_fields_present(projects_data, required_fields) | ||
|
||
return [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import pandas as pd | ||
|
||
|
||
def verify_required_fields_present( | ||
data: pd.DataFrame, required_fields: set[str] | ||
) -> bool: | ||
"""Map the GCF event info to new structure. | ||
:param pd.DataFrame data: The DataFrame to check. | ||
:param set[str] required_fields: The required DataFrame columns. | ||
:param bool debug: Whether debug mode is on. | ||
:raise AttributeError if any of the required fields are missing. | ||
:return bool: True if the DataFrame contains the required fields. | ||
""" | ||
cols = set(data.columns) | ||
diff = set(required_fields).difference(cols) | ||
if diff == set(): | ||
return True | ||
raise AttributeError( | ||
f"Required fields '{str(diff)}' not present in df columns '" | ||
f"{cols if cols != set() else r'{}'}'" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "gcf-data-mapper" | ||
version = "0.1.5" | ||
version = "0.1.6" | ||
description = "A CLI tool to wrangle GCF data into format recognised by the bulk-import tool." | ||
authors = ["CPR-dev-team <[email protected]>"] | ||
license = "Apache-2.0" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
|
||
@pytest.fixture( | ||
params=[ | ||
{ | ||
"col1": ["record1"], | ||
}, | ||
{ | ||
"ApprovalDate": ["some_approval"], | ||
}, | ||
{ | ||
"ApprovalDate": ["some_ref"], | ||
"StartDate": ["some_start"], | ||
}, | ||
] | ||
) | ||
def required_cols_missing(request): | ||
yield pd.DataFrame(request.param) | ||
|
||
|
||
@pytest.fixture() | ||
def valid_data(): | ||
yield pd.DataFrame( | ||
{ | ||
"ApprovalDate": ["some_approval"], | ||
"StartDate": ["some_start"], | ||
"DateCompletion": ["some_end"], | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import pytest | ||
|
||
from gcf_data_mapper.parsers.event import event | ||
|
||
|
||
def test_returns_empty_when_cols_missing(required_cols_missing): | ||
with pytest.raises(AttributeError): | ||
event(required_cols_missing, debug=False) | ||
|
||
|
||
def test_success_with_valid_data(valid_data): | ||
event_data = event(valid_data, debug=False) | ||
assert event_data == [] |
60 changes: 60 additions & 0 deletions
60
tests/unit_tests/parsers/helpers/test_verify_required_fields_present.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
from gcf_data_mapper.parsers.helpers import verify_required_fields_present | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("test_df", "expected_fields", "expected_error"), | ||
[ | ||
( | ||
pd.DataFrame( | ||
{ | ||
"fruits": ["apple", "banana", "cherry"], | ||
} | ||
), | ||
set(["fruits", "vegetables"]), | ||
"Required fields '{'vegetables'}' not present in df columns '{'fruits'}'", | ||
), | ||
( | ||
pd.DataFrame(), | ||
set(["cars"]), | ||
"Required fields '{'cars'}' not present in df columns '{}'", | ||
), | ||
], | ||
) | ||
def test_returns_false_when_missing_fields( | ||
test_df: pd.DataFrame, expected_fields: set[str], expected_error: str | ||
): | ||
with pytest.raises(AttributeError) as e: | ||
verify_required_fields_present(test_df, expected_fields) | ||
assert str(e.value) == expected_error | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("test_df", "expected_fields"), | ||
[ | ||
( | ||
pd.DataFrame( | ||
{ | ||
"fruits": ["date", "elderberry", "fig"], | ||
"vegetables": ["asparagus", "beetroot", "carrot"], | ||
} | ||
), | ||
set(["fruits", "vegetables"]), | ||
), | ||
( | ||
pd.DataFrame( | ||
{ | ||
"cars": ["Ford", "Renault", "Audi"], | ||
} | ||
), | ||
set(["cars"]), | ||
), | ||
], | ||
) | ||
def test_returns_true_when_no_missing_fields( | ||
test_df: pd.DataFrame, expected_fields: set[str] | ||
): | ||
return_value = verify_required_fields_present(test_df, expected_fields) | ||
assert return_value is True |