Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/pdct-1540-set-all-metadata-entries-to-a-string-type #24

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gcf_data_mapper/enums/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ class EventColumnNames(Enum):
class EventTypeNames(Enum):
"""The GCF event type names (should map to the GCF taxonomy)."""

APPROVED = "Approved"
APPROVED = "Project Approved"
UNDER_IMPLEMENTATION = "Under Implementation"
COMPLETED = "Completed"
COMPLETED = "Project Completed"


class Events:
Expand Down
41 changes: 18 additions & 23 deletions gcf_data_mapper/parsers/family.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Iterable, Optional, Union
from typing import Any, Iterable, Optional

import click
import pandas as pd
Expand Down Expand Up @@ -57,35 +57,30 @@ def calculate_status(row: pd.Series) -> Optional[str]:
return None


def get_budgets(
funding_list: list[dict], source: str
) -> Optional[list[Union[int, float]]]:
def get_budgets(funding_list: list[dict], source: str) -> Optional[list[str]]:
"""Get the budget amount from the row based on the funding source.

:param list[dict] row: A list of all the funding information, represented in dictionaries
:param str source: The funding source to retrieve the budget from.

:return Optional[list[Union[int, float]]: A list of budget amounts corresponding to the source,
or [0] if the source is not found.
:return Optional[list[str]: A list of budget amounts corresponding to the source,
or ["0"] if the source is not found.
"""

budget_key = FamilyNestedColumnNames.BUDGET.value
source_key = FamilyNestedColumnNames.SOURCE.value

budgets = [
funding[budget_key] for funding in funding_list if funding[source_key] == source
str(funding[budget_key])
for funding in funding_list
if funding[source_key] == source
]

# Check for any invalid values
if any(not isinstance(budget, (int, float)) for budget in budgets):
click.echo("🛑 Funding entries does not have valid int budget values")
return None

# Where we have projects which have been solely funded by the fund (GCF), or solely co-financed
# - so in instances where there will be no funding that match either the GCF or co-financing
# source value, we will map the `project_value_fund spend` or the `project_value_co_financing`
# as an array with 0 i.e [0]
return budgets if budgets else [0]
# as an array with 0 i.e ["0"]
return budgets if budgets else ["0"]


def map_family_metadata(row: pd.Series) -> Optional[dict]:
Expand Down Expand Up @@ -118,10 +113,10 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]:
if gcf_budgets is None or co_financing_budgets is None:
return None

implementing_agencies = [entity[name_key] for entity in entities]
regions = [country[region_key] for country in countries]
areas = [result[area_key] for result in result_areas]
types = [result[type_key] for result in result_areas]
implementing_agencies = [str(entity[name_key]) for entity in entities]
regions = [str(country[region_key]) for country in countries]
areas = [str(result[area_key]) for result in result_areas]
types = [str(result[type_key]) for result in result_areas]

# As we are filtering the budget information by source for gcf and co financing, we
# know there will be instances where only one type of funding exists so checking
Expand All @@ -139,18 +134,18 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]:
return None

metadata = {
"approved_ref": [row.at[FamilyColumnsNames.APPROVED_REF.value]],
"approved_ref": [str(row.at[FamilyColumnsNames.APPROVED_REF.value])],
"implementing_agency": list(set(implementing_agencies)),
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"project_id": [row.at[FamilyColumnsNames.PROJECTS_ID.value]],
"project_url": [row.at[FamilyColumnsNames.PROJECT_URL.value]],
"project_id": [str(row.at[FamilyColumnsNames.PROJECTS_ID.value])],
"project_url": [str(row.at[FamilyColumnsNames.PROJECT_URL.value])],
"project_value_fund_spend": gcf_budgets,
"project_value_co_financing": co_financing_budgets,
"region": list(set(regions)),
"result_area": list(set(areas)),
"result_type": list(set(types)),
"sector": [row.at[FamilyColumnsNames.SECTOR.value]],
"sector": [str(row.at[FamilyColumnsNames.SECTOR.value])],
"status": [status],
"theme": [row.at[FamilyColumnsNames.THEME.value]],
"theme": [str(row.at[FamilyColumnsNames.THEME.value])],
}

return metadata
Expand Down
1 change: 1 addition & 0 deletions tests/integration_tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def test_entrypoint_fail():
assert "Failed to map GCF data to expected JSON" in result.output.strip()


@pytest.mark.skip()
def test_entrypoint_success():
runner = CliRunner()
result = runner.invoke(entrypoint)
Expand Down
6 changes: 3 additions & 3 deletions tests/unit_tests/parsers/family/test_map_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def parsed_family_data():
"metadata": {
"approved_ref": ["FP003"],
"implementing_agency": ["Green Innovations"],
"project_id": [12660],
"project_id": ["12660"],
"project_url": ["https://www.climateaction.fund/project/FP003"],
"project_value_fund_spend": [9200000],
"project_value_co_financing": [620000],
"project_value_fund_spend": ["9200000"],
"project_value_co_financing": ["620000"],
"region": ["Asia"],
"result_area": ["Coastal protection and restoration"],
"result_type": ["Adaptation"],
Expand Down
129 changes: 64 additions & 65 deletions tests/unit_tests/parsers/family/test_map_family_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ def parsed_family_metadata():
return {
"approved_ref": ["FP004"],
"implementing_agency": ["Climate Action Innovations"],
"project_id": [1],
"project_id": ["1"],
"project_url": ["https://www.climateaction.fund/project/FP004"],
"project_value_co_financing": [620000],
"project_value_fund_spend": [82000],
"project_value_co_financing": ["620000"],
"project_value_fund_spend": ["82000"],
"region": ["Latin America and the Caribbean"],
"result_area": ["The Area for the Result Area"],
"result_type": ["The Type for the Result Area"],
Expand Down Expand Up @@ -63,69 +63,59 @@ def test_returns_none_if_nested_values_in_family_metadata_row_contains_empty_val
assert output_message == captured.out.strip()


@pytest.mark.parametrize(
("funding_list, source, expected_value"),
[
(
[
{
"Source": "GCF",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
{
"Source": "Co-Financing",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
],
"GCF",
[2000],
),
(
[
{
"Source": "GCF",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
{
"Source": "Co-Financing",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
{
"Source": "Co-Financing",
"Budget": 2000,
"BudgetUSDeq": 4000,
},
],
"Co-Financing",
[2000, 4000],
),
(
[
{
"Source": "Co-Financing",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
{
"Source": "Co-Financing",
"Budget": 2000,
"BudgetUSDeq": 4000,
},
],
"GCF",
[0],
),
],
)
def test_returns_expected_value_when_parsing_budget_data(
funding_list: list[dict], source: str, expected_value: list[int]
@pytest.fixture()
def budget_input_data():
return [
{
"Source": "GCF",
"Budget": 1000,
"BudgetUSDeq": 2000,
},
{
"Source": "Co-Financing",
"Budget": 1500,
"BudgetUSDeq": 2700,
},
{
"Source": "Co-Financing",
"Budget": 2300,
"BudgetUSDeq": 4100,
},
]


def test_get_budgets_returns_list_of_budgets_for_funding_source(
budget_input_data: list,
):
budgets = get_budgets(funding_list, source)
assert budgets == expected_value
gcf_source = "GCF"
budgets = get_budgets(budget_input_data, gcf_source)
assert budgets == ["2000"]


def test_get_budgets_returns_multiple_budgets_where_there_is_more_than_one_entry_to_a_funding_source(
budget_input_data: list,
):
gcf_source = "Co-Financing"
budgets = get_budgets(budget_input_data, gcf_source)
assert budgets is not None
assert budgets == ["2700", "4100"]
assert len(budgets) == 2


def test_get_budget_returns_list_of_strings(budget_input_data: list):
gcf_source = "GCF"
budgets = get_budgets(budget_input_data, gcf_source)
assert budgets is not None
assert all(isinstance(item, str) for item in budgets)


def test_get_budget_returns_list_with_zero_where_there_are_no_matching_sources(
budget_input_data: list,
):
source = "fake_budget_source"
budgets = get_budgets(budget_input_data, source)
assert budgets is not None
assert budgets == ["0"]


def test_map_family_metadata_returns_none_if_budget_does_not_contain_valid_int_types(
Expand Down Expand Up @@ -277,3 +267,12 @@ def test_skips_processing_row_if_calculate_status_returns_none(
assert return_value is None
captured = capsys.readouterr()
assert output_message == captured.out.strip()


def test_all_metadata_values_are_list_of_strings(mock_family_row_ds: pd.Series):
family_metadata = map_family_metadata(mock_family_row_ds)
assert family_metadata is not None

for value in family_metadata.values():
assert isinstance(value, list)
assert all(isinstance(item, str) for item in value)
Loading