Skip to content

Commit

Permalink
merge main for some reason
Browse files Browse the repository at this point in the history
  • Loading branch information
Osneil Drakes authored and Osneil Drakes committed Sep 11, 2024
2 parents c65278c + b3348a2 commit 6140263
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 0 deletions.
58 changes: 58 additions & 0 deletions gcf_data_mapper/parsers/family.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Any, Iterable, Optional
from typing import Any, Iterable, Optional, Union

import click
import pandas as pd

from gcf_data_mapper.enums.event import EventColumnNames, Events
from gcf_data_mapper.enums.event import EventColumnNames, Events
from gcf_data_mapper.enums.family import (
FamilyColumnsNames,
Expand All @@ -25,6 +27,48 @@ def contains_invalid_date_entries(list_of_dates: Iterable[pd.Timestamp]) -> bool
return any(date is pd.NaT for date in list_of_dates)


def calculate_status(row: pd.Series) -> Optional[str]:
"""Calculate status of project based on the event types and dates
The status is calculated per the below:
Completed : (NOW is passed date-completion)
Under implementation : (NOW is passed start-date)
Approved : (NOW is passed approved-date)
:param pd.Series row: The row containing the event information
:return Optional[str]: The status of the project, if there are no valid values return None
"""
completed_date = pd.to_datetime(row.at[Events.COMPLETED.column_name])
start_date = pd.to_datetime(row.at[Events.UNDER_IMPLEMENTATION.column_name])
approved_date = pd.to_datetime(row.at[Events.APPROVED.column_name])

if contains_invalid_date_entries([completed_date, start_date, approved_date]):
click.echo("🛑 Row contains invalid date entries")
return None

now = pd.Timestamp.now(tz="UTC")

# This block is arranged to reflect the project lifecycle in reverse order, from the final stage to the initial stage.
if pd.notna(completed_date) and now >= completed_date:
return Events.COMPLETED.type
if pd.notna(start_date) and now >= start_date:
return Events.UNDER_IMPLEMENTATION.type
if pd.notna(approved_date) and now >= approved_date:
return Events.APPROVED.type

click.echo("🛑 Row missing event date information to calculate status")
return None


def get_budgets(funding_list: list[dict], source: str) -> Optional[list[int]]:
def contains_invalid_date_entries(list_of_dates: Iterable[pd.Timestamp]) -> bool:
"""Check if any of the values in the list of dates are NaT (Not a Time).
:param Iterable[pd.Timestamp] list_of_dates: A list of pd.TimeStamps, may also include NoneTypes
:return bool: True if any of the values are not a valid timestamp. This helps distinguish between NaT and NaN/None Type values which are valid date entries.
"""
return any(date is pd.NaT for date in list_of_dates)


def calculate_status(row: pd.Series) -> Optional[str]:
"""Calculate status of project based on the event types and dates
The status is calculated per the below:
Expand Down Expand Up @@ -97,6 +141,11 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]:

status = calculate_status(row)

if status is None:
return None

status = calculate_status(row)

if status is None:
return None

Expand Down Expand Up @@ -150,6 +199,7 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]:
"result_types": list(set(types)),
"sector": [row.at[FamilyColumnsNames.SECTOR.value]],
"status": status,
"status": status,
"theme": [row.at[FamilyColumnsNames.THEME.value]],
}

Expand Down Expand Up @@ -248,6 +298,8 @@ def family(

mapped_families = []

family_columns = set(str(e.value) for e in FamilyColumnsNames)
required_fields = family_columns.union(set(str(e.value) for e in EventColumnNames))
family_columns = set(str(e.value) for e in FamilyColumnsNames)
required_fields = family_columns.union(set(str(e.value) for e in EventColumnNames))

Expand All @@ -258,6 +310,12 @@ def family(
required_fields -= set(
str(e.value) for e in EventColumnNames if str(e.value) not in family_columns
)
# Whilst we expect the event columns to be present, some of the events in the data may have empty values.
# We therefore want to exclude these from the `row_contains_columns_with_empty_values` function,
# and handle any empty event values in the `calculate_status` function.
required_fields -= set(
str(e.value) for e in EventColumnNames if str(e.value) not in family_columns
)

for _, row in gcf_projects_data.iterrows():
projects_id = row.at[FamilyColumnsNames.PROJECTS_ID.value]
Expand Down
9 changes: 9 additions & 0 deletions tests/unit_tests/parsers/family/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def mock_family_doc_df():
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
}
]
)
Expand Down Expand Up @@ -100,6 +103,9 @@ def mock_family_row_ds():
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
}
)

Expand Down Expand Up @@ -146,6 +152,9 @@ def mock_family_row_no_result_areas():
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
"ApprovalDate": "2016-06-30T00:00:00.000Z",
"StartDate": "2024-06-28T00:00:00.000Z",
"DateCompletion": None,
}
)

Expand Down
9 changes: 9 additions & 0 deletions tests/unit_tests/parsers/family/test_map_family_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Optional

from typing import Optional

import pandas as pd
import pytest

Expand All @@ -10,6 +12,13 @@
get_budgets,
map_family_metadata,
)
from gcf_data_mapper.enums.event import Events
from gcf_data_mapper.parsers.family import (
calculate_status,
contains_invalid_date_entries,
get_budgets,
map_family_metadata,
)


@pytest.fixture()
Expand Down

0 comments on commit 6140263

Please sign in to comment.