Skip to content

Commit

Permalink
Feature/pdct 1402 validate collection exists before creating family (#…
Browse files Browse the repository at this point in the history
…214)

* Validate that a collection exists before saving a family

* Refactor to remove duplication

* Bump patch version
  • Loading branch information
annaCPR authored Sep 24, 2024
1 parent 4eef7ca commit 993a404
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 24 deletions.
103 changes: 83 additions & 20 deletions app/service/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import of data and other services for validation etc.
"""

from enum import Enum
from typing import Any, Optional

from fastapi import HTTPException, status
Expand All @@ -28,6 +29,15 @@
)


class IngestEntityList(str, Enum):
"""Name of the list of entities that can be ingested."""

Collections = "collections"
Families = "families"
Documents = "documents"
Events = "events"


@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def save_collections(
collection_data: list[dict[str, Any]],
Expand Down Expand Up @@ -154,37 +164,90 @@ def save_events(
return event_import_ids


def validate_entity_relationships(data: dict[str, Any]) -> None:
def _collect_import_ids(
entity_list_name: IngestEntityList,
data: dict[str, Any],
import_id_type_name: Optional[str] = None,
) -> list[str]:
"""
Validates relationships between entities contained in data based on import_ids.
For documents, it validates that the family the document is linked to exists.
Extracts a list of import_ids (or family_import_ids if specified) for the specified entity list in data.
:param IngestEntityList entity_list_name: The name of the entity list from which the import_ids are to be extracted.
:param dict[str, Any] data: The data structure containing the entity lists used for extraction.
:param Optional[str] import_id_type_name: the name of the type of import_id to be extracted or None.
:return list[str]: A list of extracted import_ids for the specified entity list.
"""
import_id_key = import_id_type_name or "import_id"
import_ids = []
if entity_list_name.value in data:
for entity in data[entity_list_name.value]:
import_ids.append(entity[import_id_key])
return import_ids


def _match_import_ids(
parent_references: list[str], parent_import_ids: set[str]
) -> None:
"""
Validates that all the references to parent entities exist in the set of parent import_ids passed in
:param list[str] parent_references: List of import_ids referencing parent entities to be validated.
:param set[str] parent_import_ids: Set of parent import_ids to validate against.
:raises ValidationError: raised if a parent reference is not found in the parent_import_ids.
"""
for id in parent_references:
if id not in parent_import_ids:
raise ValidationError(f"No entity with id {id} found")


def _validate_collections_exist_for_families(data: dict[str, Any]) -> None:
"""
Validates that collections the families are linked to exist based on import_id links in data.
:param dict[str, Any] data: The data object containing entities to be validated.
:raises ValidationError: raised should there be any unmatched relationships.
"""
families = []
collections = _collect_import_ids(IngestEntityList.Collections, data)
collections_set = set(collections)

family_collection_import_ids = []
if "families" in data:
for fam in data["families"]:
families.append(fam["import_id"])
family_collection_import_ids.extend(fam["collections"])

_match_import_ids(family_collection_import_ids, collections_set)

document_family_import_ids = []
if "documents" in data:
for entity in data["documents"]:
document_family_import_ids.append(entity["family_import_id"])

event_family_import_ids = []
if "events" in data:
for event in data["events"]:
event_family_import_ids.append(event["family_import_id"])
def _validate_families_exist_for_events_and_documents(data: dict[str, Any]) -> None:
"""
Validates that families the documents and events are linked to exist
based on import_id links in data.
:param dict[str, Any] data: The data object containing entities to be validated.
"""
families = _collect_import_ids(IngestEntityList.Families, data)
families_set = set(families)
for doc_fam in document_family_import_ids:
if doc_fam not in families_set:
raise ValidationError(f"No family with id {doc_fam} found for document")

for event_fam in event_family_import_ids:
if event_fam not in families_set:
raise ValidationError(f"No family with id {event_fam} found for event")
document_family_import_ids = _collect_import_ids(
IngestEntityList.Documents, data, "family_import_id"
)
event_family_import_ids = _collect_import_ids(
IngestEntityList.Events, data, "family_import_id"
)

_match_import_ids(document_family_import_ids, families_set)
_match_import_ids(event_family_import_ids, families_set)


def validate_entity_relationships(data: dict[str, Any]) -> None:
"""
Validates relationships between entities contained in data.
For documents, it validates that the family the document is linked to exists.
:param dict[str, Any] data: The data object containing entities to be validated.
"""

_validate_collections_exist_for_families(data)
_validate_families_exist_for_events_and_documents(data)


@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "admin_backend"
version = "2.15.2"
version = "2.15.3"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
17 changes: 14 additions & 3 deletions tests/unit_tests/service/ingest/test_ingest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_validate_entity_relationships_when_no_family_matching_document():

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id {fam_import_id} found for document" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_validate_entity_relationships_when_no_family_matching_event():
Expand All @@ -157,7 +157,18 @@ def test_validate_entity_relationships_when_no_family_matching_event():

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id {fam_import_id} found for event" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_validate_entity_relationships_when_no_collection_matching_family():
coll_import_id = "test.new.collection.0"
test_data = {
"families": [{"import_id": "test.new.event.0", "collections": [coll_import_id]}]
}

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No entity with id {coll_import_id} found" == e.value.message


def test_save_documents_when_no_family():
Expand All @@ -170,7 +181,7 @@ def test_save_documents_when_no_family():

with pytest.raises(ValidationError) as e:
ingest_service.import_data(test_data, "test")
assert f"No family with id {fam_import_id} found for document" == e.value.message
assert f"No entity with id {fam_import_id} found" == e.value.message


def test_save_events_when_data_invalid(validation_service_mock):
Expand Down

0 comments on commit 993a404

Please sign in to comment.