Skip to content

Commit

Permalink
Bulk import fixes (#213)
Browse files Browse the repository at this point in the history
* Give user_language_name a default value on IngestDocumentDto

* Do not look up entity counter if import_id already provided when saving documents

* Do not look up entity counter if import_id already provided when saving events

* Validate family exists before saving events

* Bump patch version

* Move comment
  • Loading branch information
annaCPR authored Sep 12, 2024
1 parent 7c860be commit 4eef7ca
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 29 deletions.
2 changes: 1 addition & 1 deletion app/model/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class IngestDocumentDTO(BaseModel):
metadata: Json
title: str
source_url: Optional[AnyHttpUrl] = None
user_language_name: Optional[str]
user_language_name: Optional[str] = None

def to_document_create_dto(self) -> DocumentCreateDTO:
"""
Expand Down
17 changes: 9 additions & 8 deletions app/repository/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,16 +404,17 @@ def create(db: Session, document: DocumentCreateDTO) -> str:
# Update the FamilyDocument with the new PhysicalDocument id
family_doc.physical_document_id = phys_doc.id

# Generate the import_id for the new document
org = family_repo.get_organisation(db, cast(str, family_doc.family_import_id))
if org is None:
raise ValidationError(
f"Cannot find counter to generate id for {family_doc.family_import_id}"
if not family_doc.import_id:
org = family_repo.get_organisation(
db, cast(str, family_doc.family_import_id)
)
if org is None:
raise ValidationError(
f"Cannot find counter to generate id for {family_doc.family_import_id}"
)
org_name = cast(str, org.name)

org_name = cast(str, org.name)

if not family_doc.import_id:
# Generate the import_id for the new document
family_doc.import_id = cast(
Column, generate_import_id(db, CountedEntity.Document, org_name)
)
Expand Down
17 changes: 8 additions & 9 deletions app/repository/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,15 @@ def create(db: Session, event: EventCreateDTO) -> str:

family_import_id = new_family_event.family_import_id

# Generate the import_id for the new event
org = family_repo.get_organisation(db, cast(str, family_import_id))
if org is None:
raise ValidationError(
f"Cannot find counter to generate id for {family_import_id}"
)

org_name = cast(str, org.name)

if not new_family_event.import_id:
org = family_repo.get_organisation(db, cast(str, family_import_id))
if org is None:
raise ValidationError(
f"Cannot find counter to generate id for {family_import_id}"
)
org_name = cast(str, org.name)

# Generate the import_id for the new event
new_family_event.import_id = cast(
Column, generate_import_id(db, CountedEntity.Event, org_name)
)
Expand Down
25 changes: 17 additions & 8 deletions app/service/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,24 @@ def validate_entity_relationships(data: dict[str, Any]) -> None:
for fam in data["families"]:
families.append(fam["import_id"])

documents = []
document_family_import_ids = []
if "documents" in data:
for doc in data["documents"]:
documents.append(doc["family_import_id"])

family_document_set = set(families)
unmatched = [x for x in documents if x not in family_document_set]
if unmatched:
raise ValidationError(f"No family with id {unmatched} found")
for entity in data["documents"]:
document_family_import_ids.append(entity["family_import_id"])

event_family_import_ids = []
if "events" in data:
for event in data["events"]:
event_family_import_ids.append(event["family_import_id"])

families_set = set(families)
for doc_fam in document_family_import_ids:
if doc_fam not in families_set:
raise ValidationError(f"No family with id {doc_fam} found for document")

for event_fam in event_family_import_ids:
if event_fam not in families_set:
raise ValidationError(f"No family with id {event_fam} found for event")


@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "admin_backend"
version = "2.15.1"
version = "2.15.2"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
1 change: 1 addition & 0 deletions tests/integration_tests/ingest/test_ingest_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def test_get_template_unfcc(
"user_language_name": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"title": "User Language Name",
"default": None,
},
}
],
Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests/routers/ingest/test_ingest_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def test_ingest_template_when_ok(
"user_language_name": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"title": "User Language Name",
"default": None,
},
}
],
Expand Down
15 changes: 13 additions & 2 deletions tests/unit_tests/service/ingest/test_ingest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,18 @@ def test_validate_entity_relationships_when_no_family_matching_document():

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id ['{fam_import_id}'] found" == e.value.message
assert f"No family with id {fam_import_id} found for document" == e.value.message


def test_validate_entity_relationships_when_no_family_matching_event():
fam_import_id = "test.new.family.0"
test_data = {
"events": [{"import_id": "test.new.event.0", "family_import_id": fam_import_id}]
}

with pytest.raises(ValidationError) as e:
ingest_service.validate_entity_relationships(test_data)
assert f"No family with id {fam_import_id} found for event" == e.value.message


def test_save_documents_when_no_family():
Expand All @@ -159,7 +170,7 @@ def test_save_documents_when_no_family():

with pytest.raises(ValidationError) as e:
ingest_service.import_data(test_data, "test")
assert f"No family with id ['{fam_import_id}'] found" == e.value.message
assert f"No family with id {fam_import_id} found for document" == e.value.message


def test_save_events_when_data_invalid(validation_service_mock):
Expand Down

0 comments on commit 4eef7ca

Please sign in to comment.