diff --git a/pyproject.toml b/pyproject.toml index 53a74289..14f12eea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "admin_backend" -version = "2.17.15" +version = "2.17.16" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/helpers/ingest.py b/tests/helpers/ingest.py new file mode 100644 index 00000000..dcc87090 --- /dev/null +++ b/tests/helpers/ingest.py @@ -0,0 +1,48 @@ +import json +import logging +from io import BytesIO +from typing import Any + +logger = logging.getLogger(__name__) + + +default_collection = { + "import_id": "test.new.collection.0", + "title": "Test title", + "description": "Test description", +} + + +default_family = { + "import_id": "test.new.family.0", + "title": "Test", + "summary": "Test", + "geographies": ["South Asia"], + "category": "UNFCCC", + "metadata": {"author_type": ["Non-Party"], "author": ["Test"]}, + "collections": ["test.new.collection.0"], +} + + +default_document = { + "import_id": "test.new.document.0", + "family_import_id": "test.new.family.0", + "metadata": {"role": ["MAIN"], "type": ["Law"]}, + "variant_name": None, + "title": "", + "user_language_name": "", +} + + +default_event = { + "import_id": "test.new.event.0", + "family_import_id": "test.new.family.0", + "event_title": "Test", + "date": "2024-01-01", + "event_type_value": "Amended", +} + + +def build_json_file(data: dict[str, Any]) -> BytesIO: + json_data = json.dumps(data).encode("utf-8") + return BytesIO(json_data) diff --git a/tests/integration_tests/ingest/test_bulk_data.json b/tests/integration_tests/ingest/test_bulk_data.json deleted file mode 100644 index c85d73ba..00000000 --- a/tests/integration_tests/ingest/test_bulk_data.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "collections": [ - { - "import_id": "test.new.collection.0", - "title": "Test title", - "description": "Test description" - }, - { - "import_id": "test.new.collection.1", - "title": "Test title", - "description": "Test description" - } - ], - "families": [ - { - "import_id": "test.new.family.0", - "title": "Test", - "summary": "Test", - "geographies": ["South Asia"], - "category": "UNFCCC", - "metadata": { - "author_type": ["Non-Party"], - "author": ["Test"] - }, - "collections": ["test.new.collection.0"] - }, - { - "import_id": "test.new.family.1", - "title": "Test", - "summary": "Test", - "geographies": ["South Asia"], - "category": "UNFCCC", - "metadata": { - "author_type": ["Party"], - "author": ["Test"] - }, - "collections": ["test.new.collection.1"] - } - ], - "documents": [ - { - "import_id": "test.new.document.0", - "family_import_id": "test.new.family.0", - "metadata": { "role": ["MAIN"], "type": ["Law"] }, - "variant_name": "Original Language", - "title": "", - "user_language_name": "" - }, - { - "import_id": "test.new.document.1", - "family_import_id": "test.new.family.1", - "metadata": { "role": ["MAIN"], "type": ["Law"] }, - "variant_name": "Original Language", - "title": "", - "user_language_name": "" - } - ], - "events": [ - { - "import_id": "test.new.event.0", - "family_import_id": "test.new.family.0", - "event_title": "Test", - "date": "2024-01-01", - "event_type_value": "Amended" - }, - { - "import_id": "test.new.event.1", - "family_import_id": "test.new.family.1", - "event_title": "Test", - "date": "2024-01-01", - "event_type_value": "Amended" - } - ] -} diff --git a/tests/integration_tests/ingest/test_bulk_data_with_invalid_event_type.json b/tests/integration_tests/ingest/test_bulk_data_with_invalid_event_type.json deleted file mode 100644 index bc2c00b4..00000000 --- a/tests/integration_tests/ingest/test_bulk_data_with_invalid_event_type.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "collections": [ - { - "import_id": "test.new.collection.0", - "title": "Test title", - "description": "Test description" - } - ], - "families": [ - { - "import_id": "test.new.family.0", - "title": "Test", - "summary": "Test", - "geographies": ["South Asia"], - "category": "UNFCCC", - "metadata": { - "author_type": ["Non-Party"], - "author": ["Test"] - }, - "collections": ["test.new.collection.0"] - } - ], - "documents": [ - { - "import_id": "test.new.document.0", - "family_import_id": "test.new.family.0", - "metadata": { "role": ["MAIN"], "type": ["Law"] }, - "variant_name": "Original Language", - "title": "", - "user_language_name": "" - } - ], - "events": [ - { - "import_id": "test.new.event.0", - "family_import_id": "test.new.family.0", - "event_title": "Test", - "date": "2024-01-01", - "event_type_value": "Invalid" - } - ] -} diff --git a/tests/integration_tests/ingest/test_ingest.py b/tests/integration_tests/ingest/test_ingest.py index 738db92a..4121addd 100644 --- a/tests/integration_tests/ingest/test_ingest.py +++ b/tests/integration_tests/ingest/test_ingest.py @@ -1,5 +1,3 @@ -import io -import json import logging import os from unittest.mock import patch @@ -12,23 +10,60 @@ from sqlalchemy import update from sqlalchemy.orm import Session +from tests.helpers.ingest import ( + build_json_file, + default_collection, + default_document, + default_event, + default_family, +) from tests.integration_tests.setup_db import setup_db +def create_input_json_with_two_of_each_entity(): + return build_json_file( + { + "collections": [ + default_collection, + {**default_collection, "import_id": "test.new.collection.1"}, + ], + "families": [ + default_family, + { + **default_family, + "import_id": "test.new.family.1", + "collections": ["test.new.collection.1"], + }, + ], + "documents": [ + default_document, + { + **default_document, + "import_id": "test.new.document.1", + "family_import_id": "test.new.family.1", + }, + ], + "events": [ + default_event, + { + **default_event, + "import_id": "test.new.event.1", + "family_import_id": "test.new.family.1", + }, + ], + } + ) + + @patch.dict(os.environ, {"BULK_IMPORT_BUCKET": "test_bucket"}) def test_ingest_when_ok( data_db: Session, client: TestClient, superuser_header_token, basic_s3_client ): + input_json = create_input_json_with_two_of_each_entity() + response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={ - "new_data": open( - os.path.join( - "tests", "integration_tests", "ingest", "test_bulk_data.json" - ), - "rb", - ) - }, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -95,18 +130,12 @@ def test_import_data_rollback( basic_s3_client, ): setup_db(data_db) + input_json = create_input_json_with_two_of_each_entity() with caplog.at_level(logging.ERROR): response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={ - "new_data": open( - os.path.join( - "tests", "integration_tests", "ingest", "test_bulk_data.json" - ), - "rb", - ) - }, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -132,56 +161,22 @@ def test_ingest_idempotency( superuser_header_token, basic_s3_client, ): - family_import_id = "test.new.family.0" - event_import_id = "test.new.event.0" - collection_import_id = "test.new.collection.0" - test_data = { - "collections": [ - { - "import_id": collection_import_id, - "title": "Test title", - "description": "Test description", - }, - ], - "families": [ - { - "import_id": family_import_id, - "title": "Test", - "summary": "Test", - "geographies": ["South Asia"], - "category": "UNFCCC", - "metadata": {"author_type": ["Non-Party"], "author": ["Test"]}, - "collections": [collection_import_id], - } - ], - "documents": [ - { - "import_id": f"test.new.document.{i}", - "family_import_id": family_import_id, - "metadata": {"role": ["MAIN"], "type": ["Law"]}, - "variant_name": "Original Language", - "title": f"Document{i}", - "user_language_name": "", - } - for i in range(1001) - ], - "events": [ - { - "import_id": event_import_id, - "family_import_id": family_import_id, - "event_title": "Test", - "date": "2024-01-01", - "event_type_value": "Amended", - } - ], - } - test_json = json.dumps(test_data).encode("utf-8") - test_data_file = io.BytesIO(test_json) + input_json = build_json_file( + { + "collections": [default_collection], + "families": [default_family], + "documents": [ + {**default_document, "import_id": f"test.new.document.{i}"} + for i in range(1001) + ], + "events": [default_event], + } + ) with caplog.at_level(logging.ERROR): first_response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={"new_data": test_data_file}, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -214,7 +209,7 @@ def test_ingest_idempotency( with caplog.at_level(logging.ERROR): second_response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={"new_data": test_json}, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -255,45 +250,26 @@ def test_generates_unique_slugs_for_documents_with_identical_titles( of bulk import. However, the current length of the suffix added to the slug to ensure uniqueness (6), means that the likelihood of a collision is extremely low. """ - family_import_id = "test.new.family.0" - test_data = { - "collections": [], - "families": [ - { - "import_id": family_import_id, - "title": "Test", - "summary": "Test", - "geographies": ["South Asia"], - "category": "UNFCCC", - "metadata": {"author_type": ["Non-Party"], "author": ["Test"]}, - "collections": [], - } - ], - "documents": [ - { - "import_id": f"test.new.document.{i}", - "family_import_id": family_import_id, - "metadata": {"role": ["MAIN"], "type": ["Law"]}, - "variant_name": "Original Language", - "title": "Project Document", - "user_language_name": "", - } - for i in range(1000) - ], - "events": [], - } - test_json = json.dumps(test_data).encode("utf-8") - test_data_file = io.BytesIO(test_json) + + input_json = build_json_file( + { + "families": [{**default_family, "collections": []}], + "documents": [ + {**default_document, "import_id": f"test.new.document.{i}"} + for i in range(1000) + ], + } + ) with caplog.at_level(logging.ERROR): - first_response = client.post( + response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={"new_data": test_data_file}, + files={"new_data": input_json}, headers=superuser_header_token, ) - assert first_response.status_code == status.HTTP_202_ACCEPTED - assert first_response.json() == { + assert response.status_code == status.HTTP_202_ACCEPTED + assert response.json() == { "message": "Bulk import request accepted. Check Cloudwatch logs for result." } @@ -315,18 +291,12 @@ def test_ingest_when_corpus_import_id_invalid( basic_s3_client, ): invalid_corpus = "test" + input_json = create_input_json_with_two_of_each_entity() with caplog.at_level(logging.ERROR): response = client.post( f"/api/v1/ingest/{invalid_corpus}", - files={ - "new_data": open( - os.path.join( - "tests", "integration_tests", "ingest", "test_bulk_data.json" - ), - "rb", - ) - }, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -346,20 +316,19 @@ def test_ingest_events_when_event_type_invalid( superuser_header_token, basic_s3_client, ): + + input_json = build_json_file( + { + "families": [{**default_family, "collections": []}], + "documents": [default_document], + "events": [{**default_event, "event_type_value": "Invalid"}], + } + ) + with caplog.at_level(logging.ERROR): response = client.post( "/api/v1/ingest/UNFCCC.corpus.i00000001.n0000", - files={ - "new_data": open( - os.path.join( - "tests", - "integration_tests", - "ingest", - "test_bulk_data_with_invalid_event_type.json", - ), - "rb", - ) - }, + files={"new_data": input_json}, headers=superuser_header_token, ) diff --git a/tests/mocks/repos/family_repo.py b/tests/mocks/repos/family_repo.py index a766bfcd..57eda0bd 100644 --- a/tests/mocks/repos/family_repo.py +++ b/tests/mocks/repos/family_repo.py @@ -1,5 +1,6 @@ from typing import Optional, Union +from db_client.models.organisation.users import Organisation from sqlalchemy.orm import Session from app.errors import RepositoryError @@ -75,8 +76,15 @@ def count(db: Session, org_id: Optional[int]) -> Optional[int]: return 11 -def get_organisation(db: Session, family_import_id: str) -> Optional[int]: +def get_organisation(db: Session, family_import_id: str) -> Optional[Organisation]: _maybe_throw() if family_repo.no_org: return None - return ALTERNATIVE_ORG_ID if family_repo.alternative_org else STANDARD_ORG_ID + org = Organisation( + id=ALTERNATIVE_ORG_ID if family_repo.alternative_org else STANDARD_ORG_ID, + name="", + display_name="", + description="", + organisation_type="", + ) + return org diff --git a/tests/unit_tests/routers/ingest/test_bulk_data.json b/tests/unit_tests/routers/ingest/test_bulk_data.json deleted file mode 100644 index a5d56793..00000000 --- a/tests/unit_tests/routers/ingest/test_bulk_data.json +++ /dev/null @@ -1,74 +0,0 @@ -{ - "collections": [ - { - "import_id": "test.new.collection.0", - "title": "Test title", - "description": "Test description" - }, - { - "import_id": "test.new.collection.1", - "title": "Test title", - "description": "Test description" - } - ], - "families": [ - { - "import_id": "test.new.family.0", - "title": "Test", - "summary": "Test", - "geographies": ["Test"], - "category": "UNFCCC", - "metadata": { - "color": ["blue"], - "size": [] - }, - "collections": ["test.new.collection.0"] - }, - { - "import_id": "test.new.family.1", - "title": "Test", - "summary": "Test", - "geographies": ["Test"], - "category": "UNFCCC", - "metadata": { - "color": ["pink"], - "size": [] - }, - "collections": ["test.new.collection.1"] - } - ], - "documents": [ - { - "import_id": "test.new.document.0", - "family_import_id": "test.new.family.0", - "metadata": { "color": ["pink"] }, - "variant_name": "Test", - "title": "", - "user_language_name": "" - }, - { - "import_id": "test.new.document.1", - "family_import_id": "test.new.family.1", - "metadata": { "color": ["pink"] }, - "variant_name": "Test", - "title": "", - "user_language_name": "" - } - ], - "events": [ - { - "import_id": "test.new.event.0", - "family_import_id": "test.new.family.0", - "event_title": "Test", - "date": "2000-01-01T00:00:00.000Z", - "event_type_value": "Amended" - }, - { - "import_id": "test.new.event.1", - "family_import_id": "test.new.family.1", - "event_title": "Test", - "date": "2000-01-01T00:00:00.000Z", - "event_type_value": "Amended" - } - ] -} diff --git a/tests/unit_tests/routers/ingest/test_bulk_ingest.py b/tests/unit_tests/routers/ingest/test_bulk_ingest.py index b2b700c8..b1dd8291 100644 --- a/tests/unit_tests/routers/ingest/test_bulk_ingest.py +++ b/tests/unit_tests/routers/ingest/test_bulk_ingest.py @@ -1,26 +1,58 @@ -""" -Tests the route for bulk import of data. - -This uses service mocks and ensures the endpoint calls into each service. -""" - import io import json -import os from unittest.mock import patch -import pytest from fastapi import status from fastapi.testclient import TestClient -from app.errors import ValidationError -from app.service.validation import validate_entity_relationships +from tests.helpers.ingest import ( + build_json_file, + default_collection, + default_document, + default_event, + default_family, +) -def test_ingest_when_not_authenticated(client: TestClient): - response = client.post( - "/api/v1/ingest/test", +def create_input_json_with_two_of_each_entity(): + return build_json_file( + { + "collections": [ + default_collection, + {**default_collection, "import_id": "test.new.collection.1"}, + ], + "families": [ + {**default_family, "metadata": {"color": ["blue"], "size": []}}, + { + **default_family, + "import_id": "test.new.family.1", + "collections": ["test.new.collection.1"], + "metadata": {"color": ["blue"], "size": []}, + }, + ], + "documents": [ + {**default_document, "metadata": {"color": ["pink"], "size": []}}, + { + **default_document, + "import_id": "test.new.document.1", + "family_import_id": "test.new.family.1", + "metadata": {"color": ["pink"], "size": []}, + }, + ], + "events": [ + default_event, + { + **default_event, + "import_id": "test.new.event.1", + "family_import_id": "test.new.family.1", + }, + ], + } ) + + +def test_ingest_when_not_authenticated(client: TestClient): + response = client.post("/api/v1/ingest/test") assert response.status_code == status.HTTP_401_UNAUTHORIZED @@ -36,22 +68,12 @@ def test_ingest_when_admin_non_super(client: TestClient, admin_user_header_token def test_ingest_data_when_ok(client: TestClient, superuser_header_token): corpus_import_id = "test" + input_json = create_input_json_with_two_of_each_entity() with patch("fastapi.BackgroundTasks.add_task") as background_task_mock: response = client.post( f"/api/v1/ingest/{corpus_import_id}", - files={ - "new_data": open( - os.path.join( - "tests", - "unit_tests", - "routers", - "ingest", - "test_bulk_data.json", - ), - "rb", - ) - }, + files={"new_data": input_json}, headers=superuser_header_token, ) @@ -84,56 +106,13 @@ def test_ingest_when_no_data( def test_ingest_documents_when_no_family(client: TestClient, superuser_header_token): - fam_import_id = "test.new.family.0" - test_data = json.dumps( - { - "documents": [ - {"import_id": "test.new.document.0", "family_import_id": fam_import_id} - ] - } - ).encode("utf-8") - test_data_file = io.BytesIO(test_data) + json_input = build_json_file({"documents": [default_document]}) response = client.post( "/api/v1/ingest/test", - files={"new_data": test_data_file}, + files={"new_data": json_input}, headers=superuser_header_token, ) assert response.status_code == status.HTTP_400_BAD_REQUEST - assert response.json().get("detail") == f"No entity with id {fam_import_id} found" - - -def test_validate_entity_relationships_when_no_family_matching_document(): - fam_import_id = "test.new.family.0" - test_data = { - "documents": [ - {"import_id": "test.new.document.0", "family_import_id": fam_import_id} - ] - } - - with pytest.raises(ValidationError) as e: - validate_entity_relationships(test_data) - assert f"No entity with id {fam_import_id} found" == e.value.message - - -def test_validate_entity_relationships_when_no_family_matching_event(): - fam_import_id = "test.new.family.0" - test_data = { - "events": [{"import_id": "test.new.event.0", "family_import_id": fam_import_id}] - } - - with pytest.raises(ValidationError) as e: - validate_entity_relationships(test_data) - assert f"No entity with id {fam_import_id} found" == e.value.message - - -def test_validate_entity_relationships_when_no_collection_matching_family(): - coll_import_id = "test.new.collection.0" - test_data = { - "families": [{"import_id": "test.new.event.0", "collections": [coll_import_id]}] - } - - with pytest.raises(ValidationError) as e: - validate_entity_relationships(test_data) - assert f"No entity with id {coll_import_id} found" == e.value.message + assert response.json().get("detail") == "No entity with id test.new.family.0 found" diff --git a/tests/unit_tests/service/validation/test_entity_relationship_validation.py b/tests/unit_tests/service/validation/test_entity_relationship_validation.py new file mode 100644 index 00000000..7e1cf3c6 --- /dev/null +++ b/tests/unit_tests/service/validation/test_entity_relationship_validation.py @@ -0,0 +1,39 @@ +import pytest + +from app.errors import ValidationError +from app.service.validation import validate_entity_relationships + + +def test_validate_entity_relationships_when_no_family_matching_document(): + fam_import_id = "test.new.family.0" + test_data = { + "documents": [ + {"import_id": "test.new.document.0", "family_import_id": fam_import_id} + ] + } + + with pytest.raises(ValidationError) as e: + validate_entity_relationships(test_data) + assert f"No entity with id {fam_import_id} found" == e.value.message + + +def test_validate_entity_relationships_when_no_family_matching_event(): + fam_import_id = "test.new.family.0" + test_data = { + "events": [{"import_id": "test.new.event.0", "family_import_id": fam_import_id}] + } + + with pytest.raises(ValidationError) as e: + validate_entity_relationships(test_data) + assert f"No entity with id {fam_import_id} found" == e.value.message + + +def test_validate_entity_relationships_when_no_collection_matching_family(): + coll_import_id = "test.new.collection.0" + test_data = { + "families": [{"import_id": "test.new.event.0", "collections": [coll_import_id]}] + } + + with pytest.raises(ValidationError) as e: + validate_entity_relationships(test_data) + assert f"No entity with id {coll_import_id} found" == e.value.message