From 7959e557687aa87fbb47d4ee491062224e768deb Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 9 May 2024 14:02:14 +0100 Subject: [PATCH] Add corpus info to config (#134) * Add corpus info to config * Push new lockfile to fix dependency error * Test corpora in config keys * Readd tests for checking corpora and taxonomy * Bump to 2.6.0 * Fix tests --- app/api/api_v1/routers/config.py | 2 +- app/model/config.py | 16 +- app/repository/config.py | 59 ++++- app/repository/organisation.py | 4 + app/service/config.py | 12 +- pyproject.toml | 2 +- tests/integration_tests/config/test_config.py | 224 +++++++++++++++++- tests/integration_tests/setup_db.py | 8 +- tests/mocks/repos/config_repo.py | 1 + tests/mocks/services/config_service.py | 3 +- .../routers/config/test_get_config.py | 1 + 11 files changed, 317 insertions(+), 15 deletions(-) diff --git a/app/api/api_v1/routers/config.py b/app/api/api_v1/routers/config.py index 6957f2f5..e342a469 100644 --- a/app/api/api_v1/routers/config.py +++ b/app/api/api_v1/routers/config.py @@ -16,7 +16,7 @@ async def get_config(request: Request) -> ConfigReadDTO: user = request.state.user _LOGGER.info(f"User {user.email} is getting config") try: - config = config_service.get() + config = config_service.get(user.email) except RepositoryError as e: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message diff --git a/app/model/config.py b/app/model/config.py index ab920fdf..e29a4d53 100644 --- a/app/model/config.py +++ b/app/model/config.py @@ -19,11 +19,25 @@ class DocumentConfig(BaseModel): variants: Sequence[str] +class CorpusData(BaseModel): + """Contains the Corpus and CorpusType info""" + + corpus_import_id: str + title: str + description: str + corpus_type: str + corpus_type_description: str + taxonomy: TaxonomyData + + class ConfigReadDTO(BaseModel): """Definition of the new Config which just includes taxonomy.""" geographies: Sequence[dict] - taxonomies: Mapping[str, TaxonomyData] + taxonomies: Mapping[ + str, TaxonomyData + ] # TODO: Will be Mapping[str, Sequence[CorpusData]] after PDCT-1052 finished + corpora: Sequence[CorpusData] languages: Mapping[str, str] document: DocumentConfig event: EventConfig diff --git a/app/repository/config.py b/app/repository/config.py index bbe61658..54a9092d 100644 --- a/app/repository/config.py +++ b/app/repository/config.py @@ -1,5 +1,6 @@ import logging -from typing import Any, Optional +from dataclasses import asdict +from typing import Any, Optional, Sequence from db_client.models.base import AnyModel from db_client.models.dfce.family import ( @@ -9,11 +10,18 @@ Variant, ) from db_client.models.dfce.geography import Geography +from db_client.models.dfce.taxonomy_entry import TaxonomyEntry from db_client.models.document.physical_document import Language from db_client.models.organisation import Corpus, CorpusType, Organisation from sqlalchemy.orm import Session -from app.model.config import ConfigReadDTO, DocumentConfig, EventConfig, TaxonomyData +from app.model.config import ( + ConfigReadDTO, + CorpusData, + DocumentConfig, + EventConfig, + TaxonomyData, +) _LOGGER = logging.getLogger(__name__) @@ -67,7 +75,49 @@ def _get_organisation_taxonomy_by_name( return metadata[0] -def get(db: Session) -> ConfigReadDTO: +def _to_corpus_data(row, event_types) -> CorpusData: + return CorpusData( + corpus_import_id=row.corpus_import_id, + title=row.title, + description=row.description, + corpus_type=row.corpus_type, + corpus_type_description=row.corpus_type_description, + taxonomy={ + **row.taxonomy, + "event_types": asdict(event_types), + }, + ) + + +def get_corpora_for_org(db: Session, org_id: int) -> Sequence[CorpusData]: + corpora = ( + db.query( + Corpus.import_id.label("corpus_import_id"), + Corpus.title.label("title"), + Corpus.description.label("description"), + Corpus.corpus_type_name.label("corpus_type"), + CorpusType.description.label("corpus_type_description"), + CorpusType.valid_metadata.label("taxonomy"), + ) + .join( + Corpus, + Corpus.corpus_type_name == CorpusType.name, + ) + .join(Organisation, Organisation.id == Corpus.organisation_id) + .filter(Organisation.id == org_id) + .all() + ) + + event_types = db.query(FamilyEventType).all() + entry = TaxonomyEntry( + allow_blanks=False, + allowed_values=[r.name for r in event_types], + allow_any=False, + ) + return [_to_corpus_data(row, entry) for row in corpora] + + +def get(db: Session, org_id: int) -> ConfigReadDTO: """ Returns the configuration for the admin service. @@ -84,6 +134,8 @@ def get(db: Session) -> ConfigReadDTO: if tax is not None: taxonomies[org.name] = tax + corpora = get_corpora_for_org(db, org_id) + languages = {lang.language_code: lang.name for lang in db.query(Language).all()} # Now Document config @@ -118,6 +170,7 @@ def get(db: Session) -> ConfigReadDTO: return ConfigReadDTO( geographies=geographies, taxonomies=taxonomies, + corpora=corpora, languages=languages, document=doc_config, event=event_config, diff --git a/app/repository/organisation.py b/app/repository/organisation.py index dd8283c0..2a50d2c8 100644 --- a/app/repository/organisation.py +++ b/app/repository/organisation.py @@ -6,3 +6,7 @@ def get_id_from_name(db: Session, org_name: str) -> Optional[int]: return db.query(Organisation.id).filter_by(name=org_name).scalar() + + +def get_name_from_id(db: Session, org_id: int) -> Optional[str]: + return db.query(Organisation.name).filter_by(id=org_id).scalar() diff --git a/app/service/config.py b/app/service/config.py index ce6fb24a..6a93e781 100644 --- a/app/service/config.py +++ b/app/service/config.py @@ -1,25 +1,33 @@ import logging +from pydantic import ConfigDict, validate_call from sqlalchemy import exc import app.clients.db.session as db_session import app.repository.config as config_repo from app.errors import RepositoryError from app.model.config import ConfigReadDTO +from app.service import app_user _LOGGER = logging.getLogger(__name__) -def get() -> ConfigReadDTO: +@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) +def get(user_email: str) -> ConfigReadDTO: """ Gets the config :raises RepositoryError: If there is an issue getting the config :return ConfigReadDTO: The config for the application """ + try: with db_session.get_db() as db: - return config_repo.get(db) + # Get the organisation from the user's email + org_id = app_user.get_organisation(db, user_email) + + return config_repo.get(db, org_id) + except exc.SQLAlchemyError: _LOGGER.exception("Error while getting config") raise RepositoryError("Could not get the config") diff --git a/pyproject.toml b/pyproject.toml index b8842f31..5e6f3a1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "admin_backend" -version = "2.5.1" +version = "2.6.0" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/integration_tests/config/test_config.py b/tests/integration_tests/config/test_config.py index 40bd9b82..afce399f 100644 --- a/tests/integration_tests/config/test_config.py +++ b/tests/integration_tests/config/test_config.py @@ -4,8 +4,14 @@ from tests.integration_tests.setup_db import setup_db +EXPECTED_CCLW_TAXONOMY = {"color", "size"} +EXPECTED_CCLW_COLOURS = ["green", "red", "pink", "blue"] +EXPECTED_UNFCCC_TAXONOMY = {"author", "author_type"} -def test_get_config(client: TestClient, data_db: Session, user_header_token): + +def test_get_config_has_expected_keys( + client: TestClient, data_db: Session, user_header_token +): setup_db(data_db) response = client.get( @@ -15,21 +21,235 @@ def test_get_config(client: TestClient, data_db: Session, user_header_token): assert response.status_code == status.HTTP_200_OK data = response.json() keys = data.keys() + assert "geographies" in keys assert "taxonomies" in keys + assert "corpora" in keys assert "languages" in keys assert "document" in keys assert "event" in keys + +def test_get_config_has_correct_organisations( + client: TestClient, data_db: Session, user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + # Now sanity check the data - assert data["geographies"][1]["node"]["slug"] == "south-asia" + # + # Organisations. + LEN_ORG_CONFIG = 2 assert "CCLW" in data["taxonomies"].keys() + cclw_org = data["taxonomies"]["CCLW"] + assert len(cclw_org) == LEN_ORG_CONFIG + + assert "UNFCCC" in data["taxonomies"] + unfccc_org = data["taxonomies"]["UNFCCC"] + assert len(unfccc_org) == LEN_ORG_CONFIG + + +# TODO: Remove as part of PDCT-1052 +def test_get_config_cclw_old_taxonomy_correct( + client: TestClient, data_db: Session, user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the old taxonomy data + assert "CCLW" in data["taxonomies"].keys() + cclw_taxonomy = data["taxonomies"]["CCLW"] + assert set(cclw_taxonomy) == EXPECTED_CCLW_TAXONOMY + cclw_taxonomy_colours = cclw_taxonomy["color"]["allowed_values"] + assert set(cclw_taxonomy_colours) ^ set(EXPECTED_CCLW_COLOURS) == set() + + +# TODO: Remove as part of PDCT-1052 +def test_get_config_unfccc_old_taxonomy_correct( + client: TestClient, data_db: Session, user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the old taxonomy data + assert "UNFCCC" in data["taxonomies"] + unfccc_taxonomy = data["taxonomies"]["UNFCCC"] + assert set(unfccc_taxonomy) == EXPECTED_UNFCCC_TAXONOMY + assert set(unfccc_taxonomy["author_type"]["allowed_values"]) == { + "Party", + "Non-Party", + } + + +def test_get_config_has_correct_number_corpora_cclw( + client: TestClient, data_db: Session, user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + assert len(data["corpora"]) == 1 + + +def test_get_config_has_correct_number_corpora_unfccc( + client: TestClient, data_db: Session, non_cclw_user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=non_cclw_user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + assert len(data["corpora"]) == 1 + + +def test_get_config_cclw_corpora_correct( + client: TestClient, data_db: Session, user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the new corpora data + cclw_corporas = data["corpora"] + + assert cclw_corporas[0]["corpus_import_id"] == "CCLW.corpus.i00000001.n0000" + assert cclw_corporas[0]["corpus_type"] == "Laws and Policies" + assert cclw_corporas[0]["corpus_type_description"] == "Laws and policies" + assert cclw_corporas[0]["description"] == "CCLW national policies" + assert cclw_corporas[0]["title"] == "CCLW national policies" + + cclw_taxonomy = cclw_corporas[0]["taxonomy"] + expected_cclw_taxonomy = {"color", "size"} + expected_cclw_taxonomy.add("event_types") + assert set(cclw_taxonomy) ^ expected_cclw_taxonomy == set() + + expected_cclw_colours = ["green", "red", "pink", "blue"] + cclw_taxonomy_colours = cclw_taxonomy["color"]["allowed_values"] + assert set(cclw_taxonomy_colours) ^ set(expected_cclw_colours) == set() + + +def test_get_config_unfccc_corpora_correct( + client: TestClient, data_db: Session, non_cclw_user_header_token +): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=non_cclw_user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + # Now sanity check the new corpora data + unfccc_corporas = data["corpora"] + + assert unfccc_corporas[0]["corpus_import_id"] == "UNFCCC.corpus.1.0" + assert unfccc_corporas[0]["corpus_type"] == "Intl. agreements" + assert unfccc_corporas[0]["corpus_type_description"] == "Intl. agreements" + assert unfccc_corporas[0]["description"] == "UNFCCC Submissions" + assert unfccc_corporas[0]["title"] == "UNFCCC Submissions" + + expected_unfccc_taxonomy = {"author", "author_type"} + expected_unfccc_taxonomy.add("event_types") + assert set(unfccc_corporas[0]["taxonomy"]) ^ expected_unfccc_taxonomy == set() + + +def test_config_languages(client: TestClient, data_db: Session, user_header_token): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + # + # Languages. assert "aaa" in data["languages"].keys() + +def test_config_documents(client: TestClient, data_db: Session, user_header_token): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + # + # Documents.. assert "AMENDMENT" in data["document"]["roles"] assert "Action Plan" in data["document"]["types"] assert "Translation" in data["document"]["variants"] + +def test_config_events(client: TestClient, data_db: Session, user_header_token): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + # + # Events. assert "Appealed" in data["event"]["types"] + + +def test_config_geographies(client: TestClient, data_db: Session, user_header_token): + setup_db(data_db) + + response = client.get( + "/api/v1/config", + headers=user_header_token, + ) + assert response.status_code == status.HTTP_200_OK + data = response.json() + + # Now sanity check the data + # + # Geographies. + assert data["geographies"][1]["node"]["slug"] == "south-asia" diff --git a/tests/integration_tests/setup_db.py b/tests/integration_tests/setup_db.py index c96b0601..41e8a26e 100644 --- a/tests/integration_tests/setup_db.py +++ b/tests/integration_tests/setup_db.py @@ -380,11 +380,11 @@ def _setup_family_data( test_db.flush() test_db.add( Corpus( - import_id="OTHER.corpus.1.0", - title="Test Corpus", - description="", + import_id="UNFCCC.corpus.1.0", + title="UNFCCC Submissions", + description="UNFCCC Submissions", organisation_id=other_org_id, - corpus_type_name="other-type", + corpus_type_name="Intl. agreements", ) ) test_db.flush() diff --git a/tests/mocks/repos/config_repo.py b/tests/mocks/repos/config_repo.py index 84c12594..80c5e071 100644 --- a/tests/mocks/repos/config_repo.py +++ b/tests/mocks/repos/config_repo.py @@ -19,6 +19,7 @@ def mock_get(_) -> Optional[ConfigReadDTO]: return ConfigReadDTO( geographies=[], taxonomies={}, + corpora=[], languages={}, document=DocumentConfig(roles=[], types=[], variants=[]), event=EventConfig(types=[]), diff --git a/tests/mocks/services/config_service.py b/tests/mocks/services/config_service.py index 14f77ad5..5a7e5a2d 100644 --- a/tests/mocks/services/config_service.py +++ b/tests/mocks/services/config_service.py @@ -11,11 +11,12 @@ def maybe_throw(): if config_service.throw_repository_error: raise RepositoryError("bad repo") - def mock_get_config() -> ConfigReadDTO: + def mock_get_config(_) -> ConfigReadDTO: maybe_throw() return ConfigReadDTO( geographies=[], taxonomies={}, + corpora=[], languages={}, document=DocumentConfig(roles=[], types=[], variants=[]), event=EventConfig(types=[]), diff --git a/tests/unit_tests/routers/config/test_get_config.py b/tests/unit_tests/routers/config/test_get_config.py index 6fe596ef..65c85324 100644 --- a/tests/unit_tests/routers/config/test_get_config.py +++ b/tests/unit_tests/routers/config/test_get_config.py @@ -9,6 +9,7 @@ def test_get_when_ok(client: TestClient, user_header_token, config_service_mock) keys = data.keys() assert "geographies" in keys assert "taxonomies" in keys + assert "corpora" in keys assert "languages" in keys assert "event" in keys assert "document" in keys