diff --git a/app/api/api_v1/routers/documents.py b/app/api/api_v1/routers/documents.py index 0cf92c19..1d48c278 100644 --- a/app/api/api_v1/routers/documents.py +++ b/app/api/api_v1/routers/documents.py @@ -1,8 +1,8 @@ import logging from http.client import NOT_FOUND -from typing import Union +from typing import Annotated, Union -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, Header, HTTPException, Request from app.clients.db.session import get_db from app.models.document import ( @@ -14,6 +14,7 @@ get_family_document_and_context, get_slugged_objects, ) +from app.service.custom_app import AppTokenFactory _LOGGER = logging.getLogger(__file__) @@ -28,20 +29,23 @@ ], ) async def family_or_document_detail( - slug: str, - db=Depends(get_db), + slug: str, request: Request, app_token: Annotated[str, Header()], db=Depends(get_db) ): """Get details of the family or document associated with the slug.""" _LOGGER.info( f"Getting detailed information for family or document '{slug}'", extra={ - "props": { - "import_id_or_slug": slug, - }, + "props": {"import_id_or_slug": slug, "app_token": str(app_token)}, }, ) - family_document_import_id, family_import_id = get_slugged_objects(db, slug) + # Decode the app token and validate it. + token = AppTokenFactory() + token.decode_and_validate(db, request, app_token) + + family_document_import_id, family_import_id = get_slugged_objects( + db, slug, token.allowed_corpora_ids + ) if family_document_import_id is None and family_import_id is None: raise HTTPException(status_code=NOT_FOUND, detail=f"Nothing found for {slug}") diff --git a/app/api/api_v1/routers/search.py b/app/api/api_v1/routers/search.py index cb5ebe87..a7749c33 100644 --- a/app/api/api_v1/routers/search.py +++ b/app/api/api_v1/routers/search.py @@ -277,8 +277,6 @@ def download_all_search_documents( request: Request, app_token: Annotated[str, Header()], db=Depends(get_db) ) -> RedirectResponse: """Download a CSV containing details of all the documents in the corpus.""" - token = AppTokenFactory() - _LOGGER.info( "Whole data download request", extra={ diff --git a/app/repository/document.py b/app/repository/document.py index 519735dd..57579a26 100644 --- a/app/repository/document.py +++ b/app/repository/document.py @@ -5,6 +5,7 @@ """ import logging +import os from datetime import datetime from typing import Optional, Sequence, cast @@ -34,29 +35,69 @@ LinkableFamily, ) from app.repository.geography import get_geo_subquery +from app.repository.helpers import get_query_template from app.repository.lookups import doc_type_from_family_document_metadata from app.service.util import to_cdn_url _LOGGER = logging.getLogger(__file__) -def get_slugged_objects(db: Session, slug: str) -> tuple[Optional[str], Optional[str]]: +def get_slugged_object_from_allowed_corpora_query( + template_query, slug_name: str, allowed_corpora_ids: list[str] +) -> str: + """Create download whole database query, replacing variables. + + :param str ingest_cycle_start: The current ingest cycle date. + :param list[str] allowed_corpora_ids: The corpora from which we + should allow the data to be dumped. + :return str: The SQL query to perform on the database session. """ - Matches the slug name to a FamilyDocument or Family import_id + corpora_ids = "'" + "','".join(allowed_corpora_ids) + "'" + return template_query.replace("{slug_name}", slug_name).replace( # type: ignore + "{allowed_corpora_ids}", corpora_ids + ) # type: ignore + + +def get_slugged_objects( + db: Session, slug: str, allowed_corpora: Optional[list[str]] = None +) -> tuple[Optional[str], Optional[str]]: + """Match the slug name to a FamilyDocument or Family import ID. + + This function also contains logic to only get the import ID for the + family or document if the slug given is associated with a family + that belongs to the list of allowed corpora. :param Session db: connection to db :param str slug: slug name to match - :return tuple[Optional[str], Optional[str]]: the FamilyDocument import id or - the Family import_id + :param Optional[list[str]] allowed_corpora: The corpora IDs to look + for the slugged object in. + :return tuple[Optional[str], Optional[str]]: the FamilyDocument + import id or the Family import_id. """ - result = ( - db.query(Slug.family_document_import_id, Slug.family_import_id).filter( + if allowed_corpora is not None: + query_template = get_query_template( + os.path.join("app", "repository", "sql", "slug_lookup.sql") + ) + query = get_slugged_object_from_allowed_corpora_query( + query_template, slug, allowed_corpora + ) + query = db.execute(query) + else: + query = db.query(Slug.family_document_import_id, Slug.family_import_id).filter( Slug.name == slug ) - ).one_or_none() + + result = query.one_or_none() if result is None: return (None, None) - return result + + DOC_INDEX = 0 + doc_id = cast(str, result[DOC_INDEX]) if result[DOC_INDEX] is not None else None + + FAM_INDEX = 1 + fam_id = cast(str, result[FAM_INDEX]) if result[FAM_INDEX] is not None else None + + return doc_id, fam_id def get_family_document_and_context( diff --git a/app/repository/download.py b/app/repository/download.py index 69938182..1ed90396 100644 --- a/app/repository/download.py +++ b/app/repository/download.py @@ -1,24 +1,43 @@ """Functions to support browsing the RDS document structure""" import os -from functools import lru_cache from logging import getLogger import pandas as pd from fastapi import Depends from app.clients.db.session import get_db +from app.repository.helpers import get_query_template _LOGGER = getLogger(__name__) -@lru_cache() -def _get_query_template(): - with open(os.path.join("app", "repository", "sql", "download.sql"), "r") as file: - return file.read() +def create_query( + template_query, ingest_cycle_start: str, allowed_corpora_ids: list[str] +) -> str: + """Create download whole database query, replacing variables. + + :param str ingest_cycle_start: The current ingest cycle date. + :param list[str] allowed_corpora_ids: The corpora from which we + should allow the data to be dumped. + :return str: The SQL query to perform on the database session. + """ + corpora_ids = "'" + "','".join(allowed_corpora_ids) + "'" + return template_query.replace( # type: ignore + "{ingest_cycle_start}", ingest_cycle_start + ).replace( + "{allowed_corpora_ids}", corpora_ids + ) # type: ignore + + +def get_whole_database_dump( + ingest_cycle_start: str, allowed_corpora_ids: list[str], db=Depends(get_db) +): + query_template = get_query_template( + os.path.join("app", "repository", "sql", "download.sql") + ) + query = create_query(query_template, ingest_cycle_start, allowed_corpora_ids) - -def get_whole_database_dump(query, db=Depends(get_db)): with db.connection() as conn: df = pd.read_sql(query, conn.connection) return df diff --git a/app/repository/helpers.py b/app/repository/helpers.py new file mode 100644 index 00000000..e976683b --- /dev/null +++ b/app/repository/helpers.py @@ -0,0 +1,14 @@ +""" +Functions to support the documents endpoints + +old functions (non DFC) are moved to the deprecated_documents.py file. +""" + +from functools import lru_cache + + +@lru_cache() +def get_query_template(filepath: str) -> str: + """Read query for non-deleted docs and their associated data.""" + with open(filepath, "r") as file: + return file.read() diff --git a/app/repository/pipeline.py b/app/repository/pipeline.py index 0eebaff4..d9eac447 100644 --- a/app/repository/pipeline.py +++ b/app/repository/pipeline.py @@ -1,7 +1,6 @@ import logging import os from datetime import datetime, timezone -from functools import lru_cache from typing import Sequence, cast import pandas as pd @@ -11,19 +10,13 @@ from app.clients.db.session import get_db from app.models.document import DocumentParserInput +from app.repository.helpers import get_query_template _LOGGER = logging.getLogger(__name__) MetadataType = dict[str, list[str]] -@lru_cache() -def generate_pipeline_ingest_input_query(): - """Read query for non-deleted docs and their associated data.""" - with open(os.path.join("app", "repository", "sql", "pipeline.sql"), "r") as file: - return file.read() - - def get_pipeline_data(db=Depends(get_db)) -> pd.DataFrame: """Get non-deleted docs and their associated data from the db. @@ -39,7 +32,7 @@ def get_pipeline_data(db=Depends(get_db)) -> pd.DataFrame: in database. """ _LOGGER.info("Running pipeline query") - query = generate_pipeline_ingest_input_query() + query = get_query_template(os.path.join("app", "repository", "sql", "pipeline.sql")) df = pd.read_sql(query, db.connection().connection) return df diff --git a/app/repository/sql/slug_lookup.sql b/app/repository/sql/slug_lookup.sql new file mode 100644 index 00000000..9d649067 --- /dev/null +++ b/app/repository/sql/slug_lookup.sql @@ -0,0 +1,20 @@ +SELECT + slug.family_document_import_id, slug.family_import_id +FROM slug +LEFT JOIN family ON family.import_id = slug.family_import_id +LEFT JOIN family_corpus ON family_corpus.family_import_id = family.import_id +LEFT JOIN corpus ON corpus.import_id = family_corpus.corpus_import_id +WHERE slug.name = '{slug_name}' +AND corpus.import_id IN ({allowed_corpora_ids}) + +UNION + +SELECT + slug.family_document_import_id, slug.family_import_id +FROM slug +LEFT JOIN family_document ON family_document.import_id = slug.family_document_import_id +LEFT JOIN family ON family.import_id = family_document.family_import_id +LEFT JOIN family_corpus ON family_corpus.family_import_id = family.import_id +LEFT JOIN corpus ON corpus.import_id = family_corpus.corpus_import_id +WHERE slug.name = '{slug_name}' +AND corpus.import_id IN ({allowed_corpora_ids}); diff --git a/app/service/download.py b/app/service/download.py index dad6078d..d5110b01 100644 --- a/app/service/download.py +++ b/app/service/download.py @@ -9,29 +9,11 @@ from fastapi import Depends from app.clients.db.session import get_db -from app.repository.download import _get_query_template, get_whole_database_dump +from app.repository.download import get_whole_database_dump _LOGGER = getLogger(__name__) -def create_query( - template_query, ingest_cycle_start: str, allowed_corpora_ids: list[str] -) -> str: - """Create download whole database query, replacing variables. - - :param str ingest_cycle_start: The current ingest cycle date. - :param list[str] allowed_corpora_ids: The corpora from which we - should allow the data to be dumped. - :return str: The SQL query to perform on the database session. - """ - corpora_ids = "'" + "','".join(allowed_corpora_ids) + "'" - return template_query.replace( # type: ignore - "{ingest_cycle_start}", ingest_cycle_start - ).replace( - "{allowed_corpora_ids}", corpora_ids - ) # type: ignore - - def replace_slug_with_qualified_url( df: pd.DataFrame, public_app_url: str, @@ -61,8 +43,10 @@ def convert_dump_to_csv(df: pd.DataFrame): return csv_buffer -def generate_data_dump_as_csv(query, db=Depends(get_db)): - df = get_whole_database_dump(query, db) +def generate_data_dump_as_csv( + ingest_cycle_start: str, allowed_corpora_ids: list[str], db=Depends(get_db) +): + df = get_whole_database_dump(ingest_cycle_start, allowed_corpora_ids, db) csv = convert_dump_to_csv(df) csv.seek(0) return csv @@ -90,9 +74,8 @@ def create_data_download_zip_archive( ingest_cycle_start: str, allowed_corpora_ids: list[str], db=Depends(get_db) ): readme_buffer = generate_data_dump_readme(ingest_cycle_start) - query_template = _get_query_template() - query = create_query(query_template, ingest_cycle_start, allowed_corpora_ids) - csv_buffer = generate_data_dump_as_csv(query, db) + + csv_buffer = generate_data_dump_as_csv(ingest_cycle_start, allowed_corpora_ids, db) zip_buffer = BytesIO() with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zip_file: diff --git a/pyproject.toml b/pyproject.toml index 85e6896a..0a2c8f23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.19.10" +version = "1.19.11" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/conftest.py b/tests/conftest.py index d897290d..6d6fb4cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -117,7 +117,7 @@ def valid_token(monkeypatch): def mock_return(_, __, ___): return True - corpora_ids = "CCLW.corpus.1.0,CCLW.corpus.2.0" + corpora_ids = "CCLW.corpus.1.0,CCLW.corpus.2.0,CCLW.corpus.i00000001.n0000" subject = "CCLW" audience = "localhost" input_str = f"{corpora_ids};{subject};{audience}" @@ -127,6 +127,30 @@ def mock_return(_, __, ___): return af.create_configuration_token(input_str) +@pytest.fixture +def alternative_token(monkeypatch): + """Generate a valid alternative config token using TOKEN_SECRET_KEY. + + Need to generate the config token using the token secret key from + your local env file. For tests in CI, this will be the secret key in + the .env.example file, but for local development this secret key + might be different (e.g., the one for staging). This fixture works + around this. + """ + + def mock_return(_, __, ___): + return True + + corpora_ids = "UNFCCC.corpus.i00000001.n0000" + subject = "CPR" + audience = "localhost" + input_str = f"{corpora_ids};{subject};{audience}" + + af = AppTokenFactory() + monkeypatch.setattr(custom_app.AppTokenFactory, "validate", mock_return) + return af.create_configuration_token(input_str) + + @pytest.fixture def create_test_db(): """Create a test database and use it for the whole test session.""" diff --git a/tests/non_search/routers/documents/setup_doc_fam_lookup.py b/tests/non_search/routers/documents/setup_doc_fam_lookup.py new file mode 100644 index 00000000..7530e88c --- /dev/null +++ b/tests/non_search/routers/documents/setup_doc_fam_lookup.py @@ -0,0 +1,24 @@ +from typing import Optional + +from fastapi import status + +DOCUMENTS_ENDPOINT = "/api/v1/documents" +TEST_HOST = "http://localhost:3000/" + + +def _make_doc_fam_lookup_request( + client, + token, + slug: str, + expected_status_code: int = status.HTTP_200_OK, + origin: Optional[str] = TEST_HOST, +): + headers = ( + {"app-token": token} + if origin is None + else {"app-token": token, "origin": origin} + ) + + response = client.get(f"{DOCUMENTS_ENDPOINT}/{slug}", headers=headers) + assert response.status_code == expected_status_code, response.text + return response.json() diff --git a/tests/non_search/routers/documents/test_document_families.py b/tests/non_search/routers/documents/test_admin_doc_routes.py similarity index 94% rename from tests/non_search/routers/documents/test_document_families.py rename to tests/non_search/routers/documents/test_admin_doc_routes.py index c46bc4a3..4674620c 100644 --- a/tests/non_search/routers/documents/test_document_families.py +++ b/tests/non_search/routers/documents/test_admin_doc_routes.py @@ -10,7 +10,6 @@ ) from fastapi import status from fastapi.testclient import TestClient -from sqlalchemy import update from sqlalchemy.orm import Session from tests.non_search.setup_helpers import ( @@ -18,72 +17,10 @@ setup_docs_with_two_orgs_no_langs, setup_with_two_docs, setup_with_two_docs_bad_ids, - setup_with_two_docs_multiple_languages, setup_with_two_unpublished_docs, ) -def test_physical_doc_languages( - data_client: TestClient, - data_db: Session, -): - setup_with_two_docs(data_db) - - response = data_client.get( - "/api/v1/documents/DocSlug1", - ) - json_response = response.json() - document = json_response["document"] - - assert response.status_code == 200 - print(json_response) - assert document["languages"] == ["eng"] - - response = data_client.get( - "/api/v1/documents/DocSlug2", - ) - json_response = response.json() - document = json_response["document"] - - assert response.status_code == 200 - assert document["languages"] == [] - - -def test_physical_doc_languages_not_visible( - data_client: TestClient, - data_db: Session, -): - setup_with_two_docs(data_db) - data_db.execute(update(PhysicalDocumentLanguage).values(visible=False)) - - response = data_client.get( - "/api/v1/documents/DocSlug1", - ) - json_response = response.json() - document = json_response["document"] - - assert response.status_code == 200 - print(json_response) - assert document["languages"] == [] - - -def test_physical_doc_multiple_languages( - data_client: TestClient, - data_db: Session, -): - setup_with_two_docs_multiple_languages(data_db) - - response = data_client.get( - "/api/v1/documents/DocSlug1", - ) - json_response = response.json() - document = json_response["document"] - - assert response.status_code == 200 - print(json_response) - assert set(document["languages"]) == set(["fra", "eng"]) - - def test_update_document_status__is_secure( data_client: TestClient, data_db: Session, diff --git a/tests/non_search/routers/documents/test_get_document.py b/tests/non_search/routers/documents/test_get_document.py new file mode 100644 index 00000000..dafa1642 --- /dev/null +++ b/tests/non_search/routers/documents/test_get_document.py @@ -0,0 +1,193 @@ +import pytest +from db_client.models.dfce.family import Family, FamilyDocument, FamilyEvent +from db_client.models.document.physical_document import PhysicalDocumentLanguage +from fastapi import status +from fastapi.testclient import TestClient +from sqlalchemy import update +from sqlalchemy.orm import Session + +from tests.non_search.routers.documents.setup_doc_fam_lookup import ( + _make_doc_fam_lookup_request, +) +from tests.non_search.setup_helpers import ( + setup_with_docs, + setup_with_two_docs, + setup_with_two_docs_multiple_languages, +) + +N_FAMILY_OVERVIEW_KEYS = 8 +N_DOCUMENT_KEYS = 12 + + +def test_physical_doc_languages(data_client: TestClient, data_db: Session, valid_token): + setup_with_two_docs(data_db) + + json_response = _make_doc_fam_lookup_request(data_client, valid_token, "DocSlug1") + document = json_response["document"] + print(json_response) + assert document["languages"] == ["eng"] + + json_response = _make_doc_fam_lookup_request(data_client, valid_token, "DocSlug2") + document = json_response["document"] + assert document["languages"] == [] + + +def test_physical_doc_languages_not_visible( + data_client: TestClient, data_db: Session, valid_token +): + setup_with_two_docs(data_db) + data_db.execute(update(PhysicalDocumentLanguage).values(visible=False)) + + json_response = _make_doc_fam_lookup_request(data_client, valid_token, "DocSlug1") + document = json_response["document"] + print(json_response) + assert document["languages"] == [] + + +def test_physical_doc_multiple_languages( + data_client: TestClient, data_db: Session, valid_token +): + setup_with_two_docs_multiple_languages(data_db) + + json_response = _make_doc_fam_lookup_request(data_client, valid_token, "DocSlug1") + document = json_response["document"] + print(json_response) + assert set(document["languages"]) == set(["fra", "eng"]) + + +def test_documents_doc_slug_returns_not_found( + data_client: TestClient, data_db: Session, valid_token +): + setup_with_docs(data_db) + assert data_db.query(Family).count() == 1 + assert data_db.query(FamilyEvent).count() == 1 + + # Test associations + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "DocSlug100", + expected_status_code=status.HTTP_404_NOT_FOUND, + ) + assert json_response["detail"] == "Nothing found for DocSlug100" + + +@pytest.mark.parametrize( + ("slug", "expected_fam", "expected_doc"), + [ + ( + "DocSlug1", + { + "title": "Fam1", + "import_id": "CCLW.family.1001.0", + "geographies": ["South Asia"], + "category": "Executive", + "slug": "FamSlug1", + "corpus_id": "CCLW.corpus.i00000001.n0000", + "published_date": "2019-12-25T00:00:00Z", + "last_updated_date": "2019-12-25T00:00:00Z", + }, + { + "import_id": "CCLW.executive.1.2", + "variant": "Original Language", + "slug": "DocSlug1", + "title": "Document1", + "md5_sum": "111", + "cdn_object": None, + "content_type": "application/pdf", + "source_url": "http://somewhere1", + "language": "eng", + "languages": ["eng"], + "document_type": "Plan", + "document_role": "MAIN", + }, + ), + ( + "DocSlug2", + { + "title": "Fam2", + "import_id": "CCLW.family.2002.0", + "geographies": ["AFG", "IND"], + "category": "Executive", + "slug": "FamSlug2", + "corpus_id": "CCLW.corpus.i00000001.n0000", + "published_date": "2019-12-25T00:00:00Z", + "last_updated_date": "2019-12-25T00:00:00Z", + }, + { + "import_id": "CCLW.executive.2.2", + "variant": None, + "slug": "DocSlug2", + "title": "Document2", + "md5_sum": None, + "cdn_object": None, + "content_type": None, + "source_url": "http://another_somewhere", + "language": "", + "languages": [], + "document_type": "Order", + "document_role": "MAIN", + }, + ), + ], +) +def test_documents_doc_slug_preexisting_objects( + data_client: TestClient, + data_db: Session, + slug, + expected_fam, + expected_doc, + valid_token, +): + setup_with_two_docs(data_db) + + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + slug, + ) + assert len(json_response) == 2 + + family = json_response["family"] + assert family + assert len(family.keys()) == N_FAMILY_OVERVIEW_KEYS + assert family == expected_fam + + doc = json_response["document"] + assert doc + assert len(doc) == N_DOCUMENT_KEYS + assert doc == expected_doc + + +def test_documents_doc_slug_when_deleted( + data_client: TestClient, data_db: Session, valid_token +): + setup_with_two_docs(data_db) + data_db.execute( + update(FamilyDocument) + .where(FamilyDocument.import_id == "CCLW.executive.2.2") + .values(document_status="Deleted") + ) + + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "DocSlug2", + expected_status_code=status.HTTP_404_NOT_FOUND, + ) + assert json_response["detail"] == "The document CCLW.executive.2.2 is not published" + + +def test_documents_doc_slug_returns_404_when_corpora_mismatch( + data_client: TestClient, data_db: Session, alternative_token +): + setup_with_two_docs(data_db) + + # Test associations + json_response = _make_doc_fam_lookup_request( + data_client, + alternative_token, + "DocSlug1", + expected_status_code=status.HTTP_404_NOT_FOUND, + ) + assert json_response["detail"] == "Nothing found for DocSlug1" diff --git a/tests/non_search/routers/documents/test_get_document_families.py b/tests/non_search/routers/documents/test_get_family.py similarity index 53% rename from tests/non_search/routers/documents/test_get_document_families.py rename to tests/non_search/routers/documents/test_get_family.py index 828ba603..e149f2ca 100644 --- a/tests/non_search/routers/documents/test_get_document_families.py +++ b/tests/non_search/routers/documents/test_get_family.py @@ -1,9 +1,13 @@ import pytest from db_client.models.dfce.family import Family, FamilyDocument, FamilyEvent +from fastapi import status from fastapi.testclient import TestClient from sqlalchemy import update from sqlalchemy.orm import Session +from tests.non_search.routers.documents.setup_doc_fam_lookup import ( + _make_doc_fam_lookup_request, +) from tests.non_search.setup_helpers import ( setup_with_docs, setup_with_two_docs, @@ -11,48 +15,45 @@ ) N_FAMILY_KEYS = 15 -N_FAMILY_OVERVIEW_KEYS = 8 N_DOCUMENT_KEYS = 12 def test_documents_family_slug_returns_not_found( - data_db: Session, - data_client: TestClient, + data_db: Session, data_client: TestClient, valid_token ): setup_with_docs(data_db) assert data_db.query(Family).count() == 1 assert data_db.query(FamilyEvent).count() == 1 # Test by slug - response = data_client.get( - "/api/v1/documents/FamSlug100", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug100", + expected_status_code=status.HTTP_404_NOT_FOUND, ) - assert response.status_code == 404 - assert response.json()["detail"] == "Nothing found for FamSlug100" + assert json_response["detail"] == "Nothing found for FamSlug100" def test_documents_family_slug_returns_correct_family( - data_db: Session, - data_client: TestClient, + data_db: Session, data_client: TestClient, valid_token ): setup_with_two_docs(data_db) # Test by slug - response = data_client.get( - "/api/v1/documents/FamSlug1", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug1", ) - - json_response = response.json() - assert response.status_code == 200 assert json_response["import_id"] == "CCLW.family.1001.0" # Ensure a different family is returned - response = data_client.get( - "/api/v1/documents/FamSlug2", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug2", ) - - json_response = response.json() - assert response.status_code == 200 assert json_response["import_id"] == "CCLW.family.2002.0" @@ -124,17 +125,21 @@ def test_documents_family_slug_returns_correct_family( ], ) def test_documents_family_slug_returns_correct_json( - data_client: TestClient, data_db: Session, slug, expected_fam, expected_doc + data_client: TestClient, + data_db: Session, + slug, + expected_fam, + expected_doc, + valid_token, ): setup_with_two_docs(data_db) # Test associations - response = data_client.get( - f"/api/v1/documents/{slug}", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + slug, ) - json_response = response.json() - - assert response.status_code == 200 # Verify family data correct. assert len(json_response) == N_FAMILY_KEYS @@ -170,23 +175,20 @@ def test_documents_family_slug_returns_correct_json( def test_documents_family_slug_returns_multiple_docs( - data_client: TestClient, - data_db: Session, + data_client: TestClient, data_db: Session, valid_token ): setup_with_two_docs_one_family(data_db) - response = data_client.get( - "/api/v1/documents/FamSlug1", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug1", ) - json_response = response.json() - - assert response.status_code == 200 assert len(json_response["documents"]) == 2 def test_documents_family_slug_returns_only_published_docs( - data_client: TestClient, - data_db: Session, + data_client: TestClient, data_db: Session, valid_token ): setup_with_two_docs_one_family(data_db) data_db.execute( @@ -196,18 +198,16 @@ def test_documents_family_slug_returns_only_published_docs( ) # Test associations - response = data_client.get( - "/api/v1/documents/FamSlug1", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug1", ) - json_response = response.json() - - assert response.status_code == 200 assert len(json_response["documents"]) == 1 def test_documents_family_slug_returns_404_when_all_docs_deleted( - data_client: TestClient, - data_db: Session, + data_client: TestClient, data_db: Session, valid_token ): setup_with_two_docs_one_family(data_db) data_db.execute( @@ -222,126 +222,25 @@ def test_documents_family_slug_returns_404_when_all_docs_deleted( ) # Test associations - response = data_client.get( - "/api/v1/documents/FamSlug1", + json_response = _make_doc_fam_lookup_request( + data_client, + valid_token, + "FamSlug1", + expected_status_code=status.HTTP_404_NOT_FOUND, ) - json_response = response.json() - - assert response.status_code == 404 assert json_response["detail"] == "Family CCLW.family.1001.0 is not published" -def test_documents_doc_slug_returns_not_found( - data_client: TestClient, - data_db: Session, +def test_documents_family_slug_returns_404_when_corpora_mismatch( + data_client: TestClient, data_db: Session, alternative_token ): - setup_with_docs(data_db) - assert data_db.query(Family).count() == 1 - assert data_db.query(FamilyEvent).count() == 1 + setup_with_two_docs_one_family(data_db) # Test associations - response = data_client.get( - "/api/v1/documents/DocSlug100", - ) - assert response.status_code == 404 - assert response.json()["detail"] == "Nothing found for DocSlug100" - - -@pytest.mark.parametrize( - ("slug", "expected_fam", "expected_doc"), - [ - ( - "DocSlug1", - { - "title": "Fam1", - "import_id": "CCLW.family.1001.0", - "geographies": ["South Asia"], - "category": "Executive", - "slug": "FamSlug1", - "corpus_id": "CCLW.corpus.i00000001.n0000", - "published_date": "2019-12-25T00:00:00Z", - "last_updated_date": "2019-12-25T00:00:00Z", - }, - { - "import_id": "CCLW.executive.1.2", - "variant": "Original Language", - "slug": "DocSlug1", - "title": "Document1", - "md5_sum": "111", - "cdn_object": None, - "content_type": "application/pdf", - "source_url": "http://somewhere1", - "language": "eng", - "languages": ["eng"], - "document_type": "Plan", - "document_role": "MAIN", - }, - ), - ( - "DocSlug2", - { - "title": "Fam2", - "import_id": "CCLW.family.2002.0", - "geographies": ["AFG", "IND"], - "category": "Executive", - "slug": "FamSlug2", - "corpus_id": "CCLW.corpus.i00000001.n0000", - "published_date": "2019-12-25T00:00:00Z", - "last_updated_date": "2019-12-25T00:00:00Z", - }, - { - "import_id": "CCLW.executive.2.2", - "variant": None, - "slug": "DocSlug2", - "title": "Document2", - "md5_sum": None, - "cdn_object": None, - "content_type": None, - "source_url": "http://another_somewhere", - "language": "", - "languages": [], - "document_type": "Order", - "document_role": "MAIN", - }, - ), - ], -) -def test_documents_doc_slug_preexisting_objects( - data_client: TestClient, data_db: Session, slug, expected_fam, expected_doc -): - setup_with_two_docs(data_db) - - response = data_client.get( - f"/api/v1/documents/{slug}", - ) - json_response = response.json() - assert response.status_code == 200 - assert len(json_response) == 2 - - family = json_response["family"] - assert family - assert len(family.keys()) == N_FAMILY_OVERVIEW_KEYS - assert family == expected_fam - - doc = json_response["document"] - assert doc - assert len(doc) == N_DOCUMENT_KEYS - assert doc == expected_doc - - -def test_documents_doc_slug_when_deleted( - data_client: TestClient, - data_db: Session, -): - setup_with_two_docs(data_db) - data_db.execute( - update(FamilyDocument) - .where(FamilyDocument.import_id == "CCLW.executive.2.2") - .values(document_status="Deleted") - ) - response = data_client.get( - "/api/v1/documents/DocSlug2", + json_response = _make_doc_fam_lookup_request( + data_client, + alternative_token, + "FamSlug1", + expected_status_code=status.HTTP_404_NOT_FOUND, ) - json_response = response.json() - assert response.status_code == 404 - assert json_response["detail"] == "The document CCLW.executive.2.2 is not published" + assert json_response["detail"] == "Nothing found for FamSlug1" diff --git a/tests/non_search/routers/lookups/test_cors.py b/tests/non_search/routers/lookups/test_cors.py index b7e15002..3a67e483 100644 --- a/tests/non_search/routers/lookups/test_cors.py +++ b/tests/non_search/routers/lookups/test_cors.py @@ -19,6 +19,13 @@ ("https://app.devclimatepolicyradar.com", False), # prefixed wrong domain ("https://app-climatepolicyradar.com", False), # prefixed wrong domain ("https://prefix-climate-laws.org", False), # climate laws prefixed domain + ("https://.climateprojectexplorer.org", False), # empty subdomain + ("https://prefix-climateprojectexplorer.org", False), # MCF prefixed domain + ("https://climateprojectexplorer.org", True), # base MCF URL + ( + "https://preview.climateprojectexplorer.org", + True, + ), # MCF subdomain URL ], ) def test_cors_regex(test_client, origin, should_be_allowed):