Spike/get concepts associated with a document from vespa (#459)

* Add spike endpoint for doc from vespa * Update pyproject.toml
climatepolicyradar · Feb 6, 2025 · 153955f · 153955f
1 parent 07ada5b
commit 153955f
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 2 deletions.
diff --git a/app/api/api_v1/routers/documents.py b/app/api/api_v1/routers/documents.py
@@ -16,7 +16,7 @@
     get_slugged_objects,
 )
 from app.service.custom_app import AppTokenFactory
-from app.service.search import get_family_from_vespa
+from app.service.search import get_document_from_vespa, get_family_from_vespa
 
 _LOGGER = logging.getLogger(__file__)
 
@@ -105,3 +105,49 @@ async def family_detail_from_vespa(
         return hits
     except ValueError as err:
         raise HTTPException(status_code=NOT_FOUND, detail=str(err))
+
+
+@documents_router.get("/document/{import_id}", response_model=SearchResponse)
+async def doc_detail_from_vespa(
+    import_id: str,
+    request: Request,
+    app_token: Annotated[str, Header()],
+    db=Depends(get_db),
+):
+    """Get details of the document associated with a slug from vespa.
+
+    NOTE: As part of our concepts spike, we're going to use this endpoint
+    to get the document data from Vespa. The frontend will use this
+    endpoint alongside the `/documents` endpoint if feature flags are
+    enabled.
+
+    :param str import_id: Document import id to get vespa representation
+        for.
+    :param Request request: Request object.
+    :param Annotated[str, Header()] app_token: App token containing
+        allowed corpora.
+    :param Depends[get_db] db: Database session to query against.
+    :return SearchResponse: An object representing the document in
+        Vespa - including concepts.
+    """
+    _LOGGER.info(
+        f"Getting detailed information for vespa document '{import_id}'",
+        extra={
+            "props": {"import_id_or_slug": import_id, "app_token": str(app_token)},
+        },
+    )
+
+    # Decode the app token and validate it.
+    token = AppTokenFactory()
+    token.decode_and_validate(db, request, app_token)
+
+    try:
+        # TODO: Make this respect the allowed corpora from the decoded token.
+        hits = get_document_from_vespa(document_id=import_id, db=db)
+        if hits.total_family_hits == 0:
+            raise HTTPException(
+                status_code=NOT_FOUND, detail=f"Nothing found for {import_id} in Vespa"
+            )
+        return hits
+    except ValueError as err:
+        raise HTTPException(status_code=NOT_FOUND, detail=str(err))
diff --git a/app/service/search.py b/app/service/search.py
@@ -614,6 +614,28 @@ def get_family_from_vespa(family_id: str, db: Session) -> CprSdkSearchResponse:
     return result
 
 
+def get_document_from_vespa(document_id: str, db: Session) -> CprSdkSearchResponse:
+    """Get a document from vespa.
+
+    :param str document_id: The id of the document to get.
+    :param Session db: Database session to query against.
+    :return CprSdkSearchResponse: The document from vespa.
+    """
+    search_body = SearchParameters(
+        document_ids=[document_id], documents_only=True, all_results=True
+    )
+
+    _LOGGER.info(
+        f"Getting vespa document '{document_id}'",
+        extra={"props": {"search_body": search_body.model_dump()}},
+    )
+    try:
+        result = _VESPA_CONNECTION.search(parameters=search_body)
+    except QueryError as e:
+        raise ValidationError(e)
+    return result
+
+
 def get_s3_doc_url_from_cdn(
     s3_client: S3Client, s3_document: S3Document, data_dump_s3_key: str
 ) -> Optional[str]:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "navigator_backend"
-version = "1.23.3"
+version = "1.23.4"
 description = ""
 authors = ["CPR-dev-team <[email protected]>"]
 packages = [{ include = "app" }, { include = "tests" }]