Skip to content

Commit

Permalink
Spike/get concepts associated with a document from vespa (#459)
Browse files Browse the repository at this point in the history
* Add spike endpoint for doc from vespa

* Update pyproject.toml
  • Loading branch information
katybaulch authored Feb 6, 2025
1 parent 07ada5b commit 153955f
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 2 deletions.
48 changes: 47 additions & 1 deletion app/api/api_v1/routers/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
get_slugged_objects,
)
from app.service.custom_app import AppTokenFactory
from app.service.search import get_family_from_vespa
from app.service.search import get_document_from_vespa, get_family_from_vespa

_LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -105,3 +105,49 @@ async def family_detail_from_vespa(
return hits
except ValueError as err:
raise HTTPException(status_code=NOT_FOUND, detail=str(err))


@documents_router.get("/document/{import_id}", response_model=SearchResponse)
async def doc_detail_from_vespa(
import_id: str,
request: Request,
app_token: Annotated[str, Header()],
db=Depends(get_db),
):
"""Get details of the document associated with a slug from vespa.
NOTE: As part of our concepts spike, we're going to use this endpoint
to get the document data from Vespa. The frontend will use this
endpoint alongside the `/documents` endpoint if feature flags are
enabled.
:param str import_id: Document import id to get vespa representation
for.
:param Request request: Request object.
:param Annotated[str, Header()] app_token: App token containing
allowed corpora.
:param Depends[get_db] db: Database session to query against.
:return SearchResponse: An object representing the document in
Vespa - including concepts.
"""
_LOGGER.info(
f"Getting detailed information for vespa document '{import_id}'",
extra={
"props": {"import_id_or_slug": import_id, "app_token": str(app_token)},
},
)

# Decode the app token and validate it.
token = AppTokenFactory()
token.decode_and_validate(db, request, app_token)

try:
# TODO: Make this respect the allowed corpora from the decoded token.
hits = get_document_from_vespa(document_id=import_id, db=db)
if hits.total_family_hits == 0:
raise HTTPException(
status_code=NOT_FOUND, detail=f"Nothing found for {import_id} in Vespa"
)
return hits
except ValueError as err:
raise HTTPException(status_code=NOT_FOUND, detail=str(err))
22 changes: 22 additions & 0 deletions app/service/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,28 @@ def get_family_from_vespa(family_id: str, db: Session) -> CprSdkSearchResponse:
return result


def get_document_from_vespa(document_id: str, db: Session) -> CprSdkSearchResponse:
"""Get a document from vespa.
:param str document_id: The id of the document to get.
:param Session db: Database session to query against.
:return CprSdkSearchResponse: The document from vespa.
"""
search_body = SearchParameters(
document_ids=[document_id], documents_only=True, all_results=True
)

_LOGGER.info(
f"Getting vespa document '{document_id}'",
extra={"props": {"search_body": search_body.model_dump()}},
)
try:
result = _VESPA_CONNECTION.search(parameters=search_body)
except QueryError as e:
raise ValidationError(e)
return result


def get_s3_doc_url_from_cdn(
s3_client: S3Client, s3_document: S3Document, data_dump_s3_key: str
) -> Optional[str]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.23.3"
version = "1.23.4"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down

0 comments on commit 153955f

Please sign in to comment.