-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Self serve corpus for superusers (#243)
* Update CorpusData to handle entity specific taxonomies * Add corpora get, all, search and update endpoints * Add searching within corpus_text * Fix corpus update * Remove rogue debug * Add search tests for /corpora * Add get tests for /corpora * Add all tests for /corpora * Remove debug * Add corpora info to setup_db * Change function signatures from doc to corpus * Linting fixes * Bump to 2.17.8 * Use validate instead of verify * Fix repo function name * Remove unused code * Bump ruff * Remove created and last modified for now * Add corpus auth * Ingest auth access should be admin not user * Make test tokens non admin * Driveby fix tests for ingest * Add todos for timestamps * Revert "Ingest auth access should be admin not user" This reverts commit 347a6eb. * Update test_ingest.py
- Loading branch information
1 parent
6b79bb1
commit 86cc83b
Showing
19 changed files
with
1,051 additions
and
209 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import logging | ||
|
||
from fastapi import APIRouter, HTTPException, Request, status | ||
|
||
from app.api.api_v1.query_params import ( | ||
get_query_params_as_dict, | ||
set_default_query_params, | ||
validate_query_params, | ||
) | ||
from app.errors import AuthorisationError, RepositoryError, ValidationError | ||
from app.model.corpus import CorpusReadDTO, CorpusWriteDTO | ||
from app.service import corpus as corpus_service | ||
|
||
corpora_router = r = APIRouter() | ||
|
||
_LOGGER = logging.getLogger(__file__) | ||
|
||
|
||
@r.get( | ||
"/corpora/{import_id}", | ||
response_model=CorpusReadDTO, | ||
) | ||
async def get_corpus(import_id: str) -> CorpusReadDTO: | ||
""" | ||
Returns a specific corpus given the import id. | ||
:param str import_id: Specified corpus import_id. | ||
:raises HTTPException: If the corpus is not found a 404 is returned. | ||
:return CorpusReadDTO: returns a CorpusReadDTO of the corpus found. | ||
""" | ||
try: | ||
corpus = corpus_service.get(import_id) | ||
except ValidationError as e: | ||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message) | ||
except RepositoryError as e: | ||
raise HTTPException( | ||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message | ||
) | ||
|
||
if corpus is None: | ||
raise HTTPException( | ||
status_code=status.HTTP_404_NOT_FOUND, | ||
detail=f"Corpus not found: {import_id}", | ||
) | ||
|
||
return corpus | ||
|
||
|
||
@r.get("/corpora", response_model=list[CorpusReadDTO]) | ||
async def get_all_corpora(request: Request) -> list[CorpusReadDTO]: | ||
""" | ||
Returns all corpora | ||
:param Request request: Request object. | ||
:return CorpusReadDTO: returns a CorpusReadDTO of the corpora found. | ||
""" | ||
try: | ||
return corpus_service.all(request.state.user) | ||
except RepositoryError as e: | ||
raise HTTPException( | ||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message | ||
) | ||
|
||
|
||
@r.get("/corpora/", response_model=list[CorpusReadDTO]) | ||
async def search_corpora(request: Request) -> list[CorpusReadDTO]: | ||
""" | ||
Searches for corpora matching URL parameters ("q" by default). | ||
:param Request request: The fields to match against and the values | ||
to search for. Defaults to searching for "" in corpus titles and | ||
descriptions. | ||
:raises HTTPException: If invalid fields passed a 400 is returned. | ||
:raises HTTPException: If a DB error occurs a 503 is returned. | ||
:raises HTTPException: If the search request times out a 408 is | ||
returned. | ||
:return list[CorpusReadDTO]: A list of matching corpora (which | ||
can be empty). | ||
""" | ||
query_params = get_query_params_as_dict(request.query_params) | ||
|
||
query_params = set_default_query_params(query_params) | ||
|
||
VALID_PARAMS = ["q", "max_results"] | ||
validate_query_params(query_params, VALID_PARAMS) | ||
|
||
try: | ||
corpora = corpus_service.search(query_params, request.state.user) | ||
except ValidationError as e: | ||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message) | ||
except RepositoryError as e: | ||
raise HTTPException( | ||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message | ||
) | ||
except TimeoutError: | ||
msg = "Request timed out fetching matching corpora. Try adjusting your query." | ||
_LOGGER.error(msg) | ||
raise HTTPException( | ||
status_code=status.HTTP_408_REQUEST_TIMEOUT, | ||
detail=msg, | ||
) | ||
|
||
if len(corpora) == 0: | ||
_LOGGER.info(f"Corpora not found for terms: {query_params}") | ||
|
||
return corpora | ||
|
||
|
||
@r.put( | ||
"/corpora/{import_id}", | ||
response_model=CorpusReadDTO, | ||
) | ||
async def update_corpus( | ||
request: Request, import_id: str, new_corpus: CorpusWriteDTO | ||
) -> CorpusReadDTO: | ||
""" | ||
Updates a specific corpus given the import id. | ||
:param Request request: Request object. | ||
:param str import_id: Specified corpus import_id. | ||
:raises HTTPException: If the corpus is not found a 404 is returned. | ||
:return CorpusReadDTO: returns a CorpusReadDTO of the corpus updated. | ||
""" | ||
user = request.state.user | ||
_LOGGER.info(f"User {user.email} is getting corpus") | ||
try: | ||
corpus = corpus_service.update(import_id, new_corpus, request.state.user) | ||
except AuthorisationError as e: | ||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=e.message) | ||
except ValidationError as e: | ||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message) | ||
except RepositoryError as e: | ||
raise HTTPException( | ||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message | ||
) | ||
|
||
if corpus is None: | ||
detail = f"Corpus not updated: {import_id}" | ||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=detail) | ||
|
||
return corpus |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
from typing import Optional | ||
|
||
from pydantic import BaseModel | ||
|
||
from app.model.general import Json | ||
|
||
|
||
class CorpusReadDTO(BaseModel): | ||
"""Representation of a Corpus.""" | ||
|
||
import_id: str | ||
title: str | ||
description: str | ||
corpus_text: Optional[str] | ||
corpus_image_url: Optional[str] = None | ||
organisation_id: int | ||
organisation_name: str | ||
|
||
corpus_type_name: str | ||
corpus_type_description: str | ||
metadata: Json | ||
|
||
# TODO: Add create and last modified timestamps. | ||
|
||
|
||
class CorpusWriteDTO(BaseModel): | ||
"""Representation of a Corpus.""" | ||
|
||
title: str | ||
description: str | ||
corpus_text: Optional[str] | ||
corpus_image_url: Optional[str] | ||
|
||
corpus_type_name: str | ||
corpus_type_description: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.