Skip to content

Commit

Permalink
Self serve corpus for superusers (#243)
Browse files Browse the repository at this point in the history
* Update CorpusData to handle entity specific taxonomies

* Add corpora get, all, search and update endpoints

* Add searching within corpus_text

* Fix corpus update

* Remove rogue debug

* Add search tests for /corpora

* Add get tests for /corpora

* Add all tests for /corpora

* Remove debug

* Add corpora info to setup_db

* Change function signatures from doc to corpus

* Linting fixes

* Bump to 2.17.8

* Use validate instead of verify

* Fix repo function name

* Remove unused code

* Bump ruff

* Remove created and last modified for now

* Add corpus auth

* Ingest auth access should be admin not user

* Make test tokens non admin

* Driveby fix tests for ingest

* Add todos for timestamps

* Revert "Ingest auth access should be admin not user"

This reverts commit 347a6eb.

* Update test_ingest.py
  • Loading branch information
katybaulch authored Nov 14, 2024
1 parent 6b79bb1 commit 86cc83b
Show file tree
Hide file tree
Showing 19 changed files with 1,051 additions and 209 deletions.
2 changes: 2 additions & 0 deletions app/api/api_v1/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from app.api.api_v1.routers.auth import auth_router
from app.api.api_v1.routers.collection import collections_router
from app.api.api_v1.routers.config import config_router
from app.api.api_v1.routers.corpus import corpora_router
from app.api.api_v1.routers.document import document_router
from app.api.api_v1.routers.event import event_router
from app.api.api_v1.routers.family import families_router
Expand All @@ -10,6 +11,7 @@
__all__ = (
"analytics_router",
"auth_router",
"corpora_router",
"collections_router",
"config_router",
"document_router",
Expand Down
141 changes: 141 additions & 0 deletions app/api/api_v1/routers/corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import logging

from fastapi import APIRouter, HTTPException, Request, status

from app.api.api_v1.query_params import (
get_query_params_as_dict,
set_default_query_params,
validate_query_params,
)
from app.errors import AuthorisationError, RepositoryError, ValidationError
from app.model.corpus import CorpusReadDTO, CorpusWriteDTO
from app.service import corpus as corpus_service

corpora_router = r = APIRouter()

_LOGGER = logging.getLogger(__file__)


@r.get(
"/corpora/{import_id}",
response_model=CorpusReadDTO,
)
async def get_corpus(import_id: str) -> CorpusReadDTO:
"""
Returns a specific corpus given the import id.
:param str import_id: Specified corpus import_id.
:raises HTTPException: If the corpus is not found a 404 is returned.
:return CorpusReadDTO: returns a CorpusReadDTO of the corpus found.
"""
try:
corpus = corpus_service.get(import_id)
except ValidationError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message)
except RepositoryError as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message
)

if corpus is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Corpus not found: {import_id}",
)

return corpus


@r.get("/corpora", response_model=list[CorpusReadDTO])
async def get_all_corpora(request: Request) -> list[CorpusReadDTO]:
"""
Returns all corpora
:param Request request: Request object.
:return CorpusReadDTO: returns a CorpusReadDTO of the corpora found.
"""
try:
return corpus_service.all(request.state.user)
except RepositoryError as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message
)


@r.get("/corpora/", response_model=list[CorpusReadDTO])
async def search_corpora(request: Request) -> list[CorpusReadDTO]:
"""
Searches for corpora matching URL parameters ("q" by default).
:param Request request: The fields to match against and the values
to search for. Defaults to searching for "" in corpus titles and
descriptions.
:raises HTTPException: If invalid fields passed a 400 is returned.
:raises HTTPException: If a DB error occurs a 503 is returned.
:raises HTTPException: If the search request times out a 408 is
returned.
:return list[CorpusReadDTO]: A list of matching corpora (which
can be empty).
"""
query_params = get_query_params_as_dict(request.query_params)

query_params = set_default_query_params(query_params)

VALID_PARAMS = ["q", "max_results"]
validate_query_params(query_params, VALID_PARAMS)

try:
corpora = corpus_service.search(query_params, request.state.user)
except ValidationError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message)
except RepositoryError as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message
)
except TimeoutError:
msg = "Request timed out fetching matching corpora. Try adjusting your query."
_LOGGER.error(msg)
raise HTTPException(
status_code=status.HTTP_408_REQUEST_TIMEOUT,
detail=msg,
)

if len(corpora) == 0:
_LOGGER.info(f"Corpora not found for terms: {query_params}")

return corpora


@r.put(
"/corpora/{import_id}",
response_model=CorpusReadDTO,
)
async def update_corpus(
request: Request, import_id: str, new_corpus: CorpusWriteDTO
) -> CorpusReadDTO:
"""
Updates a specific corpus given the import id.
:param Request request: Request object.
:param str import_id: Specified corpus import_id.
:raises HTTPException: If the corpus is not found a 404 is returned.
:return CorpusReadDTO: returns a CorpusReadDTO of the corpus updated.
"""
user = request.state.user
_LOGGER.info(f"User {user.email} is getting corpus")
try:
corpus = corpus_service.update(import_id, new_corpus, request.state.user)
except AuthorisationError as e:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=e.message)
except ValidationError as e:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=e.message)
except RepositoryError as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=e.message
)

if corpus is None:
detail = f"Corpus not updated: {import_id}"
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=detail)

return corpus
7 changes: 7 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
auth_router,
collections_router,
config_router,
corpora_router,
document_router,
event_router,
families_router,
Expand Down Expand Up @@ -104,6 +105,12 @@ async def lifespan(app_: FastAPI):

app.include_router(auth_router, prefix="/api", tags=["Authentication"])

app.include_router(
corpora_router,
prefix="/api/v1",
tags=["corpora"],
dependencies=[Depends(check_user_auth)],
)
# Add CORS middleware to allow cross origin requests from any port
app.add_middleware(
CORSMiddleware,
Expand Down
34 changes: 8 additions & 26 deletions app/model/authorisation.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
import enum
from typing import Mapping


class AuthOperation(str, enum.Enum):
"""An operation that can be authorized"""

CREATE = "CREATE"
READ = "READ"
UPDATE = "UPDATE"
DELETE = "DELETE"


HTTP_MAP_TO_OPERATION = {
"POST": AuthOperation.CREATE,
"GET": AuthOperation.READ,
"HEAD": AuthOperation.READ,
"PUT": AuthOperation.UPDATE,
"PATCH": AuthOperation.UPDATE,
"DELETE": AuthOperation.DELETE,
}
from db_client.models.organisation.authorisation import AuthAccess, AuthOperation


class AuthEndpoint(str, enum.Enum):
Expand All @@ -36,14 +19,7 @@ class AuthEndpoint(str, enum.Enum):
ANALYTICS = "ANALYTICS"
EVENT = "EVENTS"
INGEST = "INGEST"


class AuthAccess(str, enum.Enum):
"""The level of access needed"""

USER = "USER"
ADMIN = "ADMIN"
SUPER = "SUPER"
CORPUS = "CORPORA"


AuthMap = Mapping[AuthEndpoint, Mapping[AuthOperation, AuthAccess]]
Expand Down Expand Up @@ -90,4 +66,10 @@ class AuthAccess(str, enum.Enum):
AuthOperation.CREATE: AuthAccess.USER,
AuthOperation.READ: AuthAccess.USER,
},
# Corpus
AuthEndpoint.CORPUS: {
AuthOperation.CREATE: AuthAccess.ADMIN,
AuthOperation.READ: AuthAccess.ADMIN,
AuthOperation.UPDATE: AuthAccess.ADMIN,
},
}
35 changes: 35 additions & 0 deletions app/model/corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Optional

from pydantic import BaseModel

from app.model.general import Json


class CorpusReadDTO(BaseModel):
"""Representation of a Corpus."""

import_id: str
title: str
description: str
corpus_text: Optional[str]
corpus_image_url: Optional[str] = None
organisation_id: int
organisation_name: str

corpus_type_name: str
corpus_type_description: str
metadata: Json

# TODO: Add create and last modified timestamps.


class CorpusWriteDTO(BaseModel):
"""Representation of a Corpus."""

title: str
description: str
corpus_text: Optional[str]
corpus_image_url: Optional[str]

corpus_type_name: str
corpus_type_description: str
1 change: 0 additions & 1 deletion app/repository/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ def count(db: Session, org_id: Optional[int]) -> Optional[int]:
if org_id is not None:
query = query.filter(Organisation.id == org_id)
n_collections = query.count()
_LOGGER.error(n_collections)
except Exception as e:
_LOGGER.error(e)
return
Expand Down
Loading

0 comments on commit 86cc83b

Please sign in to comment.