Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1759 make backend config endpoint focused on corpora not #430

Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/lookups/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from app.api.api_v1.routers.lookups.router import lookups_router
from app.clients.db.session import get_db
from app.models.metadata import ApplicationConfig
from app.models.config import ApplicationConfig
from app.repository.lookups import get_config
from app.service.custom_app import AppTokenFactory

Expand Down
27 changes: 27 additions & 0 deletions app/models/metadata.py → app/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,37 @@ class OrganisationConfig(BaseModel):
count_by_category: Mapping[str, int]


class CorpusConfig(BaseModel):
"""Contains the Corpus and Organisation info as well as stats used on homepage"""

# From corpus
corpus_import_id: str
title: str
description: str
image_url: str
text: str
# From organisation
organisation_name: str
organisation_id: int
# No of families in corpus
total: int
count_by_category: Mapping[str, int]


class CorpusTypeConfig(BaseModel):
"""Contains the CorpusType info as well as data of any corpora of that type"""

corpus_type_name: str
corpus_type_description: str
taxonomy: TaxonomyData
corpora: Sequence[CorpusConfig]


class ApplicationConfig(BaseModel):
"""Definition of the new Config which just includes taxonomy."""

geographies: Sequence[dict]
organisations: Mapping[str, OrganisationConfig]
languages: Mapping[str, str]
document_variants: Sequence[str]
corpus_types: Mapping[str, CorpusTypeConfig]
30 changes: 30 additions & 0 deletions app/repository/corpus.py
annaCPR marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from db_client.models.dfce.family import Corpus, Family, FamilyCorpus
from sqlalchemy import func
from sqlalchemy.orm import Session


def get_total_families_per_corpus(db: Session, corpus_import_id: str) -> int:
return (
db.query(Family)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.filter(FamilyCorpus.corpus_import_id == corpus_import_id)
.count()
)


def get_family_count_by_category_per_corpus(db: Session, corpus_import_id: str):
return (
db.query(Family.family_category, func.count())
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.filter(FamilyCorpus.corpus_import_id == corpus_import_id)
.group_by(Family.family_category)
.all()
)


def get_allowed_corpora(db: Session, allowed_corpora: list[str]) -> list[Corpus]:
query = db.query(Corpus)
if allowed_corpora != []:
query = query.filter(Corpus.import_id.in_(allowed_corpora))

return query.all()
6 changes: 6 additions & 0 deletions app/repository/corpus_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from db_client.models.organisation import CorpusType
from sqlalchemy.orm import Session


def get(db: Session, corpus_type_name: str) -> CorpusType:
return db.query(CorpusType).filter(CorpusType.name == corpus_type_name).one()
5 changes: 3 additions & 2 deletions app/repository/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.orm import Session

from app.models.metadata import ApplicationConfig
from app.models.config import ApplicationConfig
from app.repository.organisation import get_organisation_config, get_organisations
from app.service.config import get_corpus_type_config_for_allowed_corpora
from app.service.pipeline import IMPORT_ID_MATCHER
from app.service.util import tree_table_to_json

_LOGGER = logging.getLogger(__name__)


def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig:
# First get the CCLW stats
return ApplicationConfig(
geographies=tree_table_to_json(table=Geography, db=db),
organisations={
Expand All @@ -28,6 +28,7 @@ def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig:
variant.variant_name
for variant in db.query(Variant).order_by(Variant.variant_name).all()
],
corpus_types=get_corpus_type_config_for_allowed_corpora(db, allowed_corpora),
)


Expand Down
6 changes: 5 additions & 1 deletion app/repository/organisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sqlalchemy.orm import Session

from app import config
from app.models.metadata import CorpusData, OrganisationConfig
from app.models.config import CorpusData, OrganisationConfig


def _to_corpus_data(row) -> CorpusData:
Expand Down Expand Up @@ -94,3 +94,7 @@ def get_organisations(db: Session, allowed_corpora: list[str]) -> list[Organisat
if allowed_corpora != []:
query = query.filter(Corpus.import_id.in_(allowed_corpora))
return query.all()


def get(db: Session, org_id: int) -> Organisation:
return db.query(Organisation).filter(Organisation.id == org_id).one()
93 changes: 93 additions & 0 deletions app/service/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from typing import Any, Mapping

from db_client.models.dfce.family import FamilyCategory
from db_client.models.organisation import Corpus, CorpusType, Organisation
from sqlalchemy.orm import Session

from app import config
from app.models.config import CorpusConfig, CorpusTypeConfig
from app.repository import corpus_type as corpus_type_repo
from app.repository import organisation as org_repo
from app.repository.corpus import (
get_allowed_corpora,
get_family_count_by_category_per_corpus,
get_total_families_per_corpus,
)


def _get_family_stats_per_corpus(db: Session, corpus_import_id: str) -> dict[str, Any]:
total = get_total_families_per_corpus(db, corpus_import_id)

counts = get_family_count_by_category_per_corpus(db, corpus_import_id)
found_categories = {c[0].value: c[1] for c in counts}
count_by_category = {}

# Supply zeros when there aren't any
for category in [e.value for e in FamilyCategory]:
if category in found_categories.keys():
count_by_category[category] = found_categories[category]
else:
count_by_category[category] = 0

return {"total": total, "count_by_category": count_by_category}


def _to_corpus_type_config(
corpus: Corpus,
corpus_type: CorpusType,
organisation: Organisation,
stats: dict[str, Any],
) -> CorpusTypeConfig:
image_url = (
f"https://{config.CDN_DOMAIN}/{corpus.corpus_image_url}"
if corpus.corpus_image_url is not None and len(str(corpus.corpus_image_url)) > 0
else ""
)
corpus_text = corpus.corpus_text if corpus.corpus_text is not None else ""
return CorpusTypeConfig(
corpus_type_name=str(corpus_type.name),
corpus_type_description=str(corpus_type.description),
taxonomy={**corpus_type.valid_metadata},

Check failure on line 50 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportGeneralTypeIssues)

[new] Expected mapping for dictionary unpack operator
corpora=[
CorpusConfig(
title=str(corpus.title),
description=str(corpus.description),
corpus_import_id=str(corpus.import_id),
text=str(corpus_text),
image_url=image_url,
organisation_id=int(str(organisation.id)),
organisation_name=str(organisation.name),
total=stats["total"],
count_by_category=stats["count_by_category"],
)
],
)


def _get_config_for_corpus_type(
db: Session, corpus: Corpus
) -> dict[str, CorpusTypeConfig]:
stats = _get_family_stats_per_corpus(db, str(corpus.import_id))
corpus_type = corpus_type_repo.get(db, str(corpus.corpus_type_name))
organisation = org_repo.get(db, int(str(corpus.organisation_id)))
return {
str(corpus_type.name): _to_corpus_type_config(
corpus, corpus_type, organisation, stats
)
}


def get_corpus_type_config_for_allowed_corpora(
db: Session, allowed_corpora: list[str]
) -> Mapping[str, CorpusTypeConfig]:

corpora = get_allowed_corpora(db, allowed_corpora)

configs_for_each_allowed_corpus = (
_get_config_for_corpus_type(db, corpus) for corpus in corpora
)
corpus_type_config_for_allowed_corpora = {
k: v for config in configs_for_each_allowed_corpus for k, v in config.items()
}

return corpus_type_config_for_allowed_corpora
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.19.21"
version = "1.20.0"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
Loading
Loading