Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
annaCPR committed Dec 19, 2024
1 parent 74d8c35 commit ed58a1b
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 90 deletions.
96 changes: 9 additions & 87 deletions app/repository/corpus.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,30 @@
from typing import Any, Mapping

from db_client.models.dfce.family import Corpus, Family, FamilyCategory, FamilyCorpus
from db_client.models.organisation import CorpusType, Organisation
from db_client.models.dfce.family import Corpus, Family, FamilyCorpus
from sqlalchemy import func
from sqlalchemy.orm import Session

from app import config
from app.models.config import CorpusConfig, CorpusTypeConfig


def _get_family_stats_per_corpus(db: Session, corpus_import_id: str) -> dict[str, Any]:
total = (
def get_total_families_per_corpus(db: Session, corpus_import_id: str) -> int:
return (
db.query(Family)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.filter(FamilyCorpus.corpus_import_id == corpus_import_id)
.count()
)

counts = (

def get_family_count_by_category_per_corpus(db: Session, corpus_import_id: str):
return (
db.query(Family.family_category, func.count())
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.filter(FamilyCorpus.corpus_import_id == corpus_import_id)
.group_by(Family.family_category)
.all()
)
found_categories = {c[0].value: c[1] for c in counts}
count_by_category = {}

# Supply zeros when there aren't any
for category in [e.value for e in FamilyCategory]:
if category in found_categories.keys():
count_by_category[category] = found_categories[category]
else:
count_by_category[category] = 0

return {"total": total, "count_by_category": count_by_category}


def _to_corpus_type_config(row, stats: dict[str, Any]) -> dict[str, CorpusTypeConfig]:
image_url = (
f"https://{config.CDN_DOMAIN}/{row.image_url}"
if row.image_url is not None and len(row.image_url) > 0
else ""
)
corpus_text = row.text if row.text is not None else ""
return {
row.corpus_type: CorpusTypeConfig(
corpus_type_name=row.corpus_type_name,
corpus_type_description=row.corpus_type_description,
taxonomy={**row.taxonomy},
corpora=[
CorpusConfig(
title=row.title,
description=row.description,
corpus_import_id=row.corpus_import_id,
text=corpus_text,
image_url=image_url,
organisation_id=row.organisation_id,
organisation_name=row.organisation_name,
total=stats["total"],
count_by_category=stats["count_by_category"],
)
],
)
}


def _get_config_for_corpus(db: Session, row) -> dict[str, CorpusTypeConfig]:
stats = _get_family_stats_per_corpus(db, row.corpus_import_id)
return _to_corpus_type_config(row, stats)


def get_config_for_allowed_corpora(
db: Session, allowed_corpora: list[str]
) -> Mapping[str, CorpusTypeConfig]:
query = (
db.query(
Corpus.import_id.label("corpus_import_id"),
Corpus.title.label("title"),
Corpus.description.label("description"),
Corpus.corpus_image_url.label("image_url"),
Corpus.corpus_text.label("text"),
Corpus.corpus_type_name.label("corpus_type"),
CorpusType.name.label("corpus_type_name"),
CorpusType.description.label("corpus_type_description"),
CorpusType.valid_metadata.label("taxonomy"),
Organisation.id.label("organisation_id"),
Organisation.name.label("organisation_name"),
)
.join(
CorpusType,
Corpus.corpus_type_name == CorpusType.name,
)
.join(Organisation, Corpus.organisation_id == Organisation.id)
)
def get_allowed_corpora(db: Session, allowed_corpora: list[str]) -> list[Corpus]:
query = db.query(Corpus)
if allowed_corpora != []:
query = query.filter(Corpus.import_id.in_(allowed_corpora))

corpora = query.all()
configs_for_each_allowed_corpus = (
_get_config_for_corpus(db, row) for row in corpora
)
config_for_allowed_corpora = {
k: v for d in configs_for_each_allowed_corpus for k, v in d.items()
}

return config_for_allowed_corpora
return query.all()
6 changes: 6 additions & 0 deletions app/repository/corpus_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from db_client.models.organisation import CorpusType
from sqlalchemy.orm import Session


def get(db: Session, corpus_type_name: str) -> CorpusType:
return db.query(CorpusType).filter(CorpusType.name == corpus_type_name).one()
5 changes: 2 additions & 3 deletions app/repository/lookups.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,15 @@
from sqlalchemy.orm import Session

from app.models.config import ApplicationConfig
from app.repository.corpus import get_config_for_allowed_corpora
from app.repository.organisation import get_organisation_config, get_organisations
from app.service.config import get_corpus_type_config_for_allowed_corpora
from app.service.pipeline import IMPORT_ID_MATCHER
from app.service.util import tree_table_to_json

_LOGGER = logging.getLogger(__name__)


def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig:
# First get the CCLW stats
return ApplicationConfig(
geographies=tree_table_to_json(table=Geography, db=db),
organisations={
Expand All @@ -29,7 +28,7 @@ def get_config(db: Session, allowed_corpora: list[str]) -> ApplicationConfig:
variant.variant_name
for variant in db.query(Variant).order_by(Variant.variant_name).all()
],
corpus_types=get_config_for_allowed_corpora(db, allowed_corpora),
corpus_types=get_corpus_type_config_for_allowed_corpora(db, allowed_corpora),
)


Expand Down
4 changes: 4 additions & 0 deletions app/repository/organisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,7 @@ def get_organisations(db: Session, allowed_corpora: list[str]) -> list[Organisat
if allowed_corpora != []:
query = query.filter(Corpus.import_id.in_(allowed_corpora))
return query.all()


def get(db: Session, org_id: int) -> Organisation:
return db.query(Organisation).filter(Organisation.id == org_id).one()
94 changes: 94 additions & 0 deletions app/service/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import Any, Mapping

from db_client.models.dfce.family import FamilyCategory
from db_client.models.organisation import Corpus, CorpusType, Organisation
from sqlalchemy.orm import Session

from app import config
from app.models.config import CorpusConfig, CorpusTypeConfig
from app.repository import corpus_type as corpus_type_repo
from app.repository import organisation as org_repo
from app.repository.corpus import (
get_allowed_corpora,
get_family_count_by_category_per_corpus,
get_total_families_per_corpus,
)


def _get_family_stats_per_corpus(db: Session, corpus_import_id: str) -> dict[str, Any]:
total = get_total_families_per_corpus(db, corpus_import_id)

counts = get_family_count_by_category_per_corpus(db, corpus_import_id)
found_categories = {c[0].value: c[1] for c in counts}
count_by_category = {}

# Supply zeros when there aren't any
for category in [e.value for e in FamilyCategory]:
if category in found_categories.keys():
count_by_category[category] = found_categories[category]
else:
count_by_category[category] = 0

return {"total": total, "count_by_category": count_by_category}


def _to_corpus_type_config(
corpus: Corpus,
corpus_type: CorpusType,
organisation: Organisation,
stats: dict[str, Any],
) -> CorpusTypeConfig:
image_url = (
f"https://{config.CDN_DOMAIN}/{corpus.corpus_image_url}"
if corpus.corpus_image_url is not None and len(corpus.corpus_image_url) > 0

Check failure on line 43 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "obj" of type "Sized" in function "len"   "Column" is incompatible with protocol "Sized"     "__len__" is not present
else ""
)
corpus_text = corpus.corpus_text if corpus.corpus_text is not None else ""

return CorpusTypeConfig(
corpus_type_name=corpus_type.name,
corpus_type_description=corpus_type.description,
taxonomy={**corpus_type.valid_metadata},

Check failure on line 51 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportGeneralTypeIssues)

[new] Expected mapping for dictionary unpack operator
corpora=[
CorpusConfig(
title=corpus.title,

Check failure on line 54 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "title" of type "str" in function "__init__"   "Column" is incompatible with "str"
description=corpus.description,

Check failure on line 55 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "description" of type "str" in function "__init__"   "Column" is incompatible with "str"
corpus_import_id=corpus.import_id,

Check failure on line 56 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "corpus_import_id" of type "str" in function "__init__"   "Column" is incompatible with "str"
text=corpus_text,

Check failure on line 57 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column | Literal['']" cannot be assigned to parameter "text" of type "str" in function "__init__"   Type "Column | Literal['']" is incompatible with type "str"     "Column" is incompatible with "str"
image_url=image_url,
organisation_id=organisation.id,

Check failure on line 59 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "organisation_id" of type "int" in function "__init__"   "Column" is incompatible with "int"
organisation_name=organisation.name,

Check failure on line 60 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "organisation_name" of type "str" in function "__init__"   "Column" is incompatible with "str"
total=stats["total"],
count_by_category=stats["count_by_category"],
)
],
)


def _get_config_for_corpus_type(
db: Session, corpus: Corpus
) -> dict[str, CorpusTypeConfig]:
stats = _get_family_stats_per_corpus(db, corpus.import_id)

Check failure on line 71 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "corpus_import_id" of type "str" in function "_get_family_stats_per_corpus"   "Column" is incompatible with "str"
corpus_type = corpus_type_repo.get(db, corpus.corpus_type_name)

Check failure on line 72 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "corpus_type_name" of type "str" in function "get"   "Column" is incompatible with "str"
organisation = org_repo.get(db, corpus.organisation_id)

Check failure on line 73 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportArgumentType)

[new] Argument of type "Column" cannot be assigned to parameter "org_id" of type "int" in function "get"   "Column" is incompatible with "int"
return {
corpus_type.name: _to_corpus_type_config(

Check failure on line 75 in app/service/config.py

View workflow job for this annotation

GitHub Actions / Trunk Check

pyright(reportReturnType)

[new] Expression of type "dict[Column, CorpusTypeConfig]" is incompatible with return type "dict[str, CorpusTypeConfig]"   "Column" is incompatible with "str"
corpus, corpus_type, organisation, stats
)
}


def get_corpus_type_config_for_allowed_corpora(
db: Session, allowed_corpora: list[str]
) -> Mapping[str, CorpusTypeConfig]:

corpora = get_allowed_corpora(db, allowed_corpora)

configs_for_each_allowed_corpus = (
_get_config_for_corpus_type(db, corpus) for corpus in corpora
)
corpus_type_config_for_allowed_corpora = {
k: v for config in configs_for_each_allowed_corpus for k, v in config.items()
}

return corpus_type_config_for_allowed_corpora

0 comments on commit ed58a1b

Please sign in to comment.