-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
115 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,108 +1,30 @@ | ||
from typing import Any, Mapping | ||
|
||
from db_client.models.dfce.family import Corpus, Family, FamilyCategory, FamilyCorpus | ||
from db_client.models.organisation import CorpusType, Organisation | ||
from db_client.models.dfce.family import Corpus, Family, FamilyCorpus | ||
from sqlalchemy import func | ||
from sqlalchemy.orm import Session | ||
|
||
from app import config | ||
from app.models.config import CorpusConfig, CorpusTypeConfig | ||
|
||
|
||
def _get_family_stats_per_corpus(db: Session, corpus_import_id: str) -> dict[str, Any]: | ||
total = ( | ||
def get_total_families_per_corpus(db: Session, corpus_import_id: str) -> int: | ||
return ( | ||
db.query(Family) | ||
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id) | ||
.filter(FamilyCorpus.corpus_import_id == corpus_import_id) | ||
.count() | ||
) | ||
|
||
counts = ( | ||
|
||
def get_family_count_by_category_per_corpus(db: Session, corpus_import_id: str): | ||
return ( | ||
db.query(Family.family_category, func.count()) | ||
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id) | ||
.filter(FamilyCorpus.corpus_import_id == corpus_import_id) | ||
.group_by(Family.family_category) | ||
.all() | ||
) | ||
found_categories = {c[0].value: c[1] for c in counts} | ||
count_by_category = {} | ||
|
||
# Supply zeros when there aren't any | ||
for category in [e.value for e in FamilyCategory]: | ||
if category in found_categories.keys(): | ||
count_by_category[category] = found_categories[category] | ||
else: | ||
count_by_category[category] = 0 | ||
|
||
return {"total": total, "count_by_category": count_by_category} | ||
|
||
|
||
def _to_corpus_type_config(row, stats: dict[str, Any]) -> dict[str, CorpusTypeConfig]: | ||
image_url = ( | ||
f"https://{config.CDN_DOMAIN}/{row.image_url}" | ||
if row.image_url is not None and len(row.image_url) > 0 | ||
else "" | ||
) | ||
corpus_text = row.text if row.text is not None else "" | ||
return { | ||
row.corpus_type: CorpusTypeConfig( | ||
corpus_type_name=row.corpus_type_name, | ||
corpus_type_description=row.corpus_type_description, | ||
taxonomy={**row.taxonomy}, | ||
corpora=[ | ||
CorpusConfig( | ||
title=row.title, | ||
description=row.description, | ||
corpus_import_id=row.corpus_import_id, | ||
text=corpus_text, | ||
image_url=image_url, | ||
organisation_id=row.organisation_id, | ||
organisation_name=row.organisation_name, | ||
total=stats["total"], | ||
count_by_category=stats["count_by_category"], | ||
) | ||
], | ||
) | ||
} | ||
|
||
|
||
def _get_config_for_corpus(db: Session, row) -> dict[str, CorpusTypeConfig]: | ||
stats = _get_family_stats_per_corpus(db, row.corpus_import_id) | ||
return _to_corpus_type_config(row, stats) | ||
|
||
|
||
def get_config_for_allowed_corpora( | ||
db: Session, allowed_corpora: list[str] | ||
) -> Mapping[str, CorpusTypeConfig]: | ||
query = ( | ||
db.query( | ||
Corpus.import_id.label("corpus_import_id"), | ||
Corpus.title.label("title"), | ||
Corpus.description.label("description"), | ||
Corpus.corpus_image_url.label("image_url"), | ||
Corpus.corpus_text.label("text"), | ||
Corpus.corpus_type_name.label("corpus_type"), | ||
CorpusType.name.label("corpus_type_name"), | ||
CorpusType.description.label("corpus_type_description"), | ||
CorpusType.valid_metadata.label("taxonomy"), | ||
Organisation.id.label("organisation_id"), | ||
Organisation.name.label("organisation_name"), | ||
) | ||
.join( | ||
CorpusType, | ||
Corpus.corpus_type_name == CorpusType.name, | ||
) | ||
.join(Organisation, Corpus.organisation_id == Organisation.id) | ||
) | ||
def get_allowed_corpora(db: Session, allowed_corpora: list[str]) -> list[Corpus]: | ||
query = db.query(Corpus) | ||
if allowed_corpora != []: | ||
query = query.filter(Corpus.import_id.in_(allowed_corpora)) | ||
|
||
corpora = query.all() | ||
configs_for_each_allowed_corpus = ( | ||
_get_config_for_corpus(db, row) for row in corpora | ||
) | ||
config_for_allowed_corpora = { | ||
k: v for d in configs_for_each_allowed_corpus for k, v in d.items() | ||
} | ||
|
||
return config_for_allowed_corpora | ||
return query.all() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from db_client.models.organisation import CorpusType | ||
from sqlalchemy.orm import Session | ||
|
||
|
||
def get(db: Session, corpus_type_name: str) -> CorpusType: | ||
return db.query(CorpusType).filter(CorpusType.name == corpus_type_name).one() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
from typing import Any, Mapping | ||
|
||
from db_client.models.dfce.family import FamilyCategory | ||
from db_client.models.organisation import Corpus, CorpusType, Organisation | ||
from sqlalchemy.orm import Session | ||
|
||
from app import config | ||
from app.models.config import CorpusConfig, CorpusTypeConfig | ||
from app.repository import corpus_type as corpus_type_repo | ||
from app.repository import organisation as org_repo | ||
from app.repository.corpus import ( | ||
get_allowed_corpora, | ||
get_family_count_by_category_per_corpus, | ||
get_total_families_per_corpus, | ||
) | ||
|
||
|
||
def _get_family_stats_per_corpus(db: Session, corpus_import_id: str) -> dict[str, Any]: | ||
total = get_total_families_per_corpus(db, corpus_import_id) | ||
|
||
counts = get_family_count_by_category_per_corpus(db, corpus_import_id) | ||
found_categories = {c[0].value: c[1] for c in counts} | ||
count_by_category = {} | ||
|
||
# Supply zeros when there aren't any | ||
for category in [e.value for e in FamilyCategory]: | ||
if category in found_categories.keys(): | ||
count_by_category[category] = found_categories[category] | ||
else: | ||
count_by_category[category] = 0 | ||
|
||
return {"total": total, "count_by_category": count_by_category} | ||
|
||
|
||
def _to_corpus_type_config( | ||
corpus: Corpus, | ||
corpus_type: CorpusType, | ||
organisation: Organisation, | ||
stats: dict[str, Any], | ||
) -> CorpusTypeConfig: | ||
image_url = ( | ||
f"https://{config.CDN_DOMAIN}/{corpus.corpus_image_url}" | ||
if corpus.corpus_image_url is not None and len(corpus.corpus_image_url) > 0 | ||
Check failure on line 43 in app/service/config.py GitHub Actions / Trunk Checkpyright(reportArgumentType)
|
||
else "" | ||
) | ||
corpus_text = corpus.corpus_text if corpus.corpus_text is not None else "" | ||
|
||
return CorpusTypeConfig( | ||
corpus_type_name=corpus_type.name, | ||
corpus_type_description=corpus_type.description, | ||
taxonomy={**corpus_type.valid_metadata}, | ||
corpora=[ | ||
CorpusConfig( | ||
title=corpus.title, | ||
description=corpus.description, | ||
corpus_import_id=corpus.import_id, | ||
Check failure on line 56 in app/service/config.py GitHub Actions / Trunk Checkpyright(reportArgumentType)
|
||
text=corpus_text, | ||
Check failure on line 57 in app/service/config.py GitHub Actions / Trunk Checkpyright(reportArgumentType)
|
||
image_url=image_url, | ||
organisation_id=organisation.id, | ||
organisation_name=organisation.name, | ||
Check failure on line 60 in app/service/config.py GitHub Actions / Trunk Checkpyright(reportArgumentType)
|
||
total=stats["total"], | ||
count_by_category=stats["count_by_category"], | ||
) | ||
], | ||
) | ||
|
||
|
||
def _get_config_for_corpus_type( | ||
db: Session, corpus: Corpus | ||
) -> dict[str, CorpusTypeConfig]: | ||
stats = _get_family_stats_per_corpus(db, corpus.import_id) | ||
Check failure on line 71 in app/service/config.py GitHub Actions / Trunk Checkpyright(reportArgumentType)
|
||
corpus_type = corpus_type_repo.get(db, corpus.corpus_type_name) | ||
organisation = org_repo.get(db, corpus.organisation_id) | ||
return { | ||
corpus_type.name: _to_corpus_type_config( | ||
corpus, corpus_type, organisation, stats | ||
) | ||
} | ||
|
||
|
||
def get_corpus_type_config_for_allowed_corpora( | ||
db: Session, allowed_corpora: list[str] | ||
) -> Mapping[str, CorpusTypeConfig]: | ||
|
||
corpora = get_allowed_corpora(db, allowed_corpora) | ||
|
||
configs_for_each_allowed_corpus = ( | ||
_get_config_for_corpus_type(db, corpus) for corpus in corpora | ||
) | ||
corpus_type_config_for_allowed_corpora = { | ||
k: v for config in configs_for_each_allowed_corpus for k, v in config.items() | ||
} | ||
|
||
return corpus_type_config_for_allowed_corpora |