Skip to content

Commit

Permalink
Improve generating the polypredicate question.
Browse files Browse the repository at this point in the history
  • Loading branch information
ielis committed Sep 3, 2024
1 parent af302c0 commit 21caddc
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 31 deletions.
2 changes: 1 addition & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ in the individuals of the *TBX5* cohort.
... ),
... group_names=('Missense', 'Frameshift'),
... )
>>> gt_predicate.get_question()
>>> gt_predicate.display_question()
'Genotype group: Missense, Frameshift'

.. note::
Expand Down
8 changes: 4 additions & 4 deletions docs/user-guide/predicates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ for assigning a patient into a genotype group:

>>> from gpsea.analysis.predicate.genotype import ModeOfInheritancePredicate
>>> gt_predicate = ModeOfInheritancePredicate.autosomal_recessive(is_frameshift_or_stop_gain)
>>> gt_predicate.get_question()
>>> gt_predicate.display_question()
'Which genotype group does the patient fit in: HOM_REF, HET, BIALLELIC_ALT'

The `gt_predicate` can be used in downstream analysis, such as in :class:
Expand Down Expand Up @@ -337,7 +337,7 @@ for testing if the individual has at least one missense vs. frameshift vs. synon
... ),
... group_names=('Missense', 'Frameshift', 'Synonymous'),
... )
>>> gt_predicate.get_question()
>>> gt_predicate.display_question()
'Genotype group: Missense, Frameshift, Synonymous'


Expand Down Expand Up @@ -380,8 +380,8 @@ to test for a presence of `Abnormal lens morphology <https://hpo.jax.org/browse/
... hpo=hpo,
... query=query,
... )
>>> pheno_predicate.get_question()
'Is Abnormal lens morphology present in the patient?'
>>> pheno_predicate.display_question()
'Is Abnormal lens morphology present in the patient: Yes, No'


TODO: explain ``missing_implies_phenotype_excluded``
Expand Down
4 changes: 2 additions & 2 deletions docs/user-guide/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ we expect the autosomal dominant mode of inheritance:

>>> from gpsea.analysis.predicate.genotype import ModeOfInheritancePredicate
>>> gt_predicate = ModeOfInheritancePredicate.autosomal_dominant(is_frameshift)
>>> gt_predicate.get_question()
>>> gt_predicate.display_question()
'Which genotype group does the patient fit in: HOM_REF, HET'

`gt_predicate` will assign the patients with no frameshift variant allele into `HOM_REF` group
Expand Down Expand Up @@ -409,7 +409,7 @@ The genotype predicate will bin the patient into two groups: a point mutation gr
... predicates=(point_mutation, lof_mutation),
... group_names=('Point', 'LoF'),
... )
>>> gt_predicate.get_question()
>>> gt_predicate.display_question()
'Genotype group: Point, LoF'


Expand Down
2 changes: 1 addition & 1 deletion src/gpsea/analysis/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def summarize(
# Column index: multiindex of counts and percentages for all genotype predicate groups
geno_idx = pd.MultiIndex.from_product(
iterables=(self._geno_predicate.get_categories(), ('Count', 'Percent')),
names=(self._geno_predicate.get_question(), None),
names=(self._geno_predicate.get_question_base(), None),
)

# We'll fill this frame with data
Expand Down
4 changes: 2 additions & 2 deletions src/gpsea/analysis/_gp_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ def apply_predicates_on_patients(
data=0,
index=pd.Index(
data=ph_predicate.get_categories(),
name=ph_predicate.get_question(),
name=ph_predicate.get_question_base(),
),
columns=pd.Index(
data=gt_predicate.get_categories(),
name=gt_predicate.get_question(),
name=gt_predicate.get_question_base(),
),
)

Expand Down
6 changes: 3 additions & 3 deletions src/gpsea/analysis/pcats/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ def apply_predicates_on_patients(
data=0,
index=pd.Index(
data=ph_predicate.get_categories(),
name=ph_predicate.get_question(),
name=ph_predicate.get_question_base(),
),
columns=pd.Index(
data=gt_predicate.get_categories(),
name=gt_predicate.get_question(),
name=gt_predicate.get_question_base(),
),
)

Expand Down Expand Up @@ -198,7 +198,7 @@ def summarize(
gt_idx = pd.MultiIndex.from_product(
# TODO: fix the below
iterables=(self._gt_predicate.get_categories(), ("Count", "Percent")),
names=(self._gt_predicate.get_question(), None),
names=(self._gt_predicate.get_question_base(), None),
)

# We'll fill this frame with data
Expand Down
17 changes: 16 additions & 1 deletion src/gpsea/analysis/predicate/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ def get_categories(self) -> typing.Iterator[PatientCategory]:
"""
return (c.category for c in self.get_categorizations())

def get_category_names(self) -> typing.Iterator[str]:
"""
Get an iterator with names of the :class:`PatientCategory` items that the predicate can produce.
"""
return (cat.name for cat in self.get_categories())

def get_category(
self,
cat_id: int,
Expand Down Expand Up @@ -182,12 +188,21 @@ def get_category_name(
return self.get_category(cat_id).name

@abc.abstractmethod
def get_question(self) -> str:
def get_question_base(self) -> str:
"""
Prepare a `str` with the question the predicate can answer.
"""
pass

def display_question(self) -> str:
"""
Prepare the question which the predicate can answer.
The question includes the question base and the category names
"""
cat_names = ', '.join(self.get_category_names())
return f'{self.get_question_base()}: {cat_names}'

@abc.abstractmethod
def test(self, patient: Patient) -> typing.Optional[C]:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/gpsea/analysis/predicate/genotype/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ def __init__(
def get_categorizations(self) -> typing.Sequence[Categorization]:
return self._allowed

def get_question(self) -> str:
return self._predicate.get_question()
def get_question_base(self) -> str:
return self._predicate.get_question_base()

def test(self, patient: Patient) -> typing.Optional[Categorization]:
cat = self._predicate.test(patient)
Expand Down
17 changes: 7 additions & 10 deletions src/gpsea/analysis/predicate/genotype/_gt_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_categorizations(self) -> typing.Sequence[Categorization]:
AlleleCountingGenotypeBooleanPredicate.NO,
)

def get_question(self) -> str:
def get_question_base(self) -> str:
return self._allele_counter.get_question()

def test(self, patient: Patient) -> typing.Optional[Categorization]:
Expand Down Expand Up @@ -92,13 +92,12 @@ def __init__(
):
self._counters = tuple(counters)
self._categorizations = tuple(categorizations)
group_names = ", ".join(c.category.name for c in self._categorizations)
self._question = f"Genotype group: {group_names}"
self._question = "Genotype group"

def get_categorizations(self) -> typing.Sequence[Categorization]:
return self._categorizations

def get_question(self) -> str:
def get_question_base(self) -> str:
return self._question

def test(self, patient: Patient) -> typing.Optional[Categorization]:
Expand Down Expand Up @@ -132,7 +131,7 @@ def __hash__(self) -> int:
)

def __str__(self) -> str:
return self.get_question()
return self.get_question_base()

def __repr__(self) -> str:
return (
Expand Down Expand Up @@ -194,7 +193,7 @@ def __init__(
):
self._allele_counter = allele_counter

def get_question(self) -> str:
def get_question_base(self) -> str:
return self._allele_counter.get_question()

def test(self, patient: Patient) -> typing.Optional[Categorization]:
Expand Down Expand Up @@ -558,14 +557,12 @@ def __init__(
issues = ModeOfInheritancePredicate._check_categorizations(self._categorizations)
if issues:
raise ValueError('Cannot create predicate: {}'.format(', '.join(issues)))
self._question = 'Which genotype group does the patient fit in: {}'.format(
', '.join(cat.category.name for cat in self._categorizations),
)
self._question = 'Which genotype group does the patient fit in'

def get_categorizations(self) -> typing.Sequence[Categorization]:
return self._categorizations

def get_question(self) -> str:
def get_question_base(self) -> str:
return self._question

def test(
Expand Down
6 changes: 3 additions & 3 deletions src/gpsea/analysis/predicate/phenotype/_pheno.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def __init__(
# Some tests depend on the order of `self._categorizations`.
self._categorizations = (self._phenotype_observed, self._phenotype_excluded)

def get_question(self) -> str:
return f"Is {self._query_label} present in the patient?"
def get_question_base(self) -> str:
return f"Is {self._query_label} present in the patient"

@property
def phenotype(self) -> hpotk.TermId:
Expand Down Expand Up @@ -222,7 +222,7 @@ def __init__(self, disease_id_query: hpotk.TermId):
phenotype=disease_id_query,
)

def get_question(self) -> str:
def get_question_base(self) -> str:
return f"Was {self._query} diagnosed in the patient"

@property
Expand Down
4 changes: 2 additions & 2 deletions tests/analysis/predicate/genotype/test_gt_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def test_get_question(
self,
predicate: GenotypePolyPredicate,
):
question = predicate.get_question()
assert question == "Genotype group: Point, LoF"
question = predicate.get_question_base()
assert question == "Genotype group"

def test_get_categorizations(
self,
Expand Down

0 comments on commit 21caddc

Please sign in to comment.