Skip to content

Commit

Permalink
Merge pull request #336 from monarch-initiative/report-the-analysis-c…
Browse files Browse the repository at this point in the history
…omponents

Expose more analysis components
  • Loading branch information
ielis authored Oct 29, 2024
2 parents e1f7ab3 + 7390305 commit f20af85
Show file tree
Hide file tree
Showing 14 changed files with 103 additions and 43 deletions.
2 changes: 1 addition & 1 deletion docs/user-guide/analyses/measurements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ We use the measurement of `Testosterone [Mass/volume] in Serum or Plasma <https:
... label="Testosterone [Mass/volume] in Serum or Plasma",
... )
>>> pheno_scorer.description
'The value of Testosterone [Mass/volume] in Serum or Plasma [LOINC:2986-8]'
'Value of Testosterone [Mass/volume] in Serum or Plasma [LOINC:2986-8]'


Statistical test
Expand Down
2 changes: 1 addition & 1 deletion docs/user-guide/predicates/hpo_predicate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ and now we can set up a predicate to test for presence of *Abnormal lens morphol
>>> pheno_predicate.name
'HPO Predicate'
>>> pheno_predicate.description
'Test for presence of Abnormal lens morphology'
'Test for presence of Abnormal lens morphology [HP:0000517]'
>>> pheno_predicate.group_labels
('Yes', 'No')

Expand Down
14 changes: 14 additions & 0 deletions src/gpsea/analysis/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from .predicate.phenotype import PhenotypePolyPredicate, P
from .predicate.genotype import GenotypePolyPredicate
from ._partition import Partitioning


class Statistic(metaclass=abc.ABCMeta):
Expand Down Expand Up @@ -266,12 +267,16 @@ class MonoPhenotypeAnalysisResult(AnalysisResult, metaclass=abc.ABCMeta):
def __init__(
self,
gt_predicate: GenotypePolyPredicate,
phenotype: Partitioning,
statistic: Statistic,
data: pd.DataFrame,
pval: float,
):
super().__init__(gt_predicate, statistic)

assert isinstance(phenotype, Partitioning)
self._phenotype = phenotype

assert isinstance(data, pd.DataFrame) and all(
col in data.columns for col in MonoPhenotypeAnalysisResult.DATA_COLUMNS
)
Expand All @@ -283,6 +288,13 @@ def __init__(
raise ValueError(
f"`pval` must be a finite float in range [0, 1] but it was {pval}"
)

@property
def phenotype(self) -> Partitioning:
"""
Get the :class:`~gpsea.analysis.Partitioning` that produced the phenotype.
"""
return self._phenotype

@property
def data(self) -> pd.DataFrame:
Expand Down Expand Up @@ -325,12 +337,14 @@ def pval(self) -> float:
def __eq__(self, value: object) -> bool:
return isinstance(value, MonoPhenotypeAnalysisResult) \
and super(AnalysisResult, self).__eq__(value) \
and self._phenotype == value._phenotype \
and self._pval == value._pval \
and self._data.equals(value._data)

def __hash__(self) -> int:
return hash((
super(AnalysisResult, self).__hash__(),
self._phenotype,
self._pval,
self._data,
))
2 changes: 1 addition & 1 deletion src/gpsea/analysis/_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def variable_name(self) -> str:
"""
Get a `str` with the name of the variable investigated by the partitioning.
For instance `Sex`, `Allele groups`, `HPO term`, `Diagnosis`
For instance `Sex`, `Allele groups`, `HP:0001250`, `OMIM:256000`
"""
pass

Expand Down
3 changes: 2 additions & 1 deletion src/gpsea/analysis/pcats/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,11 +387,12 @@ def _compute_result(
)

# 2 - Apply MTC filter and select p values to MTC
cohort_size = sum(1 for _ in cohort)
mtc_filter_results = self._mtc_filter.filter(
gt_predicate=gt_predicate,
ph_predicates=pheno_predicates,
counts=all_counts,
cohort_size=len(cohort),
cohort_size=cohort_size,
)

pvals = np.full(shape=(len(n_usable),), fill_value=np.nan)
Expand Down
25 changes: 13 additions & 12 deletions src/gpsea/analysis/predicate/genotype/_gt_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def description(self) -> str:

@property
def variable_name(self) -> str:
return "Allele groups"
return "Allele group"

def test(self, patient: Patient) -> typing.Optional[Categorization]:
self._check_patient(patient)
Expand Down Expand Up @@ -357,13 +357,13 @@ def allele_count(
>>> from gpsea.analysis.predicate.genotype import allele_count
>>> zero_vs_one = allele_count(counts=({0,}, {1,}))
>>> zero_vs_one.summarize_groups()
'Allele counts: 0, 1'
'Allele count: 0, 1'
These counts will create three groups for individuals with zero, one or two alleles:
>>> zero_vs_one_vs_two = allele_count(counts=({0,}, {1,}, {2,}))
>>> zero_vs_one_vs_two.summarize_groups()
'Allele counts: 0, 1, 2'
'Allele count: 0, 1, 2'
:param counts: a sequence with allele count partitions.
:param target: a predicate for choosing the variants for testing
Expand Down Expand Up @@ -415,7 +415,7 @@ def description(self) -> str:

@property
def variable_name(self) -> str:
return "Allele counts"
return "Allele count"

def test(self, patient: Patient) -> typing.Optional[Categorization]:
self._check_patient(patient)
Expand Down Expand Up @@ -540,14 +540,6 @@ def create(
# Last, put the predicate together.
return DiagnosisPredicate(categorizations)

@property
def name(self) -> str:
return "Diagnosis Predicate"

@property
def description(self) -> str:
return "Partition the individual by diagnosis"

def __init__(
self,
categorizations: typing.Mapping[hpotk.TermId, Categorization],
Expand All @@ -558,6 +550,15 @@ def __init__(
)
self._hash = hash(tuple(categorizations.items()))

@property
def name(self) -> str:
return "Diagnosis Predicate"

@property
def description(self) -> str:
diagnoses = ", ".join(cat.category.name for cat in self._categorizations)
return f"Partition the individual by presence of {diagnoses}"

@property
def variable_name(self) -> str:
return "Diagnosis"
Expand Down
8 changes: 4 additions & 4 deletions src/gpsea/analysis/predicate/phenotype/_pheno.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ def name(self) -> str:

@property
def description(self) -> str:
return f"Test for presence of {self._query_label}"
return f"Test for presence of {self._query_label} [{self._query.value}]"

@property
def variable_name(self) -> str:
return f"{self._query_label} is present"
return self._query.value

@property
def phenotype(self) -> hpotk.TermId:
Expand Down Expand Up @@ -257,11 +257,11 @@ def name(self) -> str:

@property
def description(self) -> str:
return "Partition based on a diagnosis"
return f"Partition based on a diagnosis of {self._query.value}"

@property
def variable_name(self) -> str:
return f"{self._query.value} was diagnosed"
return self._query.value

@property
def phenotype(self) -> hpotk.TermId:
Expand Down
35 changes: 29 additions & 6 deletions src/gpsea/analysis/pscore/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..predicate.genotype import GenotypePolyPredicate
from .stats import PhenotypeScoreStatistic

from .._base import MonoPhenotypeAnalysisResult
from .._base import MonoPhenotypeAnalysisResult, Statistic
from .._partition import ContinuousPartitioning


Expand Down Expand Up @@ -71,7 +71,7 @@ def description(self) -> str:

@property
def variable_name(self) -> str:
return "Score"
return "Phenotype score"

def __init__(
self,
Expand Down Expand Up @@ -116,6 +116,26 @@ class PhenotypeScoreAnalysisResult(MonoPhenotypeAnalysisResult):
if the phenotype score is impossible to compute.
"""

def __init__(
self,
gt_predicate: GenotypePolyPredicate,
phenotype: PhenotypeScorer,
statistic: Statistic,
data: pd.DataFrame,
pval: float,
):
super().__init__(gt_predicate, phenotype, statistic, data, pval)
assert isinstance(phenotype, PhenotypeScorer)

def phenotype_scorer(self) -> PhenotypeScorer:
"""
Get the scorer that computed the phenotype score.
"""
# We are sure that `self._phenotype` is a `PhenotypeScorer`
# because of the instance check in `__init__` and `PhenotypeScorer`
# being a subclass of `Partitioning`.
return self._phenotype # type: ignore

def plot_boxplots(
self,
ax,
Expand Down Expand Up @@ -168,10 +188,11 @@ def __hash__(self) -> int:
def __str__(self) -> str:
return (
"PhenotypeScoreAnalysisResult("
"gt_predicate={self._gt_predicate}, "
"statistic={self._statistic}, "
"data={self._data}, "
"pval={self._pval})"
f"gt_predicate={self._gt_predicate}, "
f"phenotype_scorer={self._phenotype}, "
f"statistic={self._statistic}, "
f"data={self._data}, "
f"pval={self._pval})"
)

def __repr__(self) -> str:
Expand Down Expand Up @@ -213,6 +234,7 @@ def compare_genotype_vs_phenotype_score(
assert (
gt_predicate.n_categorizations() == 2
), "We only support 2 genotype categories at this point"
assert isinstance(pheno_scorer, PhenotypeScorer)

idx = pd.Index((patient.patient_id for patient in cohort), name="patient_id")
data = pd.DataFrame(
Expand Down Expand Up @@ -244,6 +266,7 @@ def compare_genotype_vs_phenotype_score(

return PhenotypeScoreAnalysisResult(
gt_predicate=gt_predicate,
phenotype=pheno_scorer,
statistic=self._statistic,
data=data,
pval=pval,
Expand Down
12 changes: 10 additions & 2 deletions src/gpsea/analysis/pscore/_measurement.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,19 @@ def name(self) -> str:

@property
def description(self) -> str:
return self.variable_name
return f"Value of {self._label} [{self._identifier.value}]"

@property
def variable_name(self) -> str:
return f"The value of {self._label} [{self._identifier.value}]"
return self._identifier.value

@property
def term_id(self) -> hpotk.TermId:
return self._identifier

@property
def label(self) -> str:
return self._label

def score(
self,
Expand Down
17 changes: 10 additions & 7 deletions src/gpsea/analysis/temporal/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,22 @@ def __init__(
):
super().__init__(
gt_predicate=gt_predicate,
phenotype=endpoint,
statistic=statistic,
data=data,
pval=pval,
)

assert isinstance(endpoint, Endpoint)
self._endpoint = endpoint

@property
def endpoint(self) -> Endpoint:
"""
Get the endpoint used to compute the survival of the individuals.
"""
return self._endpoint
# We are sure that `self._phenotype` is assignable to `Endpoint`
# because of the instance check in `__init__` and `Endpoint`
# being a subclass of `Partitioning`.
return self._phenotype # type: ignore

def plot_kaplan_meier_curves(
self,
Expand Down Expand Up @@ -125,10 +127,11 @@ def __hash__(self) -> int:
def __str__(self) -> str:
return (
"SurvivalAnalysisResult("
"gt_predicate={self._gt_predicate}, "
"statistic={self._statistic}, "
"data={self._data}, "
"pval={self._pval})"
f"gt_predicate={self._gt_predicate}, "
f"endpoint={self._phenotype}, "
f"statistic={self._statistic}, "
f"data={self._data}, "
f"pval={self._pval})"
)

def __repr__(self) -> str:
Expand Down
6 changes: 3 additions & 3 deletions src/gpsea/analysis/temporal/endpoint/_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Death(EndpointBase):

@property
def name(self) -> str:
return "Death"
return "Age of death"

@property
def description(self) -> str:
Expand Down Expand Up @@ -111,7 +111,7 @@ def description(self) -> str:

@property
def variable_name(self) -> str:
return "Phenotype onset"
return f"Onset of {self._term_id.value}"

def compute_survival(
self,
Expand Down Expand Up @@ -194,7 +194,7 @@ def description(self) -> str:

@property
def variable_name(self) -> str:
return "Disease onset"
return f"Onset of {self._disease_id.value}"

def compute_survival(
self,
Expand Down
6 changes: 3 additions & 3 deletions tests/analysis/predicate/genotype/test_gt_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_eq_and_hash(self):
def test_summarize_groups(self):
a = allele_count(counts=((0, 1), (2,)))

assert a.summarize_groups() == "Allele counts: 0 OR 1, 2"
assert a.summarize_groups() == "Allele count: 0 OR 1, 2"


class TestAllelePredicates:
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_monoallelic_predicate__general_stuff(

gt_predicate = monoallelic_predicate(is_missense, is_synonymous)

assert gt_predicate.summarize_groups() == 'Allele groups: A, B'
assert gt_predicate.summarize_groups() == 'Allele group: A, B'

@pytest.mark.parametrize(
"individual_name,expected_name",
Expand Down Expand Up @@ -199,7 +199,7 @@ def test_biallelic_predicate__general_stuff(

gt_predicate = biallelic_predicate(is_missense, is_synonymous)

assert gt_predicate.summarize_groups() == 'Allele groups: A/A, A/B, B/B'
assert gt_predicate.summarize_groups() == 'Allele group: A/A, A/B, B/B'


class TestSexPredicate:
Expand Down
Loading

0 comments on commit f20af85

Please sign in to comment.