From c6b817809eeb6893f9b65315d807a55517d7167a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 27 Aug 2024 14:39:09 +0200 Subject: [PATCH 1/9] Allow to wrap a scoring funcion and produce `PhenotypeScorer`. --- src/gpsea/analysis/pscore/_api.py | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/gpsea/analysis/pscore/_api.py b/src/gpsea/analysis/pscore/_api.py index a2899734f..5ddce1210 100644 --- a/src/gpsea/analysis/pscore/_api.py +++ b/src/gpsea/analysis/pscore/_api.py @@ -11,8 +11,35 @@ class PhenotypeScorer(metaclass=abc.ABCMeta): """ `PhenotypeScorer` assigns the patient with a phenotype score. + + The score can be :attr:`math.nan` if it is not possible to compute the score for a patient. + + The scorer can be created by wrapping a scoring function (see :func:`~PhenotypeScorer.wrap_scoring_function`). """ + @staticmethod + def wrap_scoring_function( + func: typing.Callable[[Patient], float], + ) -> "PhenotypeScorer": + """ + Create a `PhenotypeScorer` by wrap the provided scoring function `func`. + + The function must take exactly one argument of type :class:`~gpsea.model.Patient` + and return a `float` with the corresponding phenotype score. + + Example + ^^^^^^^ + + >>> from gpsea.analysis.pscore import PhenotypeScorer + >>> def f(p): 123.4 + >>> phenotype_scorer = PhenotypeScorer.wrap_scoring_function(f) + + `phenotype_scorer` will assign all patients a score of `123.4`. + + :param func: the scoring function. + """ + return FunctionPhenotypeScorer(func=func) + def score(self, patient: Patient) -> float: """ Compute the score for the `patient`. @@ -20,6 +47,25 @@ def score(self, patient: Patient) -> float: pass +class FunctionPhenotypeScorer(PhenotypeScorer): + """ + `FunctionPhenotypeScorer` computes the phenotype score using the provided function/closure. + """ + # NOT PART OF THE PUBLIC API + + def __init__( + self, + func: typing.Callable[[Patient], float], + ): + self._func = func + + def score(self, patient: Patient) -> float: + """ + Apply the function to compute the phenotype score. + """ + return self._func(patient) + + class PhenotypeScoreAnalysisResult: """ `PhenotypeScoreAnalysisResult` is a container for :class:`PhenotypeScoreAnalysis` results. From e0c70b612830d912c5776f03edcc91ba48fc2bff Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 27 Aug 2024 14:39:28 +0200 Subject: [PATCH 2/9] Create `DeVriesPhenotypeScorer` stub. --- src/gpsea/analysis/pscore/__init__.py | 4 ++-- src/gpsea/analysis/pscore/_impl.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/gpsea/analysis/pscore/__init__.py b/src/gpsea/analysis/pscore/__init__.py index d03f6fd99..ce9c0e89d 100644 --- a/src/gpsea/analysis/pscore/__init__.py +++ b/src/gpsea/analysis/pscore/__init__.py @@ -1,7 +1,7 @@ from ._api import PhenotypeScorer, PhenotypeScoreAnalysis, PhenotypeScoreAnalysisResult -from ._impl import CountingPhenotypeScorer +from ._impl import CountingPhenotypeScorer, DeVriesPhenotypeScorer __all__ = [ 'PhenotypeScorer', 'PhenotypeScoreAnalysis', 'PhenotypeScoreAnalysisResult', - 'CountingPhenotypeScorer', + 'CountingPhenotypeScorer', 'DeVriesPhenotypeScorer', ] diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index b2ce8bd24..f7cf248cf 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -133,3 +133,16 @@ def score( # assert len(args) == 1 and isinstance(args[0], Patient), 'The first argument must be an instance of `Patient`' # assert len(kwds) == 0, 'We do not take any key-word arguments' # return self.score(args[0]) + + +class DeVriesPhenotypeScorer(PhenotypeScorer): + """ + `DeVriesPhenotypeScorer` computes "adapted De Vries Score" + as described in `Feenstra et al `_. + """ + + def score(self, patient: Patient) -> float: + """ + Compute the score for the `patient`. + """ + raise NotImplementedError() From 303858f36cd2fe5edbb0a9f306fbeb7e6c94c534 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 27 Aug 2024 14:44:42 +0200 Subject: [PATCH 3/9] Check genotype categories in boxplot function. --- src/gpsea/analysis/pscore/_api.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gpsea/analysis/pscore/_api.py b/src/gpsea/analysis/pscore/_api.py index 5ddce1210..991036de4 100644 --- a/src/gpsea/analysis/pscore/_api.py +++ b/src/gpsea/analysis/pscore/_api.py @@ -122,8 +122,13 @@ def plot_boxplots( # skip the patients with unassigned genotype group not_na_gts = self._genotype_phenotype_scores["genotype"].notna() data = self._genotype_phenotype_scores.loc[not_na_gts] + + # Check that the provided genotype predicate defines the same categories + # as those found in `data.` actual = set(data["genotype"].unique()) - expected = gt_predicate.get_categorizations() + expected = set(gt_predicate.get_categorizations()) + assert actual == expected, 'Mismatch in the genotype categories' + x = [ data.loc[data["genotype"] == c.category.cat_id, "phenotype"].to_list() for c in gt_predicate.get_categorizations() From 92800cb752b0371080b55a50cd3281fbe0996d79 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Tue, 27 Aug 2024 14:49:42 +0200 Subject: [PATCH 4/9] Fix bug in the check. --- src/gpsea/analysis/pscore/_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpsea/analysis/pscore/_api.py b/src/gpsea/analysis/pscore/_api.py index 991036de4..36caa0e93 100644 --- a/src/gpsea/analysis/pscore/_api.py +++ b/src/gpsea/analysis/pscore/_api.py @@ -126,7 +126,7 @@ def plot_boxplots( # Check that the provided genotype predicate defines the same categories # as those found in `data.` actual = set(data["genotype"].unique()) - expected = set(gt_predicate.get_categorizations()) + expected = set(c.cat_id for c in gt_predicate.get_categories()) assert actual == expected, 'Mismatch in the genotype categories' x = [ From 1e3c4635ca9538fc44f7ead67995baed2de1dfca Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 29 Aug 2024 08:26:56 +0200 Subject: [PATCH 5/9] WIP, de Vries score and documentation --- docs/user-guide/devries.rst | 139 +++++++++++++++++++++++ docs/user-guide/index.rst | 1 + docs/user-guide/phenotype_predicates.rst | 18 +++ src/gpsea/analysis/pscore/_impl.py | 75 ++++++++++-- 4 files changed, 225 insertions(+), 8 deletions(-) create mode 100644 docs/user-guide/devries.rst create mode 100644 docs/user-guide/phenotype_predicates.rst diff --git a/docs/user-guide/devries.rst b/docs/user-guide/devries.rst new file mode 100644 index 000000000..dd62d13d2 --- /dev/null +++ b/docs/user-guide/devries.rst @@ -0,0 +1,139 @@ +.. _devries: + +============== +De Vries Score +============== + + +The De Vries score was developed as a relatively simple phenotypic severity +score for individuals with intellectual disability in which points are given +for (severity of) intellectual disability, +growth abnormalities (prenatal and postnatal), facial dysmorphisms, +nonfacial dysmorphisms, and other congenital anomalies +(`Dingemans et al. (2022) `_). +Statistical significance of a difference in the De Vries score between groups can be +determined using the Mann–Whitney-U test. + +We refer to `Feenstra et al. (2011) `_ for +te original description of the adjusted De Vries score. Here we offer a version of the +score that leverages the structure of the Human Phenotype Ontology to assess the phenotype. + +The De Vries score has several sections, each of which is scored on a point system. The +final score is obtained as the sum of the points of each of the sections. + +Developmental delay +~~~~~~~~~~~~~~~~~~~ + +The original score assigns one point for mild or moderate developmental delay +and two points for severe developmental delay. + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Mild global developmental delay (HP:0011342) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Moderate global developmental delay (HP:0011343) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Severe global developmental delay (HP:0011344) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Profound global developmental delay (HP:0011344) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Global developmental delay (HP:0001263) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ + +Note that one point is assigned for the term Global developmental delay (HP:0001263), which is the +parent of the other terms, because no information was provided about the degree of delay. + +If none of the above terms is found, then the scorer assigns terms based on the Intellectual Disability terms. + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Intellectual disability, borderline (HP:0006889) `_ | 0.5 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intellectual disability, mild (HP:0001256) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intellectual disability, moderate (HP:0002342) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intellectual disability, severe (HP:0010864) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intellectual disability, profound (HP:0002187) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intellectual disability (HP:0001249) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ + +If none of these terms is found, a score of zero is assigned for this section. + + +Postnatal growth abnormalities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the original score, one point is assigned for each of Microcephaly, Short stature, Macrocephaly, and Tall stature, +with the maximum score for the section being limited to 2 points. We implement this as adding one point for either the +corresponding HPO terms or any of their descendents (for instance, `Disproportionate short stature (HP:0003498) `_ would +be counted for `Short stature (HP:0004322) `_). + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Microcephaly (HP:0000252) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Short stature (HP:0004322) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Macrocephaly (HP:0000256) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Tall stature (HP:0000098) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ + +If none of these terms is found, a score of zero is assigned for this section. Logically, the maximum score obtainable +is 2 because the same individual cannot have both tall and short stature or both micro- and macrocephaly. + + +Facial dysmorphic features +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section assigns two points if two or more anomalies are identified in the following +categories: hypertelorism, nasal anomalies and ear anomalies. Our implementation of this feature counts the total +number of terms or descendents of the following HPO terms. + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Hypertelorism (HP:0000316) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Abnormal external nose morphology (HP:0010938) `_ | 1 each | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Abnormal pinna morphology (HP:0000377) `_ | 1 each | ++----------------------------------------------------------------------------------------------------------+-----------+ + +If two or more terms are found, the score is 2, otherwise a score of zero is assigned. + + +Non-facial dysmorphism and congenital abnormalities +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +One point is assigned for for either the +corresponding HPO terms or any of their descendents up to a maximum of two points. + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Abnormal hand morphology (HP:0005922) `_ | 1 each | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Abnormal heart morphology (HP:0001627) `_ | 1 each | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Hypospadias (HP:0000047) `_ | 1 | ++----------------------------------------------------------------------------------------------------------+-----------+ + +Final score +~~~~~~~~~~~ + +The final score is obtained by summing the scores from each of the sections. The final score ranges from 0 to 10, with +higher scores being considered a proxy for higher clinical severity. + + +Using the De Vries Scorer in code +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TODO + + diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst index 9f7933eea..f3e0569ab 100644 --- a/docs/user-guide/index.rst +++ b/docs/user-guide/index.rst @@ -13,6 +13,7 @@ TODO - write high level overview and bridge to individual sections. input-data exploratory predicates + phenotype_predicates stats mtc glossary diff --git a/docs/user-guide/phenotype_predicates.rst b/docs/user-guide/phenotype_predicates.rst new file mode 100644 index 000000000..86572657f --- /dev/null +++ b/docs/user-guide/phenotype_predicates.rst @@ -0,0 +1,18 @@ +.. _phenotype_predicates: + +==================== +Phenotype Predicates +==================== + +GPSEA offers several phenotype predicates. + +TODO -- separate explanations for HPO (Fisher), scores (Mann Whitney) and survival (log rank test). + + + + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + + devries diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index b6bd6d5bf..8a2807f76 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -20,8 +20,8 @@ class CountingPhenotypeScorer(PhenotypeScorer): @staticmethod def from_query_curies( - hpo: hpotk.MinimalOntology, - query: typing.Iterable[typing.Union[str, hpotk.TermId]], + hpo: hpotk.MinimalOntology, + query: typing.Iterable[typing.Union[str, hpotk.TermId]], ): """ Create a scorer to test for the number of phenotype terms that fall into the phenotype groups. @@ -65,9 +65,9 @@ def from_query_curies( ) def __init__( - self, - hpo: hpotk.MinimalOntology, - query: typing.Iterable[hpotk.TermId], + self, + hpo: hpotk.MinimalOntology, + query: typing.Iterable[hpotk.TermId], ): self._hpo = hpo self._query = set(query) @@ -76,8 +76,8 @@ def get_question(self) -> str: return "How many of the query HPO terms (or their descendants) does the individual display" def score( - self, - patient: Patient, + self, + patient: Patient, ) -> float: """ Get the count (number) of terms in the query set @@ -118,8 +118,67 @@ class DeVriesPhenotypeScorer(PhenotypeScorer): as described in `Feenstra et al `_. """ + def __init__( + self, + hpo: hpotk.MinimalOntology, + ): + self._hpo = hpo + + def _developmental_delay_score(self, observed_term_ids: typing.List[str]) -> float: + """ + calculate the dev delay component of the score + Args: + observed_term_ids: terms observed in patient + + Returns: a score between 0 and 2 + """ + gdd_tids = {'HP:0011344': 2, 'HP:0011344': 2, + 'HP:0011342': 1, 'HP:0011343': 1, 'HP:0001263': 1} # severe and profound GDD + idd_tids = {'HP:0010864': 2, 'HP:0002187': 2,'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1, + 'HP:0006889': 0.5} # mild, moderate, and unspecified GDD (borderline has 0.5) + # check GDD terms with higher priority than ID terms + for t in observed_term_ids: + if t in gdd_tids: + return gdd_tids.get(t) + for t in observed_term_ids: + if t in idd_tids: + return idd_tids.get(t) + return 0 + + + def _postnatal_growth_score(self, observed_term_ids: typing.List[str]) -> float: + """ + calculate the postnatal growth component of the score + Args: + observed_term_ids: + + Returns: + + """ + microcephaly = 'HP:0000252' + short_stature = 'HP:0004322' + macrocephaly = 'HP: 0000256' + tall_stature = 'HP:0000098' + # to do -- implement me + + def _calculate_score(self, observed_term_ids: typing.List[str]) -> float: + """ + calculate score based on list of strings with term identifiers or observed HPO terms. + Args: + observed_term_ids: list of strings with term identifiers or observed HPO terms + + Returns: de Vries score between 0 and 10 + + """ + delay_score = self._developmental_delay_score(observed_term_ids) + growth_score = self._postnatal_growth_score(observed_term_ids) + ## todo -- complete + return delay_score + growth_score + def score(self, patient: Patient) -> float: """ Compute the score for the `patient`. """ - raise NotImplementedError() + # collect term identifiers as strings for all observed phenotypes + observed_term_ids = [tid.identifier.value for tid in patient.present_phenotypes()] + return self._calculate_score(observed_term_ids) From 57a2a7d422eb5fa59d9246c176c125395342aa3f Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 29 Aug 2024 14:13:21 +0200 Subject: [PATCH 6/9] de Vries score and documentation and unit tests --- docs/user-guide/devries.rst | 16 ++++ src/gpsea/analysis/pscore/_impl.py | 125 ++++++++++++++++++++++--- tests/analysis/test_de_vries_scorer.py | 85 +++++++++++++++++ 3 files changed, 215 insertions(+), 11 deletions(-) create mode 100644 tests/analysis/test_de_vries_scorer.py diff --git a/docs/user-guide/devries.rst b/docs/user-guide/devries.rst index dd62d13d2..60585b5c9 100644 --- a/docs/user-guide/devries.rst +++ b/docs/user-guide/devries.rst @@ -65,6 +65,22 @@ If none of the above terms is found, then the scorer assigns terms based on the If none of these terms is found, a score of zero is assigned for this section. +Prenatal-onset growth retardation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the original score, two points are assigned if Prenatal-onset growth retardation is present. In our implementation, +we assign two points if either of the following terms is present (the score is thus either zero or two). + ++----------------------------------------------------------------------------------------------------------+-----------+ +| HPO term | Score | ++==========================================================================================================+===========+ +| `Small for gestational age (HP:0001518) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ +| `Intrauterine growth retardation (HP:0001511) `_ | 2 | ++----------------------------------------------------------------------------------------------------------+-----------+ + + + + Postnatal growth abnormalities ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index 8a2807f76..6c7248b98 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -12,7 +12,7 @@ class CountingPhenotypeScorer(PhenotypeScorer): that is equivalent to the count of present phenotypes that are either an exact match to the `query` terms or their descendants. - For instance, we may want to count whether an individual has brain, liver, kidney, and skin abormalities. + For instance, we may want to count whether an individual has brain, liver, kidney, and skin abnormalities. In the case, the query would include the corresponding terms (e.g., Abnormal brain morphology HP:0012443). An individual can then have between 0 and 4 phenotype group abnormalities. This predicate is intended to be used with the Mann Whitney U test. @@ -42,7 +42,7 @@ def from_query_curies( f"query argument must be iterable of hpotk TermId's or strings but we found {type(q)}" ) - # Now chack that the term IDs are HPO term IDs. + # Now check that the term IDs are HPO term IDs. if q not in hpo: raise ValueError(f"The query {q} was not found in the HPO") query_term_ids.add(q) @@ -115,7 +115,7 @@ def score( class DeVriesPhenotypeScorer(PhenotypeScorer): """ `DeVriesPhenotypeScorer` computes "adapted De Vries Score" - as described in `Feenstra et al `_. + as described in `Feenstra et al. `_. """ def __init__( @@ -132,10 +132,10 @@ def _developmental_delay_score(self, observed_term_ids: typing.List[str]) -> flo Returns: a score between 0 and 2 """ - gdd_tids = {'HP:0011344': 2, 'HP:0011344': 2, + gdd_tids = {'HP:0011344': 2, 'HP:0012736': 2, 'HP:0011342': 1, 'HP:0011343': 1, 'HP:0001263': 1} # severe and profound GDD - idd_tids = {'HP:0010864': 2, 'HP:0002187': 2,'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1, - 'HP:0006889': 0.5} # mild, moderate, and unspecified GDD (borderline has 0.5) + idd_tids = {'HP:0010864': 2, 'HP:0002187': 2, 'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1, + 'HP:0006889': 0.5} # mild, moderate, and unspecified GDD (borderline has 0.5) # check GDD terms with higher priority than ID terms for t in observed_term_ids: if t in gdd_tids: @@ -145,21 +145,122 @@ def _developmental_delay_score(self, observed_term_ids: typing.List[str]) -> flo return idd_tids.get(t) return 0 + def _term_or_descendant(self, + target_tid: str, + observed_term_ids: typing.List[str]): + """ + Args: + target_tid: term of interest + observed_term_ids: all terms observed in patient + + Returns: + 1 if the term or any descendant is present in the patient, otherwise 0 + """ + for term_id in observed_term_ids: + for desc_tid in self._hpo.graph.get_ancestors(term_id, include_source=True): + if desc_tid.value == target_tid: + return 1 + return 0 + + def _term_or_descendant_count(self, + target_tid: str, + observed_term_ids: typing.List[str]): + """ + Args: + target_tid: term of interest + observed_term_ids: all terms observed in patient + + Returns: + 1 if the term or any descendant is present in the patient, otherwise 0 + """ + total_count = 0 + for term_id in observed_term_ids: + for desc_tid in self._hpo.graph.get_ancestors(term_id, include_source=True): + if desc_tid.value == target_tid: + total_count += 1 + return total_count def _postnatal_growth_score(self, observed_term_ids: typing.List[str]) -> float: """ calculate the postnatal growth component of the score Args: - observed_term_ids: + observed_term_ids: terms observed in patient Returns: """ microcephaly = 'HP:0000252' short_stature = 'HP:0004322' - macrocephaly = 'HP: 0000256' + macrocephaly = 'HP:0000256' tall_stature = 'HP:0000098' - # to do -- implement me + total_count = 0 + for tid in [microcephaly, short_stature, macrocephaly, tall_stature]: + total_count += self._term_or_descendant(tid, observed_term_ids) + if total_count > 2: + raise ValueError(f"Inconsistent annotations for postnatal growth score {total_count}: {observed_term_ids}") + return total_count + + def _facial_dysmorphism_score(self, observed_term_ids: typing.List[str]) -> float: + """ + This section assigns two points if two or more anomalies are identified in the following + categories: hypertelorism, nasal anomalies and ear anomalies. Our implementation counts the total + number of terms or descendents of the hypertelorism, Abnormal external nose morphology, and + Abnormal pinna morphology. + + Args: + observed_term_ids: terms observed in patient + + Returns: facial dysmorphism score (between 0 and 2) + + """ + hypertelorism = 'HP:0000316' + external_nose = 'HP:0010938' + pinna_morphology = 'HP:0000377' + total_count = len([t for t in observed_term_ids if t == hypertelorism]) + total_count += self._term_or_descendant_count(target_tid=external_nose, observed_term_ids=observed_term_ids) + total_count += self._term_or_descendant_count(target_tid=pinna_morphology, observed_term_ids=observed_term_ids) + if total_count > 1: + return 2 + else: + return 0 + + def _congenital_score(self, observed_term_ids: typing.List[str]) -> float: + """ + Non-facial dysmorphism and congenital abnormalities component + One point is assigned for either the corresponding HPO terms or any of their descendents up to a maximum of 2. + Args: + observed_term_ids: terms observed in patient + + Returns: Non-facial dysmorphism and congenital abnormalities score (between 0 and 2) + + """ + hypospadias = 'HP:0000047' + abnormal_hand_morphology = 'HP:0005922' + abnormal_heart_morphology = 'HP:0001627' + total_count = len([t for t in observed_term_ids if t == hypospadias]) + total_count += self._term_or_descendant_count(target_tid=abnormal_hand_morphology, + observed_term_ids=observed_term_ids) + total_count += self._term_or_descendant_count(target_tid=abnormal_heart_morphology, + observed_term_ids=observed_term_ids) + return min(2, total_count) + + def _prenatal_growth_score(self, observed_term_ids: typing.List[str]) -> float: + """ + two points are assigned if Prenatal-onset growth retardation is present + + Args: + observed_term_ids: list of strings with term identifiers or observed HPO terms + + Returns: score between 0 and 2 + + """ + small_for_gestational_age = 'HP:0001518' + intrauterine_growth_retardation = 'HP:0001511' + targets = {small_for_gestational_age, intrauterine_growth_retardation} + for tid in observed_term_ids: + if tid in targets: + return 2 + return 0 def _calculate_score(self, observed_term_ids: typing.List[str]) -> float: """ @@ -172,8 +273,10 @@ def _calculate_score(self, observed_term_ids: typing.List[str]) -> float: """ delay_score = self._developmental_delay_score(observed_term_ids) growth_score = self._postnatal_growth_score(observed_term_ids) - ## todo -- complete - return delay_score + growth_score + facial_score = self._facial_dysmorphism_score(observed_term_ids) + congen_score = self._congenital_score(observed_term_ids) + prenatal_score = self._prenatal_growth_score(observed_term_ids) + return delay_score + growth_score + facial_score + congen_score + prenatal_score def score(self, patient: Patient) -> float: """ diff --git a/tests/analysis/test_de_vries_scorer.py b/tests/analysis/test_de_vries_scorer.py new file mode 100644 index 000000000..6057c247c --- /dev/null +++ b/tests/analysis/test_de_vries_scorer.py @@ -0,0 +1,85 @@ +import typing + +import hpotk +import pytest + +from gpsea.analysis.pscore import DeVriesPhenotypeScorer +from gpsea.model import Patient, SampleLabels, Phenotype + +intrauterine_growth_retardation = 'HP:0001511' +small_for_gestational_age = 'HP:0001518' +arachnodactyly = "HP:0001166" +seizure = "HP:0001250" +sensorineural_hearing_impairment = 'HP:0000407' +intellectual_disability_mild = 'HP:0001256' +intellectual_disability_profound = 'HP:0002187' +microcephaly = 'HP:0000252' +short_stature = 'HP:0004322' +hypertelorism = 'HP:0000316' +posteriorly_rotated_ears = 'HP:0000358' +underdeveloped_crus_of_the_helix = 'HP:0009898' # external ear morphology +ventricular_septal_defect = 'HP:0001629' +metacarpal_synostosis = 'HP:0009701' # hand morphology +hypospadias = 'HP:0000047' + + +class TestDeVriesScorer: + + @pytest.fixture + def devries_scorer( + self, + hpo: hpotk.MinimalOntology, + ) -> DeVriesPhenotypeScorer: + return DeVriesPhenotypeScorer(hpo=hpo) + + @pytest.mark.parametrize( + "term_set, expected", + [ + ([intrauterine_growth_retardation], 2,), + ([intrauterine_growth_retardation, small_for_gestational_age], 2,), # superfluous, still should be 2 + ([sensorineural_hearing_impairment, ], 0,), # Unrelated + ([seizure, arachnodactyly], 0,), # Unrelated + ([intrauterine_growth_retardation, intellectual_disability_mild], 3,), + ([intrauterine_growth_retardation, intellectual_disability_profound], 4,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly], 5,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature], 6,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism], 6,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism, posteriorly_rotated_ears], 8,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism, posteriorly_rotated_ears, underdeveloped_crus_of_the_helix], 8,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism, posteriorly_rotated_ears, underdeveloped_crus_of_the_helix, + ventricular_septal_defect], 9,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism, posteriorly_rotated_ears, underdeveloped_crus_of_the_helix, + ventricular_septal_defect, hypospadias], 10,), + ([intrauterine_growth_retardation, intellectual_disability_profound, microcephaly, short_stature, + hypertelorism, posteriorly_rotated_ears, underdeveloped_crus_of_the_helix, + ventricular_septal_defect, hypospadias, metacarpal_synostosis], 10,), + + ], + ) + def test_a_patient( + self, + term_set: typing.Sequence[str], + expected: int, + devries_scorer: DeVriesPhenotypeScorer, + ): + patient = Patient( + labels=SampleLabels("test"), + phenotypes=( + Phenotype( + hpotk.TermId.from_curie(curie), + is_observed=True, + ) + for curie in term_set + ), + diseases=(), + variants=(), + ) + + actual = devries_scorer.score(patient) + + assert actual == expected From 1b9ed118a40e77826673d32a0641e7883e38d0bf Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 29 Aug 2024 16:40:36 +0200 Subject: [PATCH 7/9] Fix typos, rearrange few things. --- docs/user-guide/devries.rst | 12 +-- src/gpsea/analysis/pscore/_impl.py | 140 +++++++++++++++---------- src/gpsea/model/_phenotype.py | 25 ++++- tests/analysis/test_de_vries_scorer.py | 8 +- 4 files changed, 116 insertions(+), 69 deletions(-) diff --git a/docs/user-guide/devries.rst b/docs/user-guide/devries.rst index 60585b5c9..74e5cd26e 100644 --- a/docs/user-guide/devries.rst +++ b/docs/user-guide/devries.rst @@ -15,7 +15,7 @@ Statistical significance of a difference in the De Vries score between groups ca determined using the Mann–Whitney-U test. We refer to `Feenstra et al. (2011) `_ for -te original description of the adjusted De Vries score. Here we offer a version of the +the original description of the adjusted De Vries score. Here we offer a version of the score that leverages the structure of the Human Phenotype Ontology to assess the phenotype. The De Vries score has several sections, each of which is scored on a point system. The @@ -119,7 +119,7 @@ number of terms or descendents of the following HPO terms. +----------------------------------------------------------------------------------------------------------+-----------+ | `Abnormal external nose morphology (HP:0010938) `_ | 1 each | +----------------------------------------------------------------------------------------------------------+-----------+ -| `Abnormal pinna morphology (HP:0000377) `_ | 1 each | +| `Abnormal pinna morphology (HP:0000377) `_ | 1 each | +----------------------------------------------------------------------------------------------------------+-----------+ If two or more terms are found, the score is 2, otherwise a score of zero is assigned. @@ -127,17 +127,17 @@ If two or more terms are found, the score is 2, otherwise a score of zero is ass Non-facial dysmorphism and congenital abnormalities ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -One point is assigned for for either the +One point is assigned for either the corresponding HPO terms or any of their descendents up to a maximum of two points. +----------------------------------------------------------------------------------------------------------+-----------+ | HPO term | Score | +==========================================================================================================+===========+ -| `Abnormal hand morphology (HP:0005922) `_ | 1 each | +| `Abnormal hand morphology (HP:0005922) `_ | 1 each | +----------------------------------------------------------------------------------------------------------+-----------+ -| `Abnormal heart morphology (HP:0001627) `_ | 1 each | +| `Abnormal heart morphology (HP:0001627) `_ | 1 each | +----------------------------------------------------------------------------------------------------------+-----------+ -| `Hypospadias (HP:0000047) `_ | 1 | +| `Hypospadias (HP:0000047) `_ | 1 | +----------------------------------------------------------------------------------------------------------+-----------+ Final score diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index 6c7248b98..0520cd2f7 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -101,16 +101,6 @@ def score( return count - # def __call__( - # self, - # *args: typing.Any, - # **kwds: typing.Any, - # ) -> float: - # # TODO: move to `PhenotypeScorer` API. - # assert len(args) == 1 and isinstance(args[0], Patient), 'The first argument must be an instance of `Patient`' - # assert len(kwds) == 0, 'We do not take any key-word arguments' - # return self.score(args[0]) - class DeVriesPhenotypeScorer(PhenotypeScorer): """ @@ -119,35 +109,54 @@ class DeVriesPhenotypeScorer(PhenotypeScorer): """ def __init__( - self, - hpo: hpotk.MinimalOntology, + self, + hpo: hpotk.MinimalOntology, ): self._hpo = hpo - def _developmental_delay_score(self, observed_term_ids: typing.List[str]) -> float: + # severe and profound GDD + self._gdd_tids = { + 'HP:0011344': 2, 'HP:0012736': 2, + 'HP:0011342': 1, 'HP:0011343': 1, 'HP:0001263': 1, + } + + # mild, moderate, and unspecified GDD (borderline has 0.5) + self._idd_tids = { + 'HP:0010864': 2, 'HP:0002187': 2, + 'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1, + 'HP:0006889': 0.5, + } + + def _developmental_delay_score( + self, + observed_term_ids: typing.Iterable[str], + ) -> float: """ - calculate the dev delay component of the score + Calculate the dev delay component of the score + Args: observed_term_ids: terms observed in patient Returns: a score between 0 and 2 """ - gdd_tids = {'HP:0011344': 2, 'HP:0012736': 2, - 'HP:0011342': 1, 'HP:0011343': 1, 'HP:0001263': 1} # severe and profound GDD - idd_tids = {'HP:0010864': 2, 'HP:0002187': 2, 'HP:0001256': 1, 'HP:0002342': 1, 'HP:0001249': 1, - 'HP:0006889': 0.5} # mild, moderate, and unspecified GDD (borderline has 0.5) - # check GDD terms with higher priority than ID terms + # Check GDD terms with higher priority than ID terms. + # Global developmental delay for t in observed_term_ids: - if t in gdd_tids: - return gdd_tids.get(t) + if t in self._gdd_tids: + return self._gdd_tids[t] + + # Intellectual disability for t in observed_term_ids: - if t in idd_tids: - return idd_tids.get(t) + if t in self._idd_tids: + return self._idd_tids[t] + return 0 - def _term_or_descendant(self, - target_tid: str, - observed_term_ids: typing.List[str]): + def _term_or_descendant( + self, + target_tid: str, + observed_term_ids: typing.Iterable[str], + ) -> int: """ Args: target_tid: term of interest @@ -157,14 +166,17 @@ def _term_or_descendant(self, 1 if the term or any descendant is present in the patient, otherwise 0 """ for term_id in observed_term_ids: - for desc_tid in self._hpo.graph.get_ancestors(term_id, include_source=True): - if desc_tid.value == target_tid: - return 1 + if term_id == target_tid \ + or any(ancestor == target_tid for ancestor in self._hpo.graph.get_ancestors(term_id)): + return 1 + return 0 - def _term_or_descendant_count(self, - target_tid: str, - observed_term_ids: typing.List[str]): + def _term_or_descendant_count( + self, + target_tid: str, + observed_term_ids: typing.Iterable[str], + ) -> int: """ Args: target_tid: term of interest @@ -180,31 +192,37 @@ def _term_or_descendant_count(self, total_count += 1 return total_count - def _postnatal_growth_score(self, observed_term_ids: typing.List[str]) -> float: + def _postnatal_growth_score( + self, + observed_term_ids: typing.Iterable[str], + ) -> int: """ - calculate the postnatal growth component of the score + Calculate the postnatal growth component of the score. + Args: observed_term_ids: terms observed in patient - Returns: - + Returns: an `int` (between 0 and 2) """ microcephaly = 'HP:0000252' short_stature = 'HP:0004322' macrocephaly = 'HP:0000256' tall_stature = 'HP:0000098' total_count = 0 - for tid in [microcephaly, short_stature, macrocephaly, tall_stature]: + for tid in (microcephaly, short_stature, macrocephaly, tall_stature): total_count += self._term_or_descendant(tid, observed_term_ids) if total_count > 2: raise ValueError(f"Inconsistent annotations for postnatal growth score {total_count}: {observed_term_ids}") return total_count - def _facial_dysmorphism_score(self, observed_term_ids: typing.List[str]) -> float: + def _facial_dysmorphism_score( + self, + observed_term_ids: typing.Collection[str], + ) -> int: """ This section assigns two points if two or more anomalies are identified in the following categories: hypertelorism, nasal anomalies and ear anomalies. Our implementation counts the total - number of terms or descendents of the hypertelorism, Abnormal external nose morphology, and + number of terms or descendants of the hypertelorism, Abnormal external nose morphology, and Abnormal pinna morphology. Args: @@ -216,7 +234,7 @@ def _facial_dysmorphism_score(self, observed_term_ids: typing.List[str]) -> floa hypertelorism = 'HP:0000316' external_nose = 'HP:0010938' pinna_morphology = 'HP:0000377' - total_count = len([t for t in observed_term_ids if t == hypertelorism]) + total_count = self._term_or_descendant_count(target_tid=hypertelorism, observed_term_ids=observed_term_ids) total_count += self._term_or_descendant_count(target_tid=external_nose, observed_term_ids=observed_term_ids) total_count += self._term_or_descendant_count(target_tid=pinna_morphology, observed_term_ids=observed_term_ids) if total_count > 1: @@ -224,10 +242,14 @@ def _facial_dysmorphism_score(self, observed_term_ids: typing.List[str]) -> floa else: return 0 - def _congenital_score(self, observed_term_ids: typing.List[str]) -> float: + def _congenital_score( + self, + observed_term_ids: typing.Iterable[str], + ) -> int: """ - Non-facial dysmorphism and congenital abnormalities component + Non-facial dysmorphism and congenital abnormalities component. One point is assigned for either the corresponding HPO terms or any of their descendents up to a maximum of 2. + Args: observed_term_ids: terms observed in patient @@ -237,51 +259,53 @@ def _congenital_score(self, observed_term_ids: typing.List[str]) -> float: hypospadias = 'HP:0000047' abnormal_hand_morphology = 'HP:0005922' abnormal_heart_morphology = 'HP:0001627' - total_count = len([t for t in observed_term_ids if t == hypospadias]) + # total_count = len([t for t in observed_term_ids if t == hypospadias]) + total_count = self._term_or_descendant_count( + target_tid=hypospadias, observed_term_ids=observed_term_ids, + ) total_count += self._term_or_descendant_count(target_tid=abnormal_hand_morphology, observed_term_ids=observed_term_ids) total_count += self._term_or_descendant_count(target_tid=abnormal_heart_morphology, observed_term_ids=observed_term_ids) return min(2, total_count) - def _prenatal_growth_score(self, observed_term_ids: typing.List[str]) -> float: + def _prenatal_growth_score( + self, + observed_term_ids: typing.Iterable[str], + ) -> int: """ - two points are assigned if Prenatal-onset growth retardation is present + Two points are assigned if Prenatal-onset growth retardation is present. Args: - observed_term_ids: list of strings with term identifiers or observed HPO terms + observed_term_ids: list of strings with term identifiers or observed HPO terms Returns: score between 0 and 2 """ small_for_gestational_age = 'HP:0001518' intrauterine_growth_retardation = 'HP:0001511' - targets = {small_for_gestational_age, intrauterine_growth_retardation} + targets = (small_for_gestational_age, intrauterine_growth_retardation) for tid in observed_term_ids: if tid in targets: return 2 return 0 - def _calculate_score(self, observed_term_ids: typing.List[str]) -> float: + def score(self, patient: Patient) -> float: """ - calculate score based on list of strings with term identifiers or observed HPO terms. + Calculate score based on list of strings with term identifiers or observed HPO terms. + Args: - observed_term_ids: list of strings with term identifiers or observed HPO terms + patient: list of strings with term identifiers or observed HPO terms Returns: de Vries score between 0 and 10 """ + observed_term_ids = tuple(tid.identifier.value for tid in patient.present_phenotypes()) + delay_score = self._developmental_delay_score(observed_term_ids) growth_score = self._postnatal_growth_score(observed_term_ids) facial_score = self._facial_dysmorphism_score(observed_term_ids) congen_score = self._congenital_score(observed_term_ids) prenatal_score = self._prenatal_growth_score(observed_term_ids) + return delay_score + growth_score + facial_score + congen_score + prenatal_score - - def score(self, patient: Patient) -> float: - """ - Compute the score for the `patient`. - """ - # collect term identifiers as strings for all observed phenotypes - observed_term_ids = [tid.identifier.value for tid in patient.present_phenotypes()] - return self._calculate_score(observed_term_ids) diff --git a/src/gpsea/model/_phenotype.py b/src/gpsea/model/_phenotype.py index def0ec9c4..267f0ec48 100644 --- a/src/gpsea/model/_phenotype.py +++ b/src/gpsea/model/_phenotype.py @@ -13,7 +13,30 @@ class Phenotype(hpotk.model.Identified, hpotk.model.ObservableFeature): @staticmethod def from_term(term: hpotk.model.MinimalTerm, is_observed: bool): - return Phenotype(term.identifier, is_observed) + return Phenotype.from_raw_parts(term.identifier, is_observed) + + @staticmethod + def from_raw_parts( + term_id: typing.Union[str, hpotk.TermId], + is_observed: bool, + ) -> "Phenotype": + """ + Create `Phenotype` from a term ID and observation state. + + :param term_id: a `str` with CURIE (e.g. `HP:0001250`) or a :class:`~hpotk.TermId`. + :param is_observed: `True` if the term ID was observed in patient or `False` if it was explicitly excluded. + """ + if isinstance(term_id, str): + term_id = hpotk.TermId.from_curie(term_id) + elif isinstance(term_id, hpotk.TermId): + pass + else: + raise ValueError('`term_id` must be either a `str` or a `hpotk.TermId`') + + return Phenotype( + term_id, + is_observed, + ) def __init__( self, diff --git a/tests/analysis/test_de_vries_scorer.py b/tests/analysis/test_de_vries_scorer.py index 6057c247c..220dd18b0 100644 --- a/tests/analysis/test_de_vries_scorer.py +++ b/tests/analysis/test_de_vries_scorer.py @@ -27,8 +27,8 @@ class TestDeVriesScorer: @pytest.fixture def devries_scorer( - self, - hpo: hpotk.MinimalOntology, + self, + hpo: hpotk.MinimalOntology, ) -> DeVriesPhenotypeScorer: return DeVriesPhenotypeScorer(hpo=hpo) @@ -70,8 +70,8 @@ def test_a_patient( patient = Patient( labels=SampleLabels("test"), phenotypes=( - Phenotype( - hpotk.TermId.from_curie(curie), + Phenotype.from_raw_parts( + term_id=curie, is_observed=True, ) for curie in term_set From eb08b3040a6608800007419f3b41c3311c987ba9 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 29 Aug 2024 17:05:28 +0200 Subject: [PATCH 8/9] foxing documentation --- src/gpsea/analysis/pscore/_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index 6c7248b98..aba4546f8 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -171,7 +171,7 @@ def _term_or_descendant_count(self, observed_term_ids: all terms observed in patient Returns: - 1 if the term or any descendant is present in the patient, otherwise 0 + the total count of the terms equal to or descending from the target_tid """ total_count = 0 for term_id in observed_term_ids: From 900922f42810fb767d29011065a8fb3fb56622b2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Thu, 29 Aug 2024 17:19:24 +0200 Subject: [PATCH 9/9] Revert the way of testing for hypertelorism. --- src/gpsea/analysis/pscore/_impl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gpsea/analysis/pscore/_impl.py b/src/gpsea/analysis/pscore/_impl.py index 3e40287d3..16931c398 100644 --- a/src/gpsea/analysis/pscore/_impl.py +++ b/src/gpsea/analysis/pscore/_impl.py @@ -234,7 +234,9 @@ def _facial_dysmorphism_score( hypertelorism = 'HP:0000316' external_nose = 'HP:0010938' pinna_morphology = 'HP:0000377' - total_count = self._term_or_descendant_count(target_tid=hypertelorism, observed_term_ids=observed_term_ids) + + # No need to inspect descendants since Hypertelorism has none. + total_count = 1 if hypertelorism in observed_term_ids else 0 total_count += self._term_or_descendant_count(target_tid=external_nose, observed_term_ids=observed_term_ids) total_count += self._term_or_descendant_count(target_tid=pinna_morphology, observed_term_ids=observed_term_ids) if total_count > 1: