Skip to content

Commit

Permalink
Merge pull request #406 from monarch-initiative/config-for-default-vi…
Browse files Browse the repository at this point in the history
…sualizer

Simplify drawing of protein diagrams, use complement notation in mono-biallelic classifiers, simplify HPO analysis report
  • Loading branch information
ielis authored Jan 21, 2025
2 parents 90be023 + cfb9bc8 commit ae863ee
Show file tree
Hide file tree
Showing 24 changed files with 617 additions and 302 deletions.
Binary file modified docs/img/tutorial/tbx5_protein_diagram.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 18 additions & 19 deletions docs/report/tbx5_truncating_vs_missense.csv
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
Allele group,Missense,Missense,Truncating,Truncating,,
,Count,Percent,Count,Percent,Corrected p values,p values
Ventricular septal defect [HP:0001629],31/60,52%,29/29,100%,9.550859422122477e-06,5.618152601248516e-07
Hypoplasia of the radius [HP:0002984],30/62,48%,10/27,37%,1.0,0.3614379675325876
Atrial septal defect [HP:0001631],42/44,95%,38/38,100%,1.0,0.49653718759409815
Secundum atrial septal defect [HP:0001684],14/35,40%,13/40,32%,1.0,0.6304400561799244
Abnormal cardiac septum morphology [HP:0001671],62/62,100%,50/50,100%,1.0,1.0
Abnormal hand morphology [HP:0005922],53/53,100%,41/41,100%,1.0,1.0
Abnormal atrial septum morphology [HP:0011994],43/43,100%,38/38,100%,1.0,1.0
Abnormal cardiac atrium morphology [HP:0005120],43/43,100%,38/38,100%,1.0,1.0
Abnormal appendicular skeleton morphology [HP:0011844],64/64,100%,60/60,100%,1.0,1.0
Aplasia/hypoplasia of the extremities [HP:0009815],55/55,100%,44/44,100%,1.0,1.0
Aplasia/hypoplasia involving the skeleton [HP:0009115],56/56,100%,45/45,100%,1.0,1.0
Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],55/55,100%,44/44,100%,1.0,1.0
Aplasia/hypoplasia involving bones of the extremities [HP:0045060],55/55,100%,44/44,100%,1.0,1.0
Abnormal long bone morphology [HP:0011314],44/44,100%,19/19,100%,1.0,1.0
Abnormal finger morphology [HP:0001167],36/36,100%,56/56,100%,1.0,1.0
Abnormal digit morphology [HP:0011297],38/38,100%,59/59,100%,1.0,1.0
Abnormal thumb morphology [HP:0001172],30/30,100%,56/56,100%,1.0,1.0
,Missense,Truncating,Corrected p values,p values
Ventricular septal defect [HP:0001629],31/60 (52%),29/29 (100%),9.550859422122477e-06,5.618152601248516e-07
Hypoplasia of the radius [HP:0002984],30/62 (48%),10/27 (37%),1.0,0.3614379675325876
Atrial septal defect [HP:0001631],42/44 (95%),38/38 (100%),1.0,0.49653718759409815
Secundum atrial septal defect [HP:0001684],14/35 (40%),13/40 (32%),1.0,0.6304400561799244
Abnormal thumb morphology [HP:0001172],30/30 (100%),56/56 (100%),1.0,1.0
Abnormal finger morphology [HP:0001167],36/36 (100%),56/56 (100%),1.0,1.0
Abnormal digit morphology [HP:0011297],38/38 (100%),59/59 (100%),1.0,1.0
Abnormal atrial septum morphology [HP:0011994],43/43 (100%),38/38 (100%),1.0,1.0
Abnormal cardiac atrium morphology [HP:0005120],43/43 (100%),38/38 (100%),1.0,1.0
Abnormal long bone morphology [HP:0011314],44/44 (100%),19/19 (100%),1.0,1.0
Abnormal hand morphology [HP:0005922],53/53 (100%),41/41 (100%),1.0,1.0
Aplasia/hypoplasia of the extremities [HP:0009815],55/55 (100%),44/44 (100%),1.0,1.0
Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],55/55 (100%),44/44 (100%),1.0,1.0
Aplasia/hypoplasia involving bones of the extremities [HP:0045060],55/55 (100%),44/44 (100%),1.0,1.0
Aplasia/hypoplasia involving the skeleton [HP:0009115],56/56 (100%),45/45 (100%),1.0,1.0
Abnormal cardiac septum morphology [HP:0001671],62/62 (100%),50/50 (100%),1.0,1.0
Abnormal appendicular skeleton morphology [HP:0011844],64/64 (100%),60/60 (100%),1.0,1.0
31 changes: 8 additions & 23 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -166,32 +166,17 @@ the most common HPO terms, variants, diseases, and variant effects:
Plot distribution of variants with respect to the protein sequence
------------------------------------------------------------------

We can also show the distribution of variants with respect to the encoded protein.
We first obtain ``tx_coordinates`` (:class:`~gpsea.model.TranscriptCoordinates`)
with genomic coordinates of the transcript, including e.g. untranslated regions or exons:
We can use :class:`~gpsea.view.CohortArtist` to plot the distribution of variants
with respect to the encoded protein on
a Matplotlib `Axes <https://matplotlib.org/stable/api/_as_gen/matplotlib.axes.Axes.html>`_:

>>> from gpsea.preprocessing import configure_default_tx_coordinate_service
>>> tx_service = configure_default_tx_coordinate_service(genome_build="GRCh38.p13")
>>> tx_coordinates = tx_service.fetch(tx_id)


and we also get ``protein_meta`` (:class:`~gpsea.model.ProteinMetadata`)
with the domains and regions of the encoded protein:

>>> from gpsea.preprocessing import configure_default_protein_metadata_service
>>> pms = configure_default_protein_metadata_service()
>>> protein_meta = pms.annotate(px_id)

Now we can plot a diagram of the mutations on the protein:

>>> from gpsea.view import ProteinVisualizer
>>> import matplotlib.pyplot as plt
>>> from gpsea.view import configure_default_cohort_artist
>>> cohort_artist = configure_default_cohort_artist()
>>> fig, ax = plt.subplots(figsize=(15, 8))
>>> visualizer = ProteinVisualizer()
>>> visualizer.draw_protein_diagram(
... tx_coordinates,
... protein_meta,
... cohort,
>>> cohort_artist.draw_protein(
... cohort=cohort,
... protein_id=px_id,
... ax=ax,
... )

Expand Down
Binary file modified docs/user-guide/analyses/report/rere_phenotype_score_boxplot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 25 additions & 26 deletions docs/user-guide/analyses/report/tbx5_frameshift.csv
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
Allele group,Frameshift,Frameshift,Other,Other,,
,Count,Percent,Count,Percent,Corrected p values,p values
Ventricular septal defect [HP:0001629],19/19,100%,42/71,59%,0.005806240832840839,0.00024192670136836828
Absent thumb [HP:0009777],14/31,45%,18/100,18%,0.04456405819223913,0.0037136715160199273
Secundum atrial septal defect [HP:0001684],4/22,18%,23/55,42%,0.4065940176561687,0.06544319142266644
Triphalangeal thumb [HP:0001199],13/32,41%,23/99,23%,0.4065940176561687,0.06932119159387057
Muscular ventricular septal defect [HP:0011623],6/25,24%,8/84,10%,0.4065940176561687,0.08470708701170182
Short thumb [HP:0009778],8/30,27%,25/69,36%,1.0,0.4870099714553749
Absent radius [HP:0003974],6/25,24%,9/43,21%,1.0,0.7703831604944444
Atrial septal defect [HP:0001631],20/20,100%,63/65,97%,1.0,1.0
Abnormal atrial septum morphology [HP:0011994],20/20,100%,64/64,100%,1.0,1.0
Abnormal cardiac septum morphology [HP:0001671],28/28,100%,89/89,100%,1.0,1.0
Abnormal cardiac atrium morphology [HP:0005120],20/20,100%,64/64,100%,1.0,1.0
Hypoplasia of the radius [HP:0002984],6/14,43%,34/75,45%,1.0,1.0
Abnormal appendicular skeleton morphology [HP:0011844],34/34,100%,93/93,100%,1.0,1.0
Aplasia/hypoplasia of the extremities [HP:0009815],22/22,100%,78/78,100%,1.0,1.0
Aplasia/hypoplasia involving the skeleton [HP:0009115],23/23,100%,80/80,100%,1.0,1.0
Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],22/22,100%,78/78,100%,1.0,1.0
Aplasia/hypoplasia involving bones of the extremities [HP:0045060],22/22,100%,78/78,100%,1.0,1.0
Abnormal long bone morphology [HP:0011314],13/13,100%,50/50,100%,1.0,1.0
Abnormal hand morphology [HP:0005922],20/20,100%,75/75,100%,1.0,1.0
Abnormal thumb morphology [HP:0001172],31/31,100%,58/58,100%,1.0,1.0
Abnormal finger morphology [HP:0001167],31/31,100%,64/64,100%,1.0,1.0
Abnormal digit morphology [HP:0011297],33/33,100%,67/67,100%,1.0,1.0
Aplasia/Hypoplasia of fingers [HP:0006265],19/19,100%,44/44,100%,1.0,1.0
Aplasia/hypoplasia involving bones of the hand [HP:0005927],19/19,100%,44/44,100%,1.0,1.0
,Frameshift,Other,Corrected p values,p values
Ventricular septal defect [HP:0001629],19/19 (100%),42/71 (59%),0.005806240832840839,0.00024192670136836828
Absent thumb [HP:0009777],14/31 (45%),18/100 (18%),0.04456405819223913,0.0037136715160199273
Secundum atrial septal defect [HP:0001684],4/22 (18%),23/55 (42%),0.4065940176561687,0.06544319142266644
Triphalangeal thumb [HP:0001199],13/32 (41%),23/99 (23%),0.4065940176561687,0.06932119159387057
Muscular ventricular septal defect [HP:0011623],6/25 (24%),8/84 (10%),0.4065940176561687,0.08470708701170182
Short thumb [HP:0009778],8/30 (27%),25/69 (36%),1.0,0.4870099714553749
Absent radius [HP:0003974],6/25 (24%),9/43 (21%),1.0,0.7703831604944444
Abnormal long bone morphology [HP:0011314],13/13 (100%),50/50 (100%),1.0,1.0
Aplasia/Hypoplasia of fingers [HP:0006265],19/19 (100%),44/44 (100%),1.0,1.0
Aplasia/hypoplasia involving bones of the hand [HP:0005927],19/19 (100%),44/44 (100%),1.0,1.0
Atrial septal defect [HP:0001631],20/20 (100%),63/65 (97%),1.0,1.0
Abnormal atrial septum morphology [HP:0011994],20/20 (100%),64/64 (100%),1.0,1.0
Abnormal cardiac atrium morphology [HP:0005120],20/20 (100%),64/64 (100%),1.0,1.0
Abnormal hand morphology [HP:0005922],20/20 (100%),75/75 (100%),1.0,1.0
Aplasia/hypoplasia of the extremities [HP:0009815],22/22 (100%),78/78 (100%),1.0,1.0
Aplasia/hypoplasia involving bones of the upper limbs [HP:0006496],22/22 (100%),78/78 (100%),1.0,1.0
Aplasia/hypoplasia involving bones of the extremities [HP:0045060],22/22 (100%),78/78 (100%),1.0,1.0
Aplasia/hypoplasia involving the skeleton [HP:0009115],23/23 (100%),80/80 (100%),1.0,1.0
Abnormal cardiac septum morphology [HP:0001671],28/28 (100%),89/89 (100%),1.0,1.0
Abnormal thumb morphology [HP:0001172],31/31 (100%),58/58 (100%),1.0,1.0
Abnormal finger morphology [HP:0001167],31/31 (100%),64/64 (100%),1.0,1.0
Abnormal digit morphology [HP:0011297],33/33 (100%),67/67 (100%),1.0,1.0
Abnormal appendicular skeleton morphology [HP:0011844],34/34 (100%),93/93 (100%),1.0,1.0
Hypoplasia of the radius [HP:0002984],6/14 (43%),34/75 (45%),1.0,1.0
Binary file modified docs/user-guide/analyses/report/umod_km_curves.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
58 changes: 48 additions & 10 deletions docs/user-guide/exploratory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ Then we choose the transcript and protein identifiers, and we fetch the correspo

>>> from gpsea.preprocessing import configure_default_tx_coordinate_service, configure_default_protein_metadata_service
>>> tx_id = "NM_181486.4"
>>> pt_id = "NP_852259.1"
>>> px_id = "NP_852259.1"
>>> tx_service = configure_default_tx_coordinate_service(genome_build="GRCh38.p13")
>>> tx_coordinates = tx_service.fetch(tx_id)
>>> pm_service = configure_default_protein_metadata_service()
>>> protein_meta = pm_service.annotate(pt_id)
>>> protein_meta = pm_service.annotate(px_id)


Last, we load HPO `v2024-07-01` to use in the exploratory analysis:
Expand Down Expand Up @@ -164,20 +164,59 @@ with variants in the *TBX5* gene:
Plot distribution of variants with respect to the protein sequence
------------------------------------------------------------------

We use Matplotlib to plot the distribution of variants on a protein diagram:
Cohort artist
^^^^^^^^^^^^^

The simplest way to plot the variant distribution is to use the :class:`~gpsea.view.CohortArtist` API:

>>> import matplotlib.pyplot as plt
>>> from gpsea.view import configure_default_cohort_artist
>>> cohort_artist = configure_default_cohort_artist()
>>> fig, ax = plt.subplots(figsize=(15, 8))
>>> cohort_artist.draw_protein(
... cohort=cohort,
... protein_id=px_id,
... ax=ax,
... )


.. image:: img/TBX5_protein_diagram.from_artist.png
:alt: TBX5 protein diagram
:align: center
:width: 600px

.. doctest:: exploratory
:hide:

>>> if _overwrite:
... fig.tight_layout()
... fig.savefig('docs/user-guide/img/TBX5_protein_diagram.from_artist.png')

The :func:`~gpsea.view.configure_default_cohort_artist` function gets the default artist
which we use to plot the diagram with the variant distribution across the protein sequence
on Matplotlib axes.


Protein visualizer
^^^^^^^^^^^^^^^^^^

Sometimes, however, things do not work out-of-the-box, e.g. because protein metadata
is not available from Uniprot (the default), and we may need to use the lower-level components.

The :class:`~gpsea.view.ProteinVisualizer` takes cohort and the protein metadata
to plot the distribution of variants on a protein diagram:

>>> from gpsea.view import ProteinVisualizer
>>> fig, ax = plt.subplots(figsize=(15, 8))
>>> visualizer = ProteinVisualizer()
>>> visualizer.draw_protein_diagram(
... tx_coordinates,
... protein_meta,
... cohort,
>>> visualizer.draw_protein(
... cohort=cohort,
... protein_metadata=protein_meta,
... ax=ax,
... )

.. image:: img/TBX5_protein_diagram.png

.. image:: img/TBX5_protein_diagram.from_protein_visualizer.png
:alt: TBX5 protein diagram
:align: center
:width: 600px
Expand All @@ -187,5 +226,4 @@ We use Matplotlib to plot the distribution of variants on a protein diagram:

>>> if _overwrite:
... fig.tight_layout()
... fig.savefig('docs/user-guide/img/TBX5_protein_diagram.png')

... fig.savefig('docs/user-guide/img/TBX5_protein_diagram.from_protein_visualizer.png')
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed docs/user-guide/img/TBX5_protein_diagram.png
Binary file not shown.
64 changes: 46 additions & 18 deletions src/gpsea/analysis/clf/_gt_classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def monoallelic_classifier(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate] = None,
a_label: str = "A",
b_label: str = "B",
b_label: typing.Optional[str] = None,
) -> GenotypeClassifier:
"""
Monoallelic classifier bins patient into one of two groups, `A` and `B`,
Expand All @@ -260,17 +260,20 @@ def monoallelic_classifier(
:param a_predicate: predicate to test if the variants
meet the criteria of the first group (named `A` by default).
:param b_predicate: predicate to test if the variants
meet the criteria of the second group or `None`
if the inverse of `a_predicate` should be used (named `B` by default).
:param b_predicate: predicate to test if the variants meet
the criteria of the second group or `None` if the complement
of the `a_predicate` should be used (named ``A^C`` by default).
:param a_label: display name of the `a_predicate` (default ``"A"``).
:param b_label: display name of the `b_predicate` (default ``"B"``).
:param b_label: display name of the `b_predicate`.
If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``).
"""
assert isinstance(a_label, str)
assert isinstance(b_label, str)

if b_predicate is None:
b_predicate = ~a_predicate
a_predicate, b_predicate, b_label = _validate_b_predicate(
a_predicate=a_predicate,
b_predicate=b_predicate,
a_label=a_label,
b_label=b_label,
)

return PolyCountingGenotypeClassifier.monoallelic(
a_predicate=a_predicate,
Expand All @@ -284,7 +287,7 @@ def biallelic_classifier(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate] = None,
a_label: str = "A",
b_label: str = "B",
b_label: typing.Optional[str] = None,
partitions: typing.Collection[typing.Union[int, typing.Collection[int]]] = (
0,
1,
Expand All @@ -302,22 +305,25 @@ def biallelic_classifier(
:param a_predicate: predicate to test if the variants meet
the criteria of the first group (named `A` by default).
:param b_predicate: predicate to test if the variants meet
the criteria of the second group or `None` if an inverse
of `a_predicate` should be used (named `B` by default).
the criteria of the second group or `None` if the complement
of the `a_predicate` should be used (named ``A^C`` by default).
:param a_label: display name of the `a_predicate` (default ``"A"``).
:param b_label: display name of the `b_predicate` (default ``"B"``).
:param b_label: display name of the `b_predicate`.
If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``).
:param partitions: a sequence with partition identifiers (default ``(0, 1, 2)``).
"""
# Q/C
assert isinstance(a_label, str)
assert isinstance(b_label, str)

a_predicate, b_predicate, b_label = _validate_b_predicate(
a_predicate=a_predicate,
b_predicate=b_predicate,
a_label=a_label,
b_label=b_label,
)

partitions = _fixate_partitions(partitions)
_qc_partitions(partitions)

if b_predicate is None:
b_predicate = ~a_predicate

return PolyCountingGenotypeClassifier.biallelic(
a_predicate=a_predicate,
b_predicate=b_predicate,
Expand All @@ -326,6 +332,28 @@ def biallelic_classifier(
partitions=partitions,
)

def _validate_b_predicate(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate],
a_label: str,
b_label: typing.Optional[str],
) -> typing.Tuple[
VariantPredicate, VariantPredicate, str,
]:
if b_predicate is None:
b_predicate = ~a_predicate
if b_label is None:
# Using a regular uppercase `C` instead of Unicode complement (`∁`)
# to reduce the 😕 factor.
b_label = f"{a_label}^C" # complement of A
else:
assert isinstance(b_label, str)
else:
if b_label is None:
b_label = f"{a_label}^C" # complement of A

return a_predicate, b_predicate, b_label


def _build_ac_to_cat(
partitions: typing.Collection[typing.Collection[int]],
Expand Down
11 changes: 8 additions & 3 deletions src/gpsea/view/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from ._base import GpseaReport, BaseViewer, BaseProteinVisualizer, CohortArtist
from ._config import configure_default_protein_visualizer, configure_default_cohort_artist
from ._formatter import Formatter, VariantFormatter
from ._phenotype_analysis import summarize_hpo_analysis
from ._protein_visualizable import ProteinVisualizable
from ._base import GpseaReport, BaseViewer
from ._protein_visualizer import ProteinVisualizer
from ._txp import VariantTranscriptVisualizer
from ._viewers import (
CohortVariantViewer,
Expand All @@ -9,13 +12,15 @@
MtcStatsViewer,
ProteinVariantViewer,
)
from ._protein_visualizer import ProteinVisualizer
from ._formatter import Formatter, VariantFormatter

__all__ = [
"GpseaReport",
"CohortVariantViewer",
"CohortViewer",
"BaseProteinVisualizer",
"configure_default_protein_visualizer",
"CohortArtist",
"configure_default_cohort_artist",
"ProteinVisualizer",
"ProteinVisualizable",
"ProteinVariantViewer",
Expand Down
Loading

0 comments on commit ae863ee

Please sign in to comment.