From 37ddf801eb027ca177c49cf0263e22a40c17adb2 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Fri, 6 Sep 2024 17:10:10 +0200 Subject: [PATCH 1/5] all variants viewable --- src/gpsea/view/_all_variant_viewable.py | 119 +++++++++++++++ src/gpsea/view/templates/all_variants.html | 170 +++++++++++++++++++++ 2 files changed, 289 insertions(+) create mode 100644 src/gpsea/view/_all_variant_viewable.py create mode 100644 src/gpsea/view/templates/all_variants.html diff --git a/src/gpsea/view/_all_variant_viewable.py b/src/gpsea/view/_all_variant_viewable.py new file mode 100644 index 000000000..ffe5c9433 --- /dev/null +++ b/src/gpsea/view/_all_variant_viewable.py @@ -0,0 +1,119 @@ +import typing + +from hpotk import MinimalOntology +from jinja2 import Environment, PackageLoader +from collections import namedtuple, defaultdict + +from gpsea.model import Cohort +from gpsea.model._variant import Variant +from ._formatter import VariantFormatter + + +ToDisplay = namedtuple('ToDisplay', ['hgvs_cdna', 'hgvsp', 'variant_effects']) + +VariantData = namedtuple('VariantData', ['variant_key', 'hgvs_cdna', 'hgvsp', 'chromosomal_description', 'variant_effects'] ) + +class AllVariantViewable: + """ + Class to create a viewable object that is uses a Jinja2 template to create an HTML element + for display in the Jupyter notebook. + """ + + def __init__( + self, + transcript_id: str + ): + """ + Args: + hpo(MinimalOntology): An HPO ontology object from hpo-toolkit + top_phenotype_count(int): Maximum number of HPO terms to display in the HTML table (default: 10) + top_variant_count(int): Maximum number of variants to display in the HTML table (default: 10) + """ + environment = Environment(loader=PackageLoader('gpsea.view', 'templates')) + self._cohort_template = environment.get_template("all_variants.html") + self._var_formatter = VariantFormatter(transcript_id) + + def process( + self, + cohort: Cohort, + only_hgvs: bool = True + ) -> str: + """ + Create an HTML that should be shown with display(HTML(..)) of the ipython package. + + Args: + cohort (Cohort): The cohort being analyzed in the current Notebook + transcript_id (str): the transcript that we map variants onto + + Returns: + str: an HTML string with parameterized template for rendering + """ + context = self._prepare_context(cohort, transcript_id=self._transcript_id, only_hgvs=only_hgvs) + return self._cohort_template.render(context) + + def _prepare_context( + self, + cohort: Cohort, + transcript_id: typing.Optional[str], + only_hgvs + ) -> typing.Mapping[str, typing.Any]: + variant_count_dictionary = defaultdict() + for var in cohort.all_variants(): + vdata = self._get_variant_data(var, only_hgvs) + variant_count_dictionary[vdata] += 1 + variant_counts = list() + for var_data, count in variant_count_dictionary.items(): + variant_counts.append( + { + "variant_key": var_data.variant_key, + "variant": var_data.hgvs_cdna, + "variant_name": var_data.hgvs_cdna, + "protein_name": var_data.hgvsp, + "variant_effects": var_data.variant_effects, + "count": count, + } + ) + # The following dictionary is used by the Jinja2 HTML template + return { + "variant_count_list": variant_counts, + "total_unique_allele_count": len(variant_counts) + } + + + def _get_variant_description( + self, + variant: Variant, + only_hgvs: bool + ) -> VariantData: + """ + Get user-friendly strings (e.g., HGVS for our target transcript) to match to the chromosomal strings + Args: + cohort (Cohort): The cohort being analyzed in the current Notebook + transcript_id (str): the transcript that we map variants onto + only_hgvs (bool): do not show the transcript ID part of the HGVS annotation, just the annotation. + + Returns: + typing.Mapping[str, ToDisplay]: key: variant key, value: namedtuple(display (e.g. HGVS) string of variant, hgvsp protein string of variant) + """ + variant_key = variant.variant_info.variant_key + display = self._var_formatter.format_as_string(variant) + tx_annotation = variant.get_tx_anno_by_tx_id(self._transcript_id) + if tx_annotation is not None: + hgvsp = tx_annotation.hgvsp + var_effects = [var_eff.name for var_eff in tx_annotation.variant_effects] + else: + hgvsp = None + var_effects = None + if only_hgvs: + # do not show the transcript id + fields_dna = display.split(":") + fields_ps = hgvsp.split(":") if hgvsp is not None else [None] + if len(fields_dna) > 1: + display = fields_dna[1] + else: + display = fields_dna[0] + if len(fields_ps) > 1: + hgvsp = fields_ps[1] + else: + hgvsp = fields_ps[0] + return VariantData(variant_key=variant_key, hgvs_cdna = fields_dna, hgvsp = hgvsp, chromosomal_description=None, variant_effects=var_effects) diff --git a/src/gpsea/view/templates/all_variants.html b/src/gpsea/view/templates/all_variants.html new file mode 100644 index 000000000..0bbe94e59 --- /dev/null +++ b/src/gpsea/view/templates/all_variants.html @@ -0,0 +1,170 @@ + + + + + Cohort + + + + +

GPSEA cohort analysis: All variant alleles

+ + + + + + + "variant": var_data.hgvs_cdna, + "variant_name": var_data.hgvs_cdna, + "protein_name": var_data.hgvsp, + "variant_effects": var_data.variant_effects, + "count": count, + + + + + + + + + + + {% for vdata in variant_counts %} + + + + + + + + {% endfor %} + +
+

Variant alleles

+ A total of {{ total_unique_allele_count }} unique alleles were identified in the cohort. +
Varianthgvs_cdnaproteinEffectscount
{{ vdata.variant }}{{ vdata.variant_name }}{{ vdata.protein_name }}{{ vdata.variant_effects }}{{ vdata.count }}
+ + + + + + + + + + + + {% for var_count in var_counts %} + + + + + + + + {% endfor %} + +
+

Top {{top_var_count}} Variants

+ Variants are shown according to {{ transcript_id }}. A total of {{ unique_variant_count}} unique variants were identified in the cohort. +
CountVariant keyVariant NameProtein VariantVariant Class
{{ var_count.Count }}{{ var_count.variant }}{{ var_count.variant_name }}{{ var_count.protein_name }}{{ var_count.variant_effects }}
+ + + + + + + + + {% for disease_count in disease_counts %} + + + + + + {% endfor %} + +
+

Diseases

+
Disease NameDisease IDAnnotation Count
{{ disease_count.disease_name }}{{ disease_count.disease_id }}{{ disease_count.count }}
+ {% if has_transcript > 0 %} + + + + + + + + {% for var_effect in var_effects_list %} + + + + + {% endfor %} + +
+

Variant categories for {{ transcript_id }}

+
Variant effectAnnotation Count
{{ var_effect.effect }}{{ var_effect.count }}
+ {% else %} +

Call this function with transcript to see table with variant effect counts.

+ {% endif %} + + + + From 1d4ebd1b52700cad772c5c321584e33791806724 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 9 Sep 2024 15:14:32 +0200 Subject: [PATCH 2/5] viewable for all variants --- src/gpsea/model/_variant_effects.py | 65 +++++++++++++++- src/gpsea/view/__init__.py | 2 + src/gpsea/view/_all_variant_viewable.py | 89 ++++++++++++++-------- src/gpsea/view/templates/all_variants.html | 75 +++--------------- 4 files changed, 136 insertions(+), 95 deletions(-) diff --git a/src/gpsea/model/_variant_effects.py b/src/gpsea/model/_variant_effects.py index e15452e08..d104f178a 100644 --- a/src/gpsea/model/_variant_effects.py +++ b/src/gpsea/model/_variant_effects.py @@ -1,5 +1,6 @@ from enum import Enum - +import typing +from hpotk import TermId class VariantEffect(Enum): """ @@ -62,6 +63,68 @@ class VariantEffect(Enum): INTERGENIC_VARIANT = "SO:0001628" SEQUENCE_VARIANT = "SO:0001060" + @staticmethod + def to_display(veffect: "VariantEffect") -> str: + effect_to_display = { + VariantEffect.TRANSCRIPT_ABLATION : "transcript ablation", + VariantEffect.SPLICE_ACCEPTOR_VARIANT : "splice acceptor", + VariantEffect.SPLICE_DONOR_VARIANT : "splice donor", + VariantEffect.STOP_GAINED : "stop gained", + VariantEffect.FRAMESHIFT_VARIANT : "frameshift", + VariantEffect.STOP_LOST : "stop lost", + VariantEffect.START_LOST : "start lost", + VariantEffect.TRANSCRIPT_AMPLIFICATION : "transcript amplification", + VariantEffect.INFRAME_INSERTION : "inframe insertion", + VariantEffect.INFRAME_DELETION : "inframe deletion", + VariantEffect. MISSENSE_VARIANT : "missense", + VariantEffect. PROTEIN_ALTERING_VARIANT : "protein altering", + VariantEffect.SPLICE_REGION_VARIANT : "splice region", + VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT : "splice donor 5th base", + VariantEffect.SPLICE_DONOR_REGION_VARIANT : "splice donor", + VariantEffect. SPLICE_POLYPYRIMIDINE_TRACT_VARIANT : "splice polypyrimidine", + VariantEffect.INCOMPLETE_TERMINAL_CODON_VARIANT : "incomplete terminal codon", + VariantEffect.START_RETAINED_VARIANT : "start retained", + VariantEffect.STOP_RETAINED_VARIANT : "stop retainined", + VariantEffect.SYNONYMOUS_VARIANT : "synonymous", + VariantEffect.CODING_SEQUENCE_VARIANT : "coding sequence", + VariantEffect. MATURE_MIRNA_VARIANT : "mature miRNA", + VariantEffect. FIVE_PRIME_UTR_VARIANT : "5UTR", + VariantEffect. THREE_PRIME_UTR_VARIANT : "3UTR", + VariantEffect. NON_CODING_TRANSCRIPT_EXON_VARIANT : "non-coding transcript exon", + VariantEffect.INTRON_VARIANT : "intronic", + VariantEffect. NMD_TRANSCRIPT_VARIANT : "NMD transcript", + VariantEffect.NON_CODING_TRANSCRIPT_VARIANT : "non-coding transcript", + VariantEffect.UPSTREAM_GENE_VARIANT : "upstream of gene", + VariantEffect. DOWNSTREAM_GENE_VARIANT : "downstream of gene", + VariantEffect.TFBS_ABLATION : "TFBS ablation", + VariantEffect. TFBS_AMPLIFICATION : "TFBS amplification", + VariantEffect.TF_BINDING_SITE_VARIANT : "TFBS binding site", + VariantEffect.REGULATORY_REGION_ABLATION : "regulatory region ablation", + VariantEffect. REGULATORY_REGION_AMPLIFICATION : "regulatory region amplification", + VariantEffect.FEATURE_ELONGATION : "feature elongation", + VariantEffect.REGULATORY_REGION_VARIANT : "regulatory region", + VariantEffect.FEATURE_TRUNCATION : "feature truncation", + VariantEffect.INTERGENIC_VARIANT : "intergenic", + VariantEffect.SEQUENCE_VARIANT : "sequence variant" + } + return effect_to_display.get(veffect, "n/a") + + @staticmethod + def structural_so_id_to_display(so_term: typing.Union[TermId, str]) -> str: + """ + get a string to represent a Sequence Ontology (SO) term identifier + """ + if isinstance(so_term, TermId): + so_term = so_term.value + so_id_to_display = {"SO:1000029": "chromosomal deletion", + "SO:1000037": "chromosomal duplication", + "SO:1000030": "chromosomal_inversion", + "SO:1000044": "chromosomal_translocation", + } + return so_id_to_display.get(so_term, "n/a") + + + def __init__(self, curie: str): self._curie = curie diff --git a/src/gpsea/view/__init__.py b/src/gpsea/view/__init__.py index 6eb13dd61..95507abc3 100644 --- a/src/gpsea/view/__init__.py +++ b/src/gpsea/view/__init__.py @@ -1,4 +1,5 @@ from ._cohort import CohortViewable +from ._all_variant_viewable import AllVariantViewable from ._disease import DiseaseViewable from ._protein_viewer import ProteinViewable from ._protein_visualizable import ProteinVisualizable @@ -8,6 +9,7 @@ from ._formatter import VariantFormatter __all__ = [ + 'AllVariantViewable', 'CohortViewable', 'ProteinVisualizer', 'ProteinVisualizable', 'ProteinViewable', 'DiseaseViewable', diff --git a/src/gpsea/view/_all_variant_viewable.py b/src/gpsea/view/_all_variant_viewable.py index ffe5c9433..a941c58e5 100644 --- a/src/gpsea/view/_all_variant_viewable.py +++ b/src/gpsea/view/_all_variant_viewable.py @@ -1,17 +1,16 @@ import typing -from hpotk import MinimalOntology from jinja2 import Environment, PackageLoader from collections import namedtuple, defaultdict from gpsea.model import Cohort -from gpsea.model._variant import Variant +from gpsea.model._variant import Variant, VariantEffect from ._formatter import VariantFormatter ToDisplay = namedtuple('ToDisplay', ['hgvs_cdna', 'hgvsp', 'variant_effects']) -VariantData = namedtuple('VariantData', ['variant_key', 'hgvs_cdna', 'hgvsp', 'chromosomal_description', 'variant_effects'] ) +VariantData = namedtuple('VariantData', ['variant_key', 'hgvs_cdna', 'hgvsp', 'variant_effects'] ) class AllVariantViewable: """ @@ -25,13 +24,14 @@ def __init__( ): """ Args: - hpo(MinimalOntology): An HPO ontology object from hpo-toolkit - top_phenotype_count(int): Maximum number of HPO terms to display in the HTML table (default: 10) - top_variant_count(int): Maximum number of variants to display in the HTML table (default: 10) + transcript_id(str): The transcript identifier (Usually, the MANE RefSeq transcript, that should start with "NM_") """ environment = Environment(loader=PackageLoader('gpsea.view', 'templates')) self._cohort_template = environment.get_template("all_variants.html") self._var_formatter = VariantFormatter(transcript_id) + if not transcript_id.startswith("NM"): + print(f"[WARNING] Non-RefSeq transcript id: {transcript_id}") + self._transcript_id = transcript_id def process( self, @@ -57,30 +57,51 @@ def _prepare_context( transcript_id: typing.Optional[str], only_hgvs ) -> typing.Mapping[str, typing.Any]: - variant_count_dictionary = defaultdict() + variant_count_dictionary = defaultdict(int) + variant_effect_count_dictionary = defaultdict(int) + variant_key_to_variant = dict() for var in cohort.all_variants(): + var_key = var.variant_info.variant_key vdata = self._get_variant_data(var, only_hgvs) - variant_count_dictionary[vdata] += 1 + variant_key_to_variant[var_key] = vdata + variant_count_dictionary[var_key] += 1 + for v_eff in vdata.variant_effects: + variant_effect_count_dictionary[v_eff] += 1 variant_counts = list() - for var_data, count in variant_count_dictionary.items(): + variant_effect_counts = list() + for var_key, count in variant_count_dictionary.items(): + var_data = variant_key_to_variant[var_key] variant_counts.append( { "variant_key": var_data.variant_key, "variant": var_data.hgvs_cdna, "variant_name": var_data.hgvs_cdna, "protein_name": var_data.hgvsp, - "variant_effects": var_data.variant_effects, + "variant_effects": ", ".join(var_data.variant_effects), "count": count, } ) + for v_effect, count in variant_effect_count_dictionary.items(): + variant_effect_counts.append( + { + "effect": v_effect, + "count": count + } + ) + print(f"variants eee {len(variant_effect_counts)}") + variant_counts = sorted(variant_counts, key=lambda row: row.get("count"), reverse=True) + variant_effect_counts = sorted(variant_effect_counts, key=lambda row: row.get("count"), reverse=True) + # The following dictionary is used by the Jinja2 HTML template return { + "has_transcript": False, "variant_count_list": variant_counts, + "variant_effect_count_list": variant_effect_counts, "total_unique_allele_count": len(variant_counts) } - def _get_variant_description( + def _get_variant_data( self, variant: Variant, only_hgvs: bool @@ -96,24 +117,32 @@ def _get_variant_description( typing.Mapping[str, ToDisplay]: key: variant key, value: namedtuple(display (e.g. HGVS) string of variant, hgvsp protein string of variant) """ variant_key = variant.variant_info.variant_key - display = self._var_formatter.format_as_string(variant) - tx_annotation = variant.get_tx_anno_by_tx_id(self._transcript_id) - if tx_annotation is not None: - hgvsp = tx_annotation.hgvsp - var_effects = [var_eff.name for var_eff in tx_annotation.variant_effects] + if variant.variant_info.has_sv_info(): + sv_info = variant.variant_info.sv_info + gene_symbol = sv_info.gene_symbol + display = f"SV involving {gene_symbol}" + effect = VariantEffect.structural_so_id_to_display(so_term=sv_info.structural_type) + return VariantData(variant_key=variant_key, hgvs_cdna = display, hgvsp = "p.?", variant_effects=[effect]) else: - hgvsp = None - var_effects = None - if only_hgvs: - # do not show the transcript id - fields_dna = display.split(":") - fields_ps = hgvsp.split(":") if hgvsp is not None else [None] - if len(fields_dna) > 1: - display = fields_dna[1] - else: - display = fields_dna[0] - if len(fields_ps) > 1: - hgvsp = fields_ps[1] + variant_key = variant.variant_info.variant_key + display = self._var_formatter.format_as_string(variant) + tx_annotation = variant.get_tx_anno_by_tx_id(self._transcript_id) + if tx_annotation is not None: + hgvsp = tx_annotation.hgvsp + var_effects = [VariantEffect.to_display(var_eff) for var_eff in tx_annotation.variant_effects] else: - hgvsp = fields_ps[0] - return VariantData(variant_key=variant_key, hgvs_cdna = fields_dna, hgvsp = hgvsp, chromosomal_description=None, variant_effects=var_effects) + hgvsp = None + var_effects = [] + if only_hgvs: + # do not show the transcript id + fields_dna = display.split(":") + fields_ps = hgvsp.split(":") if hgvsp is not None else [None] + if len(fields_dna) > 1: + display_hgvs_cDNA = fields_dna[1] + else: + display_hgvs_cDNA = fields_dna[0] + if len(fields_ps) > 1: + hgvsp = fields_ps[1] + else: + hgvsp = fields_ps[0] + return VariantData(variant_key=variant_key, hgvs_cdna = display_hgvs_cDNA, hgvsp = hgvsp, variant_effects=var_effects) diff --git a/src/gpsea/view/templates/all_variants.html b/src/gpsea/view/templates/all_variants.html index 0bbe94e59..9640bd1af 100644 --- a/src/gpsea/view/templates/all_variants.html +++ b/src/gpsea/view/templates/all_variants.html @@ -73,24 +73,17 @@

Variant alleles

- "variant": var_data.hgvs_cdna, - "variant_name": var_data.hgvs_cdna, - "protein_name": var_data.hgvsp, - "variant_effects": var_data.variant_effects, - "count": count, - - - Variant - hgvs_cdna - protein + Variant key + Variant (cDNA) + Variant (protein) Effects - count + Count - {% for vdata in variant_counts %} + {% for vdata in variant_count_list %} - {{ vdata.variant }} + {{ vdata.variant_key }} {{ vdata.variant_name }} {{ vdata.protein_name }} {{ vdata.variant_effects }} @@ -99,61 +92,17 @@

Variant alleles

{% endfor %} - - - - - - - - - - - - {% for var_count in var_counts %} - - - - - - - - {% endfor %} - -
-

Top {{top_var_count}} Variants

- Variants are shown according to {{ transcript_id }}. A total of {{ unique_variant_count}} unique variants were identified in the cohort. -
CountVariant keyVariant NameProtein VariantVariant Class
{{ var_count.Count }}{{ var_count.variant }}{{ var_count.variant_name }}{{ var_count.protein_name }}{{ var_count.variant_effects }}
- - - - - - - - - {% for disease_count in disease_counts %} - - - - - - {% endfor %} - -
-

Diseases

-
Disease NameDisease IDAnnotation Count
{{ disease_count.disease_name }}{{ disease_count.disease_id }}{{ disease_count.count }}
- {% if has_transcript > 0 %} - + From 381fd548e5c87bc2f4303430afacb8c390446ee2 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 9 Sep 2024 21:33:55 +0200 Subject: [PATCH 3/5] Refactor, rename to `CohortVariantViewer` and add a test. --- src/gpsea/model/_variant_effects.py | 148 +++++++++++------- src/gpsea/view/__init__.py | 4 +- ..._viewable.py => _cohort_variant_viewer.py} | 58 ++++--- tests/conftest.py | 13 +- tests/view/test_variant_viewer.py | 16 ++ 5 files changed, 149 insertions(+), 90 deletions(-) rename src/gpsea/view/{_all_variant_viewable.py => _cohort_variant_viewer.py} (76%) create mode 100644 tests/view/test_variant_viewer.py diff --git a/src/gpsea/model/_variant_effects.py b/src/gpsea/model/_variant_effects.py index d104f178a..632d56a46 100644 --- a/src/gpsea/model/_variant_effects.py +++ b/src/gpsea/model/_variant_effects.py @@ -1,8 +1,10 @@ -from enum import Enum +import enum import typing -from hpotk import TermId -class VariantEffect(Enum): +import hpotk + + +class VariantEffect(enum.Enum): """ `VariantEffect` represents consequences of a variant on transcript that are supported by GPSEA. @@ -48,7 +50,7 @@ class VariantEffect(Enum): THREE_PRIME_UTR_VARIANT = "SO:0001624" NON_CODING_TRANSCRIPT_EXON_VARIANT = "SO:0001792" INTRON_VARIANT = "SO:0001627" - NMD_TRANSCRIPT_VARIANT = "SO:0001621", + NMD_TRANSCRIPT_VARIANT = "SO:0001621" NON_CODING_TRANSCRIPT_VARIANT = "SO:0001619" UPSTREAM_GENE_VARIANT = "SO:0001631" DOWNSTREAM_GENE_VARIANT = "SO:0001632" @@ -63,67 +65,42 @@ class VariantEffect(Enum): INTERGENIC_VARIANT = "SO:0001628" SEQUENCE_VARIANT = "SO:0001060" + def to_display(self) -> str: + """ + Get a concise name of the variant effect that is suitable for showing to humans. + + Example + ^^^^^^^ + + >>> from gpsea.model import VariantEffect + >>> VariantEffect.MISSENSE_VARIANT + 'missense' + >>> VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT + 'splice donor 5th base' + + :returns: a `str` with the name or `'n/a'` if the variant effect was not assigned a concise name. + """ + return effect_to_display.get(self, "n/a") + @staticmethod - def to_display(veffect: "VariantEffect") -> str: - effect_to_display = { - VariantEffect.TRANSCRIPT_ABLATION : "transcript ablation", - VariantEffect.SPLICE_ACCEPTOR_VARIANT : "splice acceptor", - VariantEffect.SPLICE_DONOR_VARIANT : "splice donor", - VariantEffect.STOP_GAINED : "stop gained", - VariantEffect.FRAMESHIFT_VARIANT : "frameshift", - VariantEffect.STOP_LOST : "stop lost", - VariantEffect.START_LOST : "start lost", - VariantEffect.TRANSCRIPT_AMPLIFICATION : "transcript amplification", - VariantEffect.INFRAME_INSERTION : "inframe insertion", - VariantEffect.INFRAME_DELETION : "inframe deletion", - VariantEffect. MISSENSE_VARIANT : "missense", - VariantEffect. PROTEIN_ALTERING_VARIANT : "protein altering", - VariantEffect.SPLICE_REGION_VARIANT : "splice region", - VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT : "splice donor 5th base", - VariantEffect.SPLICE_DONOR_REGION_VARIANT : "splice donor", - VariantEffect. SPLICE_POLYPYRIMIDINE_TRACT_VARIANT : "splice polypyrimidine", - VariantEffect.INCOMPLETE_TERMINAL_CODON_VARIANT : "incomplete terminal codon", - VariantEffect.START_RETAINED_VARIANT : "start retained", - VariantEffect.STOP_RETAINED_VARIANT : "stop retainined", - VariantEffect.SYNONYMOUS_VARIANT : "synonymous", - VariantEffect.CODING_SEQUENCE_VARIANT : "coding sequence", - VariantEffect. MATURE_MIRNA_VARIANT : "mature miRNA", - VariantEffect. FIVE_PRIME_UTR_VARIANT : "5UTR", - VariantEffect. THREE_PRIME_UTR_VARIANT : "3UTR", - VariantEffect. NON_CODING_TRANSCRIPT_EXON_VARIANT : "non-coding transcript exon", - VariantEffect.INTRON_VARIANT : "intronic", - VariantEffect. NMD_TRANSCRIPT_VARIANT : "NMD transcript", - VariantEffect.NON_CODING_TRANSCRIPT_VARIANT : "non-coding transcript", - VariantEffect.UPSTREAM_GENE_VARIANT : "upstream of gene", - VariantEffect. DOWNSTREAM_GENE_VARIANT : "downstream of gene", - VariantEffect.TFBS_ABLATION : "TFBS ablation", - VariantEffect. TFBS_AMPLIFICATION : "TFBS amplification", - VariantEffect.TF_BINDING_SITE_VARIANT : "TFBS binding site", - VariantEffect.REGULATORY_REGION_ABLATION : "regulatory region ablation", - VariantEffect. REGULATORY_REGION_AMPLIFICATION : "regulatory region amplification", - VariantEffect.FEATURE_ELONGATION : "feature elongation", - VariantEffect.REGULATORY_REGION_VARIANT : "regulatory region", - VariantEffect.FEATURE_TRUNCATION : "feature truncation", - VariantEffect.INTERGENIC_VARIANT : "intergenic", - VariantEffect.SEQUENCE_VARIANT : "sequence variant" - } - return effect_to_display.get(veffect, "n/a") - - @staticmethod - def structural_so_id_to_display(so_term: typing.Union[TermId, str]) -> str: + def structural_so_id_to_display(so_term: typing.Union[hpotk.TermId, str]) -> str: """ - get a string to represent a Sequence Ontology (SO) term identifier + Get a `str` with a concise name for representing a Sequence Ontology (SO) term identifier. + + Example + ^^^^^^^ + + >>> from gpsea.model import VariantEffect + >>> VariantEffect.structural_so_id_to_display('SO:1000029') + 'chromosomal deletion' + + :param so_term: a CURIE `str` or a :class:`~hpotk.TermId` with the query SO term. + :returns: a `str` with the concise name for the SO term or `'n/a'` if a name has not been assigned yet. """ - if isinstance(so_term, TermId): + if isinstance(so_term, hpotk.TermId): so_term = so_term.value - so_id_to_display = {"SO:1000029": "chromosomal deletion", - "SO:1000037": "chromosomal duplication", - "SO:1000030": "chromosomal_inversion", - "SO:1000044": "chromosomal_translocation", - } - return so_id_to_display.get(so_term, "n/a") - + return so_id_to_display.get(so_term, "n/a") def __init__(self, curie: str): self._curie = curie @@ -138,3 +115,54 @@ def curie(self) -> str: def __str__(self) -> str: return self.name.lower() + + +effect_to_display = { + VariantEffect.TRANSCRIPT_ABLATION: "transcript ablation", + VariantEffect.SPLICE_ACCEPTOR_VARIANT: "splice acceptor", + VariantEffect.SPLICE_DONOR_VARIANT: "splice donor", + VariantEffect.STOP_GAINED: "stop gained", + VariantEffect.FRAMESHIFT_VARIANT: "frameshift", + VariantEffect.STOP_LOST: "stop lost", + VariantEffect.START_LOST: "start lost", + VariantEffect.TRANSCRIPT_AMPLIFICATION: "transcript amplification", + VariantEffect.INFRAME_INSERTION: "inframe insertion", + VariantEffect.INFRAME_DELETION: "inframe deletion", + VariantEffect.MISSENSE_VARIANT: "missense", + VariantEffect.PROTEIN_ALTERING_VARIANT: "protein altering", + VariantEffect.SPLICE_REGION_VARIANT: "splice region", + VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT: "splice donor 5th base", + VariantEffect.SPLICE_DONOR_REGION_VARIANT: "splice donor", + VariantEffect.SPLICE_POLYPYRIMIDINE_TRACT_VARIANT: "splice polypyrimidine", + VariantEffect.INCOMPLETE_TERMINAL_CODON_VARIANT: "incomplete terminal codon", + VariantEffect.START_RETAINED_VARIANT: "start retained", + VariantEffect.STOP_RETAINED_VARIANT: "stop retainined", + VariantEffect.SYNONYMOUS_VARIANT: "synonymous", + VariantEffect.CODING_SEQUENCE_VARIANT: "coding sequence", + VariantEffect.MATURE_MIRNA_VARIANT: "mature miRNA", + VariantEffect.FIVE_PRIME_UTR_VARIANT: "5UTR", + VariantEffect.THREE_PRIME_UTR_VARIANT: "3UTR", + VariantEffect.NON_CODING_TRANSCRIPT_EXON_VARIANT: "non-coding transcript exon", + VariantEffect.INTRON_VARIANT: "intronic", + VariantEffect.NMD_TRANSCRIPT_VARIANT: "NMD transcript", + VariantEffect.NON_CODING_TRANSCRIPT_VARIANT: "non-coding transcript", + VariantEffect.UPSTREAM_GENE_VARIANT: "upstream of gene", + VariantEffect.DOWNSTREAM_GENE_VARIANT: "downstream of gene", + VariantEffect.TFBS_ABLATION: "TFBS ablation", + VariantEffect.TFBS_AMPLIFICATION: "TFBS amplification", + VariantEffect.TF_BINDING_SITE_VARIANT: "TFBS binding site", + VariantEffect.REGULATORY_REGION_ABLATION: "regulatory region ablation", + VariantEffect.REGULATORY_REGION_AMPLIFICATION: "regulatory region amplification", + VariantEffect.FEATURE_ELONGATION: "feature elongation", + VariantEffect.REGULATORY_REGION_VARIANT: "regulatory region", + VariantEffect.FEATURE_TRUNCATION: "feature truncation", + VariantEffect.INTERGENIC_VARIANT: "intergenic", + VariantEffect.SEQUENCE_VARIANT: "sequence variant", +} + +so_id_to_display = { + "SO:1000029": "chromosomal deletion", + "SO:1000037": "chromosomal duplication", + "SO:1000030": "chromosomal_inversion", + "SO:1000044": "chromosomal_translocation", +} diff --git a/src/gpsea/view/__init__.py b/src/gpsea/view/__init__.py index 74d580e0f..d6a6c22ec 100644 --- a/src/gpsea/view/__init__.py +++ b/src/gpsea/view/__init__.py @@ -1,5 +1,5 @@ from ._cohort import CohortViewable -from ._all_variant_viewable import AllVariantViewable +from ._cohort_variant_viewer import CohortVariantViewer from ._disease import DiseaseViewable from ._phenotype_analysis import summarize_hpo_analysis from ._protein_viewer import ProteinViewable @@ -10,7 +10,7 @@ from ._formatter import VariantFormatter __all__ = [ - 'AllVariantViewable', + 'CohortVariantViewer', 'CohortViewable', 'ProteinVisualizer', 'ProteinVisualizable', 'ProteinViewable', 'DiseaseViewable', diff --git a/src/gpsea/view/_all_variant_viewable.py b/src/gpsea/view/_cohort_variant_viewer.py similarity index 76% rename from src/gpsea/view/_all_variant_viewable.py rename to src/gpsea/view/_cohort_variant_viewer.py index a941c58e5..737a2c20f 100644 --- a/src/gpsea/view/_all_variant_viewable.py +++ b/src/gpsea/view/_cohort_variant_viewer.py @@ -3,24 +3,25 @@ from jinja2 import Environment, PackageLoader from collections import namedtuple, defaultdict -from gpsea.model import Cohort -from gpsea.model._variant import Variant, VariantEffect +from gpsea.model import Cohort, Variant, VariantEffect from ._formatter import VariantFormatter ToDisplay = namedtuple('ToDisplay', ['hgvs_cdna', 'hgvsp', 'variant_effects']) -VariantData = namedtuple('VariantData', ['variant_key', 'hgvs_cdna', 'hgvsp', 'variant_effects'] ) +VariantData = namedtuple('VariantData', ['variant_key', 'hgvs_cdna', 'hgvsp', 'variant_effects']) -class AllVariantViewable: + +class CohortVariantViewer: """ - Class to create a viewable object that is uses a Jinja2 template to create an HTML element - for display in the Jupyter notebook. + `AllVariantViewer` creates an HTML report with the cohort variants. + + The report can be either written into an HTML file or displayed in a Jupyter notebook. """ def __init__( - self, - transcript_id: str + self, + transcript_id: str ): """ Args: @@ -39,23 +40,22 @@ def process( only_hgvs: bool = True ) -> str: """ - Create an HTML that should be shown with display(HTML(..)) of the ipython package. + Create an HTML that should be shown with ``display(HTML(..))`` of the ipython package. Args: - cohort (Cohort): The cohort being analyzed in the current Notebook - transcript_id (str): the transcript that we map variants onto + cohort (Cohort): The cohort being analyzed in the current notebook. + only_hgvs (bool): Do not show the transcript ID part of the HGVS annotation, just the annotation. Returns: str: an HTML string with parameterized template for rendering """ - context = self._prepare_context(cohort, transcript_id=self._transcript_id, only_hgvs=only_hgvs) + context = self._prepare_context(cohort, only_hgvs=only_hgvs) return self._cohort_template.render(context) def _prepare_context( self, cohort: Cohort, - transcript_id: typing.Optional[str], - only_hgvs + only_hgvs: bool, ) -> typing.Mapping[str, typing.Any]: variant_count_dictionary = defaultdict(int) variant_effect_count_dictionary = defaultdict(int) @@ -74,9 +74,9 @@ def _prepare_context( variant_counts.append( { "variant_key": var_data.variant_key, - "variant": var_data.hgvs_cdna, - "variant_name": var_data.hgvs_cdna, - "protein_name": var_data.hgvsp, + "variant": var_data.hgvs_cdna, + "variant_name": var_data.hgvs_cdna, + "protein_name": var_data.hgvsp, "variant_effects": ", ".join(var_data.variant_effects), "count": count, } @@ -88,7 +88,6 @@ def _prepare_context( "count": count } ) - print(f"variants eee {len(variant_effect_counts)}") variant_counts = sorted(variant_counts, key=lambda row: row.get("count"), reverse=True) variant_effect_counts = sorted(variant_effect_counts, key=lambda row: row.get("count"), reverse=True) @@ -100,7 +99,6 @@ def _prepare_context( "total_unique_allele_count": len(variant_counts) } - def _get_variant_data( self, variant: Variant, @@ -109,12 +107,11 @@ def _get_variant_data( """ Get user-friendly strings (e.g., HGVS for our target transcript) to match to the chromosomal strings Args: - cohort (Cohort): The cohort being analyzed in the current Notebook - transcript_id (str): the transcript that we map variants onto + variant (Variant): The variant to be formatted. only_hgvs (bool): do not show the transcript ID part of the HGVS annotation, just the annotation. Returns: - typing.Mapping[str, ToDisplay]: key: variant key, value: namedtuple(display (e.g. HGVS) string of variant, hgvsp protein string of variant) + VariantData: a named tuple with variant data formatted for human consumption. """ variant_key = variant.variant_info.variant_key if variant.variant_info.has_sv_info(): @@ -122,7 +119,12 @@ def _get_variant_data( gene_symbol = sv_info.gene_symbol display = f"SV involving {gene_symbol}" effect = VariantEffect.structural_so_id_to_display(so_term=sv_info.structural_type) - return VariantData(variant_key=variant_key, hgvs_cdna = display, hgvsp = "p.?", variant_effects=[effect]) + return VariantData( + variant_key=variant_key, + hgvs_cdna=display, + hgvsp="p.?", + variant_effects=[effect], + ) else: variant_key = variant.variant_info.variant_key display = self._var_formatter.format_as_string(variant) @@ -136,13 +138,19 @@ def _get_variant_data( if only_hgvs: # do not show the transcript id fields_dna = display.split(":") - fields_ps = hgvsp.split(":") if hgvsp is not None else [None] if len(fields_dna) > 1: display_hgvs_cDNA = fields_dna[1] else: display_hgvs_cDNA = fields_dna[0] + + fields_ps = hgvsp.split(":") if hgvsp is not None else (None,) if len(fields_ps) > 1: hgvsp = fields_ps[1] else: hgvsp = fields_ps[0] - return VariantData(variant_key=variant_key, hgvs_cdna = display_hgvs_cDNA, hgvsp = hgvsp, variant_effects=var_effects) + return VariantData( + variant_key=variant_key, + hgvs_cdna=display_hgvs_cDNA, + hgvsp=hgvsp, + variant_effects=var_effects, + ) diff --git a/tests/conftest.py b/tests/conftest.py index 64ed2e817..907b7d838 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -111,12 +111,19 @@ def suox_cohort( @pytest.fixture(scope='session') -def suox_gt_predicate() -> GenotypePolyPredicate: +def suox_mane_tx_id() -> str: + return 'NM_001032386.2' + + +@pytest.fixture(scope='session') +def suox_gt_predicate( + suox_mane_tx_id: str, +) -> GenotypePolyPredicate: # To bin the patients to a group with >1 MISSENSE variant or 0 MISSENSE variants. - suox_mane_tx_id = 'NM_001032386.2' + return boolean_predicate( variant_predicate=VariantPredicates.variant_effect( - effect=VariantEffect.MISSENSE_VARIANT, + effect=VariantEffect.MISSENSE_VARIANT, tx_id=suox_mane_tx_id ) ) diff --git a/tests/view/test_variant_viewer.py b/tests/view/test_variant_viewer.py new file mode 100644 index 000000000..169940dac --- /dev/null +++ b/tests/view/test_variant_viewer.py @@ -0,0 +1,16 @@ +import pytest + +from gpsea.model import Cohort +from gpsea.view import CohortVariantViewer + + +@pytest.mark.skip("For manual run only") +def test_viewer( + suox_mane_tx_id: str, + suox_cohort: Cohort, +): + viewer = CohortVariantViewer(transcript_id=suox_mane_tx_id) + html = viewer.process(suox_cohort) + + with open("all_variants.html", "w") as fh: + fh.write(html) From f0edb42dba0d26db376e84e8a9c5083c1c27cb8a Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 9 Sep 2024 21:47:49 +0200 Subject: [PATCH 4/5] Include the `CohortVariantViewer` in the tutorial. --- docs/report/tbx5_all_variants.html | 583 +++++++++++++++++++++++ docs/tutorial.rst | 33 +- src/gpsea/view/_cohort_variant_viewer.py | 14 +- tests/view/test_variant_viewer.py | 2 +- 4 files changed, 624 insertions(+), 8 deletions(-) create mode 100644 docs/report/tbx5_all_variants.html diff --git a/docs/report/tbx5_all_variants.html b/docs/report/tbx5_all_variants.html new file mode 100644 index 000000000..a96181629 --- /dev/null +++ b/docs/report/tbx5_all_variants.html @@ -0,0 +1,583 @@ + + + + + Cohort + + + + +

GPSEA cohort analysis: All variant alleles

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Variant alleles

+ A total of 53 unique alleles were identified in the cohort. +
Variant keyVariant (cDNA)Variant (protein)EffectsCount
12_114385521_114385521_C_Tc.710G>Ap.Arg237Glnmissense22
12_114401830_114401830_C_Tc.238G>Ap.Gly80Argmissense20
12_114385563_114385563_G_Ac.668C>Tp.Thr223Metmissense8
12_114398675_114398675_G_Tc.408C>Ap.Tyr136Terstop gained6
12_114399514_114399514_A_Cc.361T>Gp.Trp121Glymissense, splice region5
12_114403792_114403792_C_CGc.106_107insCp.Ser36ThrfsTer25frameshift5
12_114398682_114398682_C_CGc.400dupp.Arg134ProfsTer49frameshift5
12_114385474_114385474_A_Gc.755+2T>CNonesplice donor4
12_114398656_114398656_C_CGc.426dupp.Ala143ArgfsTer40frameshift4
12_114366360_114366360_C_Tc.787G>Ap.Val263Metmissense4
12_114385522_114385522_G_Ac.709C>Tp.Arg237Trpmissense4
12_114403798_114403798_G_GCc.100dupp.Ala34GlyfsTer27frameshift4
12_114399613_114399613_T_Ac.262A>Tp.Lys88Terstop gained3
12_114401827_114401827_T_Ac.241A>Tp.Arg81Trpmissense, splice region3
12_114366312_114366312_G_Ac.835C>Tp.Arg279Terstop gained3
12_114366366_114366366_T_Ac.781A>Tp.Ser261Cysmissense3
12_114403798_114403799_GC_Gc.100delp.Ala34ProfsTer32frameshift3
12_114385475_114385475_C_Tc.755+1G>ANonesplice donor3
12_114401853_114401853_G_Tc.215C>Ap.Thr72Lysmissense3
12_114385521_114385521_C_Gc.710G>Cp.Arg237Promissense2
12_114398568_114398568_C_Ac.510+5G>TNonesplice donor 5th base, intronic2
12_114366207_114366208_GC_Gc.939delp.Gln315ArgfsTer79frameshift2
12_114366274_114366274_G_Tc.873C>Ap.Tyr291Terstop gained2
12_114366267_114366267_C_Ac.880G>Tp.Glu294Terstop gained2
12_114398578_114398579_CA_Cc.504delp.Phe168LeufsTer6frameshift2
12_114394762_114394763_CA_Cc.641delp.Val214GlyfsTer12frameshift2
12_114398666_114398667_TG_Tc.416delp.Pro139GlnfsTer11frameshift2
12_114403754_114403754_G_Tc.145C>Ap.Gln49Lysmissense, splice region2
12_114385550_114385550_A_AATTATTCTCAGc.680_681insCTGAGAATAATp.Ile227_Glu228insTerinframe insertion, stop retainined2
12_114366348_114366349_CT_Cc.798delp.Val267TrpfsTer127frameshift1
12_114399559_114399559_T_Cc.316A>Gp.Ile106Valmissense1
12_114356064_114356065_TA_Tc.1024delp.Tyr342ThrfsTer52frameshift1
12_114355755_114355756_TG_Tc.1333delp.His445MetfsTer137frameshift1
12_114398632_114398632_G_Ac.451C>Tp.Gln151Terstop gained1
12_114401907_114401907_A_Gc.161T>Cp.Ile54Thrmissense1
12_114355723_114355723_G_Ac.1366C>Tp.Gln456Terstop gained1
12_114398602_114398602_T_Gc.481A>Cp.Thr161Promissense1
12_114398708_114398709_GC_Gc.374delp.Gly125AlafsTer25frameshift1
12_114385553_114385553_C_Ac.678G>Tp.Lys226Asnmissense1
12_114399633_114399633_C_Gc.243-1G>CNonesplice acceptor1
12_114401873_114401874_TA_Tc.194delp.Leu65GlnfsTer10frameshift1
12_114399594_114399594_A_Cc.281T>Gp.Leu94Argmissense1
12_114399622_114399622_G_Tc.253C>Ap.Pro85Thrmissense1
12_114394743_114394746_TGTG_Tc.658_660delp.His220delinframe deletion1
12_114394820_114394820_C_Gc.584G>Cp.Gly195Alamissense1
12_114401846_114401846_C_Gc.222G>Cp.Met74Ilemissense1
12_114366241_114366242_CT_Cc.905delp.Gln302ArgfsTer92frameshift1
12_114355784_114355785_CA_Cc.1304delp.Leu435ArgfsTer147frameshift1
12_114398626_114398627_CG_Cc.456delp.Val153SerfsTer21frameshift1
12_114403859_114403859_G_Tc.40C>Ap.Pro14Thrmissense1
12_114399625_114399629_ACATC_Ac.246_249delp.Met83PhefsTer6frameshift1
12_114394817_114394817_G_Cc.587C>Gp.Ser196Terstop gained1
12_114401921_114401921_C_Gc.148-1G>CNonesplice acceptor1
+ + + + + \ No newline at end of file diff --git a/docs/tutorial.rst b/docs/tutorial.rst index dcecee150..687969ca4 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -103,7 +103,13 @@ We loaded the patient data into a `cohort` which is ready for the next steps. Explore cohort ^^^^^^^^^^^^^^ -We can now explore the cohort to see how many patients are included. +GPSEA helps with gaining insight into the cohort by providing + + +Show cohort summary +------------------- + +The summary report provides an overview about the HPO terms, variants, diseases, and variant effects that occurr most frequently: >>> from gpsea.view import CohortViewable >>> viewer = CohortViewable(hpo) @@ -121,6 +127,10 @@ We can now explore the cohort to see how many patients are included. from IPython.display import HTML, display display(HTML(report)) + +Plot distribution of variants with respect to the protein sequence +------------------------------------------------------------------ + Now we can show the distribution of variants with respect to the encoded protein. We first obtain `tx_coordinates` (:class:`~gpsea.model.TranscriptCoordinates`) and `protein_meta` (:class:`~gpsea.model.ProteinMetadata`) @@ -154,6 +164,27 @@ and we follow with plotting the diagram of the mutations on the protein: :width: 600px +.. _show-cohort-variants: + +Summarize all variant alleles +----------------------------- + +We can prepare a table of all variant alleles that occurr in the cohort. +Each table row corresponds to a single allele and lists the variant key, +the predicted effect on the transcript (*cDNA*) and protein of interest, +the variant effects, and the number of patients who present +with one or more variant alleles (*Count*): + +>>> from gpsea.view import CohortVariantViewer +>>> viewer = CohortVariantViewer(tx_id=tx_id) +>>> report = viewer.process(cohort=cohort) +>>> with open('docs/report/tbx5_all_variants.html', 'w') as fh: # doctest: +SKIP +... _ = fh.write(report) + +.. raw:: html + :file: report/tbx5_all_variants.html + + Prepare genotype and phenotype predicates ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/gpsea/view/_cohort_variant_viewer.py b/src/gpsea/view/_cohort_variant_viewer.py index 737a2c20f..016c8cf99 100644 --- a/src/gpsea/view/_cohort_variant_viewer.py +++ b/src/gpsea/view/_cohort_variant_viewer.py @@ -17,22 +17,24 @@ class CohortVariantViewer: `AllVariantViewer` creates an HTML report with the cohort variants. The report can be either written into an HTML file or displayed in a Jupyter notebook. + + See :ref:show-cohort-variants: for an example usage. """ def __init__( self, - transcript_id: str + tx_id: str ): """ Args: - transcript_id(str): The transcript identifier (Usually, the MANE RefSeq transcript, that should start with "NM_") + tx_id (str): The transcript identifier (Usually, the MANE RefSeq transcript, that should start with "NM_") """ environment = Environment(loader=PackageLoader('gpsea.view', 'templates')) self._cohort_template = environment.get_template("all_variants.html") - self._var_formatter = VariantFormatter(transcript_id) - if not transcript_id.startswith("NM"): - print(f"[WARNING] Non-RefSeq transcript id: {transcript_id}") - self._transcript_id = transcript_id + self._var_formatter = VariantFormatter(tx_id) + if not tx_id.startswith("NM"): + print(f"[WARNING] Non-RefSeq transcript id: {tx_id}") + self._transcript_id = tx_id def process( self, diff --git a/tests/view/test_variant_viewer.py b/tests/view/test_variant_viewer.py index 169940dac..31fe12bbd 100644 --- a/tests/view/test_variant_viewer.py +++ b/tests/view/test_variant_viewer.py @@ -9,7 +9,7 @@ def test_viewer( suox_mane_tx_id: str, suox_cohort: Cohort, ): - viewer = CohortVariantViewer(transcript_id=suox_mane_tx_id) + viewer = CohortVariantViewer(tx_id=suox_mane_tx_id) html = viewer.process(suox_cohort) with open("all_variants.html", "w") as fh: From 8ccfed60e4ab4b4a6c80f7bc3c31228d49c07703 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 9 Sep 2024 21:51:42 +0200 Subject: [PATCH 5/5] Fix doctest, tweak pydoc. --- src/gpsea/model/_variant_effects.py | 4 ++-- src/gpsea/view/_cohort_variant_viewer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gpsea/model/_variant_effects.py b/src/gpsea/model/_variant_effects.py index 632d56a46..9567592d4 100644 --- a/src/gpsea/model/_variant_effects.py +++ b/src/gpsea/model/_variant_effects.py @@ -73,9 +73,9 @@ def to_display(self) -> str: ^^^^^^^ >>> from gpsea.model import VariantEffect - >>> VariantEffect.MISSENSE_VARIANT + >>> VariantEffect.MISSENSE_VARIANT.to_display() 'missense' - >>> VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT + >>> VariantEffect.SPLICE_DONOR_5TH_BASE_VARIANT.to_display() 'splice donor 5th base' :returns: a `str` with the name or `'n/a'` if the variant effect was not assigned a concise name. diff --git a/src/gpsea/view/_cohort_variant_viewer.py b/src/gpsea/view/_cohort_variant_viewer.py index 016c8cf99..0933ce02e 100644 --- a/src/gpsea/view/_cohort_variant_viewer.py +++ b/src/gpsea/view/_cohort_variant_viewer.py @@ -14,7 +14,7 @@ class CohortVariantViewer: """ - `AllVariantViewer` creates an HTML report with the cohort variants. + `CohortVariantViewer` creates an HTML report with the cohort variants. The report can be either written into an HTML file or displayed in a Jupyter notebook. @@ -133,7 +133,7 @@ def _get_variant_data( tx_annotation = variant.get_tx_anno_by_tx_id(self._transcript_id) if tx_annotation is not None: hgvsp = tx_annotation.hgvsp - var_effects = [VariantEffect.to_display(var_eff) for var_eff in tx_annotation.variant_effects] + var_effects = [var_eff.to_display() for var_eff in tx_annotation.variant_effects] else: hgvsp = None var_effects = []