").
"""
return self._alt
@@ -568,7 +543,7 @@ def __init__(
@property
def structural_type(self) -> hpotk.TermId:
"""
- Get term ID of the structural type (e.g. `SO:1000029` for chromosomal deletion).
+ Get term ID of the structural type (e.g. ``SO:1000029`` for chromosomal deletion).
"""
return self._structural_type
@@ -582,7 +557,7 @@ def variant_class(self) -> VariantClass:
@property
def gene_id(self) -> str:
"""
- Get a `str` with gene identifier CURIE (e.g. `HGNC:3603`) or `None` if the identifier is not available.
+ Get a `str` with gene identifier CURIE (e.g. ``HGNC:3603``) or `None` if the identifier is not available.
"""
return self._gene_id
diff --git a/src/genophenocorr/preprocessing/_api.py b/src/genophenocorr/preprocessing/_api.py
index 29b9b635a..8a1a1cfb9 100644
--- a/src/genophenocorr/preprocessing/_api.py
+++ b/src/genophenocorr/preprocessing/_api.py
@@ -74,7 +74,8 @@ class TranscriptCoordinateService(metaclass=abc.ABCMeta):
@abc.abstractmethod
def fetch(
- self, tx: typing.Union[str, TranscriptInfoAware]
+ self,
+ tx: typing.Union[str, TranscriptInfoAware],
) -> TranscriptCoordinates:
"""
Get tx coordinates for a tx ID or an entity that knows about the tx ID.
@@ -83,7 +84,7 @@ def fetch(
Args:
tx: a `str` with tx ID (e.g. `NM_002834.5`) or an entity that knows about the transcript ID
- (e.g. :class:`genophenocorr.model.TranscriptAnnotation`).
+ (e.g. :class:`genophenocorr.model.TranscriptAnnotation`).
Returns: the transcript coordinates.
"""
@@ -105,7 +106,8 @@ def fetch_for_gene(self, gene: str) -> typing.Sequence[TranscriptCoordinates]:
Args:
gene: a `str` with tx ID (e.g. `HGNC:3603`)
- Returns: a sequence of transcript coordinates for the gene.
+ Returns:
+ typing.Sequence[TranscriptCoordinates]: a sequence of transcript coordinates for the gene.
"""
pass
@@ -223,5 +225,3 @@ def summarize(
else:
file.write("No errors or warnings were found")
file.write(os.linesep)
- l_pad = " " * (self._notepad.level * indent)
- file.write(os.linesep)
diff --git a/src/genophenocorr/preprocessing/_config.py b/src/genophenocorr/preprocessing/_config.py
index bd4069f6a..12eef36cf 100644
--- a/src/genophenocorr/preprocessing/_config.py
+++ b/src/genophenocorr/preprocessing/_config.py
@@ -154,10 +154,6 @@ def configure_patient_creator(
timeout: float = 30.,
) -> PhenopacketPatientCreator: # Rename to something more understandable by user
"""
- ^^^ none, lenient, strict -
- none = run unless unrunnable
- lenient = fix what we can, abort unfixable
- strict = abort at any issue
A convenience function for configuring a non-caching :class:`genophenocorr.preprocessing.PhenopacketPatientCreator`.
To create the patient creator, we need hpo-toolkit's representation of HPO. Other options are optional
@@ -246,10 +242,10 @@ def _setup_phenotype_creator(hpo: hpotk.MinimalOntology,
def _configure_functional_annotator(
- cache_dir: str,
- variant_fallback: str,
- timeout: float,
- ) -> FunctionalAnnotator:
+ cache_dir: str,
+ variant_fallback: str,
+ timeout: float,
+) -> FunctionalAnnotator:
# (2) FunctionalAnnotator
# Setup fallback
@@ -265,9 +261,9 @@ def _configure_functional_annotator(
def _configure_fallback_functional(
- variant_fallback: str,
- timeout: float,
- ) -> FunctionalAnnotator:
+ variant_fallback: str,
+ timeout: float,
+) -> FunctionalAnnotator:
if variant_fallback == 'VEP':
fallback = VepFunctionalAnnotator(timeout=timeout)
else:
diff --git a/src/genophenocorr/preprocessing/_phenopacket.py b/src/genophenocorr/preprocessing/_phenopacket.py
index 12bba8457..be24228d4 100644
--- a/src/genophenocorr/preprocessing/_phenopacket.py
+++ b/src/genophenocorr/preprocessing/_phenopacket.py
@@ -56,10 +56,10 @@ class PhenopacketVariantCoordinateFinder(
VariantCoordinateFinder[GenomicInterpretation]
):
"""
- `PhenopacketVariantCoordinateFinder` figures out :class:`genophenocorr.model.VariantCoordinates`
- and :class:`genophenocorr.model.Genotype` from `GenomicInterpretation` element of Phenopacket Schema.
+ `PhenopacketVariantCoordinateFinder` figures out :class:`~genophenocorr.model.VariantCoordinates`
+ and :class:`~genophenocorr.model.Genotype` from `GenomicInterpretation` element of Phenopacket Schema.
- :param build: genome build to use in `VariantCoordinates
+ :param build: genome build to use in `VariantCoordinates`
:param hgvs_coordinate_finder: the coordinate finder to use for parsing HGVS expressions
"""
@@ -85,8 +85,9 @@ def find_coordinates(
Args:
item (GenomicInterpretation): a genomic interpretation element from Phenopacket Schema
+
Returns:
- VariantCoordinates: variant coordinates
+ typing.Optional[VariantCoordinates]: variant coordinates
"""
if not isinstance(item, GenomicInterpretation):
raise ValueError(
@@ -233,12 +234,12 @@ def __init__(
# Set of sequence ontology IDs that we will treat as a deletion (`DEL`)
# for the purpose of assigning imprecise SV info with a variant class.
self._so_deletions = {
- '1000029', # chromosomal deletion: An incomplete chromosome.
- '0001893', # transcript ablation: A feature ablation whereby the deleted region includes a transcript feature.
- '0001879', # feature_ablation: A sequence variant, caused by an alteration of the genomic sequence, where the deletion, is greater than the extent of the underlying genomic features.
+ '1000029', # chromosomal deletion: An incomplete chromosome.
+ '0001893', # transcript ablation: A feature ablation whereby the deleted region includes a transcript feature.
+ '0001879', # feature_ablation: A sequence variant, caused by an alteration of the genomic sequence, where the deletion, is greater than the extent of the underlying genomic features.
}
self._so_duplications = {
- '1000037', # chromosomal_duplication
+ '1000037', # chromosomal_duplication
}
def process(self, inputs: Phenopacket, notepad: Notepad) -> Patient:
diff --git a/src/genophenocorr/preprocessing/_vv.py b/src/genophenocorr/preprocessing/_vv.py
index 777fb1e38..6242ea754 100644
--- a/src/genophenocorr/preprocessing/_vv.py
+++ b/src/genophenocorr/preprocessing/_vv.py
@@ -136,7 +136,7 @@ class VariantValidatorDecodeException(BaseException):
class VVMultiCoordinateService(TranscriptCoordinateService, GeneCoordinateService):
"""
- `VVMultiCoordinateService` uses the Variant Validator REST API to fetch transcript coordinates for
+ `VVMultiCoordinateService` uses the Variant Validator REST API to fetch transcript coordinates for
both a *gene* ID and a specific *transcript* ID.
:param genome_build: the genome build for constructing the transcript coordinates.
@@ -370,7 +370,7 @@ def _parse_cds(coding_start: int, coding_end: int, exons: typing.Iterable[Genomi
processed += exon_len
- raise ValueError(f'Could not parse CDS start and end from given coordinates')
+ raise ValueError('Could not parse CDS start and end from given coordinates')
@staticmethod
def _parse_is_preferred(
@@ -379,7 +379,7 @@ def _parse_is_preferred(
if 'annotations' in tx_data:
annotations = tx_data['annotations']
if 'mane_select' in annotations:
- assert type(annotations['mane_select']) == bool, '\'mane_select\' field must be `bool`'
+ assert isinstance(annotations['mane_select'], bool), '\'mane_select\' field must be `bool`'
return annotations['mane_select']
return None
diff --git a/src/genophenocorr/view/__init__.py b/src/genophenocorr/view/__init__.py
index c29b2c564..6eb13dd61 100644
--- a/src/genophenocorr/view/__init__.py
+++ b/src/genophenocorr/view/__init__.py
@@ -2,7 +2,7 @@
from ._disease import DiseaseViewable
from ._protein_viewer import ProteinViewable
from ._protein_visualizable import ProteinVisualizable
-from ._stats import StatsViewer
+from ._stats import MtcStatsViewer
from ._txp import VariantTranscriptVisualizer
from ._protein_visualizer import ProteinVisualizer
from ._formatter import VariantFormatter
@@ -11,7 +11,7 @@
'CohortViewable',
'ProteinVisualizer', 'ProteinVisualizable', 'ProteinViewable',
'DiseaseViewable',
- 'StatsViewer',
+ 'MtcStatsViewer',
'VariantTranscriptVisualizer',
'VariantFormatter',
]
diff --git a/src/genophenocorr/view/_protein_visualizer.py b/src/genophenocorr/view/_protein_visualizer.py
index e805221d9..7dfa1c452 100644
--- a/src/genophenocorr/view/_protein_visualizer.py
+++ b/src/genophenocorr/view/_protein_visualizer.py
@@ -9,7 +9,7 @@
import matplotlib.colors as mcolors
import numpy as np
-from genophenocorr.model import VariantEffect
+from genophenocorr.model import Cohort, ProteinMetadata, TranscriptCoordinates, VariantEffect
from ._protein_visualizable import ProteinVisualizable
@@ -53,11 +53,22 @@ def __init__(self, random_seed: int = 42) -> None:
self.legend2_max_x = 0.3
self.legend2_max_y = 0.75
+ def draw_protein_diagram(
+ self,
+ tx_coordinates: TranscriptCoordinates,
+ protein_metadata: ProteinMetadata,
+ cohort: Cohort,
+ ax: typing.Optional[plt.Axes] = None,
+ labeling_method: typing.Literal['abbreviate', 'enumerate'] = 'abbreviate'
+ ) -> typing.Optional[plt.Axes]:
+ pvis = ProteinVisualizable(tx_coordinates, protein_metadata, cohort)
+ return self.draw_fig(pvis, ax, labeling_method)
+
def draw_fig(
- self,
- pvis: ProteinVisualizable,
- ax: typing.Optional[plt.Axes] = None,
- labeling_method: typing.Literal['abbreviate', 'enumerate'] = 'abbreviate'
+ self,
+ pvis: ProteinVisualizable,
+ ax: typing.Optional[plt.Axes] = None,
+ labeling_method: typing.Literal['abbreviate', 'enumerate'] = 'abbreviate'
) -> typing.Optional[plt.Axes]:
"""
Visualize the cohort variants on a protein diagram.
diff --git a/src/genophenocorr/view/_stats.py b/src/genophenocorr/view/_stats.py
index 22fd75d95..dd17c6fd3 100644
--- a/src/genophenocorr/view/_stats.py
+++ b/src/genophenocorr/view/_stats.py
@@ -1,15 +1,12 @@
import typing
-from hpotk import MinimalOntology
from jinja2 import Environment, PackageLoader
from genophenocorr.analysis import HpoMtcReport
-from genophenocorr.model import Cohort
-
-class StatsViewer:
+class MtcStatsViewer:
"""
- `StatsViewer` uses a Jinja2 template to create an HTML element for showing in the Jupyter notebook
+ `MtcStatsViewer` uses a Jinja2 template to create an HTML element for showing in the Jupyter notebook
or for writing into a standalone HTML file.
"""
@@ -18,14 +15,14 @@ def __init__(self):
self._cohort_template = environment.get_template("stats.html")
def process(
- self,
- hpo_mtc_report: HpoMtcReport,
+ self,
+ hpo_mtc_report: HpoMtcReport,
) -> str:
"""
- Create an HTML that should be shown with `display(HTML(..))` of the ipython package.
+ Create an HTML that should be shown with `display(HTML(..))` of the IPython package.
Args:
- hpo_mtc_report(HpoMtcReport): summary of heuristic term filtering procedure
+ hpo_mtc_report (HpoMtcReport): summary of heuristic term filtering procedure
Returns:
str: an HTML string with parameterized template for rendering or writing into a standalone HTML file.
@@ -35,7 +32,7 @@ def process(
@staticmethod
def _prepare_context(
- hpo_mtc_report: HpoMtcReport,
+ hpo_mtc_report: HpoMtcReport,
) -> typing.Mapping[str, typing.Any]:
results_map = hpo_mtc_report.skipped_terms_dict
if not isinstance(results_map, dict):
@@ -50,7 +47,7 @@ def _prepare_context(
reason_to_count.append({"reason": reason, "count": count})
n_skipped += count
- n_tested = hpo_mtc_report.n_terms_tested - n_skipped
+ n_tested = hpo_mtc_report.n_terms_before_filtering - n_skipped
# The following dictionary is used by the Jinja2 HTML template
return {
@@ -58,6 +55,6 @@ def _prepare_context(
"hpo_mtc_filter_name": hpo_mtc_report.filter_method,
"skipped_hpo_count": n_skipped,
"tested_hpo_count": n_tested,
- "total_hpo_count": hpo_mtc_report.n_terms_tested,
+ "total_hpo_count": hpo_mtc_report.n_terms_before_filtering,
"reason_to_count": reason_to_count,
}
diff --git a/src/genophenocorr/view/templates/cohort.html b/src/genophenocorr/view/templates/cohort.html
index f8ef9379f..d6a3dc866 100644
--- a/src/genophenocorr/view/templates/cohort.html
+++ b/src/genophenocorr/view/templates/cohort.html
@@ -64,7 +64,7 @@
genophenocorr cohort analysis
- Successfully ingested {{ n_individuals }} phenopackets.
+ Successfully ingested {{ n_individuals }} individuals.
{% if n_excluded > 0 %}
Not able to ingest {{ n_excluded }} individuals.
{% else %}
@@ -80,7 +80,7 @@ Top {{top_hpo_count}} HPO Terms
HPO Term |
ID |
- Annotation Count |
+ Seen in n individuals |
{% for hpo_count in hpo_counts %}
@@ -99,20 +99,20 @@ Top {{top_var_count}} Variants
- Variant |
+ Count |
+ Variant key |
Variant Name |
Protein Variant |
Variant Class |
- Variant Count |
{% for var_count in var_counts %}
+ {{ var_count.Count }} |
{{ var_count.variant }} |
{{ var_count.variant_name }} |
{{ var_count.protein_name }} |
{{ var_count.variant_effects }} |
- {{ var_count.Count }} |
-
+
{% endfor %}
diff --git a/src/genophenocorr/view/templates/stats.html b/src/genophenocorr/view/templates/stats.html
index c95fe7a39..2a75c055a 100644
--- a/src/genophenocorr/view/templates/stats.html
+++ b/src/genophenocorr/view/templates/stats.html
@@ -45,17 +45,22 @@
- Statistical analysis: {{mtc_name}} ({{ hpo_mtc_filter_name }})
+ Phenotype testing report
+ Phenotype MTC filter: {{ hpo_mtc_filter_name }}
+ Multiple testing correction: {{ mtc_name }}
Performed statistical tests for {{ tested_hpo_count }} out of the total of {{ total_hpo_count }} HPO terms.
- Using {{ hpo_mtc_filter_name }}, {{ skipped_hpo_count }} term(s) were omitted from statistical analysis.
+ Using {{ hpo_mtc_filter_name }}, {{ skipped_hpo_count }} term(s) were omitted from statistical analysis.
+ Code |
Reason |
Count |
{% for skipped in reason_to_count %}
+
+ TODO |
{{ skipped.reason }} |
{{ skipped.count }} |
diff --git a/tests/analysis/predicate/genotype/test_gt_predicates.py b/tests/analysis/predicate/genotype/test_gt_predicates.py
index e626ae0e9..defa998ab 100644
--- a/tests/analysis/predicate/genotype/test_gt_predicates.py
+++ b/tests/analysis/predicate/genotype/test_gt_predicates.py
@@ -34,7 +34,7 @@ def test_get_question(
predicate: GenotypePolyPredicate,
):
question = predicate.get_question()
- assert question == "What group does the patient belong to: Point, LoF"
+ assert question == "Genotype group: Point, LoF"
def test_get_categorizations(
self,
diff --git a/tests/analysis/test_config.py b/tests/analysis/test_config.py
index b66224175..d730da892 100644
--- a/tests/analysis/test_config.py
+++ b/tests/analysis/test_config.py
@@ -8,7 +8,6 @@ class TestCohortAnalysisConfiguration:
def test_default_values(self):
config = CohortAnalysisConfiguration()
- assert config.missing_implies_excluded is False
assert config.pval_correction == 'bonferroni'
assert config.min_patients_w_hpo is None
assert config.include_sv is False
diff --git a/tests/analysis/test_mtc_filter.py b/tests/analysis/test_mtc_filter.py
index e1899fc6c..c95ebec48 100644
--- a/tests/analysis/test_mtc_filter.py
+++ b/tests/analysis/test_mtc_filter.py
@@ -40,6 +40,23 @@ def patient_counts(
)
return n_usable, all_counts
+ @pytest.fixture(scope='class')
+ def gt_categories(self) -> pd.Index:
+ return pd.Index([PatientCategories.YES, PatientCategories.NO])
+
+ @pytest.fixture(scope='class')
+ def pheno_categories(self) -> pd.Index:
+ return pd.Index([PatientCategories.YES, PatientCategories.NO])
+
+ @staticmethod
+ def prepare_counts_df(
+ counts,
+ index: pd.Index,
+ columns: pd.Index,
+ ):
+ values = np.array(counts).reshape((2, 2))
+ return pd.DataFrame(data=values, index=index, columns=columns)
+
@pytest.mark.parametrize(
"counts, expected",
[
@@ -53,10 +70,15 @@ def test_one_genotype_has_zero_hpo_observations(
self,
counts: typing.Tuple[int],
expected: bool,
+ gt_categories: pd.Index,
+ pheno_categories: pd.Index,
):
- counts_df = self.prepare_counts_df(counts)
+ counts_df = TestHeuristicSamplerMtcFilter.prepare_counts_df(counts, gt_categories, pheno_categories)
- actual = HpoMtcFilter.one_genotype_has_zero_hpo_observations(counts=counts_df)
+ actual = HpoMtcFilter.one_genotype_has_zero_hpo_observations(
+ counts=counts_df,
+ gt_categories=gt_categories,
+ )
assert actual == expected
@@ -77,8 +99,10 @@ def test_some_cell_has_greater_than_one_count(
self,
counts: typing.Tuple[int],
expected: bool,
+ gt_categories: pd.Index,
+ pheno_categories: pd.Index,
):
- counts_df = self.prepare_counts_df(counts)
+ counts_df = TestHeuristicSamplerMtcFilter.prepare_counts_df(counts, gt_categories, pheno_categories)
actual = HpoMtcFilter.some_cell_has_greater_than_one_count(counts=counts_df)
@@ -96,23 +120,33 @@ def test_genotypes_have_same_hpo_proportions(
self,
counts: typing.Tuple[int],
expected: bool,
+ gt_categories: pd.Index,
+ pheno_categories: pd.Index,
):
- counts_df = self.prepare_counts_df(counts)
+ counts_df = TestHeuristicSamplerMtcFilter.prepare_counts_df(counts, gt_categories, pheno_categories)
- actual = HpoMtcFilter.genotypes_have_same_hpo_proportions(counts=counts_df)
+ actual = HpoMtcFilter.genotypes_have_same_hpo_proportions(
+ counts=counts_df,
+ gt_categories=gt_categories,
+ )
assert actual == expected
def test_filter_terms_to_test(
self,
mtc_filter: HpoMtcFilter,
+ suox_gt_predicate: GenotypePolyPredicate,
patient_counts: typing.Tuple[
typing.Mapping[hpotk.TermId, int],
typing.Mapping[hpotk.TermId, pd.DataFrame],
],
):
n_usable, all_counts = patient_counts
- mtc_report = mtc_filter.filter_terms_to_test(n_usable, all_counts)
+ mtc_report = mtc_filter.filter_terms_to_test(
+ suox_gt_predicate,
+ n_usable,
+ all_counts,
+ )
assert isinstance(mtc_report, tuple)
assert len(mtc_report) == 3
@@ -130,6 +164,7 @@ def test_filter_terms_to_test(
def test_specified_term_mtc_filter(
self,
hpo: hpotk.MinimalOntology,
+ suox_gt_predicate: GenotypePolyPredicate,
patient_counts: typing.Tuple[
typing.Mapping[hpotk.TermId, int],
typing.Mapping[hpotk.TermId, pd.DataFrame],
@@ -143,7 +178,11 @@ def test_specified_term_mtc_filter(
"""
specified_filter = SpecifiedTermsMtcFilter(hpo=hpo, terms_to_test={hpotk.TermId.from_curie("HP:0032350")})
n_usable, all_counts = patient_counts
- mtc_report = specified_filter.filter_terms_to_test(n_usable, all_counts)
+ mtc_report = specified_filter.filter_terms_to_test(
+ suox_gt_predicate,
+ n_usable,
+ all_counts,
+ )
assert isinstance(mtc_report, tuple)
assert len(mtc_report) == 3 # # Skipping non-specified term (n=5)
@@ -152,14 +191,6 @@ def test_specified_term_mtc_filter(
assert len(filtered_n_usable) == 1
assert reason_for_filtering_out['Skipping non-specified term'] == 4
- @staticmethod
- def prepare_counts_df(counts):
- index = pd.Index([PatientCategories.YES, PatientCategories.NO])
- columns = pd.Index([PatientCategories.YES, PatientCategories.NO])
- values = np.array(counts).reshape((2, 2))
-
- return pd.DataFrame(data=values, index=index, columns=columns)
-
def test_min_observed_HPO_threshold(
self,
patient_counts: typing.Tuple[
diff --git a/tests/view/test_stats.py b/tests/view/test_stats.py
index 2ce006940..939d29353 100644
--- a/tests/view/test_stats.py
+++ b/tests/view/test_stats.py
@@ -1,19 +1,19 @@
import pytest
from genophenocorr.analysis import HpoMtcReport
-from genophenocorr.view import StatsViewer
+from genophenocorr.view import MtcStatsViewer
class TestStatsViewable:
@pytest.fixture
- def stats_viewer(self) -> StatsViewer:
- return StatsViewer()
+ def stats_viewer(self) -> MtcStatsViewer:
+ return MtcStatsViewer()
@pytest.mark.skip('Until we design a more reasonable test')
def test_process(
- self,
- stats_viewer: StatsViewer,
+ self,
+ stats_viewer: MtcStatsViewer,
):
mtc_report = HpoMtcReport(
filter_name='identity filter',
@@ -25,7 +25,7 @@ def test_process(
'Life is a conspiracy': 80,
'I need coffee': 7,
},
- term_count=100, # The filtered out (80 + 7 + 5) + the unfiltered
+ n_terms_before_filtering=100, # The filtered out (80 + 7 + 5) + the unfiltered
)
report = stats_viewer.process(hpo_mtc_report=mtc_report)