diff --git a/docs/conf.py b/docs/conf.py index 1accede13..7d77904d6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -64,7 +64,7 @@ # The short X.Y version. version = u'0.9' # The full version, including alpha/beta/rc tags. -release = u'0.9.3' +release = u'0.9.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index c25f7263c..898758f1c 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -314,7 +314,7 @@ For general use, we recommend using a combination of a *phenotype MT filter* (:class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`) with a *multiple testing correction*. Phenotype MT filter chooses the HPO terms to test according to several heuristics, which reduce the multiple testing burden and focus the analysis -on the most interesting terms (see :ref:`HPO MT filter ` for more info). +on the most interesting terms (see :ref:`Independent filtering for HPO ` for more info). Then the multiple testing correction, such as Bonferroni or Benjamini-Hochberg, is used to control the family-wise error rate or the false discovery rate. See :ref:`mtc` for more information. @@ -323,7 +323,7 @@ See :ref:`mtc` for more information. >>> analysis = configure_hpo_term_analysis(hpo) :func:`~gpsea.analysis.pcats.configure_hpo_term_analysis` configures the analysis -that uses HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`) for selecting HPO terms of interest, +that uses Independent filtering for HPO (:class:`~gpsea.analysis.mtc_filter.IfHpoFilter`) for selecting HPO terms of interest, Fisher Exact test for computing nominal p values, and Benjamini-Hochberg for multiple testing correction. diff --git a/docs/user-guide/analyses/mtc.rst b/docs/user-guide/analyses/mtc.rst index 893b634ce..225245bb8 100644 --- a/docs/user-guide/analyses/mtc.rst +++ b/docs/user-guide/analyses/mtc.rst @@ -171,31 +171,37 @@ we pass an iterable (e.g. a tuple) with these two terms as an argument: 2 -.. _hpo-mt-filter: +.. _hpo-if-filter: -HPO MT filter -------------- +Independent filtering for HPO +----------------------------- + +Independent filtering for HPO involves making several domain judgments +and taking advantage of the HPO structure +in order to reduce the number of HPO terms for testing. +The filter's logic is made up of 8 individual heuristics +to skip testing the terms that are unlikely to yield significant or interesting results (see below). -The HPO MT filter involves making several domain judgments and takes advantage of the HPO structure. -The strategy needs access to HPO: +Some of the heuristics need to access HPO hierarchy, +so let's load HPO >>> import hpotk >>> store = hpotk.configure_ontology_store() >>> hpo = store.load_minimal_hpo(release='v2024-07-01') -and it is implemented in the :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter` class: +and let's create the :class:`~gpsea.analysis.mtc_filter.IfHpoFilter` class +using the static constructor +:func:`~gpsea.analysis.mtc_filter.IfHpoFilter.default_filter`: ->>> from gpsea.analysis.mtc_filter import HpoMtcFilter ->>> hpo_mtc = HpoMtcFilter.default_filter(hpo=hpo) +>>> from gpsea.analysis.mtc_filter import IfHpoFilter +>>> hpo_mtc = IfHpoFilter.default_filter(hpo=hpo) -We use static constructor :func:`~gpsea.analysis.mtc_filter.HpoMtcFilter.default_filter` -for creating :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`. -The constructor takes a ``term_frequency_threshold`` option (40% by default) -and the method's logic is made up of 8 individual heuristics -designed to skip testing the HPO terms that are unlikely to yield significant or interesting results. +The constructor takes HPO and two thresholds (optional). +See the API documentation and the explanations below for more details. -.. contents:: HPO MT filters + +.. contents:: Independent filtering for HPO :depth: 1 :local: @@ -281,6 +287,8 @@ that if there is a signal from the nervous system, it will lead to at least one of the descendents of *Abnormality of the nervous system* being significant. +See :ref:`general-hpo-terms` section for details. + `HMF09` - Skipping terms that are rare on the cohort level ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -294,8 +302,6 @@ and we have explicit observed observations for 20 and excluded for 10 individual then the annotation frequency is `0.3`. The threshold is set as ``annotation_frequency_threshold`` option -of the :func:`~gpsea.analysis.mtc_filter.HpoMtcFilter.default_filter` constructor, +of the :func:`~gpsea.analysis.mtc_filter.IfHpoFilter.default_filter` constructor, with the default value of `0.4` (40%). - -See :ref:`general-hpo-terms` section for details. diff --git a/docs/user-guide/analyses/phenotype-classes.rst b/docs/user-guide/analyses/phenotype-classes.rst index af1c879eb..56d5b3366 100644 --- a/docs/user-guide/analyses/phenotype-classes.rst +++ b/docs/user-guide/analyses/phenotype-classes.rst @@ -207,7 +207,7 @@ a phenotype multiple testing (MT) filter and multiple testing correction (MTC). Phenotype MT filter selects a (sub)set of HPO terms for testing, for instance only the user-selected terms (see :class:`~gpsea.analysis.mtc_filter.SpecifiedTermsMtcFilter`) -or the terms selected by :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`. +or the terms selected by :class:`~gpsea.analysis.mtc_filter.IfHpoFilter`. MTC then adjusts the nominal p values for the increased risk of false positive G/P associations. @@ -221,8 +221,8 @@ We must choose a phenotype MT filter as well as a MTC procedure to perform genot Default analysis ^^^^^^^^^^^^^^^^ -We recommend using HPO MT filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`) as a phenotype MT filter -and Benjamini-Hochberg for MTC. +We recommend using Independent filtering for HPO (:class:`~gpsea.analysis.mtc_filter.IfHpoFilter`) +and Benjamini-Hochberg MT correction. The default analysis can be configured with :func:`~gpsea.analysis.pcats.configure_hpo_term_analysis` convenience method. >>> from gpsea.analysis.pcats import configure_hpo_term_analysis @@ -240,10 +240,10 @@ Custom analysis If the default selection of phenotype MT filter and multiple testing correction is not an option, we can configure the analysis manually. -First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`): +First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.IfHpoFilter`): ->>> from gpsea.analysis.mtc_filter import HpoMtcFilter ->>> mtc_filter = HpoMtcFilter.default_filter(hpo, term_frequency_threshold=.2) +>>> from gpsea.analysis.mtc_filter import IfHpoFilter +>>> mtc_filter = IfHpoFilter.default_filter(hpo, term_frequency_threshold=.2) .. note:: diff --git a/src/gpsea/__init__.py b/src/gpsea/__init__.py index 85c5f0b81..923b6f603 100644 --- a/src/gpsea/__init__.py +++ b/src/gpsea/__init__.py @@ -2,7 +2,7 @@ GPSEA is a library for finding genotype-phenotype associations. """ -__version__ = "0.9.3" +__version__ = "0.9.4" _overwrite = False """ diff --git a/src/gpsea/analysis/mtc_filter/__init__.py b/src/gpsea/analysis/mtc_filter/__init__.py index 57f0d6149..ddbdb47b1 100644 --- a/src/gpsea/analysis/mtc_filter/__init__.py +++ b/src/gpsea/analysis/mtc_filter/__init__.py @@ -6,9 +6,15 @@ """ from ._impl import PhenotypeMtcFilter, PhenotypeMtcResult, PhenotypeMtcIssue -from ._impl import UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter +from ._impl import UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, IfHpoFilter +from ._impl import HpoMtcFilter __all__ = [ - 'PhenotypeMtcFilter', 'PhenotypeMtcResult', 'PhenotypeMtcIssue', - 'UseAllTermsMtcFilter', 'SpecifiedTermsMtcFilter', 'HpoMtcFilter', + "PhenotypeMtcFilter", + "PhenotypeMtcResult", + "PhenotypeMtcIssue", + "UseAllTermsMtcFilter", + "SpecifiedTermsMtcFilter", + "IfHpoFilter", + "HpoMtcFilter", ] diff --git a/src/gpsea/analysis/mtc_filter/_impl.py b/src/gpsea/analysis/mtc_filter/_impl.py index 83387ead1..4959a7fae 100644 --- a/src/gpsea/analysis/mtc_filter/_impl.py +++ b/src/gpsea/analysis/mtc_filter/_impl.py @@ -3,6 +3,7 @@ import typing from collections import deque +import warnings import hpotk import pandas as pd @@ -252,14 +253,14 @@ def verify_term_id(val: typing.Union[str, hpotk.TermId]) -> hpotk.TermId: raise ValueError(f"{val} is neither `str` nor `hpotk.TermId`") -class HpoMtcFilter(PhenotypeMtcFilter[hpotk.TermId]): +class IfHpoFilter(PhenotypeMtcFilter[hpotk.TermId]): """ - `HpoMtcFilter` decides which phenotypes should be tested and which phenotypes are not worth testing. + `IfHpoFilter` decides which phenotypes should be tested and which phenotypes are not worth testing. The class leverages a number of heuristics and domain decisions. - See :ref:`hpo-mt-filter` section for more info. + See :ref:`hpo-if-filter` section for more info. - We recommend creating an instance using the :func:`default_filter` static factory method. + We recommend creating an instance using the :func:`~gpsea.analysis.mtc_filter.IfHpoFilter.default_filter` static factory method. """ NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO = PhenotypeMtcResult.fail( @@ -293,7 +294,7 @@ def default_filter( (e.g., 22% in missense and 3% in nonsense genotypes would be OK, but not 13% missense and 10% nonsense genotypes if the threshold is 0.2). The default threshold is `0.4` (40%). - annotation_frequency_threshold: a `float` in range :math:`(0, 1) with the minimum frequency of + annotation_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency of annotation in the cohort. For instance, if the cohort consists of 100 individuals, and we have explicit observed observations for 20 and excluded for 10 individuals, then the annotation frequency is `0.3`. The purpose of this threshold is to omit terms for which @@ -340,7 +341,7 @@ def default_filter( general_hpo_term_set.update(second_level_terms) general_hpo_term_set.update(third_level_terms) - return HpoMtcFilter( + return IfHpoFilter( hpo=hpo, term_frequency_threshold=term_frequency_threshold, annotation_frequency_threshold=annotation_frequency_threshold, @@ -355,7 +356,15 @@ def __init__( general_hpo_terms: typing.Iterable[hpotk.TermId], ): self._hpo = hpo + assert ( + isinstance(term_frequency_threshold, (int, float)) + and 0.0 < term_frequency_threshold <= 1.0 + ), "The term_frequency_threshold must be in the range (0, 1]" self._hpo_term_frequency_filter = term_frequency_threshold + assert ( + isinstance(annotation_frequency_threshold, (int, float)) + and 0.0 < annotation_frequency_threshold <= 1.0 + ), "The annotation_frequency_threshold must be in the range (0, 1]" self._hpo_annotation_frequency_threshold = annotation_frequency_threshold self._general_hpo_terms = set(general_hpo_terms) @@ -423,17 +432,17 @@ def filter( continue if term_id in self._general_hpo_terms: - results[idx] = HpoMtcFilter.SKIPPING_GENERAL_TERM + results[idx] = IfHpoFilter.SKIPPING_GENERAL_TERM continue if not self._hpo.graph.is_ancestor_of(PHENOTYPIC_ABNORMALITY, term_id): - results[idx] = HpoMtcFilter.SKIPPING_NON_PHENOTYPE_TERM + results[idx] = IfHpoFilter.SKIPPING_NON_PHENOTYPE_TERM continue ph_clf = pheno_clfs[idx] contingency_matrix = counts[idx] - max_freq = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_freq = IfHpoFilter.get_maximum_group_observed_HPO_frequency( contingency_matrix, ph_clf=ph_clf, ) @@ -459,19 +468,19 @@ def filter( results[idx] = self._not_powered_for_2_by_3 continue - if not HpoMtcFilter.some_cell_has_greater_than_one_count( + if not IfHpoFilter.some_cell_has_greater_than_one_count( counts=contingency_matrix, ph_clf=ph_clf, ): - results[idx] = HpoMtcFilter.NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO + results[idx] = IfHpoFilter.NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO continue - elif HpoMtcFilter.one_genotype_has_zero_hpo_observations( + elif IfHpoFilter.one_genotype_has_zero_hpo_observations( counts=contingency_matrix, gt_clf=gt_clf, ): results[idx] = ( - HpoMtcFilter.SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS + IfHpoFilter.SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS ) continue @@ -495,7 +504,7 @@ def filter( axis=None ) < 1: # Do not test if the count is exactly the same to the counts in the only child term. - results[idx] = HpoMtcFilter.SAME_COUNT_AS_THE_ONLY_CHILD + results[idx] = IfHpoFilter.SAME_COUNT_AS_THE_ONLY_CHILD continue # ## @@ -520,18 +529,18 @@ def possible_results(self) -> typing.Collection[PhenotypeMtcResult]: return ( PhenotypeMtcFilter.OK, self._below_frequency_threshold, # HMF01 - HpoMtcFilter.NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO, # HMF02 - HpoMtcFilter.SAME_COUNT_AS_THE_ONLY_CHILD, # HMF03 - HpoMtcFilter.SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS, # HMF05 + IfHpoFilter.NO_GENOTYPE_HAS_MORE_THAN_ONE_HPO, # HMF02 + IfHpoFilter.SAME_COUNT_AS_THE_ONLY_CHILD, # HMF03 + IfHpoFilter.SKIPPING_SINCE_ONE_GENOTYPE_HAD_ZERO_OBSERVATIONS, # HMF05 self._not_powered_for_2_by_2, # HMF06 self._not_powered_for_2_by_3, # HMF06 - HpoMtcFilter.SKIPPING_NON_PHENOTYPE_TERM, # HMF07 - HpoMtcFilter.SKIPPING_GENERAL_TERM, # HMF08 + IfHpoFilter.SKIPPING_NON_PHENOTYPE_TERM, # HMF07 + IfHpoFilter.SKIPPING_GENERAL_TERM, # HMF08 self._below_annotation_frequency_threshold, # HMF09 ) def filter_method_name(self) -> str: - return "HPO MTC filter" + return "Independent filtering HPO filter" @staticmethod def get_number_of_observed_hpo_observations( @@ -623,3 +632,65 @@ def _get_ordered_terms( # now, ordered_term_list is ordered from leaves to root return ordered_term_list + + +class HpoMtcFilter(IfHpoFilter): + """ + `HpoMtcFilter` is deprecated and will be removed in `1.0.0`. + + Use :class:`gpsea.analysis.mtc_filter.IfHpoFilter` instead. + """ + + @staticmethod + def default_filter( + hpo: hpotk.MinimalOntology, + term_frequency_threshold: float = 0.4, + annotation_frequency_threshold: float = 0.4, + phenotypic_abnormality: hpotk.TermId = PHENOTYPIC_ABNORMALITY, + ): + """ + Args: + hpo: HPO + term_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency + for an HPO term to have in at least one of the genotype groups + (e.g., 22% in missense and 3% in nonsense genotypes would be OK, + but not 13% missense and 10% nonsense genotypes if the threshold is 0.2). + The default threshold is `0.4` (40%). + annotation_frequency_threshold: a `float` in range :math:`(0, 1]` with the minimum frequency of + annotation in the cohort. For instance, if the cohort consists of 100 individuals, and + we have explicit observed observations for 20 and excluded for 10 individuals, then the + annotation frequency is `0.3`. The purpose of this threshold is to omit terms for which + we simply do not have much data overall. By default, we set a threshold to `0.4` (40%). + phenotypic_abnormality: a :class:`~hpotk.TermId` corresponding to the root of HPO phenotype hierarchy. + Having to specify this option should be very rarely, if ever. + """ + warnings.warn( + "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead.", + DeprecationWarning, + stacklevel=2, + ) + IfHpoFilter.default_filter( + hpo=hpo, + term_frequency_threshold=term_frequency_threshold, + annotation_frequency_threshold=annotation_frequency_threshold, + phenotypic_abnormality=phenotypic_abnormality, + ) + + def __init__( + self, + hpo: hpotk.MinimalOntology, + term_frequency_threshold: float, + annotation_frequency_threshold: float, + general_hpo_terms: typing.Iterable[hpotk.TermId], + ): + super().__init__( + hpo, + term_frequency_threshold, + annotation_frequency_threshold, + general_hpo_terms, + ) + warnings.warn( + "HpoMtcFilter has been deprecated and will be removed in 1.0.0. Use `IfHpoFilter` instead.", + DeprecationWarning, + stacklevel=2, + ) diff --git a/src/gpsea/analysis/pcats/_config.py b/src/gpsea/analysis/pcats/_config.py index 2a06fc3e1..0638564c8 100644 --- a/src/gpsea/analysis/pcats/_config.py +++ b/src/gpsea/analysis/pcats/_config.py @@ -2,7 +2,7 @@ import hpotk -from ..mtc_filter import HpoMtcFilter +from ..mtc_filter import IfHpoFilter from ._impl import HpoTermAnalysis from .stats import CountStatistic, FisherExactTest @@ -16,13 +16,13 @@ def configure_hpo_term_analysis( """ Configure HPO term analysis with default parameters. - The default analysis will pre-filter HPO terms with :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`, + The default analysis will pre-filter HPO terms with :class:`~gpsea.analysis.mtc_filter.IfHpoFilter`, then compute nominal p values using `count_statistic` (default Fisher exact test), and apply multiple testing correction (default Benjamini/Hochberg (`fdr_bh`)) with target `mtc_alpha` (default `0.05`). """ return HpoTermAnalysis( - mtc_filter=HpoMtcFilter.default_filter(hpo), + mtc_filter=IfHpoFilter.default_filter(hpo), count_statistic=count_statistic, mtc_correction=mtc_correction, mtc_alpha=mtc_alpha, diff --git a/tests/analysis/pcats/test_hpo_term_analysis.py b/tests/analysis/pcats/test_hpo_term_analysis.py index 45278867e..5413e5660 100644 --- a/tests/analysis/pcats/test_hpo_term_analysis.py +++ b/tests/analysis/pcats/test_hpo_term_analysis.py @@ -6,7 +6,7 @@ from gpsea.model import Cohort -from gpsea.analysis.mtc_filter import PhenotypeMtcFilter, HpoMtcFilter +from gpsea.analysis.mtc_filter import PhenotypeMtcFilter, IfHpoFilter from gpsea.analysis.pcats import HpoTermAnalysis from gpsea.analysis.pcats.stats import CountStatistic, FisherExactTest from gpsea.analysis.clf import GenotypeClassifier, PhenotypeClassifier @@ -22,7 +22,7 @@ def phenotype_mtc_filter( self, hpo: hpotk.MinimalOntology, ) -> PhenotypeMtcFilter: - return HpoMtcFilter.default_filter( + return IfHpoFilter.default_filter( hpo=hpo, term_frequency_threshold=0.2, annotation_frequency_threshold=0.25, diff --git a/tests/analysis/test_mtc_filter.py b/tests/analysis/test_mtc_filter.py index 62664b082..4afe81bcf 100644 --- a/tests/analysis/test_mtc_filter.py +++ b/tests/analysis/test_mtc_filter.py @@ -5,7 +5,7 @@ import pandas as pd import pytest -from gpsea.analysis.mtc_filter import HpoMtcFilter, SpecifiedTermsMtcFilter +from gpsea.analysis.mtc_filter import IfHpoFilter, SpecifiedTermsMtcFilter from gpsea.analysis.clf import GenotypeClassifier, PhenotypeClassifier, HpoClassifier from gpsea.analysis.pcats import apply_classifiers_on_individuals from gpsea.model import Cohort @@ -25,13 +25,13 @@ def patient_counts( return counts -class TestHpoMtcFilter: +class TestIfHpoFilter: @pytest.fixture def mtc_filter( self, hpo: hpotk.MinimalOntology, - ) -> HpoMtcFilter: - return HpoMtcFilter.default_filter( + ) -> IfHpoFilter: + return IfHpoFilter.default_filter( hpo=hpo, term_frequency_threshold=0.2, annotation_frequency_threshold=0.1, @@ -86,9 +86,9 @@ def test_one_genotype_has_zero_hpo_observations( gt_clf: GenotypeClassifier, ph_predicate: PhenotypeClassifier[hpotk.TermId], ): - counts_df = TestHpoMtcFilter.prepare_counts_df(counts, gt_clf, ph_predicate) + counts_df = TestIfHpoFilter.prepare_counts_df(counts, gt_clf, ph_predicate) - actual = HpoMtcFilter.one_genotype_has_zero_hpo_observations( + actual = IfHpoFilter.one_genotype_has_zero_hpo_observations( counts=counts_df, gt_clf=gt_clf, ) @@ -113,9 +113,9 @@ def test_some_cell_has_greater_than_one_count( gt_clf: GenotypeClassifier, ph_predicate: PhenotypeClassifier[hpotk.TermId], ): - counts_df = TestHpoMtcFilter.prepare_counts_df(counts, gt_clf, ph_predicate) + counts_df = TestIfHpoFilter.prepare_counts_df(counts, gt_clf, ph_predicate) - actual = HpoMtcFilter.some_cell_has_greater_than_one_count( + actual = IfHpoFilter.some_cell_has_greater_than_one_count( counts=counts_df, ph_clf=ph_predicate, ) @@ -139,9 +139,9 @@ def test_get_maximum_group_observed_HPO_frequency( gt_clf: GenotypeClassifier, ph_predicate: PhenotypeClassifier[hpotk.TermId], ): - counts_df = TestHpoMtcFilter.prepare_counts_df(counts, gt_clf, ph_predicate) + counts_df = TestIfHpoFilter.prepare_counts_df(counts, gt_clf, ph_predicate) - actual = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + actual = IfHpoFilter.get_maximum_group_observed_HPO_frequency( counts_frame=counts_df, ph_clf=ph_predicate, ) @@ -150,7 +150,7 @@ def test_get_maximum_group_observed_HPO_frequency( def test_filter_terms_to_test( self, - mtc_filter: HpoMtcFilter, + mtc_filter: IfHpoFilter, suox_gt_clf: GenotypeClassifier, suox_pheno_clfs: typing.Sequence[PhenotypeClassifier[hpotk.TermId]], patient_counts: typing.Sequence[pd.DataFrame], @@ -199,7 +199,7 @@ def test_min_observed_HPO_threshold( idx = curie2idx["HP:0001083"] ectopia = patient_counts[idx] ectopia_predicate = suox_pheno_clfs[idx] - max_f = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_f = IfHpoFilter.get_maximum_group_observed_HPO_frequency( ectopia, ph_clf=ectopia_predicate, ) @@ -209,7 +209,7 @@ def test_min_observed_HPO_threshold( idx = curie2idx["HP:0001250"] seizure = patient_counts[idx] seizure_predicate = suox_pheno_clfs[idx] - max_f = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_f = IfHpoFilter.get_maximum_group_observed_HPO_frequency( seizure, ph_clf=seizure_predicate ) assert max_f == pytest.approx(1.0, abs=EPSILON) @@ -218,7 +218,7 @@ def test_min_observed_HPO_threshold( idx = curie2idx["HP:0032350"] sulfocysteinuria = patient_counts[idx] sulfocysteinuria_predicate = suox_pheno_clfs[idx] - max_f = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_f = IfHpoFilter.get_maximum_group_observed_HPO_frequency( sulfocysteinuria, ph_clf=sulfocysteinuria_predicate, ) @@ -228,7 +228,7 @@ def test_min_observed_HPO_threshold( idx = curie2idx["HP:0012758"] ndelay = patient_counts[idx] ndelay_predicate = suox_pheno_clfs[idx] - max_f = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_f = IfHpoFilter.get_maximum_group_observed_HPO_frequency( ndelay, ph_clf=ndelay_predicate, ) @@ -238,12 +238,40 @@ def test_min_observed_HPO_threshold( idx = curie2idx["HP:0001276"] hypertonia = patient_counts[idx] hypertonia_predicate = suox_pheno_clfs[idx] - max_f = HpoMtcFilter.get_maximum_group_observed_HPO_frequency( + max_f = IfHpoFilter.get_maximum_group_observed_HPO_frequency( hypertonia, ph_clf=hypertonia_predicate, ) assert max_f == pytest.approx(0.5714, abs=EPSILON) + def test_mtc_filter_term_frequency_threshold_raises( + self, + hpo: hpotk.MinimalOntology, + ): + with pytest.raises(AssertionError) as e: + IfHpoFilter.default_filter( + hpo=hpo, + term_frequency_threshold=1.1, + annotation_frequency_threshold=0.1, + ) + assert e.value.args == ( + "The term_frequency_threshold must be in the range (0, 1]", + ) + + def test_mtc_filter_annotation_frequency_threshold_raises( + self, + hpo: hpotk.MinimalOntology, + ): + with pytest.raises(AssertionError) as e: + IfHpoFilter.default_filter( + hpo=hpo, + term_frequency_threshold=0.1, + annotation_frequency_threshold=1.1, + ) + assert e.value.args == ( + "The annotation_frequency_threshold must be in the range (0, 1]", + ) + class TestSpecifyTermsMtcFilter: def test_specified_term_mtc_filter(