From 7241b08bbfffa4502c1ddaa4fcb96bd034e1e64c Mon Sep 17 00:00:00 2001
From: Daniel Danis <daniel.gordon.danis@protonmail.com>
Date: Wed, 4 Sep 2024 13:26:44 +0200
Subject: [PATCH 1/2] Remove obsolete analysis code.

---
 src/gpsea/analysis/__init__.py                |  15 -
 src/gpsea/analysis/_api.py                    | 472 ------------------
 src/gpsea/analysis/_config.py                 | 352 -------------
 src/gpsea/analysis/_gp_analysis.py            | 202 --------
 src/gpsea/analysis/_gp_impl.py                | 195 --------
 src/gpsea/analysis/_stats.py                  | 154 ------
 src/gpsea/analysis/_test_fisherExact.py       |  28 --
 src/gpsea/analysis/_util.py                   |  61 ---
 .../analysis/predicate/genotype/__init__.py   |   4 +-
 src/gpsea/analysis/predicate/genotype/_api.py |  11 +-
 .../predicate/genotype/_gt_predicates.py      |  66 +--
 .../test_counting_scorer.py}                  |   0
 tests/analysis/test_config.py                 |  52 --
 tests/analysis/test_examples.py               | 137 -----
 14 files changed, 4 insertions(+), 1745 deletions(-)
 delete mode 100644 src/gpsea/analysis/_api.py
 delete mode 100644 src/gpsea/analysis/_config.py
 delete mode 100644 src/gpsea/analysis/_gp_analysis.py
 delete mode 100644 src/gpsea/analysis/_gp_impl.py
 delete mode 100644 src/gpsea/analysis/_stats.py
 delete mode 100644 src/gpsea/analysis/_test_fisherExact.py
 delete mode 100644 src/gpsea/analysis/_util.py
 rename tests/analysis/{test_pscore.py => pscore/test_counting_scorer.py} (100%)
 delete mode 100644 tests/analysis/test_config.py
 delete mode 100644 tests/analysis/test_examples.py

diff --git a/src/gpsea/analysis/__init__.py b/src/gpsea/analysis/__init__.py
index 1adf3c2ee..e69de29bb 100644
--- a/src/gpsea/analysis/__init__.py
+++ b/src/gpsea/analysis/__init__.py
@@ -1,15 +0,0 @@
-from ._api import CohortAnalysis, GenotypePhenotypeAnalysisResult, HpoMtcReport
-# TODO This should go away
-from ._config import CohortAnalysisConfiguration, configure_cohort_analysis, configure_default_protein_metadata_service, MtcStrategy
-from ._gp_analysis import apply_predicates_on_patients
-from ._util import prepare_hpo_terms_of_interest
-
-__all__ = [
-    'configure_cohort_analysis',
-    'CohortAnalysis', 'GenotypePhenotypeAnalysisResult',
-    'CohortAnalysisConfiguration', 'MtcStrategy',
-    'HpoMtcReport',
-    'apply_predicates_on_patients',
-    'configure_default_protein_metadata_service',
-    'prepare_hpo_terms_of_interest',
-]
diff --git a/src/gpsea/analysis/_api.py b/src/gpsea/analysis/_api.py
deleted file mode 100644
index b2b7d41d7..000000000
--- a/src/gpsea/analysis/_api.py
+++ /dev/null
@@ -1,472 +0,0 @@
-import abc
-import typing
-from collections import namedtuple, defaultdict
-
-import hpotk
-import pandas as pd
-
-from gpsea.model import Patient
-from gpsea.preprocessing import ProteinMetadataService
-from .predicate import PolyPredicate, PatientCategory
-from .predicate.genotype import GenotypePolyPredicate, VariantPredicate, ProteinPredicates
-from .predicate.phenotype import P, PhenotypePolyPredicate
-from .pscore import PhenotypeScorer, CountingPhenotypeScorer
-
-PatientsByHPO = namedtuple('PatientsByHPO', field_names=['all_with_hpo', 'all_without_hpo'])
-
-
-class HpoMtcReport:
-    """
-    Class to simplify reporting results of multiple testing filtering by HpoMtcFilter subclasses.
-    """
-    # TODO: delete with no replacement.
-
-    def __init__(
-            self,
-            filter_name: str,
-            mtc_name: str,
-            filter_results_map: typing.Mapping[str, int],
-            n_terms_before_filtering: int,
-    ):
-        """
-        Args:
-            filter_name: name of the MTC filter strategy (e.g. `heuristic sampler`)
-            mtc_name:  name of the MTC function (e.g. `bonferroni`)
-            filter_results_map: mapping with reasons for filtering out a term as keys, and counts of filtered terms as values
-            n_terms_before_filtering: the number of HPO terms before filtering
-        """
-        self._filter_name = filter_name
-        self._mtc_name = mtc_name
-        self._results_map = filter_results_map
-        self._n_terms_before_filtering = n_terms_before_filtering
-
-    @property
-    def filter_method(self) -> str:
-        """
-        Returns:
-            the name of the HpoMtcFilter method used.
-        """
-        return self._filter_name
-
-    @property
-    def skipped_terms_dict(self) -> typing.Mapping[str, int]:
-        """
-        Returns:
-            a mapping with reasons why an HPO term was skipped as keys and counts of the skipped terms as values.
-        """
-        return self._results_map
-
-    @property
-    def mtc_method(self) -> str:
-        """
-         Returns:
-             the name of the multiple testing correction method used (e.g. `bonferroni`).
-        """
-        return self._mtc_name
-
-    @property
-    def n_terms_before_filtering(self) -> int:
-        """
-        Get the number of terms before filtering.
-        """
-        return self._n_terms_before_filtering
-
-
-class GenotypePhenotypeAnalysisResult:
-    """
-    `GenotypePhenotypeAnalysisResult` summarizes results of genotype-phenotype correlation analysis of a cohort.
-    """
-    # TODO: delete and use `gpsea.analysis.pcats.MultiPhenotypeAnalysisResult`.
-
-    def __init__(
-            self,
-            n_usable: typing.Mapping[P, int],
-            all_counts: typing.Mapping[P, pd.DataFrame],
-            pvals: pd.Series,
-            corrected_pvals: typing.Optional[pd.Series],
-            phenotype_categories: typing.Iterable[PatientCategory],
-            geno_predicate: PolyPredicate,
-            mtc_filter_report: typing.Optional[HpoMtcReport] = None
-    ):
-        self._n_usable = n_usable
-        self._all_counts = all_counts
-        self._pvals = pvals
-        self._corrected_pvals = corrected_pvals
-        self._phenotype_categories = tuple(phenotype_categories)
-        self._geno_predicate = geno_predicate
-        self._mtc_filter_report = mtc_filter_report
-
-    @property
-    def n_usable(self) -> typing.Mapping[P, int]:
-        """
-        Get a mapping from a phenotype `P` (either an HPO term or a disease ID)
-        to an `int` with the number of patients where the phenotype was assessable,
-        and are, thus, usable for genotype-phenotype correlation analysis.
-        """
-        return self._n_usable
-
-    @property
-    def all_counts(self) -> typing.Mapping[P, pd.DataFrame]:
-        """
-        Get a mapping from the phenotype item to :class:`pandas.DataFrame` with counts of patients
-        in genotype and phenotype groups.
-
-        An example for a genotype predicate that bins into two categories (`Yes` and `No`) based on presence
-        of a missense variant in transcript `NM_123456.7`, and phenotype predicate that checks
-        presence/absence of `HP:0001166` (a phenotype term)::
-
-                       Has MISSENSE_VARIANT in NM_123456.7
-                       No       Yes
-            Present
-            Yes        1        13
-            No         7        5
-
-        The rows correspond to the phenotype categories, and the columns represent the genotype categories.
-        """
-        return self._all_counts
-
-    @property
-    def pvals(self) -> pd.Series:
-        """
-        Get a :class:`pandas.Series` with p values for each tested HPO term.
-        """
-        return self._pvals
-
-    @property
-    def corrected_pvals(self) -> typing.Optional[pd.Series]:
-        """
-        Get an optional :class:`pandas.Series` with p values for each tested HPO term after multiple testing correction.
-        """
-        return self._corrected_pvals
-
-    @property
-    def phenotype_categories(self) -> typing.Sequence[PatientCategory]:
-        """
-        Get a sequence of phenotype patient categories that can be investigated.
-        """
-        return self._phenotype_categories
-
-    @property
-    def total_tests(self) -> int:
-        """
-        Get total count of tests that were run for this analysis.
-        """
-        return len(self._all_counts)
-
-    @property
-    def mtc_filter_report(self) -> typing.Optional[HpoMtcReport]:
-        return self._mtc_filter_report
-
-    def summarize(
-            self, hpo: hpotk.MinimalOntology,
-            category: PatientCategory,
-    ) -> pd.DataFrame:
-        """
-        Create a data frame with summary of the genotype phenotype analysis.
-
-        The *rows* of the frame correspond to the analyzed HPO terms.
-
-        The columns of the data frame have `Count` and `Percentage` per used genotype predicate.
-
-        **Example**
-
-        If we use :class:`~gpsea.analysis.predicate.genotype.VariantEffectPredicate`
-        which can compare phenotype with and without a missense variant, we will have a data frame
-        that looks like this::
-
-            MISSENSE_VARIANT on `NM_1234.5`                       No                  Yes
-                                                                  Count    Percent    Count    Percent   p value   Corrected p value
-            Arachnodactyly [HP:0001166]                           1/10         10%    13/16        81%   0.000781  0.020299
-            Abnormality of the musculature [HP:0003011]           6/6         100%    11/11       100%   1.000000  1.000000
-            Abnormal nervous system physiology [HP:0012638]       9/9         100%    15/15       100%   1.000000  1.000000
-            ...                                                   ...      ...        ...      ...       ...       ...
-        """
-        if category not in self._phenotype_categories:
-            raise ValueError(f'Unknown phenotype category: {category}. Use one of {self._phenotype_categories}')
-
-        # Row index: a list of tested HPO terms
-        pheno_idx = pd.Index(self._n_usable.keys())
-        # Column index: multiindex of counts and percentages for all genotype predicate groups
-        geno_idx = pd.MultiIndex.from_product(
-            iterables=(self._geno_predicate.get_categories(), ('Count', 'Percent')),
-            names=(self._geno_predicate.get_question_base(), None),
-        )
-
-        # We'll fill this frame with data
-        df = pd.DataFrame(index=pheno_idx, columns=geno_idx)
-
-        for pf, count in self._all_counts.items():
-            gt_totals = count.sum()  # Sum across the phenotype categories (collapse the rows).
-            for gt_cat in count.columns:
-                cnt = count.loc[category, gt_cat]
-                total = gt_totals[gt_cat]
-                df.loc[pf, (gt_cat, 'Count')] = f'{cnt}/{total}'
-                pct = 0 if total == 0 else round(cnt * 100 / total)
-                df.loc[pf, (gt_cat, 'Percent')] = f'{pct}%'
-
-        # Add columns with p values and corrected p values (if present)
-        df.insert(df.shape[1], ('', self._pvals.name), self._pvals)
-        if self._corrected_pvals is not None:
-            df.insert(df.shape[1], ('', self._corrected_pvals.name), self._corrected_pvals)
-
-        # Format the index values: `HP:0001250` -> `Seizure [HP:0001250]` if the index members are HPO terms
-        # or just use the term ID CURIE otherwise (e.g. `OMIM:123000`).
-        labeled_idx = df.index.map(lambda term_id: GenotypePhenotypeAnalysisResult._format_term_id(hpo, term_id))
-
-        # Last, sort by corrected p value or just p value
-        df = df.set_index(labeled_idx)
-        if self._corrected_pvals is not None:
-            return df.sort_values(by=[('', self._corrected_pvals.name), ('', self._pvals.name)])
-        else:
-            return df.sort_values(by=('', self._pvals.name))
-
-    @staticmethod
-    def _format_term_id(
-            hpo: hpotk.MinimalOntology,
-            term_id: hpotk.TermId,
-    ) -> str:
-        """
-        Format a `term_id` as a `str`. HPO term ID is formatted as `<name> [<term_id>]` whereas other term IDs
-        are formatted as CURIEs (e.g. `OMIM:123000`).
-        """
-        if term_id.prefix == 'HP':
-            min_onto = hpo.get_term(term_id)
-            return f'{min_onto.name} [{term_id.value}]'
-        else:
-            return term_id.value
-
-
-class PhenotypeScoreAnalysisResult:
-    """
-    `PhenotypeScoreAnalysisResult` includes results of testing genotypes vs. phenotype scores.
-
-    See :ref:`Mann Whitney U Test for phenotype score <phenotype-score-stats>` for more background.
-    """
-    # TODO: delete and use `gpsea.analysis.pscore.PhenotypeScoreAnalysisResult`
-
-    def __init__(
-        self,
-        genotype_phenotype_scores: pd.DataFrame,
-        p_value: float,
-    ):
-        self._genotype_phenotype_scores = genotype_phenotype_scores
-        self._p_value = p_value
-
-    @property
-    def genotype_phenotype_scores(
-        self,
-    ) -> pd.DataFrame:
-        """
-        Get the DataFrame with the genotype group and the phenotype score for each patient.
-
-        The DataFrame has the following structure:
-
-        ==========  ========  =========
-        patient_id  genotype  phenotype
-        ==========  ========  =========
-        patient_1   0         1
-        patient_2   0         3
-        patient_3   1         2
-        ...         ...       ...
-        ==========  ========  =========
-
-        The DataFrame index includes the patient IDs, and then there are 2 columns
-        with the `genotype` group id (:attr:`~gpsea.analysis.predicate.PatientCategory.cat_id`)
-        and the `phenotype` score.
-        """
-        return self._genotype_phenotype_scores
-
-    @property
-    def p_value(self) -> float:
-        return self._p_value
-
-    def __str__(self) -> str:
-        return 'PhenotypeGroupAnalysisResult(' \
-            f'genotype_phenotype_scores={self._genotype_phenotype_scores}, ' \
-            f'p_value={self._p_value})'
-
-    def __repr__(self) -> str:
-        return str(self)
-
-
-class CohortAnalysis(metaclass=abc.ABCMeta):
-    """
-    `CohortAnalysis` is a driver class for running genotype-phenotype correlation analyses.
-
-    The class provides various methods to test genotype-phenotype correlations. All methods wrap results
-    into :class:`GenotypePhenotypeAnalysisResult`.
-    """
-    # TODO: remove and use the analyses described in `User Guide > Statistical tests`.
-
-    def __init__(
-        self,
-        hpo: hpotk.MinimalOntology,
-        protein_service: ProteinMetadataService,
-    ):
-        self._hpo = hpotk.util.validate_instance(hpo, hpotk.MinimalOntology, 'hpo')
-        self._protein_service = protein_service
-        self._protein_predicates = ProteinPredicates(self._protein_service)
-
-    @abc.abstractmethod
-    def compare_hpo_vs_genotype(
-            self,
-            predicate: VariantPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of at least one allele that matches `predicate`
-        and test for genotype-phenotype correlations.
-        """
-        pass
-
-    @abc.abstractmethod
-    def compare_hpo_vs_recessive_genotype(
-        self,
-        predicate: VariantPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of zero, one, or two alleles that matche the `predicate`
-        and test for genotype-phenotype correlations.
-        """
-        pass
-
-    @abc.abstractmethod
-    def compare_hpo_vs_genotype_groups(
-        self,
-        predicates: typing.Iterable[VariantPredicate],
-        group_names: typing.Iterable[str],
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of at least one allele that matches
-        any of the provided `predicates` and test for genotype-phenotype correlations
-        between the groups.
-
-        Note, the patients that pass testing by >1 genotype predicate are *OMITTED* from the analysis!
-        """
-        pass
-
-    @abc.abstractmethod
-    def compare_disease_vs_genotype(
-        self,
-        predicate: VariantPredicate,
-        disease_ids: typing.Optional[typing.Sequence[typing.Union[str, hpotk.TermId]]] = None,
-    ) -> GenotypePhenotypeAnalysisResult:
-        pass
-
-    def compare_genotype_vs_phenotype_group_count(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-        phenotype_group_terms: typing.Iterable[typing.Union[str, hpotk.TermId]],
-    ) -> PhenotypeScoreAnalysisResult:
-        # TODO: separate into pscore module
-        assert isinstance(gt_predicate, GenotypePolyPredicate)
-        assert gt_predicate.n_categorizations() == 2
-
-        counting_scorer = CountingPhenotypeScorer.from_query_curies(
-            hpo=self._hpo,
-            query=phenotype_group_terms,
-        )
-
-        return self.compare_genotype_vs_phenotype_score(
-            gt_predicate=gt_predicate,
-            phenotype_scorer=counting_scorer,
-        )
-
-    @abc.abstractmethod
-    def compare_genotype_vs_phenotype_score(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-        phenotype_scorer: PhenotypeScorer,
-    ) -> PhenotypeScoreAnalysisResult:
-        """
-        Score the patients with a phenotype scoring method and test for correlation between the genotype group
-        and the phenotype score.
-
-        Args:
-            gt_predicate: a genotype predicate for binning the patients along the genotype axis.
-            phenotype_scorer: a callable that computes a phenotype score for a given `Patient`.
-        """
-        pass
-
-    @abc.abstractmethod
-    def compare_genotype_vs_cohort_phenotypes(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        pass
-
-    @abc.abstractmethod
-    def compare_genotype_vs_phenotypes(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
-    ):
-        """
-        All analysis functions go through this function.
-
-        The genotype predicate will partition the individuals into non-overlapping groups
-        along the genotype axis.
-        The phenotype predicates represent the phenotypes we want to test.
-        Less phenotypes may actually be tested thanks to :class:`~gpsea.analysis.PhenotypeMtcFilter`.
-
-        Args:
-            gt_predicate: a predicate for binning the individuals along the genotype axis
-            pheno_predicates: phenotype predicates for test the individuals along the phenotype axis
-        """
-        pass
-
-    @staticmethod
-    def _check_min_perc_patients_w_hpo(min_perc_patients_w_hpo: typing.Union[int, float],
-                                       cohort_size: int) -> float:
-        """
-        Check if the input meets the requirements.
-        """
-        if isinstance(min_perc_patients_w_hpo, int):
-            if min_perc_patients_w_hpo > 0:
-                return min_perc_patients_w_hpo / cohort_size
-            else:
-                raise ValueError(f'`min_perc_patients_w_hpo` must be a positive `int` '
-                                 f'but got {min_perc_patients_w_hpo}')
-        elif isinstance(min_perc_patients_w_hpo, float):
-            if 0 < min_perc_patients_w_hpo <= 1:
-                return min_perc_patients_w_hpo
-            else:
-                raise ValueError(f'`min_perc_patients_w_hpo` must be a `float` in range (0, 1] '
-                                 f'but got {min_perc_patients_w_hpo}')
-        else:
-            raise ValueError(f'`min_perc_patients_w_hpo` must be a positive `int` or a `float` in range (0, 1] '
-                             f'but got {type(min_perc_patients_w_hpo)}')
-
-    @staticmethod
-    def _group_patients_by_hpo(phenotypic_features: typing.Iterable[hpotk.TermId],
-                               patients: typing.Iterable[Patient],
-                               hpo: hpotk.GraphAware,
-                               missing_implies_excluded: bool) -> PatientsByHPO:
-        all_with_hpo = defaultdict(list)
-        all_without_hpo = defaultdict(list)
-        for hpo_term in phenotypic_features:
-            for patient in patients:
-                found = False
-                for pf in patient.present_phenotypes():
-                    if hpo_term == pf.identifier or hpo.graph.is_ancestor_of(hpo_term, pf):
-                        # Patient is annotated with `hpo_term` because `pf` is equal to `hpo_term`
-                        # or it is a descendant of `hpo_term`.
-                        all_with_hpo[hpo_term].append(patient)
-
-                        # If one `pf` of the patient is found to be a descendant of `hpo`, we must break to prevent
-                        # adding the patient to `present_hpo` more than once due to another descendant!
-                        found = True
-                        break
-                if not found:
-                    # The patient is not annotated by the `hpo_term`.
-
-                    if missing_implies_excluded:
-                        # The `hpo_term` annotation is missing, hence implicitly excluded.
-                        all_without_hpo[hpo_term].append(patient)
-                    else:
-                        # The `hpo_term` must be explicitly excluded patient to be accounted for.
-                        for ef in patient.excluded_phenotypes():
-                            if hpo_term == ef.identifier or hpo.graph.is_descendant_of(hpo_term, ef):
-                                all_with_hpo[hpo_term].append(patient)
-                                break
-
-        return PatientsByHPO(all_with_hpo, all_without_hpo)
diff --git a/src/gpsea/analysis/_config.py b/src/gpsea/analysis/_config.py
deleted file mode 100644
index 2c6d68c63..000000000
--- a/src/gpsea/analysis/_config.py
+++ /dev/null
@@ -1,352 +0,0 @@
-import enum
-import logging
-import os
-import typing
-import warnings
-
-import hpotk
-
-from gpsea.config import get_cache_dir_path
-from gpsea.model import Cohort
-from gpsea.preprocessing import ProteinMetadataService
-from gpsea.preprocessing import configure_default_protein_metadata_service as backup_pms
-from .mtc_filter import PhenotypeMtcFilter, UseAllTermsMtcFilter, SpecifiedTermsMtcFilter, HpoMtcFilter
-from ._api import CohortAnalysis
-from ._gp_analysis import FisherExactAnalyzer
-from ._gp_impl import GpCohortAnalysis
-
-P_VAL_OPTIONS = (
-    'bonferroni', 'b',
-    'sidak', 's',
-    'holm-sidak', 'hs',
-    'holm', 'h',
-    'simes-hochberg', 'sh',
-    'hommel', 'ho',
-    'fdr_bh', 'fdr_by',
-    'fdr_tsbh', 'fdr_tsbky',
-    'fdr_gbs',
-    None,
-)
-
-
-class MtcStrategy(enum.Enum):
-    """
-    A strategy for mitigating the multiple testing correction (MTC) burden.
-    """
-    
-    ALL_PHENOTYPE_TERMS = 0
-    """
-    All phenotype terms (HPO or disease IDs) will be tested.
-    """
-
-    SPECIFY_TERMS = 1
-    """
-    Only the manually provided HPO terms will be tested.
-    """
-
-    HPO_MTC = 2
-    """
-    Only HPO terms present in at least a certain fraction of patients will be tested.
-    """
-
-
-class CohortAnalysisConfiguration:
-    """
-    `CohortAnalysisConfiguration` is a value class for storing :class:`~gpsea.analysis.CohortAnalysis`
-    configuration options.
-
-    The class contains the default values upon creation and the configuration option values can be set as properties.
-
-    If an invalid value option is passed to the property setter, a warning is logged and the previous value is retained.
-    Therefore, it is *impossible* to mis-configure the analysis.
-
-    Default values
-    ^^^^^^^^^^^^^^
-
-    ==============================  =======================  =========================================
-        Option                        Type                    Default value
-    ==============================  =======================  =========================================
-     ``pval_correction``             `str`                    `bonferroni`
-     ``min_n_patients_with_term``    `int`                    `2`
-     ``mtc_alpha``                   `float`                  `0.05`
-     ``include_sv``                  `bool`                   `False`
-     ``mtc_strategy``                :class:`MtcStrategy`    :class:`MtcStrategy.ALL_PHENOTYPE_TERMS`
-    ==============================  =======================  =========================================
-
-    """
-
-    def __init__(self):
-        self._logger = logging.getLogger(__name__)
-        self._pval_correction = 'bonferroni'
-        self._mtc_alpha = .05
-        self._min_n_patients_with_term = 2
-        self._include_sv = False
-        self._mtc_strategy = MtcStrategy.ALL_PHENOTYPE_TERMS
-        self._terms_to_test = None  # # only relevant for SPECIFIED_TERMS strategy
-        self._min_patients_w_hpo = None  # # only relevant for HPO_MTC strategy
-
-    @property
-    def pval_correction(self) -> typing.Optional[str]:
-        """
-        Get method for multiple testing p value correction. Default: `bonferroni`.
-        """
-        return self._pval_correction
-
-    @pval_correction.setter
-    def pval_correction(self, pval_correction: typing.Optional[str]):
-        """
-        Set the method for p value correction.
-
-        See Statsmodels'
-        `documentation <https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html>`_
-        for the acceptable values.
-
-        :param pval_correction: a `str` with the name of the desired multiple testing correction method or `None`
-          if no MTC should be applied.
-        """
-        if pval_correction in P_VAL_OPTIONS:
-            self._pval_correction = pval_correction
-        else:
-            self._logger.warning('Ignoring invalid `pval_correction` value %s. Using %s correction.', pval_correction,
-                                 self._pval_correction)
-
-    @property
-    def min_n_patients_with_term(self) -> int:
-        """
-        Get the minimum number of patients that must be annotated with an HPO term
-        for including the term in the analysis.
-        """
-        return self._min_n_patients_with_term
-
-    @min_n_patients_with_term.setter
-    def min_n_patients_with_term(self, value: int):
-        if isinstance(value, int) and value >= 0:
-            self._min_n_patients_with_term = value
-        else:
-            self._logger.warning(
-                'Ignoring invalid `min_n_patients_with_term` value %s. Using %s',
-                value, self._min_n_patients_with_term,
-            )
-
-    @property
-    def mtc_alpha(self) -> float:
-        """
-        The alpha value for multiple testing correction.
-        """
-        return self._mtc_alpha
-
-    @mtc_alpha.setter
-    def mtc_alpha(self, mtc_alpha: float):
-        """
-        Set new multiple testing correction alpha value.
-
-        :param mtc_alpha: a `float` in range :math:`(0,1]`.
-        """
-        if isinstance(mtc_alpha, float) and 0. < mtc_alpha <= 1.:
-            self._mtc_alpha = mtc_alpha
-        else:
-            self._logger.warning(
-                '`mtc_alpha` should be a `float` in range `(0, 1]` but was %s. Keeping the previous value %s',
-                mtc_alpha, self._mtc_alpha,
-            )
-
-    @property
-    def include_sv(self) -> bool:
-        """
-        `True` if we want to include structural variants in the analysis
-        (i.e. the variants that use symbolic VCF notation).
-        """
-        return self._include_sv
-
-    @include_sv.setter
-    def include_sv(self, include_sv: bool):
-        """
-        Set `include_sv` option.
-
-        :param include_sv: a `bool` with the value.
-        """
-        if isinstance(include_sv, bool):
-            self._include_sv = include_sv
-        else:
-            self._logger.warning('Ignoring invalid `include_sv` value %s. Using %s', include_sv, self._include_sv)
-
-    @property
-    def mtc_strategy(self) -> MtcStrategy:
-        """
-        Get the MTC filtering strategy to be used.
-        """
-        return self._mtc_strategy
-
-    def all_terms_strategy(self):
-        """
-        Test all phenotype terms.
-
-        See :ref:`use-all-terms-strategy` for more info.
-        """
-        self._mtc_strategy = MtcStrategy.ALL_PHENOTYPE_TERMS
-        self._min_patients_w_hpo = None
-        self._terms_to_test = None
-
-    def hpo_mtc_strategy(
-        self,
-        min_patients_w_hpo: float = 0.2,
-    ):
-        """
-        Only test the HPO terms that pass all rules of the HPO filter strategy.
-
-        See :ref:`hpo-mtc-filter-strategy` section for more info on the rules.
-
-        :param threshold_HPO_observed_frequency: a float in range :math:`(0, 1]` to represent
-          the minimum fraction of patients for an HPO term to be included.
-        """
-        if not isinstance(min_patients_w_hpo, float):
-            raise ValueError(f'`min_patients_w_hpo` is not a `float`: {min_patients_w_hpo}')
-        if not 0 < min_patients_w_hpo <= 1:
-            raise ValueError(f'`min_patients_w_hpo` must be in range (0, 1] but was {min_patients_w_hpo}')
-       
-        self._mtc_strategy = MtcStrategy.HPO_MTC
-        self._min_patients_w_hpo = min_patients_w_hpo
-        self._terms_to_test = None
-
-    def specify_terms_strategy(
-        self,
-        terms_to_test: typing.Iterable[typing.Union[str, hpotk.TermId]],
-    ):
-        """
-        Mitigate the MTC burden by only testing the specified HPO terms.
-
-        The HPO terms are validated before running the analysis,
-        to point out invalid CURIE (e.g. `WHATEVER`) values,
-        or HPO term IDs that are not in the currently used HPO.
-
-        Calling this method will clear any previously specified terms.
-
-        See :ref:`specify-terms-strategy` for more info.
-
-        :param terms_to_test: an iterable with CURIEs (e.g. `HP:0001250`)
-          or :class:`hpotk.TermId` instances representing the terms to test.
-        """
-        self._mtc_strategy = MtcStrategy.SPECIFY_TERMS
-        self._min_patients_w_hpo = None
-        self._terms_to_test = tuple(terms_to_test)
-
-    @property
-    def terms_to_test(self) -> typing.Optional[typing.Iterable[typing.Union[str, hpotk.TermId]]]:
-        """
-        Get the ids of the terms to be tested in `specify_terms_strategy`
-        or `None` if :class:`MtcStrategy.SPECIFY_TERMS` will *not* be used.
-        """
-        return self._terms_to_test
-    
-    @property
-    def min_patients_w_hpo(self) -> typing.Optional[float]:
-        """
-        Get the minimum fraction of patients needed to be annotated with an HPO term
-        to justify being tested or `None` if :class:`MtcStrategy.HPO_MTC` will *not* be used.
-        """
-        return self._min_patients_w_hpo
-
-
-def configure_cohort_analysis(
-    cohort: Cohort,
-    hpo: hpotk.MinimalOntology,
-    protein_source: str = 'UNIPROT',
-    cache_dir: typing.Optional[str] = None,
-    config: typing.Optional[CohortAnalysisConfiguration] = None,
-) -> CohortAnalysis:
-    """
-    Configure :class:`~gpsea.analysis.CohortAnalysis` for given `cohort`.
-
-    :param cohort: a :class:`~gpsea.model.Cohort` to analyze
-    :param hpo: a :class:`~hpotk.MinimalOntology` with HPO to use in the analysis
-    :param protein_source: the resource to retrieve protein annotations from if we cannot find the annotations locally.
-     Choose from ``{'UNIPROT'}`` (just one fallback implementation is available at the moment).
-    :param config: an optional :class:`CohortAnalysisConfiguration` to parameterize the analysis.
-     The default parameters will be used if `None`.
-    """
-    if config is None:
-        config = CohortAnalysisConfiguration()  # Use the default config
-
-    cache_path = get_cache_dir_path(cache_dir)
-    os.makedirs(cache_path, exist_ok=True)
-
-    cache_dir = str(cache_path)
-    protein_metadata_service = backup_pms(protein_source, cache_dir)
-
-    mtc_filter: PhenotypeMtcFilter
-    if config.mtc_strategy == MtcStrategy.HPO_MTC:
-        assert config.min_patients_w_hpo is not None, '`min_patients_w_hpo` must be set if using `HPO_MTC` strategy'
-        mtc_filter = HpoMtcFilter.default_filter(
-            hpo=hpo,
-            term_frequency_threshold=config.min_patients_w_hpo,
-        )
-    elif config.mtc_strategy == MtcStrategy.SPECIFY_TERMS:
-        assert config.terms_to_test is not None, '`terms_to_test` must be set if using `SPECIFY_TERMS` strategy'
-        validated_terms_to_test = _validate_terms_to_test(hpo, config.terms_to_test)
-        mtc_filter = SpecifiedTermsMtcFilter(hpo=hpo, terms_to_test=validated_terms_to_test)
-    elif config.mtc_strategy == MtcStrategy.ALL_PHENOTYPE_TERMS:
-        mtc_filter = UseAllTermsMtcFilter()
-    else:
-        raise ValueError(f"Did not recognize MtcStrategy {config.mtc_strategy}")
-
-    # Choosing a simple Fisher's exact test for now.
-    gp_analyzer = FisherExactAnalyzer(
-        mtc_filter=mtc_filter,
-        p_val_correction=config.pval_correction,
-        mtc_alpha=config.mtc_alpha,
-    )
-
-    return GpCohortAnalysis(
-        cohort=cohort,
-        hpo=hpo,
-        protein_service=protein_metadata_service,
-        gp_analyzer=gp_analyzer,
-        min_n_of_patients_with_term=config.min_n_patients_with_term,
-        include_sv=config.include_sv,
-    )
-
-
-def _validate_terms_to_test(
-    hpo: hpotk.MinimalOntology,
-    terms_to_test: typing.Iterable[typing.Union[hpotk.TermId, str]],
-) -> typing.Iterable[hpotk.TermId]:
-    """
-    Check that:
-     * all terms to test are valid TermIds/CURIES,
-     * the term IDs are in used HPO, and
-     * there is at least one term to test
-    """
-    validated_terms_to_test = set()
-
-    for term in terms_to_test:
-        if isinstance(term, hpotk.TermId):
-            pass
-        elif isinstance(term, str):
-            term = hpotk.TermId.from_curie(term)
-        else:
-            raise ValueError(f'{term} is neither a TermId nor a CURIE `str`!')
-
-        if term not in hpo:
-            raise ValueError(f"HPO ID {term} not in HPO ontology {hpo.version}")
-        validated_terms_to_test.add(term)
-    if len(validated_terms_to_test) == 0:
-        raise ValueError('Cannot run use {MTC_Strategy.SPECIFY_TERMS} with no HPO terms!')
-
-    return validated_terms_to_test
-
-
-def configure_default_protein_metadata_service(
-    protein_source: str = 'UNIPROT',
-    cache_dir: typing.Optional[str] = None,
-) -> ProteinMetadataService:
-    """
-    Create default protein metadata service that will cache the protein metadata
-    in current working directory under `.gpsea_cache/protein_cache`
-    and reach out to UNIPROT REST API if a cache entry is missing.
-    """
-    # TODO: remove at some point.
-    warnings.warn(
-        'Use gpsea.preprocessing.configure_default_protein_metadata_service` instead',
-        DeprecationWarning, stacklevel=2,
-    )
-    return backup_pms(protein_source=protein_source, cache_dir=cache_dir)
diff --git a/src/gpsea/analysis/_gp_analysis.py b/src/gpsea/analysis/_gp_analysis.py
deleted file mode 100644
index f786049ce..000000000
--- a/src/gpsea/analysis/_gp_analysis.py
+++ /dev/null
@@ -1,202 +0,0 @@
-import abc
-import typing
-
-import pandas as pd
-
-from statsmodels.stats import multitest
-
-from gpsea.model import Patient
-from .mtc_filter import PhenotypeMtcFilter
-from .predicate import PatientCategory
-from .predicate.genotype import GenotypePolyPredicate
-from .predicate.phenotype import PhenotypePolyPredicate, P
-from ._api import GenotypePhenotypeAnalysisResult, HpoMtcReport
-from ._stats import run_fisher_exact, run_recessive_fisher_exact
-
-
-def apply_predicates_on_patients(
-        patients: typing.Iterable[Patient],
-        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
-        gt_predicate: GenotypePolyPredicate,
-) -> typing.Tuple[
-    typing.Collection[PatientCategory],
-    typing.Mapping[P, int],
-    typing.Mapping[P, pd.DataFrame],
-]:
-    """
-    Apply the phenotype predicates `pheno_predicates` and the genotype predicate `gt_predicate`
-    to bin the `patients` into categories.
-
-    Note, it may not be possible to bin *all* patients with a genotype/phenotype pair,
-    since a predicate is allowed to return `None` (e.g. if it bins the patient into MISSENSE or NONSENSE groups
-    but the patient has no MISSENSE or NONSENSE variants). If this happens, the patient will not be "usable"
-    for the phenotype `P`.
-
-    Args:
-        patients: an iterable with the patients to bin into categories
-        pheno_predicates: an iterable with the phenotype predicates to apply
-        gt_predicate: a genotype predicate to apply
-
-    Returns:
-        a tuple with 3 items:
-        - a collection of unique :class:`PatientCategory` items that the patients were binned into
-        - a mapping from a phenotype :class:`P` (e.g. an HPO term or a disease)
-          to an `int` with count of patients that could be binned according to the phenotype `P`
-        - a mapping from phenotype :class:`P` to a data frame with counts of patients
-          in i-th phenotype category and j-th genotype category where i and j are rows and columns of the data frame
-    """
-    # TODO: delete with no replacement.
-    phenotypes = set()
-    categories = set()
-    for predicate in pheno_predicates:
-        categories.update(predicate.get_categories())
-        phenotypes.add(predicate.phenotype)
-
-    n_usable_patients = pd.Series(data=0, index=pd.Index(phenotypes))
-
-    # Apply genotype and phenotype predicates
-    counts = {}
-    for ph_predicate in pheno_predicates:
-        if ph_predicate.phenotype not in counts:
-            # Make an empty frame for keeping track of the counts.
-            counts[ph_predicate.phenotype] = pd.DataFrame(
-                data=0,
-                index=pd.Index(
-                    data=ph_predicate.get_categories(),
-                    name=ph_predicate.get_question_base(),
-                ),
-                columns=pd.Index(
-                    data=gt_predicate.get_categories(),
-                    name=gt_predicate.get_question_base(),
-                ),
-            )
-
-        for patient in patients:
-            pheno_cat = ph_predicate.test(patient)
-            geno_cat = gt_predicate.test(patient)
-
-            if pheno_cat is not None and geno_cat is not None:
-                counts[pheno_cat.phenotype].loc[pheno_cat.category, geno_cat.category] += 1
-                n_usable_patients[pheno_cat.phenotype] += 1
-
-    return categories, n_usable_patients, counts
-
-
-class GPAnalyzer(typing.Generic[P], metaclass=abc.ABCMeta):
-    """
-    `GPAnalyzer` calculates p values for genotype-phenotype correlation of phenotypic features of interest.
-    """
-    # TODO: delete with no replacement.
-
-    @abc.abstractmethod
-    def analyze(
-            self,
-            patients: typing.Iterable[Patient],
-            pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
-            gt_predicate: GenotypePolyPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Test for association between `phenotypic_features` of interest groups or `patients` determined
-        by the `predicate`.
-
-        :return: :class:`GenotypePhenotypeAnalysisResult` object with the results.
-        """
-        pass
-
-
-class FisherExactAnalyzer(typing.Generic[P], GPAnalyzer[P]):
-    """
-    `FisherExactAnalyzer` uses Fisher's exact test to calculate p value for phenotypic features of interest.
-
-    Following the test, the code applies one of the multiple testing corrections provided by
-    :func:`statsmodels.stats.multitest.multipletests` function at given `mtc_alpha`.
-
-    :param p_val_correction: a `str` with name of the multiple testing correction method or `None` if no correction
-      should be applied.
-    :param mtc_alpha: a `float` in range :math:`(0, 1]` with the multiple testing correction alpha value.
-    """
-    # TODO: delete and use `gpsea.analysis.pcats.MultiPhenotypeAnalysis`.
-
-    def __init__(
-        self,
-        mtc_filter: PhenotypeMtcFilter,
-        p_val_correction: typing.Optional[str] = None,
-        mtc_alpha: float = .05,
-    ):
-        self._correction = p_val_correction
-        self._mtc_alpha = mtc_alpha
-        self._mtc_filter = mtc_filter
-
-    def analyze(
-            self,
-            patients: typing.Iterable[Patient],
-            pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
-            gt_predicate: GenotypePolyPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        pheno_predicates = tuple(pheno_predicates)
-        if len(pheno_predicates) == 0:
-            raise ValueError('No phenotype predicates were provided')
-
-        # 1) Count the patients
-        categories, n_usable, all_counts = apply_predicates_on_patients(
-            patients, pheno_predicates, gt_predicate,
-        )
-        n_terms_before_filtering = len(n_usable)
-
-        # 1.5) Filter terms for MTC
-        n_usable_filtered, all_counts_filtered, reason2count = self._mtc_filter.filter_terms_to_test(
-            gt_predicate=gt_predicate,
-            n_usable=n_usable,
-            all_counts=all_counts,
-        )
-        if len(n_usable_filtered) == 0:
-            raise ValueError("No phenotypes are left for the analysis after MTC filtering step")
-
-        assert len(n_usable_filtered) == len(all_counts_filtered)
-
-        # 2) Statistical tests
-        pheno_idx = pd.Index(all_counts_filtered.keys(), name='p_val')
-        pvals = pd.Series(float('nan'), index=pheno_idx, name='p value')
-        for phenotype in pheno_idx:
-            counts = all_counts_filtered[phenotype]
-            # TODO - this is where we must fail unless we have the contingency table of the right size!
-            if counts.shape == (2, 2):
-                pvals[phenotype] = run_fisher_exact(counts)
-            elif counts.shape == (2, 3):
-                pvals[phenotype] = run_recessive_fisher_exact(counts)
-            else:
-                raise ValueError(
-                    "Invalid number of categories. "
-                    f"A {counts.shape} table was created. Only (2, 2) and (2, 3) are valid sizes."
-                )
-
-        # 3) Multiple correction
-        if self._correction is not None:
-            _, pvals_corrected, _, _ = multitest.multipletests(pvals, alpha=self._mtc_alpha, method=self._correction)
-            corrected_idx = pd.Index(all_counts_filtered.keys(), name='p_val_corrected')
-            corrected_pvals_series = pd.Series(
-                data=pvals_corrected, index=corrected_idx, name='Corrected p value',
-            )
-        else:
-            corrected_pvals_series = None
-
-        mtc_method = "none"
-        if self._correction is not None:
-            mtc_method = self._correction
-        mtc_filter_report = HpoMtcReport(
-            filter_name=self._mtc_filter.filter_method_name(),
-            mtc_name=mtc_method,
-            filter_results_map=reason2count,
-            n_terms_before_filtering=n_terms_before_filtering,
-        )
-
-        # 4) Wrap up
-        return GenotypePhenotypeAnalysisResult(
-            n_usable=n_usable_filtered,
-            all_counts=all_counts_filtered,
-            pvals=pvals,
-            corrected_pvals=corrected_pvals_series,
-            phenotype_categories=categories,
-            geno_predicate=gt_predicate,
-            mtc_filter_report=mtc_filter_report
-        )
diff --git a/src/gpsea/analysis/_gp_impl.py b/src/gpsea/analysis/_gp_impl.py
deleted file mode 100644
index 1d01045c0..000000000
--- a/src/gpsea/analysis/_gp_impl.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import logging
-import typing
-
-import hpotk
-import pandas as pd
-
-from scipy.stats import mannwhitneyu
-
-from gpsea.model import Cohort
-from gpsea.preprocessing import ProteinMetadataService
-from .pscore import PhenotypeScorer
-from .predicate.genotype import GenotypePolyPredicate, VariantPredicate
-from .predicate.genotype import boolean_predicate as wrap_as_boolean_predicate
-from .predicate.genotype import groups_predicate as wrap_as_groups_predicate
-from .predicate.genotype import recessive_predicate as wrap_as_recessive_predicate
-from .predicate.phenotype import PhenotypePolyPredicate, P, PropagatingPhenotypePredicate, DiseasePresencePredicate
-
-from ._api import CohortAnalysis, GenotypePhenotypeAnalysisResult, PhenotypeScoreAnalysisResult
-from ._gp_analysis import GPAnalyzer
-from ._util import prepare_hpo_terms_of_interest
-
-
-class GpCohortAnalysis(CohortAnalysis):
-
-    def __init__(
-        self, cohort: Cohort,
-        hpo: hpotk.MinimalOntology,
-        protein_service: ProteinMetadataService,
-        gp_analyzer: GPAnalyzer,
-        min_n_of_patients_with_term: int,
-        include_sv: bool = False,
-    ):
-        super().__init__(
-            hpo,
-            protein_service,
-        )
-        if not isinstance(cohort, Cohort):
-            raise ValueError(f"cohort must be type Cohort but was type {type(cohort)}")
-
-        self._logger = logging.getLogger(__name__)
-        self._cohort = cohort
-        # self._phenotype_filter = hpotk.util.validate_instance(phenotype_filter, PhenotypeFilter, 'phenotype_filter')
-        self._gp_analyzer = hpotk.util.validate_instance(gp_analyzer, GPAnalyzer, 'gp_analyzer')
-
-        self._patient_list = list(cohort.all_patients) \
-            if include_sv \
-            else [pat for pat in cohort.all_patients if not all(var.variant_info.is_structural() for var in pat.variants)]
-        if len(self._patient_list) == 0:
-            raise ValueError('No patients left for analysis!')
-
-        self._hpo_terms_of_interest = prepare_hpo_terms_of_interest(
-            hpo=self._hpo,
-            patients=self._patient_list,
-            min_n_of_patients_with_term=min_n_of_patients_with_term,
-        )
-
-    def compare_hpo_vs_genotype(
-            self,
-            predicate: VariantPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of at least one allele that matches `predicate`
-        and test for genotype-phenotype correlations.
-        """
-        assert isinstance(predicate, VariantPredicate), \
-            f'{type(predicate)} is not an instance of `VariantPredicate`'
-        bool_predicate = wrap_as_boolean_predicate(predicate) 
-        return self.compare_genotype_vs_cohort_phenotypes(bool_predicate)
-
-    def compare_hpo_vs_recessive_genotype(
-        self,
-        predicate: VariantPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of zero, one, or two alleles that matche the `predicate`
-        and test for genotype-phenotype correlations.
-        """
-        rec_predicate = wrap_as_recessive_predicate(predicate)
-        return self.compare_genotype_vs_cohort_phenotypes(rec_predicate)
-
-    def compare_hpo_vs_genotype_groups(
-        self,
-        predicates: typing.Iterable[VariantPredicate],
-        group_names: typing.Iterable[str],
-    ) -> GenotypePhenotypeAnalysisResult:
-        """
-        Bin patients according to a presence of at least one allele that matches
-        any of the provided `predicates` and test for genotype-phenotype correlations
-        between the groups.
-
-        Note, the patients that pass testing by >1 genotype predicate are *OMITTED* from the analysis!
-        """
-        predicate = wrap_as_groups_predicate(
-            predicates=predicates,
-            group_names=group_names,
-        )
-        return self.compare_genotype_vs_cohort_phenotypes(predicate)
-
-    def compare_disease_vs_genotype(
-        self,
-        predicate: VariantPredicate,
-        disease_ids: typing.Optional[typing.Sequence[typing.Union[str, hpotk.TermId]]] = None,
-    ) -> GenotypePhenotypeAnalysisResult:
-        pheno_predicates = self._prepare_disease_predicates(disease_ids)
-
-        # This can be updated to any genotype poly predicate in future, if necessary.
-        genotype_predicate = wrap_as_boolean_predicate(predicate)
-        return self.compare_genotype_vs_phenotypes(genotype_predicate, pheno_predicates)
-
-    def _prepare_disease_predicates(
-        self,
-        disease_ids: typing.Optional[typing.Sequence[typing.Union[str, hpotk.TermId]]],
-    ) -> typing.Sequence[DiseasePresencePredicate]:
-        testing_diseases = []
-        if disease_ids is None:
-            disease_ids = [dis.identifier for dis in self._cohort.all_diseases()]
-        if len(disease_ids) < 1:
-            raise ValueError("No diseases available for testing.")
-        for disease_id in disease_ids:
-            if isinstance(disease_id, str):
-                testing_diseases.append(hpotk.TermId.from_curie(disease_id))
-            elif isinstance(disease_id, hpotk.TermId):
-                testing_diseases.append(disease_id)
-            else:
-                raise ValueError(f'{disease_id} must be a `str` or a `hpotk.TermId`')
-        pheno_predicates = []
-        for disease in testing_diseases:
-            pheno_predicates.append(DiseasePresencePredicate(disease))
-        return pheno_predicates
-
-    def compare_genotype_vs_phenotype_score(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-        phenotype_scorer: PhenotypeScorer,
-    ) -> PhenotypeScoreAnalysisResult:
-        idx = pd.Index(tuple(p.patient_id for p in self._patient_list), name='patient_id')
-        data = pd.DataFrame(
-            None,
-            index=idx,
-            columns=['genotype', 'phenotype'],
-        )
-
-        # Apply the predicates on the patients
-        for patient in self._patient_list:
-            gt_cat = gt_predicate.test(patient)
-            data.loc[patient.patient_id, 'genotype'] = None if gt_cat is None else gt_cat.category.cat_id
-            data.loc[patient.patient_id, 'phenotype'] = phenotype_scorer.score(patient)
-
-        # To improve the determinism
-        data.sort_index(inplace=True)
-
-        # Sort by PatientCategory.cat_id and unpack.
-        # For now, we only allow to have up to 2 groups.
-        x_key, y_key = sorted(data['genotype'].dropna().unique())
-        x = data.loc[data['genotype'] == x_key, 'phenotype'].to_numpy(dtype=float)
-        y = data.loc[data['genotype'] == y_key, 'phenotype'].to_numpy(dtype=float)
-        result = mannwhitneyu(
-            x=x,
-            y=y,
-            alternative='two-sided',
-        )
-
-        return PhenotypeScoreAnalysisResult(
-            genotype_phenotype_scores=data,
-            p_value=float(result.pvalue),
-        )
-
-    def compare_genotype_vs_cohort_phenotypes(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-    ) -> GenotypePhenotypeAnalysisResult:
-        assert isinstance(gt_predicate, GenotypePolyPredicate), \
-            f'{type(gt_predicate)} is not an instance of `GenotypePolyPredicate`'
-
-        pheno_predicates = self._prepare_phenotype_predicates()
-        return self.compare_genotype_vs_phenotypes(gt_predicate, pheno_predicates)
-
-    def _prepare_phenotype_predicates(self) -> typing.Sequence[PhenotypePolyPredicate[P]]:
-        return tuple(
-            PropagatingPhenotypePredicate(
-                hpo=self._hpo,
-                query=query,
-            )
-            for query in self._hpo_terms_of_interest)
-
-    def compare_genotype_vs_phenotypes(
-        self,
-        gt_predicate: GenotypePolyPredicate,
-        pheno_predicates: typing.Iterable[PhenotypePolyPredicate[P]],
-    ) -> GenotypePhenotypeAnalysisResult:
-        return self._gp_analyzer.analyze(
-            patients=self._patient_list,
-            pheno_predicates=pheno_predicates,
-            gt_predicate=gt_predicate,
-        )
diff --git a/src/gpsea/analysis/_stats.py b/src/gpsea/analysis/_stats.py
deleted file mode 100644
index 1364e7d56..000000000
--- a/src/gpsea/analysis/_stats.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import abc
-import math
-import typing
-from decimal import Decimal
-
-import numpy as np
-import scipy
-
-# TODO: delete the module
-
-
-class MultiFisherExact(metaclass=abc.ABCMeta):
-
-    @abc.abstractmethod
-    def calculate(self, a: np.ndarray) -> float:
-        """
-        :param a: a 2x3 int array with counts
-        :returns: a p value calculated with Fisher's exact test
-        """
-        pass
-
-    @staticmethod
-    def _check_input(a: np.ndarray):
-        if not isinstance(a, np.ndarray):
-            raise ValueError(f'Expected a numpy array but got {type(a)}')
-        if not a.shape == (2, 3):
-            raise ValueError(f'Shape of the array must be (2, 3) but got {a.shape}')
-        if np.array_equal(a, np.zeros_like(a)):
-            raise ValueError(f'Array is all zeros, cannot run analysis')
-
-
-class PythonMultiFisherExact(MultiFisherExact):
-
-    def calculate(self, a: np.ndarray) -> float:
-        MultiFisherExact._check_input(a)
-        return self._fisher_exact(a)
-
-    def _fisher_exact(self, table):
-        row_sum = []
-        col_sum = []
-
-        for i in range(len(table)):
-            temp = 0
-            for j in range(len(table[0])):
-                temp += table[i][j]
-            row_sum.append(temp)
-
-        for j in range(len(table[0])):
-            temp = 0
-            for i in range(len(table)):
-                temp += table[i][j]
-            col_sum.append(temp)
-
-        mat = [[0] * len(col_sum)] * len(row_sum)
-        pos = (0, 0)
-
-        p_0 = 1
-
-        for x in row_sum:
-            p_0 *= math.factorial(x)
-        for y in col_sum:
-            p_0 *= math.factorial(y)
-
-        n = 0
-        for x in row_sum:
-            n += x
-        p_0 /= Decimal(math.factorial(n))
-
-        for i in range(len(table)):
-            for j in range(len(table[0])):
-                p_0 /= Decimal(math.factorial(table[i][j]))
-
-        p = [0]
-        self._dfs(mat, pos, row_sum, col_sum, p_0, p)
-
-        return float(p[0])
-
-    def _dfs(self, mat, pos, r_sum, c_sum, p_0, p):
-
-        (xx, yy) = pos
-        (r, c) = (len(r_sum), len(c_sum))
-
-        mat_new = []
-
-        for i in range(len(mat)):
-            temp = []
-            for j in range(len(mat[0])):
-                temp.append(mat[i][j])
-            mat_new.append(temp)
-
-        if xx == -1 and yy == -1:
-            for i in range(r - 1):
-                temp = r_sum[i]
-                for j in range(c - 1):
-                    temp -= mat_new[i][j]
-                mat_new[i][c - 1] = temp
-            for j in range(c - 1):
-                temp = c_sum[j]
-                for i in range(r - 1):
-                    temp -= mat_new[i][j]
-                mat_new[r - 1][j] = temp
-            temp = r_sum[r - 1]
-            for j in range(c - 1):
-                temp -= mat_new[r - 1][j]
-            if temp < 0:
-                return
-            mat_new[r - 1][c - 1] = temp
-
-            p_1 = 1
-            for x in r_sum:
-                p_1 *= math.factorial(x)
-            for y in c_sum:
-                p_1 *= math.factorial(y)
-
-            n = 0
-            for x in r_sum:
-                n += x
-            p_1 /= Decimal(math.factorial(n))
-
-            for i in range(len(mat_new)):
-                for j in range(len(mat_new[0])):
-                    p_1 /= Decimal(math.factorial(mat_new[i][j]))
-            if p_1 <= p_0 + Decimal(0.00000001):
-                # print(mat_new)
-                # print(p_1)
-                p[0] += p_1
-        else:
-            max_1 = r_sum[xx]
-            max_2 = c_sum[yy]
-            for j in range(c):
-                max_1 -= mat_new[xx][j]
-            for i in range(r):
-                max_2 -= mat_new[i][yy]
-            for k in range(min(max_1, max_2) + 1):
-                mat_new[xx][yy] = k
-                if xx == r - 2 and yy == c - 2:
-                    pos_new = (-1, -1)
-                elif xx == r - 2:
-                    pos_new = (0, yy + 1)
-                else:
-                    pos_new = (xx + 1, yy)
-                self._dfs(mat_new, pos_new, r_sum, c_sum, p_0, p)
-
-
-def run_recessive_fisher_exact(two_by_three_table: typing.Sequence[typing.Sequence[int]]):
-    a = np.array(two_by_three_table, dtype=np.int64)
-    test_class = PythonMultiFisherExact()
-    val = test_class.calculate(a)
-    return val
-
-
-def run_fisher_exact(two_by_two_table: typing.Sequence[typing.Sequence[int]]):
-    oddsr, p = scipy.stats.fisher_exact(two_by_two_table, alternative='two-sided')
-    return p
diff --git a/src/gpsea/analysis/_test_fisherExact.py b/src/gpsea/analysis/_test_fisherExact.py
deleted file mode 100644
index fd530f5c6..000000000
--- a/src/gpsea/analysis/_test_fisherExact.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import pytest
-import numpy as np
-from contextlib import nullcontext as does_not_raise
-from ._stats import PythonMultiFisherExact
-
-
-@pytest.fixture
-def MultiExact() -> PythonMultiFisherExact:
-    return PythonMultiFisherExact()
-
-# TODO: remove
-
-@pytest.mark.parametrize('table, raise_error, pVal', 
-                        ([[[0,0,0],[0,0,0]], pytest.raises(ValueError), None],
-                        [[[2, 1, 0],[3, 0, 2]], does_not_raise(), 0.6429],
-                        #[[[100, 150], [500, 460], [420, 400]], pytest.raises(OverflowError), None],
-                        [[[5,5,5],[5,5,5]], does_not_raise(), 1],
-                        [[[10, 5, 20], [15, 5]], pytest.raises(ValueError), None],
-                        [[[10, 2, 3],[1, 3, 4]], does_not_raise(), 0.0395],
-                        [[[6, 0, 0],[6, 5, 8]], does_not_raise(), 0.0233],
-                        [[[],[]], pytest.raises(ValueError), None]
-))
-def test_multiFisherExact(table, raise_error, pVal, MultiExact):
-    with raise_error:
-        np_table = np.array(table, dtype=np.int64)
-        final_pval = MultiExact.calculate(np_table)
-        assert round(final_pval, 4) == pVal
-    
\ No newline at end of file
diff --git a/src/gpsea/analysis/_util.py b/src/gpsea/analysis/_util.py
deleted file mode 100644
index 2c3445b56..000000000
--- a/src/gpsea/analysis/_util.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import typing
-
-from collections import Counter
-
-import hpotk
-
-
-from gpsea.model import Patient
-
-
-def prepare_hpo_terms_of_interest(
-    hpo: hpotk.graph.GraphAware,
-    patients: typing.Iterable[Patient],
-    min_n_of_patients_with_term: int,
-) -> typing.Collection[hpotk.TermId]:
-    """
-    Prepare a collection of HPO terms to test.
-
-    This includes the direct HPO patient annotations
-    as well as the ancestors of the present terms and the descendants of the excluded terms.
-
-    :param hpo: an entity with an HPO graph (e.g. :class:`hpotk.MinimalOntology`).
-    :param patients: an iterable with patients.
-    :param min_n_of_patients_with_term: the minimum number of patients that must feature an HPO term
-        (either directly or indirectly) for the term to be included in the analysis.
-    """
-    # TODO remove in favor of `gpsea.analysis.predicate.phenotype`
-    present_count = Counter()
-    excluded_count = Counter()
-
-    for patient in patients:
-        for pf in patient.phenotypes:
-            if pf.is_present:
-                # A present phenotypic feature must be counted in.
-                present_count[pf.identifier] += 1
-                # and it also implies presence of its ancestors.
-                for anc in hpo.graph.get_ancestors(pf):
-                    present_count[anc] += 1
-            else:
-                # An excluded phenotypic feature
-                excluded_count[pf.identifier] += 1
-                for desc in hpo.graph.get_descendants(pf):
-                    # implies exclusion of its descendants.
-                    excluded_count[desc] += 1
-
-    total_count = Counter()
-    for term_id, count in present_count.items():
-        total_count[term_id] += count
-
-    for term_id, count in excluded_count.items():
-        total_count[term_id] += count
-
-    final_hpo = []
-    for term_id in present_count:
-        # Keep the term if it is mentioned at least *n* times (incl. being excluded)
-        # in the cohort
-        n_all = total_count[term_id]
-        if n_all >= min_n_of_patients_with_term:
-            final_hpo.append(term_id)
-
-    return tuple(final_hpo)
diff --git a/src/gpsea/analysis/predicate/genotype/__init__.py b/src/gpsea/analysis/predicate/genotype/__init__.py
index 86dd261a2..82d2819d5 100644
--- a/src/gpsea/analysis/predicate/genotype/__init__.py
+++ b/src/gpsea/analysis/predicate/genotype/__init__.py
@@ -1,13 +1,13 @@
 from ._api import GenotypePolyPredicate
 from ._api import VariantPredicate
 from ._counter import AlleleCounter
-from ._gt_predicates import boolean_predicate, groups_predicate, filtering_predicate, recessive_predicate
+from ._gt_predicates import boolean_predicate, groups_predicate, filtering_predicate
 from ._gt_predicates import ModeOfInheritancePredicate
 from ._variant import VariantPredicates, ProteinPredicates
 
 __all__ = [
     'GenotypePolyPredicate',
-    'boolean_predicate', 'groups_predicate', 'filtering_predicate', 'recessive_predicate',
+    'boolean_predicate', 'groups_predicate', 'filtering_predicate',
     'ModeOfInheritancePredicate',
     'AlleleCounter', 'VariantPredicate',
     'VariantPredicates', 'ProteinPredicates',
diff --git a/src/gpsea/analysis/predicate/genotype/_api.py b/src/gpsea/analysis/predicate/genotype/_api.py
index 39cf020be..07d6fa078 100644
--- a/src/gpsea/analysis/predicate/genotype/_api.py
+++ b/src/gpsea/analysis/predicate/genotype/_api.py
@@ -2,7 +2,7 @@
 import typing
 
 from gpsea.model import Variant
-from .._api import PolyPredicate, Categorization, PatientCategory
+from .._api import PolyPredicate, Categorization
 
 
 class GenotypePolyPredicate(PolyPredicate[Categorization], metaclass=abc.ABCMeta):
@@ -13,15 +13,6 @@ class GenotypePolyPredicate(PolyPredicate[Categorization], metaclass=abc.ABCMeta
     pass
 
 
-class RecessiveGroupingPredicate(GenotypePolyPredicate, metaclass=abc.ABCMeta):
-    BOTH = Categorization(PatientCategory(0, 'Both', 'The patient belongs in both groups.'))
-    ONE = Categorization(PatientCategory(1, 'One', 'The patient belongs in one of the two groups.'))
-    NEITHER = Categorization(PatientCategory(2, 'Neither', 'The patient does not belong in either group.'))
-
-    def get_categorizations(self) -> typing.Sequence[Categorization]:
-        return RecessiveGroupingPredicate.BOTH, RecessiveGroupingPredicate.ONE, RecessiveGroupingPredicate.NEITHER
-
-
 class VariantPredicate(metaclass=abc.ABCMeta):
     """
     `VariantPredicate` tests if a variant meets a certain criterion.
diff --git a/src/gpsea/analysis/predicate/genotype/_gt_predicates.py b/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
index 43a39594a..8693b67bd 100644
--- a/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
+++ b/src/gpsea/analysis/predicate/genotype/_gt_predicates.py
@@ -7,7 +7,7 @@
 from gpsea.model import Patient, Sex
 
 from .._api import Categorization, PatientCategory, PatientCategories
-from ._api import GenotypePolyPredicate, RecessiveGroupingPredicate
+from ._api import GenotypePolyPredicate
 from ._api import VariantPredicate
 from ._counter import AlleleCounter
 
@@ -277,70 +277,6 @@ def filtering_predicate(
     )
 
 
-class AlleleCountingRecessivePredicate(RecessiveGroupingPredicate):
-    # NOT PART OF THE PUBLIC API
-    # TODO: this predicate is a bit weird and I think it should eventually go away.
-    #  Therefore, I do not write any tests at this point.
-
-    def __init__(
-        self,
-        allele_counter: AlleleCounter,
-    ):
-        self._allele_counter = allele_counter
-
-    def get_question_base(self) -> str:
-        return self._allele_counter.get_question()
-
-    def test(self, patient: Patient) -> typing.Optional[Categorization]:
-        self._check_patient(patient)
-
-        allele_count = self._allele_counter.count(patient)
-        if allele_count == 0:
-            return RecessiveGroupingPredicate.NEITHER
-        elif allele_count == 1:
-            return RecessiveGroupingPredicate.ONE
-        elif allele_count == 2:
-            return RecessiveGroupingPredicate.BOTH
-        else:
-            return None
-
-    def __eq__(self, value: object) -> bool:
-        return (
-            isinstance(value, AlleleCountingRecessivePredicate)
-            and self._allele_counter == value._allele_counter
-        )
-
-    def __hash__(self) -> int:
-        return hash((self._allele_counter,))
-
-    def __str__(self) -> str:
-        return (
-            f"AlleleCountingRecessivePredicate(allele_counter={self._allele_counter})"
-        )
-
-    def __repr__(self) -> str:
-        return str(self)
-
-
-def recessive_predicate(
-    variant_predicate: VariantPredicate,
-) -> GenotypePolyPredicate:
-    """
-    Create a recessive grouping predicate from given `variant_predicate`
-    to bin the patient into :class:`RecessiveGroupingPredicate.NEITHER`,
-    :class:`RecessiveGroupingPredicate.ONE`, or :class:`RecessiveGroupingPredicate.BOTH`,
-    depending on the number of variant alleles matching the variant predicate.
-
-    The patient is assigned into a group in the following manner:
-    * 0 alleles: :class:`RecessiveGroupingPredicate.NEITHER`
-    * 1 alleles: :class:`RecessiveGroupingPredicate.ONE`
-    * 2 alleles: :class:`RecessiveGroupingPredicate.BOTH`
-    * other: `None`
-    """
-    allele_counter = AlleleCounter(predicate=variant_predicate)
-    return AlleleCountingRecessivePredicate(allele_counter=allele_counter)
-
-
 @dataclasses.dataclass(eq=True, frozen=True)
 class GenotypeGroup:
     allele_count: int
diff --git a/tests/analysis/test_pscore.py b/tests/analysis/pscore/test_counting_scorer.py
similarity index 100%
rename from tests/analysis/test_pscore.py
rename to tests/analysis/pscore/test_counting_scorer.py
diff --git a/tests/analysis/test_config.py b/tests/analysis/test_config.py
deleted file mode 100644
index afff55e1e..000000000
--- a/tests/analysis/test_config.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import pytest
-
-from gpsea.analysis import CohortAnalysisConfiguration, MtcStrategy
-
-
-class TestCohortAnalysisConfiguration:
-
-    def test_default_values(self):
-        config = CohortAnalysisConfiguration()
-
-        assert config.pval_correction == 'bonferroni'
-        assert config.min_patients_w_hpo is None
-        assert config.include_sv is False
-        assert config.mtc_alpha == pytest.approx(.05)
-        assert config.mtc_strategy == MtcStrategy.ALL_PHENOTYPE_TERMS
-        assert config.terms_to_test is None
-    
-    def test_set_all_terms_strategy(self):
-        config = CohortAnalysisConfiguration()
-        assert config.mtc_strategy == MtcStrategy.ALL_PHENOTYPE_TERMS
-
-        config.specify_terms_strategy(('HP:0001250', 'HP:0001166'))
-
-        assert config.mtc_strategy == MtcStrategy.SPECIFY_TERMS
-        assert config.terms_to_test == ('HP:0001250', 'HP:0001166')
-
-    def test_set_hpo_mtc_strategy(self):
-        config = CohortAnalysisConfiguration()
-        assert config.mtc_strategy == MtcStrategy.ALL_PHENOTYPE_TERMS
-
-        config.hpo_mtc_strategy()
-
-        assert config.mtc_strategy == MtcStrategy.HPO_MTC
-        assert config.min_patients_w_hpo == pytest.approx(0.2)
-
-    @pytest.mark.parametrize(
-        'value',
-        [
-            -.0,
-            1.01,
-        ]
-    )
-    def test_cannot_set_invalid_threshold_in_hpo_mtc_strategy(
-        self,
-        value: float,
-    ):
-        config = CohortAnalysisConfiguration()
-        
-        with pytest.raises(ValueError) as e:
-            config.hpo_mtc_strategy(value)
-
-        assert e.value.args[0] == f'`min_patients_w_hpo` must be in range (0, 1] but was {value}'
diff --git a/tests/analysis/test_examples.py b/tests/analysis/test_examples.py
deleted file mode 100644
index 8f17161d6..000000000
--- a/tests/analysis/test_examples.py
+++ /dev/null
@@ -1,137 +0,0 @@
-import typing
-
-import hpotk
-import pytest
-
-import pandas as pd
-
-from gpsea.analysis import configure_cohort_analysis, GenotypePhenotypeAnalysisResult
-from gpsea.analysis.predicate import PatientCategories
-from gpsea.analysis.predicate.genotype import VariantPredicates, boolean_predicate
-from gpsea.model import Cohort, VariantEffect
-
-
-# TODO: remove at some point!
-@pytest.mark.skip('Obsolete tests')
-class TestCohortAnalysis:
-
-    def test_compare_by_variant_effect(
-            self,
-            suox_cohort: Cohort,
-            hpo: hpotk.MinimalOntology,
-    ):
-        pd.set_option('expand_frame_repr', False)
-        cohort_analysis = configure_cohort_analysis(suox_cohort, hpo)
-        is_missense = VariantPredicates.variant_effect(VariantEffect.MISSENSE_VARIANT, tx_id='NM_001032386.2')
-        results = cohort_analysis.compare_hpo_vs_genotype(is_missense)
-        print(results)
-        summary = results.summarize(hpo, PatientCategories.YES)
-        print(summary)
-
-    def test_get_count(
-        self,
-        suox_cohort: Cohort,
-        hpo: hpotk.MinimalOntology,
-    ):
-        """
-        This test shows how to manipulate the results object to get the counts we need
-        Let's use Arachnodactyly [HP:0001166]  Yes (Genotype NO 1/10); No (Genotype YES 13/16) as an example
-        that is
-        Arachnodactyly (+) MISSENSE (-) = 1
-        Arachnodactyly (-) MISSENSE (-) = 9
-        Arachnodactyly (+) MISSENSE (+) = 13
-        Arachnodactyly (-) MISSENSE (+) = 3
-        In the Toy file, we have
-        Arachnodactyly TRUE, MISSENSE (snv) TRUE: A,B,D,E;G;J;M;P;Q;R;T;V;Y = 13
-        Arachnodactyly FALSE, MISSENSE (snv) TRUE: C,K,N=3
-        Arachnodactyly TRUE, MISSENSE (snv) FALSE: H = 1
-        Arachnodactyly FALSE, MISSENSE (snv) FALSE: F,I,L,O,S;U,W,X,Z=9
-        This is the output
-        MISSENSE_VARIANT on NM_1234.5                         No            Yes
-                                                   Count Percent  Count Percent   p value Corrected p value
-        Arachnodactyly [HP:0001166]                         1/10   10.0%  13/16  81.25%  0.000781          0.020299
-        """
-        pd.set_option('expand_frame_repr', False)
-        cohort_analysis = configure_cohort_analysis(suox_cohort, hpo)
-        is_missense = VariantPredicates.variant_effect(VariantEffect.MISSENSE_VARIANT, tx_id='NM_001032386.2')
-        results = cohort_analysis.compare_hpo_vs_genotype(is_missense)
-
-        # Let's make sure we know what class we have
-        assert isinstance(results, GenotypePhenotypeAnalysisResult)
-
-        # We expect that we have YES and NO as phenotype categories
-        phenotype_categories_tuple = results.phenotype_categories
-        assert len(phenotype_categories_tuple) == 2
-        assert PatientCategories.YES in phenotype_categories_tuple
-        assert PatientCategories.NO in phenotype_categories_tuple
-
-        # The all_counts field has the counts of the phenotypes
-        all_counts = results.all_counts
-        assert isinstance(all_counts, typing.Mapping)
-
-        # We tested 66 HPO terms
-        assert len(all_counts) == 66
-
-        # The index of all_counts is a Tuple with (HPO TermId, BooleanPredicate
-        # Let's test Seizure - we should have one row for each Patient Predicate
-        counts = all_counts[hpotk.TermId.from_curie("HP:0001250")]
-
-        # The YES row is Seizure YES -- according to the above, we have 11 (MISSENSE NO) and 17 (MISSENSE YES)
-        assert counts.loc[PatientCategories.YES, PatientCategories.NO] == 11
-        assert counts.loc[PatientCategories.YES, PatientCategories.YES] == 17
-
-        # The NO row is Seizure NO -- according to the above, we have 0 (MISSENSE NO) and 7 (MISSENSE YES)
-        assert counts.loc[PatientCategories.NO, PatientCategories.NO] == 0
-        assert counts.loc[PatientCategories.NO, PatientCategories.YES] == 7
-
-        # In total, 35 patients were categorized
-        assert counts.sum().sum() == 35
-
-    def test_get_positive_count(
-            self,
-            suox_cohort: Cohort,
-            hpo: hpotk.MinimalOntology,
-    ):
-        """
-        This test shows how to get the counts for positive HPO terms
-        Let's use Seizure [HP:0001250]  Yes (Genotype NO 11/11); No (Genotype YES 17/24) as an example
-        that is
-        Seizure (+) MISSENSE (-) = 11
-        Seizure (-) MISSENSE (-) = 0
-        Seizure (+) MISSENSE (+) = 17
-        Seizure (-) MISSENSE (+) = 7
-        This means we expect 11+17=28
-        See the previous test for further information
-        """
-        pd.set_option('expand_frame_repr', False)
-        cohort_analysis = configure_cohort_analysis(suox_cohort, hpo)
-        is_missense = VariantPredicates.variant_effect(VariantEffect.MISSENSE_VARIANT, tx_id='NM_001032386.2')
-        results = cohort_analysis.compare_hpo_vs_genotype(is_missense)
-        all_counts = results.all_counts
-        # The index of all_counts is a Tuple with (HPO TermId, BooleanPredicate
-        # Let's test Arachnodactyly - we should have one row for each Patient Predicate
-        counts = all_counts[hpotk.TermId.from_curie("HP:0001250")]
-        # total_observed_HPO = HeuristicSamplerMtcFilter.get_number_of_positive_observations(arachnodactyly_counts)
-        total_observed_HPO = counts.loc[PatientCategories.YES, PatientCategories.NO] + counts.loc[PatientCategories.YES, PatientCategories.YES]
-        assert total_observed_HPO == 28
-
-    def test_compare_symptom_count_vs_genotype(
-        self,
-        suox_cohort: Cohort,
-        hpo: hpotk.MinimalOntology,
-    ):
-        cohort_analysis = configure_cohort_analysis(suox_cohort, hpo)
-
-        phenotype_group_terms = (
-            'HP:0012638',  # Abnormal nervous system physiology
-            'HP:0001939',  # Abnormality of metabolism/homeostasis
-        )
-        variant_predicate = VariantPredicates.variant_effect(VariantEffect.MISSENSE_VARIANT, 'NM_001032386.2')
-        gt_predicate = boolean_predicate(variant_predicate)
-
-        phenotype_group_results = cohort_analysis.compare_genotype_vs_phenotype_group_count(
-            gt_predicate=gt_predicate,
-            phenotype_group_terms=phenotype_group_terms,
-        )
-
-        assert phenotype_group_results.p_value == pytest.approx(0.9345982107594922)

From e9ff7d45689895719b821abcddb1eddd8f8a352c Mon Sep 17 00:00:00 2001
From: Daniel Danis <daniel.gordon.danis@protonmail.com>
Date: Wed, 4 Sep 2024 13:46:18 +0200
Subject: [PATCH 2/2] Update `README.md`.

---
 README.md | 43 ++++---------------------------------------
 1 file changed, 4 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 9ceab1808..2a498eb42 100644
--- a/README.md
+++ b/README.md
@@ -5,46 +5,11 @@
 
 GPSEA is a Python library for discovery of genotype-phenotype associations.
 
-An example of simple genotype-phenotype association analysis
+See the [Tutorial](https://monarch-initiative.github.io/gpsea/stable/tutorial.html) 
+and a comprehensive [User guide](https://monarch-initiative.github.io/gpsea/stable/user-guide/index.html)
+for more information.
 
-```python
-# Load HPO
-import hpotk
-
-store = hpotk.configure_ontology_store()
-hpo = store.load_minimal_hpo()
-
-# Load a cohort of phenopackets
-from gpsea.data import get_toy_cohort
-
-cohort = get_toy_cohort()
-
-# Analyze genotype-phenotype associations
-from gpsea.analysis import configure_cohort_analysis
-from gpsea.analysis.predicate import PatientCategories
-
-from gpsea.model import VariantEffect
-
-cohort_analysis = configure_cohort_analysis(cohort, hpo)
-frameshift = cohort_analysis.compare_by_variant_effect(VariantEffect.FRAMESHIFT_VARIANT, tx_id='NM_1234.5')
-
-frameshift.summarize(hpo, category=PatientCategories.YES)
-```
-
-provides a pandas data frame with genotype-phenotype correlations:
-
-```text
-FRAMESHIFT_VARIANT on NM_1234.5                                    No                Yes
-                                                                Count   Percent    Count   Percent    p value    Corrected p value
-    Arachnodactyly [HP:0001166]                                  1/10       10%    13/16       81%   0.000781             0.020299
-    Abnormality of the musculature [HP:0003011]                   6/6      100%    11/11      100%   1.000000             1.000000
-    Abnormal nervous system physiology [HP:0012638]               9/9      100%    15/15      100%   1.000000             1.000000
-    ...                                                           ...       ...      ...       ...        ...                  ...
-```
-
-## Documentation
-
-Check out the User guide and the API reference for more info:
+The documentation comes in two flavors:
 
 - [Stable documentation](https://monarch-initiative.github.io/gpsea/stable/) (last release on `main` branch)
 - [Latest documentation](https://monarch-initiative.github.io/gpsea/latest) (bleeding edge, latest commit on `develop` branch)