diff --git a/src/gpsea/analysis/clf/_gt_classifiers.py b/src/gpsea/analysis/clf/_gt_classifiers.py index c89bc4895..991aa8151 100644 --- a/src/gpsea/analysis/clf/_gt_classifiers.py +++ b/src/gpsea/analysis/clf/_gt_classifiers.py @@ -249,7 +249,7 @@ def monoallelic_classifier( a_predicate: VariantPredicate, b_predicate: typing.Optional[VariantPredicate] = None, a_label: str = "A", - b_label: str = "B", + b_label: typing.Optional[str] = None, ) -> GenotypeClassifier: """ Monoallelic classifier bins patient into one of two groups, `A` and `B`, @@ -260,17 +260,20 @@ def monoallelic_classifier( :param a_predicate: predicate to test if the variants meet the criteria of the first group (named `A` by default). - :param b_predicate: predicate to test if the variants - meet the criteria of the second group or `None` - if the inverse of `a_predicate` should be used (named `B` by default). + :param b_predicate: predicate to test if the variants meet + the criteria of the second group or `None` if the complement + of the `a_predicate` should be used (named ``A^C`` by default). :param a_label: display name of the `a_predicate` (default ``"A"``). - :param b_label: display name of the `b_predicate` (default ``"B"``). + :param b_label: display name of the `b_predicate`. + If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``). """ assert isinstance(a_label, str) - assert isinstance(b_label, str) - - if b_predicate is None: - b_predicate = ~a_predicate + a_predicate, b_predicate, b_label = _validate_b_predicate( + a_predicate=a_predicate, + b_predicate=b_predicate, + a_label=a_label, + b_label=b_label, + ) return PolyCountingGenotypeClassifier.monoallelic( a_predicate=a_predicate, @@ -284,7 +287,7 @@ def biallelic_classifier( a_predicate: VariantPredicate, b_predicate: typing.Optional[VariantPredicate] = None, a_label: str = "A", - b_label: str = "B", + b_label: typing.Optional[str] = None, partitions: typing.Collection[typing.Union[int, typing.Collection[int]]] = ( 0, 1, @@ -302,22 +305,25 @@ def biallelic_classifier( :param a_predicate: predicate to test if the variants meet the criteria of the first group (named `A` by default). :param b_predicate: predicate to test if the variants meet - the criteria of the second group or `None` if an inverse - of `a_predicate` should be used (named `B` by default). + the criteria of the second group or `None` if the complement + of the `a_predicate` should be used (named ``A^C`` by default). :param a_label: display name of the `a_predicate` (default ``"A"``). - :param b_label: display name of the `b_predicate` (default ``"B"``). + :param b_label: display name of the `b_predicate`. + If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``). :param partitions: a sequence with partition identifiers (default ``(0, 1, 2)``). """ # Q/C assert isinstance(a_label, str) - assert isinstance(b_label, str) - + a_predicate, b_predicate, b_label = _validate_b_predicate( + a_predicate=a_predicate, + b_predicate=b_predicate, + a_label=a_label, + b_label=b_label, + ) + partitions = _fixate_partitions(partitions) _qc_partitions(partitions) - if b_predicate is None: - b_predicate = ~a_predicate - return PolyCountingGenotypeClassifier.biallelic( a_predicate=a_predicate, b_predicate=b_predicate, @@ -326,6 +332,28 @@ def biallelic_classifier( partitions=partitions, ) +def _validate_b_predicate( + a_predicate: VariantPredicate, + b_predicate: typing.Optional[VariantPredicate], + a_label: str, + b_label: typing.Optional[str], +) -> typing.Tuple[ + VariantPredicate, VariantPredicate, str, +]: + if b_predicate is None: + b_predicate = ~a_predicate + if b_label is None: + # Using a regular uppercase `C` instead of Unicode complement (`∁`) + # to reduce the 😕 factor. + b_label = f"{a_label}^C" # complement of A + else: + assert isinstance(b_label, str) + else: + if b_label is None: + b_label = f"{a_label}^C" # complement of A + + return a_predicate, b_predicate, b_label + def _build_ac_to_cat( partitions: typing.Collection[typing.Collection[int]], diff --git a/tests/analysis/clf/test_gt_predicates.py b/tests/analysis/clf/test_gt_predicates.py index bd00577e2..800ff0a3c 100644 --- a/tests/analysis/clf/test_gt_predicates.py +++ b/tests/analysis/clf/test_gt_predicates.py @@ -133,7 +133,7 @@ class TestAllelePredicates: @pytest.mark.parametrize( "individual_name,expected_name", [ - ("adam", "B"), # 0/0 & 0/1 + ("adam", "A^C"), # 0/0 & 0/1 ("eve", "A"), # 0/1 & 0/0 ("cain", "A"), # 0/1 & 0/0 ], @@ -162,15 +162,15 @@ def test_monoallelic_predicate__general_stuff( gt_predicate = monoallelic_classifier(is_missense, is_synonymous) - assert gt_predicate.summarize_classes() == "Allele group: A, B" + assert gt_predicate.summarize_classes() == "Allele group: A, A^C" @pytest.mark.parametrize( "individual_name,expected_name", [ - ("walt", "A/B"), # 0/1 & 0/1 - ("skyler", "A/B"), # 0/1 & 0/1 + ("walt", "A/A^C"), # 0/1 & 0/1 + ("skyler", "A/A^C"), # 0/1 & 0/1 ("flynn", "A/A"), # 1/1 & 0/0 - ("holly", "B/B"), # 0/0 & 1/1 + ("holly", "A^C/A^C"), # 0/0 & 1/1 ], ) def test_biallelic_predicate( @@ -197,7 +197,7 @@ def test_biallelic_predicate__general_stuff( gt_predicate = biallelic_classifier(is_missense, is_synonymous) - assert gt_predicate.summarize_classes() == "Allele group: A/A, A/B, B/B" + assert gt_predicate.summarize_classes() == "Allele group: A/A, A/A^C, A^C/A^C" class TestSexPredicate: