Skip to content

Commit

Permalink
Use the complement notation in monoallelic and biallelic classifiers.
Browse files Browse the repository at this point in the history
  • Loading branch information
ielis committed Jan 21, 2025
1 parent 3fc7788 commit cfb9bc8
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 24 deletions.
64 changes: 46 additions & 18 deletions src/gpsea/analysis/clf/_gt_classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def monoallelic_classifier(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate] = None,
a_label: str = "A",
b_label: str = "B",
b_label: typing.Optional[str] = None,
) -> GenotypeClassifier:
"""
Monoallelic classifier bins patient into one of two groups, `A` and `B`,
Expand All @@ -260,17 +260,20 @@ def monoallelic_classifier(
:param a_predicate: predicate to test if the variants
meet the criteria of the first group (named `A` by default).
:param b_predicate: predicate to test if the variants
meet the criteria of the second group or `None`
if the inverse of `a_predicate` should be used (named `B` by default).
:param b_predicate: predicate to test if the variants meet
the criteria of the second group or `None` if the complement
of the `a_predicate` should be used (named ``A^C`` by default).
:param a_label: display name of the `a_predicate` (default ``"A"``).
:param b_label: display name of the `b_predicate` (default ``"B"``).
:param b_label: display name of the `b_predicate`.
If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``).
"""
assert isinstance(a_label, str)
assert isinstance(b_label, str)

if b_predicate is None:
b_predicate = ~a_predicate
a_predicate, b_predicate, b_label = _validate_b_predicate(
a_predicate=a_predicate,
b_predicate=b_predicate,
a_label=a_label,
b_label=b_label,
)

return PolyCountingGenotypeClassifier.monoallelic(
a_predicate=a_predicate,
Expand All @@ -284,7 +287,7 @@ def biallelic_classifier(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate] = None,
a_label: str = "A",
b_label: str = "B",
b_label: typing.Optional[str] = None,
partitions: typing.Collection[typing.Union[int, typing.Collection[int]]] = (
0,
1,
Expand All @@ -302,22 +305,25 @@ def biallelic_classifier(
:param a_predicate: predicate to test if the variants meet
the criteria of the first group (named `A` by default).
:param b_predicate: predicate to test if the variants meet
the criteria of the second group or `None` if an inverse
of `a_predicate` should be used (named `B` by default).
the criteria of the second group or `None` if the complement
of the `a_predicate` should be used (named ``A^C`` by default).
:param a_label: display name of the `a_predicate` (default ``"A"``).
:param b_label: display name of the `b_predicate` (default ``"B"``).
:param b_label: display name of the `b_predicate`.
If `b_label` is not provided, then set to ``"{a_label}^C"`` (e.g. ``A^C`` if ``a_label=A``).
:param partitions: a sequence with partition identifiers (default ``(0, 1, 2)``).
"""
# Q/C
assert isinstance(a_label, str)
assert isinstance(b_label, str)

a_predicate, b_predicate, b_label = _validate_b_predicate(
a_predicate=a_predicate,
b_predicate=b_predicate,
a_label=a_label,
b_label=b_label,
)

partitions = _fixate_partitions(partitions)
_qc_partitions(partitions)

if b_predicate is None:
b_predicate = ~a_predicate

return PolyCountingGenotypeClassifier.biallelic(
a_predicate=a_predicate,
b_predicate=b_predicate,
Expand All @@ -326,6 +332,28 @@ def biallelic_classifier(
partitions=partitions,
)

def _validate_b_predicate(
a_predicate: VariantPredicate,
b_predicate: typing.Optional[VariantPredicate],
a_label: str,
b_label: typing.Optional[str],
) -> typing.Tuple[
VariantPredicate, VariantPredicate, str,
]:
if b_predicate is None:
b_predicate = ~a_predicate
if b_label is None:
# Using a regular uppercase `C` instead of Unicode complement (`∁`)
# to reduce the 😕 factor.
b_label = f"{a_label}^C" # complement of A
else:
assert isinstance(b_label, str)
else:
if b_label is None:
b_label = f"{a_label}^C" # complement of A

return a_predicate, b_predicate, b_label


def _build_ac_to_cat(
partitions: typing.Collection[typing.Collection[int]],
Expand Down
12 changes: 6 additions & 6 deletions tests/analysis/clf/test_gt_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class TestAllelePredicates:
@pytest.mark.parametrize(
"individual_name,expected_name",
[
("adam", "B"), # 0/0 & 0/1
("adam", "A^C"), # 0/0 & 0/1
("eve", "A"), # 0/1 & 0/0
("cain", "A"), # 0/1 & 0/0
],
Expand Down Expand Up @@ -162,15 +162,15 @@ def test_monoallelic_predicate__general_stuff(

gt_predicate = monoallelic_classifier(is_missense, is_synonymous)

assert gt_predicate.summarize_classes() == "Allele group: A, B"
assert gt_predicate.summarize_classes() == "Allele group: A, A^C"

@pytest.mark.parametrize(
"individual_name,expected_name",
[
("walt", "A/B"), # 0/1 & 0/1
("skyler", "A/B"), # 0/1 & 0/1
("walt", "A/A^C"), # 0/1 & 0/1
("skyler", "A/A^C"), # 0/1 & 0/1
("flynn", "A/A"), # 1/1 & 0/0
("holly", "B/B"), # 0/0 & 1/1
("holly", "A^C/A^C"), # 0/0 & 1/1
],
)
def test_biallelic_predicate(
Expand All @@ -197,7 +197,7 @@ def test_biallelic_predicate__general_stuff(

gt_predicate = biallelic_classifier(is_missense, is_synonymous)

assert gt_predicate.summarize_classes() == "Allele group: A/A, A/B, B/B"
assert gt_predicate.summarize_classes() == "Allele group: A/A, A/A^C, A^C/A^C"


class TestSexPredicate:
Expand Down

0 comments on commit cfb9bc8

Please sign in to comment.