From 8bffca18e65d6a4d180d3808196bce885ccc115f Mon Sep 17 00:00:00 2001 From: Erick Matsen Date: Tue, 18 Jun 2024 04:37:49 -0700 Subject: [PATCH] new file: tests/test_molevol.py --- tests/test_molevol.py | 133 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/test_molevol.py diff --git a/tests/test_molevol.py b/tests/test_molevol.py new file mode 100644 index 00000000..40aba5fd --- /dev/null +++ b/tests/test_molevol.py @@ -0,0 +1,133 @@ +import torch +import netam.molevol as molevol +from netam import framework + +from netam.sequences import ( + nt_idx_tensor_of_str, + translate_sequence, + AA_STR_SORTED, + CODONS, + NT_STR_SORTED, +) + +# These happen to be the same as some examples in test_models.py but that's fine. +# If it was important that they were shared, we should put them in a conftest.py. +ex_mut_probs = torch.tensor([0.01, 0.02, 0.03]) +ex_sub_probs = torch.tensor( + [[0.0, 0.3, 0.5, 0.2], [0.4, 0.0, 0.1, 0.5], [0.2, 0.3, 0.0, 0.5]] +) +ex_parent_codon_idxs = nt_idx_tensor_of_str("ACG") +parent_nt_seq = "CAGGTGCAGCTGGTGGAG" # QVQLVE +weights_path = "data/shmple_weights/my_shmoof" + + +def test_build_mutation_matrix(): + correct_tensor = torch.tensor( + [ + # probability of mutation to each nucleotide (first entry in the first row + # is probability of no mutation) + [0.99, 0.003, 0.005, 0.002], + [0.008, 0.98, 0.002, 0.01], + [0.006, 0.009, 0.97, 0.015], + ] + ) + + computed_tensor = molevol.build_mutation_matrices( + ex_parent_codon_idxs.unsqueeze(0), + ex_mut_probs.unsqueeze(0), + ex_sub_probs.unsqueeze(0), + ).squeeze() + + assert torch.allclose(correct_tensor, computed_tensor) + + +def test_neutral_aa_mut_probs(): + # This is the probability of a mutation to a codon that translates to the + # same. In this case, ACG is the codon, and it's fourfold degenerate. Thus + # we just multiply the probability of A and C staying the same from the + # correct_tensor just above. + correct_tensor = torch.tensor([1 - 0.99 * 0.98]) + + computed_tensor = molevol.neutral_aa_mut_probs( + ex_parent_codon_idxs.unsqueeze(0), + ex_mut_probs.unsqueeze(0), + ex_sub_probs.unsqueeze(0), + ).squeeze() + + assert torch.allclose(correct_tensor, computed_tensor) + + +def test_normalize_sub_probs(): + parent_idxs = nt_idx_tensor_of_str("AC") + sub_probs = torch.tensor([[0.2, 0.3, 0.4, 0.1], [0.1, 0.2, 0.3, 0.4]]) + + expected_normalized = torch.tensor( + [[0.0, 0.375, 0.5, 0.125], [0.125, 0.0, 0.375, 0.5]] + ) + normalized_sub_probs = molevol.normalize_sub_probs(parent_idxs, sub_probs) + + assert normalized_sub_probs.shape == (2, 4), "Result has incorrect shape" + assert torch.allclose( + normalized_sub_probs, expected_normalized + ), "Unexpected normalized values" + + +def iterative_aaprob_of_mut_and_sub(parent_codon, mut_probs, sub_probs): + """ + Original version of codon_to_aa_probabilities, used for testing. + """ + aa_probs = {} + for aa in AA_STR_SORTED: + aa_probs[aa] = 0.0 + + # iterate through all possible child codons + for child_codon in CODONS: + try: + aa = translate_sequence(child_codon) + except ValueError: # check for STOP codon + continue + + # iterate through codon sites and compute total probability of potential child codon + child_prob = 1.0 + for isite in range(3): + if parent_codon[isite] == child_codon[isite]: + child_prob *= 1.0 - mut_probs[isite] + else: + child_prob *= mut_probs[isite] + child_prob *= sub_probs[isite][NT_STR_SORTED.index(child_codon[isite])] + + aa_probs[aa] += child_prob + + # need renormalization factor so that amino acid probabilities sum to 1, + # since probabilities to STOP codon are dropped + psum = sum(aa_probs.values()) + + return torch.tensor([aa_probs[aa] / psum for aa in AA_STR_SORTED]) + + +def test_aaprob_of_mut_and_sub(): + crepe_path = "data/cnn_joi_sml-shmoof_small" + crepe = framework.load_crepe(crepe_path) + [rates], [subs] = crepe([parent_nt_seq]) + mut_probs = 1.0 - torch.exp(-torch.tensor(rates.squeeze())) + parent_codon = parent_nt_seq[0:3] + parent_codon_idxs = nt_idx_tensor_of_str(parent_codon) + codon_mut_probs = mut_probs[0:3] + codon_subs = torch.tensor(subs[0:3]) + + iterative_result = iterative_aaprob_of_mut_and_sub( + parent_codon, codon_mut_probs, codon_subs + ) + + parent_codon_idxs = parent_codon_idxs.unsqueeze(0) + codon_mut_probs = codon_mut_probs.unsqueeze(0) + codon_subs = codon_subs.unsqueeze(0) + + assert torch.allclose( + iterative_result, + molevol.aaprob_of_mut_and_sub( + parent_codon_idxs, + codon_mut_probs, + codon_subs, + ).squeeze(), + )