From c689d52c48c30a363e6923d5c32f940c15f01144 Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Wed, 30 Aug 2023 21:19:32 +0100 Subject: [PATCH] WIP --- python/tests/test_beagle.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/python/tests/test_beagle.py b/python/tests/test_beagle.py index 38cc78db98..04cd920e81 100644 --- a/python/tests/test_beagle.py +++ b/python/tests/test_beagle.py @@ -398,29 +398,49 @@ def compute_state_probability_matrix_equation1(fm, bm, ref_h, query_h, rho, mu): return sm -def interpolate_allele_probabilities_equation1(sm, ref_h, genotyped_pos, imputed_pos): +def interpolate_allele_probabilities_equation1( + sm, ref_h, query_h, genotyped_pos, imputed_pos +): """ Compute the interpolated allele probabilities following Equation 1 of BB2016. + This function takes the output of `compute_state_probability_matrix_equation1`. + Assume all biallelic sites. :param numpy.ndarray sm: HMM state probability matrix. :param numpy.ndarray ref_h: Reference haplotypes. + :param numpy.ndarray query_h: One query haplotype. :param numpy.ndarray genotyped_pos: Site positions at genotyped markers. :param numpy.ndarray imputed_pos: Site positions at imputed markers. :return: Interpolated allele probabilities. :rtype: numpy.ndarray """ h = ref_h.shape[1] - # m = len(genotyped_pos) + m = len(genotyped_pos) x = len(imputed_pos) - p = np.array((x, 2), dtype=np.float64) + assert sm.shape == (m, h) + assert ref_h.shape == (m + x, h) + assert len(query_h) == m + x weights = get_weights(genotyped_pos, imputed_pos) - # Compute probabilities of allele a at imputed markers - a = 0 - for i in np.arange(x): - for j in np.arange(h): - p[i, a] = weights[i] * sm[i, j] + (1 - weights[i]) * sm[i + 1, j] + assert len(weights) == x + p = np.zeros((x, 2), dtype=np.float64) + + def _compute_allele_probabilities(a): + """Helper function to compute probability of allele a at imputed markers.""" + k = 0 # Keep track of imputed marker index + # l = 0 # Keep track of genotyped marker index + for i in np.arange(m + x): + if query_h[i] != -1: + continue + for j in np.arange(h): + if ref_h[i, j] == a: + p[k, a] += weights[i] * sm[i, j] + p[k, a] += (1 - weights[i]) * sm[i + 1, j] + k += 1 + + _compute_allele_probabilities(a=0) + _compute_allele_probabilities(a=1) return p