From c689d52c48c30a363e6923d5c32f940c15f01144 Mon Sep 17 00:00:00 2001
From: Shing Zhan <shing.zhan@gmail.com>
Date: Wed, 30 Aug 2023 21:19:32 +0100
Subject: [PATCH] WIP

---
 python/tests/test_beagle.py | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/python/tests/test_beagle.py b/python/tests/test_beagle.py
index 38cc78db98..04cd920e81 100644
--- a/python/tests/test_beagle.py
+++ b/python/tests/test_beagle.py
@@ -398,29 +398,49 @@ def compute_state_probability_matrix_equation1(fm, bm, ref_h, query_h, rho, mu):
     return sm
 
 
-def interpolate_allele_probabilities_equation1(sm, ref_h, genotyped_pos, imputed_pos):
+def interpolate_allele_probabilities_equation1(
+    sm, ref_h, query_h, genotyped_pos, imputed_pos
+):
     """
     Compute the interpolated allele probabilities following Equation 1 of BB2016.
 
+    This function takes the output of `compute_state_probability_matrix_equation1`.
+
     Assume all biallelic sites.
 
     :param numpy.ndarray sm: HMM state probability matrix.
     :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
     :param numpy.ndarray genotyped_pos: Site positions at genotyped markers.
     :param numpy.ndarray imputed_pos: Site positions at imputed markers.
     :return: Interpolated allele probabilities.
     :rtype: numpy.ndarray
     """
     h = ref_h.shape[1]
-    # m = len(genotyped_pos)
+    m = len(genotyped_pos)
     x = len(imputed_pos)
-    p = np.array((x, 2), dtype=np.float64)
+    assert sm.shape == (m, h)
+    assert ref_h.shape == (m + x, h)
+    assert len(query_h) == m + x
     weights = get_weights(genotyped_pos, imputed_pos)
-    # Compute probabilities of allele a at imputed markers
-    a = 0
-    for i in np.arange(x):
-        for j in np.arange(h):
-            p[i, a] = weights[i] * sm[i, j] + (1 - weights[i]) * sm[i + 1, j]
+    assert len(weights) == x
+    p = np.zeros((x, 2), dtype=np.float64)
+
+    def _compute_allele_probabilities(a):
+        """Helper function to compute probability of allele a at imputed markers."""
+        k = 0  # Keep track of imputed marker index
+        # l = 0  # Keep track of genotyped marker index
+        for i in np.arange(m + x):
+            if query_h[i] != -1:
+                continue
+            for j in np.arange(h):
+                if ref_h[i, j] == a:
+                    p[k, a] += weights[i] * sm[i, j]
+                    p[k, a] += (1 - weights[i]) * sm[i + 1, j]
+            k += 1
+
+    _compute_allele_probabilities(a=0)
+    _compute_allele_probabilities(a=1)
     return p