Skip to content

Commit

Permalink
Add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Aug 27, 2023
1 parent a59841d commit cdf4b45
Showing 1 changed file with 22 additions and 4 deletions.
26 changes: 22 additions & 4 deletions python/tests/test_beagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
return (sm, fwd_hap_probs, bwd_hap_probs)


def compute_interpolated_haplotype_matrix(
def interpolate_haplotype_probability_matrix(
fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
):
"""
Expand Down Expand Up @@ -365,47 +365,65 @@ def run_beagle(ref_h, query_h, pos):
"""
Run the BEAGLE 4.1 imputation algorithm.
`ref_h` and `query_h` span all genotyped and imputed markers.
:param numpy.ndarray ref_h: Reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray pos: Site positions.
:param numpy.ndarray pos: Site positions of all the markers.
:return: MAP alleles at imputed markers in the query haplotype.
:rtype: numpy.ndarray
"""
assert ref_h.shape[0] == len(pos)
assert query_h.shape[0] == len(pos)
# Index of genotyped markers in the query haplotype
genotyped_pos_idx = np.where(query_h != -1)[0]
# Index of imputed markers in the query haplotype
imputed_pos_idx = np.where(query_h == -1)[0]
assert len(genotyped_pos_idx) > 0
assert len(imputed_pos_idx) > 0
# Site positions of genotyped markers
genotyped_pos = pos[genotyped_pos_idx]
# Site positions of imputed markers
imputed_pos = pos[imputed_pos_idx]
m = len(genotyped_pos)
x = len(imputed_pos)
assert m + x == len(pos)
h = ref_h.shape[1]
# Subset the reference haplotypes to genotyped markers
ref_h_genotyped = ref_h[genotyped_pos_idx, :]
assert ref_h_genotyped.shape == (m, h)
# Subset the query haplotype to genotyped markers
query_h_genotyped = query_h[genotyped_pos_idx]
assert len(query_h_genotyped) == m
# Set mismatch probabilities at genotyped markers
mu = get_mismatch_prob(genotyped_pos)
assert len(mu) == m
# Set switch probabilities at genotyped markers
rho = get_switch_prob(genotyped_pos, h, ne=10) # Small ref. panel
assert len(rho) == m
# Compute forward probability matrix over genotyped markers
fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
assert fm.shape == (m, h)
# Compute backward probability matrix over genotyped markers
bm = compute_backward_probability_matrix(
ref_h_genotyped, query_h_genotyped, rho, mu
)
assert bm.shape == (m, h)
_, fwd_hap_probs, bwd_hap_probs = compute_state_probability_matrix(
# Compute HMM state probability matrix over genotyped markers
# and forward and backward haplotype probability matrices
sm, fwd_hap_probs, bwd_hap_probs = compute_state_probability_matrix(
fm, bm, ref_h_genotyped, query_h_genotyped, rho, mu
)
assert sm.shape == (m, h) # sm not used further
assert fwd_hap_probs.shape == (m, 2)
assert bwd_hap_probs.shape == (m, 2)
i_hap_probs = compute_interpolated_haplotype_matrix(
# Interpolate haplotype probabilities
# from genotype markers to imputed markers
i_hap_probs = interpolate_haplotype_probability_matrix(
fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
)
assert i_hap_probs.shape == (x, 2)
# Get MAP alleles at imputed markers
imputed_alleles = get_map_alleles(i_hap_probs)
assert len(imputed_alleles) == x
return imputed_alleles

0 comments on commit cdf4b45

Please sign in to comment.