Skip to content

Commit

Permalink
Dividing by zero should be NaN
Browse files Browse the repository at this point in the history
Allow C and Python portions of the code to divide by zero natively,
relying on their respective runtime implementations. Remove code that
forces the two-locus statistics to return zero when we encounter 0/0
situations. In testing, it was ensured that some test cases return NaN
values, which we diff appropriately. The mingw implementation seems to
want to cast my NAN values as floats, so we preemptively cast them as
floats in the tests.
  • Loading branch information
lkirk authored and mergify[bot] committed Feb 21, 2024
1 parent 7a0b863 commit 813e361
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 29 deletions.
11 changes: 6 additions & 5 deletions c/tests/test_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -2045,10 +2045,10 @@ test_paper_ex_two_site(void)
double truth_two_sets[18] = { 1, 1, 0.1111111111111111, 0.1111111111111111,
0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111,
1, 1, 1, 1, 0.1111111111111111, 0.1111111111111111, 1, 1, 1, 1 };
double truth_three_sets[27]
= { 1, 1, 0, 0.1111111111111111, 0.1111111111111111, 0, 0.1111111111111111,
0.1111111111111111, 0, 0.1111111111111111, 0.1111111111111111, 0, 1, 1, 1,
1, 1, 1, 0.1111111111111111, 0.1111111111111111, 0, 1, 1, 1, 1, 1, 1 };
double truth_three_sets[27] = { 1, 1, NAN, 0.1111111111111111, 0.1111111111111111,
NAN, 0.1111111111111111, 0.1111111111111111, NAN, 0.1111111111111111,
0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1, 0.1111111111111111,
0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1 };

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
Expand Down Expand Up @@ -2104,7 +2104,8 @@ test_paper_ex_two_site(void)
row_sites, num_sites, col_sites, 0, result);

CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_three_sets);
assert_arrays_almost_equal_nan(
result_size * num_sample_sets, result, truth_three_sets);

tsk_treeseq_free(&ts);
tsk_safe_free(row_sites);
Expand Down
16 changes: 16 additions & 0 deletions c/tests/testlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ void unsort_edges(tsk_edge_table_t *edges, size_t start);
} while (0); \
}

/* Array equality if the arrays contain NaN values
NB: the float cast for NaNs is for mingw, which complains without */
#define assert_arrays_almost_equal_nan(len, a, b) \
{ \
do { \
tsk_size_t _j; \
for (_j = 0; _j < len; _j++) { \
if (isnan((float) a[_j]) || isnan((float) b[_j])) { \
CU_ASSERT_EQUAL_FATAL(isnan((float) a[_j]), isnan((float) b[_j])); \
} else { \
CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9); \
} \
} \
} while (0); \
}

extern const char *single_tree_ex_nodes;
extern const char *single_tree_ex_edges;
extern const char *single_tree_ex_sites;
Expand Down
16 changes: 4 additions & 12 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -3597,11 +3597,7 @@ r2_summary_func(tsk_size_t state_dim, const double *state,
double D = p_AB - (p_A * p_B);
double denom = p_A * p_B * (1 - p_A) * (1 - p_B);

if (denom == 0 && D == 0) {
result[j] = 0;
} else {
result[j] = (D * D) / denom;
}
result[j] = (D * D) / denom;
}
return 0;
}
Expand Down Expand Up @@ -3637,8 +3633,8 @@ D_prime_summary_func(tsk_size_t state_dim, const double *state,
double p_B = p_AB + p_aB;

double D = p_AB - (p_A * p_B);
result[j] = 0;
if (D > 0) {

if (D >= 0) {
result[j] = D / TSK_MIN(p_A * (1 - p_B), (1 - p_A) * p_B);
} else if (D < 0) {
result[j] = D / TSK_MIN(p_A * p_B, (1 - p_A) * (1 - p_B));
Expand Down Expand Up @@ -3681,11 +3677,7 @@ r_summary_func(tsk_size_t state_dim, const double *state,
double D = p_AB - (p_A * p_B);
double denom = p_A * p_B * (1 - p_A) * (1 - p_B);

if (denom == 0 && D == 0) {
result[j] = 0;
} else {
result[j] = D / sqrt(denom);
}
result[j] = D / sqrt(denom);
}
return 0;
}
Expand Down
26 changes: 14 additions & 12 deletions python/tests/test_ld_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"""
Test cases for two-locus statistics
"""
import contextlib
import io
from itertools import combinations_with_replacement
from itertools import permutations
Expand All @@ -40,6 +41,12 @@
from tests.test_highlevel import get_example_tree_sequences


@contextlib.contextmanager
def suppress_division_by_zero_warning():
with np.errstate(invalid="ignore", divide="ignore"):
yield


class BitSet:
"""BitSet object, which stores values in arrays of unsigned integers.
The rows represent all possible values a bit can take, and the rows
Expand Down Expand Up @@ -729,9 +736,7 @@ def r2_summary_func(
D = p_AB - (p_A * p_B)
denom = p_A * p_B * (1 - p_A) * (1 - p_B)

if denom == 0 and D == 0:
result[k] = 0
else:
with suppress_division_by_zero_warning():
result[k] = (D * D) / denom


Expand Down Expand Up @@ -782,12 +787,11 @@ def D_prime_summary_func(
p_B = p_AB + p_aB

D = p_AB - (p_A * p_B)
if D == 0:
result[k] = 0
elif D > 0:
result[k] = D / min(p_A * (1 - p_B), (1 - p_A) * p_B)
else:
result[k] = D / min(p_A * p_B, (1 - p_A) * (1 - p_B))
with suppress_division_by_zero_warning():
if D >= 0:
result[k] = D / min(p_A * (1 - p_B), (1 - p_A) * p_B)
else:
result[k] = D / min(p_A * p_B, (1 - p_A) * (1 - p_B))


def r_summary_func(
Expand All @@ -806,9 +810,7 @@ def r_summary_func(
D = p_AB - (p_A * p_B)
denom = p_A * p_B * (1 - p_A) * (1 - p_B)

if denom == 0 and D == 0:
result[k] = 0
else:
with suppress_division_by_zero_warning():
result[k] = D / np.sqrt(denom)


Expand Down

0 comments on commit 813e361

Please sign in to comment.