Skip to content

Commit

Permalink
Create two-site tests
Browse files Browse the repository at this point in the history
These tests exercise r2 on multiple sample sets (paper example tree)
and correlated/uncorrelated multiallelic sites for all statistics.
  • Loading branch information
lkirk committed Jul 18, 2023
1 parent f4ae923 commit be0a18d
Showing 1 changed file with 324 additions and 0 deletions.
324 changes: 324 additions & 0 deletions c/tests/test_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -1980,6 +1980,324 @@ test_ld_silent_mutations(void)
free(base_ts);
}

static void
test_paper_ex_two_site(void)
{
tsk_treeseq_t ts;
double *result;
tsk_size_t result_size;
int ret;

tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);

tsk_size_t sample_set_sizes[3];
tsk_size_t num_sample_sets;
tsk_id_t sample_sets[ts.num_samples * 3];

// First sample set contains all of the samples
sample_set_sizes[0] = ts.num_samples;
num_sample_sets = 1;
double truth_one_set[6] = { 1, 0.1111111111111111, 0.1111111111111111, 1, 1, 1 };

for (tsk_size_t i = 0; i < ts.num_samples; i++) {
sample_sets[i] = (tsk_id_t) i;
}

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);

CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 6);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_one_set);
tsk_safe_free(result);

// Second sample set contains all of the samples
sample_set_sizes[1] = ts.num_samples;
num_sample_sets = 2;
for (tsk_size_t i = ts.num_samples; i < ts.num_samples * 2; i++) {
sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples;
}

double truth_two_sets[12] = { 1, 1, 0.1111111111111111, 0.1111111111111111,
0.1111111111111111, 0.1111111111111111, 1, 1, 1, 1, 1, 1 };

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);

CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 6);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_two_sets);
tsk_safe_free(result);

// Third sample set contains the first two samples
sample_set_sizes[2] = 2;
num_sample_sets = 3;
for (tsk_size_t i = ts.num_samples * 2; i < (ts.num_samples * 3) - 2; i++) {
sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples * 2;
}

double truth_three_sets[18] = { 1, 1, 0, 0.1111111111111111, 0.1111111111111111, 0,
0.1111111111111111, 0.1111111111111111, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 };

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);

CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 6);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_three_sets);
tsk_safe_free(result);

tsk_treeseq_free(&ts);
}

static void
test_two_site_correlated_multiallelic(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n";
const char *edges = "0 20 9 0,1\n"
"0 20 10 2,9\n"
"0 20 11 4,5\n"
"0 20 12 6,11\n"
"0 20 13 7,8\n"
"0 20 14 3,10\n"
"0 10 15 12\n"
"10 20 15 13\n"
"0 10 15 14\n"
"10 20 15 14\n"
"10 20 16 12\n"
"0 10 16 13\n"
"0 10 16 15\n"
"10 20 16 15\n";
const char *sites = "7 A\n"
"13 G\n";
const char *mutations = "0 15 T -1\n"
"0 14 G 0\n"
"1 15 T -1\n"
"1 13 C 2\n";

tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 20, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);

tsk_size_t sample_set_sizes[1] = { ts.num_samples };
tsk_size_t num_sample_sets = 1;
tsk_id_t sample_sets[ts.num_samples];
for (tsk_size_t i = 0; i < ts.num_samples; i++) {
sample_sets[i] = (tsk_id_t) i;
}

int ret;
double *result;
tsk_size_t result_size;

double truth_D[3]
= { 0.043209876543209874, -0.018518518518518517, 0.05555555555555555 };
double truth_D2[3]
= { 0.023844603634269844, 0.02384460363426984, 0.02384460363426984 };
double truth_r2[3] = { 1, 1, 1 };
double truth_D_prime[3]
= { 0.7777777777777777, 0.4444444444444444, 0.6666666666666666 };
double truth_r[3]
= { 0.18377223398316206, -0.12212786219416509, 0.2609542781331212 };
double truth_Dz[3]
= { 0.0033870175616860566, 0.003387017561686057, 0.003387017561686057 };
double truth_pi2[3]
= { 0.04579247743399549, 0.04579247743399549, 0.0457924774339955 };

ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D);
tsk_safe_free(result);

ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D2);
tsk_safe_free(result);

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_safe_free(result);

ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0,
NULL, 0, NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D_prime);
tsk_safe_free(result);

ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_r);
tsk_safe_free(result);

ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_Dz);
tsk_safe_free(result);

ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,
0, NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_pi2);
tsk_safe_free(result);

tsk_treeseq_free(&ts);
}

static void
test_two_site_uncorrelated_multiallelic(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n";
const char *edges = "0 10 9 0,1\n"
"10 20 17 0,3\n"
"0 10 10 2,9\n"
"10 20 18 6,17\n"
"0 10 11 3,4\n"
"10 20 19 1,4\n"
"0 10 12 5,11\n"
"10 20 20 7,19\n"
"0 10 13 6,7\n"
"10 20 21 2,5\n"
"0 10 14 8,13\n"
"10 20 22 8,21\n"
"0 10 15 10,12\n"
"10 20 23 18,20\n"
"0 10 16 14,15\n"
"10 20 24 22,23\n";
const char *sites = "7 A\n"
"13 G\n";
const char *mutations = "0 15 T -1\n"
"0 12 G 0\n"
"1 23 T -1\n"
"1 20 A 2\n";

tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 20, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);

tsk_size_t sample_set_sizes[1] = { ts.num_samples };
tsk_size_t num_sample_sets = 1;
tsk_id_t sample_sets[ts.num_samples];
for (tsk_size_t i = 0; i < ts.num_samples; i++) {
sample_sets[i] = (tsk_id_t) i;
}

int ret;
double *result;
tsk_size_t result_size;

double truth_D[3] = { 0.05555555555555555, 0.0, 0.05555555555555555 };
double truth_D2[3] = { 0.024691358024691357, 0.0, 0.024691358024691357 };
double truth_r2[3] = { 1, 0, 1 };
double truth_D_prime[3] = { 0.6666666666666665, 0.0, 0.6666666666666665 };
double truth_r[3] = { 0.24999999999999997, 0.0, 0.24999999999999997 };
double truth_Dz[3] = { 0.0, 0.0, 0.0 };
double truth_pi2[3]
= { 0.04938271604938272, 0.04938271604938272, 0.04938271604938272 };

ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D);
tsk_safe_free(result);

ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D2);
tsk_safe_free(result);

ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_safe_free(result);

ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0,
NULL, 0, NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_D_prime);
tsk_safe_free(result);

ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_r);
tsk_safe_free(result);

ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL, 0,
NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_Dz);
tsk_safe_free(result);

ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,
0, NULL, 0, &result_size, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(result_size, 3);
assert_arrays_almost_equal(result_size, result, truth_pi2);
tsk_safe_free(result);

tsk_treeseq_free(&ts);
}

static void
test_simplest_divergence_matrix(void)
{
Expand Down Expand Up @@ -2214,6 +2532,12 @@ main(int argc, char **argv)
{ "test_ld_multi_mutations", test_ld_multi_mutations },
{ "test_ld_silent_mutations", test_ld_silent_mutations },

{ "test_paper_ex_two_site", test_paper_ex_two_site },
{ "test_two_site_correlated_multiallelic",
test_two_site_correlated_multiallelic },
{ "test_two_site_uncorrelated_multiallelic",
test_two_site_uncorrelated_multiallelic },

{ "test_simplest_divergence_matrix", test_simplest_divergence_matrix },
{ "test_simplest_divergence_matrix_windows",
test_simplest_divergence_matrix_windows },
Expand Down

0 comments on commit be0a18d

Please sign in to comment.