From 365de8897f8af6d255c6704f7f5141e3a2401805 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 11:02:41 -0700 Subject: [PATCH 01/88] add prototypes --- c/tskit/tables.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 38f3096c9d..6d265cd86e 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -4784,6 +4784,24 @@ int tsk_diff_iter_next(tsk_diff_iter_t *self, double *left, double *right, tsk_edge_list_t *edges_out, tsk_edge_list_t *edges_in); void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out); +typedef struct { + /* don't leak private types into public API */ + struct __tsk_modular_simplifier_impl_t * pimpl; +} tsk_modular_simplifier_t; + +int tsk_modular_simplifier_init(tsk_modular_simplifier_t * self, + tsk_table_collection_t *tables, const tsk_id_t *samples, + tsk_size_t num_samples, tsk_flags_t options); +int tsk_modular_simplifier_free(tsk_modular_simplifier_t * self); +// metadata... +int tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t * self, + double left, double right, tsk_id_t parent, tsk_id_t child); +int tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t * self, tsk_id_t parent); + +// runs the simplifier, thus processing ancient edges +// present in the input edge table. +int tsk_modular_simplifier_finalise(tsk_modular_simplifier_t * self, tsk_id_t *node_map); + #ifdef __cplusplus } #endif From 4ba5513984f2b8ef741e847643215272ca89b35d Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 11:03:42 -0700 Subject: [PATCH 02/88] clang-format --- c/tskit/tables.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 6d265cd86e..24d06c5e64 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -4786,21 +4786,22 @@ void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out); typedef struct { /* don't leak private types into public API */ - struct __tsk_modular_simplifier_impl_t * pimpl; + struct __tsk_modular_simplifier_impl_t *pimpl; } tsk_modular_simplifier_t; -int tsk_modular_simplifier_init(tsk_modular_simplifier_t * self, - tsk_table_collection_t *tables, const tsk_id_t *samples, - tsk_size_t num_samples, tsk_flags_t options); -int tsk_modular_simplifier_free(tsk_modular_simplifier_t * self); +int tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, + tsk_table_collection_t *tables, const tsk_id_t *samples, tsk_size_t num_samples, + tsk_flags_t options); +int tsk_modular_simplifier_free(tsk_modular_simplifier_t *self); // metadata... -int tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t * self, - double left, double right, tsk_id_t parent, tsk_id_t child); -int tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t * self, tsk_id_t parent); +int tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, + double right, tsk_id_t parent, tsk_id_t child); +int tsk_modular_simplifier_merge_ancestors( + tsk_modular_simplifier_t *self, tsk_id_t parent); // runs the simplifier, thus processing ancient edges // present in the input edge table. -int tsk_modular_simplifier_finalise(tsk_modular_simplifier_t * self, tsk_id_t *node_map); +int tsk_modular_simplifier_finalise(tsk_modular_simplifier_t *self, tsk_id_t *node_map); #ifdef __cplusplus } From 26262926a0291b8a3347c2c9987fe573e1f8fb37 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 11:06:49 -0700 Subject: [PATCH 03/88] add implementations --- c/tskit/tables.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 8eea85f5ad..a5ed65c012 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13607,3 +13607,54 @@ tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right, self->tree_left = right; return ret; } + +typedef struct __tsk_modular_simplifier_impl_t { + simplifier_t simplifier; +} tsk_modular_simplifier_impl_t; + +int +tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, + tsk_table_collection_t *tables, const tsk_id_t *samples, tsk_size_t num_samples, + tsk_flags_t options) +{ + int ret = 0; + self->pimpl = tsk_malloc(sizeof(tsk_modular_simplifier_impl_t)); + if (self->pimpl == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + ret = simplifier_init( + &self->pimpl->simplifier, samples, num_samples, tables, options); + if (ret != 0) { + goto out; + } + +out: + return ret; +} + +// FIXME: parent not used +int +tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, + double right, tsk_id_t parent, tsk_id_t child) +{ + int ret = simplifier_extract_ancestry(&self->pimpl->simplifier, left, right, child); + return ret; +} + +int +tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t *self, tsk_id_t parent) +{ + int ret = simplifier_merge_ancestors(&self->pimpl->simplifier, parent); + self->pimpl->simplifier.segment_queue_size = 0; + return ret; +} + +int +tsk_modular_simplifier_finalise(tsk_modular_simplifier_t *self, tsk_id_t *node_map) +{ + int ret = 0; + simplifier_t *simplifier = &self->pimpl->simplifier; + ret = simplifier_run(simplifier, node_map); + return ret; +} From 12a12457da691866acedbbed3bda84cb62a84273 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 11:15:22 -0700 Subject: [PATCH 04/88] alloc array for tracking parent contiguity --- c/tskit/tables.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index a5ed65c012..c292322dcd 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13610,6 +13610,8 @@ tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right, typedef struct __tsk_modular_simplifier_impl_t { simplifier_t simplifier; + tsk_id_t last_parent_processed; + tsk_id_t *input_nodes; } tsk_modular_simplifier_impl_t; int @@ -13629,6 +13631,12 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, goto out; } + self->pimpl->last_parent_processed = -1; + self->pimpl->input_nodes = tsk_malloc(tables->nodes.num_rows * sizeof(tsk_id_t)); + if (self->pimpl->input_nodes == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } out: return ret; } @@ -13658,3 +13666,17 @@ tsk_modular_simplifier_finalise(tsk_modular_simplifier_t *self, tsk_id_t *node_m ret = simplifier_run(simplifier, node_map); return ret; } + +int +tsk_modular_simplifier_free(tsk_modular_simplifier_t *self) +{ + int ret = 0; + ret = simplifier_free(&self->pimpl->simplifier); + if (ret != 0) { + goto out; + } + tsk_safe_free(self->pimpl->input_nodes); + tsk_safe_free(self->pimpl); +out: + return ret; +} From 0656a784403b3d38c6a8575c210f13991723b09b Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 11:35:04 -0700 Subject: [PATCH 05/88] check for contiguous parent processing --- c/tskit/tables.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index c292322dcd..b3054af260 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13641,12 +13641,21 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, return ret; } -// FIXME: parent not used int tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, double right, tsk_id_t parent, tsk_id_t child) { - int ret = simplifier_extract_ancestry(&self->pimpl->simplifier, left, right, child); + int ret = 0; + + if (parent != self->pimpl->last_parent_processed + && self->pimpl->last_parent_processed != TSK_NULL) { + if (self->pimpl->input_nodes[parent] != TSK_NULL) { + ret = TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS; + goto out; + } + } + ret = simplifier_extract_ancestry(&self->pimpl->simplifier, left, right, child); +out: return ret; } @@ -13654,7 +13663,13 @@ int tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t *self, tsk_id_t parent) { int ret = simplifier_merge_ancestors(&self->pimpl->simplifier, parent); + if (ret != 0) { + goto out; + } + /* mark this input parent as "seen" */ + self->pimpl->input_nodes[parent] = parent; self->pimpl->simplifier.segment_queue_size = 0; +out: return ret; } From c3d791bf0531a658ac2e6fe13eea25cac7c9234c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 12:13:05 -0700 Subject: [PATCH 06/88] test skeleton --- c/tests/test_tables.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 6de6675ff6..dfcd488cc5 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8629,6 +8629,14 @@ test_simplify_metadata(void) tsk_table_collection_free(&tables); } +static void +test_modular_simplifier(void) +{ + int ret; + tsk_table_collection_t tables; + CU_ASSERT_EQUAL_FATAL(1, 0); +} + static void test_edge_update_invalidates_index(void) { From 293a5f5dc7f418d67953baafd1c87bd4156df8f2 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 12:52:51 -0700 Subject: [PATCH 07/88] building up tests --- c/tests/test_tables.c | 48 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index dfcd488cc5..7e72a60bde 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -24,6 +24,7 @@ #include "testlib.h" #include "tskit/core.h" +#include #include #include @@ -8630,10 +8631,53 @@ test_simplify_metadata(void) } static void -test_modular_simplifier(void) +test_table_collection_modular_simplify_simple_tree(void) { int ret; tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_id_t new_parent1, new_parent2, new_child1, new_child2; + tsk_modular_simplifier_t simplifier; + tsk_id_t samples[2]; + ret = tsk_table_collection_init(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_init(&new_edges, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + parse_nodes(single_tree_ex_nodes, &tables.nodes); + parse_edges(single_tree_ex_edges, &tables.edges); + + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + + // "record new births" into node table + new_parent1 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE(new_parent1 >= 0); + new_parent2 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE(new_parent2 >= 0); + new_child1 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE(new_child1 >= 0); + new_child2 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE(new_child2 >= 0); + + // record edges + ret = tsk_edge_table_add_row( + &new_edges, 0, tables.sequence_length, 0, new_parent1, NULL, 0); + CU_ASSERT_TRUE(ret >= 0); + ret = tsk_edge_table_add_row( + &new_edges, 0, tables.sequence_length, 2, new_parent2, NULL, 0); + CU_ASSERT_TRUE(ret >= 0); + ret = tsk_edge_table_add_row( + &new_edges, 0, tables.sequence_length, new_parent1, new_child1, NULL, 0); + CU_ASSERT_TRUE(ret >= 0); + ret = tsk_edge_table_add_row( + &new_edges, 0, tables.sequence_length, new_parent2, new_child2, NULL, 0); + CU_ASSERT_TRUE(ret >= 0); + + samples[0] = new_child1; + samples[1] = new_child2; + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); + CU_ASSERT_TRUE(ret >= 0); CU_ASSERT_EQUAL_FATAL(1, 0); } @@ -11600,6 +11644,8 @@ main(int argc, char **argv) { "test_table_collection_equals_options", test_table_collection_equals_options }, { "test_table_collection_simplify_errors", test_table_collection_simplify_errors }, + { "test_table_collection_modular_simplify_simple_tree", + test_table_collection_modular_simplify_simple_tree }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From e61c48ccae83ec39065ca30ad8565057219744e9 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 13:23:42 -0700 Subject: [PATCH 08/88] whew --- c/tests/test_tables.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 7e72a60bde..f1cc6ae1de 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8638,18 +8638,17 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_edge_table_t new_edges; tsk_id_t new_parent1, new_parent2, new_child1, new_child2; tsk_modular_simplifier_t simplifier; - tsk_id_t samples[2]; + tsk_id_t *samples; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_edge_table_init(&new_edges, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); + samples = tsk_malloc(2 * sizeof(tsk_id_t)); + CU_ASSERT_TRUE(samples != NULL) parse_nodes(single_tree_ex_nodes, &tables.nodes); parse_edges(single_tree_ex_edges, &tables.edges); - tsk_table_collection_free(&tables); - tsk_edge_table_free(&new_edges); - // "record new births" into node table new_parent1 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE(new_parent1 >= 0); @@ -8673,12 +8672,14 @@ test_table_collection_modular_simplify_simple_tree(void) ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, new_parent2, new_child2, NULL, 0); CU_ASSERT_TRUE(ret >= 0); - samples[0] = new_child1; samples[1] = new_child2; ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); - CU_ASSERT_TRUE(ret >= 0); + CU_ASSERT_TRUE(ret > 0); CU_ASSERT_EQUAL_FATAL(1, 0); + + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); } static void From 03673a9971e6fb04840857496d44a142f369aa4c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 13:24:15 -0700 Subject: [PATCH 09/88] free --- c/tests/test_tables.c | 1 + 1 file changed, 1 insertion(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index f1cc6ae1de..4a7fecede3 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8680,6 +8680,7 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); } static void From 5ff044236852ef08885c841efc000d29c4846f66 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 13:30:46 -0700 Subject: [PATCH 10/88] working on test --- c/tests/test_tables.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 4a7fecede3..a96c251428 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8639,6 +8639,7 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_id_t new_parent1, new_parent2, new_child1, new_child2; tsk_modular_simplifier_t simplifier; tsk_id_t *samples; + tsk_size_t row; ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_edge_table_init(&new_edges, 0); @@ -8676,11 +8677,25 @@ test_table_collection_modular_simplify_simple_tree(void) samples[1] = new_child2; ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE(ret > 0); - CU_ASSERT_EQUAL_FATAL(1, 0); + CU_ASSERT_EQUAL(new_edges.num_rows, 4); + + for (row = 0; row < new_edges.num_rows; ++row) { + CU_ASSERT(new_edges.parent[new_edges.num_rows - row - 1] >= 0); + CU_ASSERT(new_edges.parent[new_edges.num_rows - row - 1] + < (tsk_id_t) tables.nodes.num_rows); + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_edges.left[new_edges.num_rows - row - 1], + new_edges.right[new_edges.num_rows - row - 1], + new_edges.parent[new_edges.num_rows - row - 1], + new_edges.child[new_edges.num_rows - row - 1]); + CU_ASSERT_TRUE(ret == 0); + } tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); + tsk_safe_free(samples); + CU_ASSERT_EQUAL_FATAL(1, 0); } static void From b8b7253e812e04b1e9b5a1d56ba5a06555944c93 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 13:47:50 -0700 Subject: [PATCH 11/88] that was kinda brutal --- c/tests/test_tables.c | 46 +++++++++++++++++++++++++++++-------------- c/tskit/tables.c | 13 +++++++----- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index a96c251428..5c5cf94ff2 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8639,8 +8639,10 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_id_t new_parent1, new_parent2, new_child1, new_child2; tsk_modular_simplifier_t simplifier; tsk_id_t *samples; - tsk_size_t row; + tsk_id_t last_parent; + tsk_size_t row, num_input_nodes; ret = tsk_table_collection_init(&tables, 0); + tables.sequence_length = 1.0; CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_edge_table_init(&new_edges, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -8652,50 +8654,64 @@ test_table_collection_modular_simplify_simple_tree(void) // "record new births" into node table new_parent1 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE(new_parent1 >= 0); + CU_ASSERT_TRUE_FATAL(new_parent1 >= 0); new_parent2 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE(new_parent2 >= 0); + CU_ASSERT_TRUE_FATAL(new_parent2 >= 0); new_child1 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE(new_child1 >= 0); + CU_ASSERT_TRUE_FATAL(new_child1 >= 0); new_child2 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE(new_child2 >= 0); + CU_ASSERT_TRUE_FATAL(new_child2 >= 0); // record edges ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, 0, new_parent1, NULL, 0); - CU_ASSERT_TRUE(ret >= 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, 2, new_parent2, NULL, 0); - CU_ASSERT_TRUE(ret >= 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, new_parent1, new_child1, NULL, 0); - CU_ASSERT_TRUE(ret >= 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, new_parent2, new_child2, NULL, 0); - CU_ASSERT_TRUE(ret >= 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); samples[0] = new_child1; samples[1] = new_child2; + num_input_nodes = tables.nodes.num_rows; ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); - CU_ASSERT_TRUE(ret > 0); + CU_ASSERT_TRUE_FATAL(ret == 0); CU_ASSERT_EQUAL(new_edges.num_rows, 4); + // FIXME: using last_parent means we have knowledge about the + // internal details of how simplification works, which is a no-no. + last_parent = TSK_NULL; for (row = 0; row < new_edges.num_rows; ++row) { - CU_ASSERT(new_edges.parent[new_edges.num_rows - row - 1] >= 0); - CU_ASSERT(new_edges.parent[new_edges.num_rows - row - 1] - < (tsk_id_t) tables.nodes.num_rows); + if (last_parent == TSK_NULL) { + last_parent = new_edges.parent[new_edges.num_rows - row - 1]; + } + if (new_edges.parent[new_edges.num_rows - row - 1] != last_parent) { + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + CU_ASSERT_EQUAL_FATAL(ret, 0); + new_edges.parent[new_edges.num_rows - row - 1] = last_parent; + } + CU_ASSERT_FATAL(new_edges.parent[new_edges.num_rows - row - 1] >= 0); + CU_ASSERT_FATAL( + new_edges.parent[new_edges.num_rows - row - 1] < (tsk_id_t) num_input_nodes); ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[new_edges.num_rows - row - 1], new_edges.right[new_edges.num_rows - row - 1], new_edges.parent[new_edges.num_rows - row - 1], new_edges.child[new_edges.num_rows - row - 1]); - CU_ASSERT_TRUE(ret == 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); } + ret = tsk_modular_simplifier_finalise(&simplifier, NULL); + CU_ASSERT_TRUE_FATAL(ret >= 0); + tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); - CU_ASSERT_EQUAL_FATAL(1, 0); } static void diff --git a/c/tskit/tables.c b/c/tskit/tables.c index b3054af260..1f24a71db6 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13625,6 +13625,14 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, ret = TSK_ERR_NO_MEMORY; goto out; } + /* Have to init this array here b/c the internal simplifier + * "steals" input tables" + */ + self->pimpl->input_nodes = tsk_malloc(tables->nodes.num_rows * sizeof(tsk_id_t)); + if (self->pimpl->input_nodes == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } ret = simplifier_init( &self->pimpl->simplifier, samples, num_samples, tables, options); if (ret != 0) { @@ -13632,11 +13640,6 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, } self->pimpl->last_parent_processed = -1; - self->pimpl->input_nodes = tsk_malloc(tables->nodes.num_rows * sizeof(tsk_id_t)); - if (self->pimpl->input_nodes == NULL) { - ret = TSK_ERR_NO_MEMORY; - goto out; - } out: return ret; } From d37cb5d35854f2acb327b0cf6b1eb43b32ef6228 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 14:06:16 -0700 Subject: [PATCH 12/88] more stress test --- c/tests/test_tables.c | 45 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 5c5cf94ff2..a2df2d5d63 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8630,6 +8630,34 @@ test_simplify_metadata(void) tsk_table_collection_free(&tables); } +/* + * Start with this tree: + * 6 + * / \ + * / \ + * / \ + * / 5 + * 4 / \ + * / \ / \ + * 0 1 2 3 + * + * Add data like a fake forward sim to give: + * + * 6 + * / \ + * / \ + * / \ + * / 5 + * 4 / \ + * / \ / \ + * 0 1 2 3 + * | | + * 7 8 <- new_parent 1 and 2, resp. + * | | + * 9 10 <- new child 1 and 2, resp. + * + * Then, we simplify w.r.to 9 and 10. + */ static void test_table_collection_modular_simplify_simple_tree(void) { @@ -8652,7 +8680,7 @@ test_table_collection_modular_simplify_simple_tree(void) parse_nodes(single_tree_ex_nodes, &tables.nodes); parse_edges(single_tree_ex_edges, &tables.edges); - // "record new births" into node table + /* "record new births" into node table */ new_parent1 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_parent1 >= 0); new_parent2 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); @@ -8662,7 +8690,7 @@ test_table_collection_modular_simplify_simple_tree(void) new_child2 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child2 >= 0); - // record edges + /* record edges */ ret = tsk_edge_table_add_row( &new_edges, 0, tables.sequence_length, 0, new_parent1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); @@ -8682,8 +8710,11 @@ test_table_collection_modular_simplify_simple_tree(void) CU_ASSERT_TRUE_FATAL(ret == 0); CU_ASSERT_EQUAL(new_edges.num_rows, 4); - // FIXME: using last_parent means we have knowledge about the - // internal details of how simplification works, which is a no-no. + /*FIXME: using last_parent means we have knowledge about the + * internal details of how simplification works, which is a no-no. + * We need the internal state of this thing to know the number of + * input nodes and check against that internally. + */ last_parent = TSK_NULL; for (row = 0; row < new_edges.num_rows; ++row) { if (last_parent == TSK_NULL) { @@ -8707,6 +8738,12 @@ test_table_collection_modular_simplify_simple_tree(void) ret = tsk_modular_simplifier_finalise(&simplifier, NULL); CU_ASSERT_TRUE_FATAL(ret >= 0); + CU_ASSERT_TRUE_FATAL(!tsk_table_collection_has_index(&tables, 0)); + ret = tsk_table_collection_build_index(&tables, 0); + CU_ASSERT_TRUE_FATAL(ret == 0); + ret = tsk_table_collection_check_integrity( + &tables, TSK_CHECK_EDGE_ORDERING | TSK_CHECK_TREES | TSK_CHECK_INDEXES); + CU_ASSERT_TRUE_FATAL(ret == 1); /* this is no. trees, not an error code */ tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); From 4b49a7a90ed304ddf576c4b290f044e96b2736c9 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 14:16:33 -0700 Subject: [PATCH 13/88] internal checks so we don't have to test internal details --- c/tests/test_tables.c | 10 +--------- c/tskit/tables.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index a2df2d5d63..0b040f0ed9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8668,7 +8668,7 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_modular_simplifier_t simplifier; tsk_id_t *samples; tsk_id_t last_parent; - tsk_size_t row, num_input_nodes; + tsk_size_t row; ret = tsk_table_collection_init(&tables, 0); tables.sequence_length = 1.0; CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -8705,16 +8705,10 @@ test_table_collection_modular_simplify_simple_tree(void) CU_ASSERT_TRUE_FATAL(ret >= 0); samples[0] = new_child1; samples[1] = new_child2; - num_input_nodes = tables.nodes.num_rows; ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE_FATAL(ret == 0); CU_ASSERT_EQUAL(new_edges.num_rows, 4); - /*FIXME: using last_parent means we have knowledge about the - * internal details of how simplification works, which is a no-no. - * We need the internal state of this thing to know the number of - * input nodes and check against that internally. - */ last_parent = TSK_NULL; for (row = 0; row < new_edges.num_rows; ++row) { if (last_parent == TSK_NULL) { @@ -8726,8 +8720,6 @@ test_table_collection_modular_simplify_simple_tree(void) new_edges.parent[new_edges.num_rows - row - 1] = last_parent; } CU_ASSERT_FATAL(new_edges.parent[new_edges.num_rows - row - 1] >= 0); - CU_ASSERT_FATAL( - new_edges.parent[new_edges.num_rows - row - 1] < (tsk_id_t) num_input_nodes); ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[new_edges.num_rows - row - 1], new_edges.right[new_edges.num_rows - row - 1], diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 1f24a71db6..731d629e45 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13612,6 +13612,7 @@ typedef struct __tsk_modular_simplifier_impl_t { simplifier_t simplifier; tsk_id_t last_parent_processed; tsk_id_t *input_nodes; + tsk_size_t num_input_nodes; } tsk_modular_simplifier_impl_t; int @@ -13633,13 +13634,18 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, ret = TSK_ERR_NO_MEMORY; goto out; } + self->pimpl->num_input_nodes = tables->nodes.num_rows; + self->pimpl->last_parent_processed = -1; + + /* Now that we have set up the pimpl state, + * we can let the unsual init happen + */ ret = simplifier_init( &self->pimpl->simplifier, samples, num_samples, tables, options); if (ret != 0) { goto out; } - self->pimpl->last_parent_processed = -1; out: return ret; } @@ -13650,6 +13656,15 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, { int ret = 0; + if (parent == TSK_NULL || parent >= (tsk_id_t) self->pimpl->num_input_nodes) { + ret = TSK_ERR_NULL_PARENT; + goto out; + } + if (child == TSK_NULL || child >= (tsk_id_t) self->pimpl->num_input_nodes) { + ret = TSK_ERR_NULL_CHILD; + goto out; + } + if (parent != self->pimpl->last_parent_processed && self->pimpl->last_parent_processed != TSK_NULL) { if (self->pimpl->input_nodes[parent] != TSK_NULL) { From 57c1b01095001c88a4787d1a09bd3fd551b33c89 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 15:17:36 -0700 Subject: [PATCH 14/88] modularize, test that we can catch birth order violations --- c/tests/test_tables.c | 140 +++++++++++++++++++++++++++++++----------- c/tskit/tables.c | 12 ++++ 2 files changed, 115 insertions(+), 37 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 0b040f0ed9..21cf63f168 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -25,6 +25,7 @@ #include "testlib.h" #include "tskit/core.h" #include +#include #include #include @@ -8630,6 +8631,56 @@ test_simplify_metadata(void) tsk_table_collection_free(&tables); } +static void +make_single_tree_for_testing_modular_simplify( + tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) +{ + int ret; + tsk_id_t new_parent1, new_parent2, new_child1, new_child2; + tsk_size_t row; + ret = tsk_table_collection_init(tables, 0); + tables->sequence_length = 1.0; + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_init(new_edges, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + *samples = tsk_malloc(2 * sizeof(tsk_id_t)); + CU_ASSERT_TRUE(samples != NULL) + + parse_nodes(single_tree_ex_nodes, &tables->nodes); + parse_edges(single_tree_ex_edges, &tables->edges); + + /* "record new births" into node table */ + new_parent1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_parent1 >= 0); + new_parent2 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_parent2 >= 0); + new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -2.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child1 >= 0); + new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -2.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child2 >= 0); + + for (row = 0; row < tables->nodes.num_rows; ++row) { + // make all times >= 0.0. + tables->nodes.time[row] += 2.0; + } + + /* record edges */ + ret = tsk_edge_table_add_row( + new_edges, 0, tables->sequence_length, 0, new_parent1, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + new_edges, 0, tables->sequence_length, 2, new_parent2, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + new_edges, 0, tables->sequence_length, new_parent1, new_child1, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + new_edges, 0, tables->sequence_length, new_parent2, new_child2, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + (*samples)[0] = new_child1; + (*samples)[1] = new_child2; +} + /* * Start with this tree: * 6 @@ -8664,47 +8715,12 @@ test_table_collection_modular_simplify_simple_tree(void) int ret; tsk_table_collection_t tables; tsk_edge_table_t new_edges; - tsk_id_t new_parent1, new_parent2, new_child1, new_child2; tsk_modular_simplifier_t simplifier; tsk_id_t *samples; tsk_id_t last_parent; tsk_size_t row; - ret = tsk_table_collection_init(&tables, 0); - tables.sequence_length = 1.0; - CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = tsk_edge_table_init(&new_edges, 0); - CU_ASSERT_EQUAL_FATAL(ret, 0); - samples = tsk_malloc(2 * sizeof(tsk_id_t)); - CU_ASSERT_TRUE(samples != NULL) - - parse_nodes(single_tree_ex_nodes, &tables.nodes); - parse_edges(single_tree_ex_edges, &tables.edges); - /* "record new births" into node table */ - new_parent1 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE_FATAL(new_parent1 >= 0); - new_parent2 = tsk_node_table_add_row(&tables.nodes, 0, -1.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE_FATAL(new_parent2 >= 0); - new_child1 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE_FATAL(new_child1 >= 0); - new_child2 = tsk_node_table_add_row(&tables.nodes, 0, -2.0, -1, -1, NULL, 0); - CU_ASSERT_TRUE_FATAL(new_child2 >= 0); - - /* record edges */ - ret = tsk_edge_table_add_row( - &new_edges, 0, tables.sequence_length, 0, new_parent1, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( - &new_edges, 0, tables.sequence_length, 2, new_parent2, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( - &new_edges, 0, tables.sequence_length, new_parent1, new_child1, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( - &new_edges, 0, tables.sequence_length, new_parent2, new_child2, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - samples[0] = new_child1; - samples[1] = new_child2; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE_FATAL(ret == 0); CU_ASSERT_EQUAL(new_edges.num_rows, 4); @@ -8717,7 +8733,7 @@ test_table_collection_modular_simplify_simple_tree(void) if (new_edges.parent[new_edges.num_rows - row - 1] != last_parent) { ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); CU_ASSERT_EQUAL_FATAL(ret, 0); - new_edges.parent[new_edges.num_rows - row - 1] = last_parent; + last_parent = new_edges.parent[row]; } CU_ASSERT_FATAL(new_edges.parent[new_edges.num_rows - row - 1] >= 0); ret = tsk_modular_simplifier_add_edge(&simplifier, @@ -8743,6 +8759,53 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_safe_free(samples); } +static void +test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + tsk_id_t last_parent; + tsk_size_t row; + int failed = 0; + + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); + CU_ASSERT_TRUE_FATAL(ret == 0); + CU_ASSERT_EQUAL(new_edges.num_rows, 4); + + last_parent = TSK_NULL; + for (row = 0; row < new_edges.num_rows; ++row) { + if (last_parent == TSK_NULL) { + last_parent = new_edges.parent[row]; + } + if (new_edges.parent[row] != last_parent) { + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + CU_ASSERT_EQUAL_FATAL(ret, 0); + last_parent = new_edges.parent[row]; + } + CU_ASSERT_FATAL(new_edges.parent[row] >= 0); + ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[row], + new_edges.right[row], new_edges.parent[row], new_edges.child[row]); + // First two rows are okay, + // then the next row has a birth time + // more recent than the previous. + if (row < 2) { + CU_ASSERT_TRUE_FATAL(ret >= 0); + ++failed; + } else { + CU_ASSERT_TRUE_FATAL(ret < 0); + } + } + CU_ASSERT_TRUE_FATAL(failed); + + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); + tsk_safe_free(samples); +} static void test_edge_update_invalidates_index(void) { @@ -11708,6 +11771,9 @@ main(int argc, char **argv) test_table_collection_simplify_errors }, { "test_table_collection_modular_simplify_simple_tree", test_table_collection_modular_simplify_simple_tree }, + { "test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_" + "order", + test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 731d629e45..6f55131222 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13613,6 +13613,7 @@ typedef struct __tsk_modular_simplifier_impl_t { tsk_id_t last_parent_processed; tsk_id_t *input_nodes; tsk_size_t num_input_nodes; + double last_parent_time; } tsk_modular_simplifier_impl_t; int @@ -13636,6 +13637,7 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, } self->pimpl->num_input_nodes = tables->nodes.num_rows; self->pimpl->last_parent_processed = -1; + self->pimpl->last_parent_time = DBL_MIN; /* Now that we have set up the pimpl state, * we can let the unsual init happen @@ -13665,6 +13667,16 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, goto out; } + fprintf(stdout, "%lf %lf\n", self->pimpl->last_parent_time, + self->pimpl->simplifier.input_tables.nodes.time[parent]); + if (self->pimpl->simplifier.input_tables.nodes.time[parent] + < self->pimpl->last_parent_time) { + ret = -999999999; + goto out; + } + self->pimpl->last_parent_time + = self->pimpl->simplifier.input_tables.nodes.time[parent]; + if (parent != self->pimpl->last_parent_processed && self->pimpl->last_parent_processed != TSK_NULL) { if (self->pimpl->input_nodes[parent] != TSK_NULL) { From bcc9a15c0f9e4c6bba9f39ffed57336291e75f42 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 15:18:45 -0700 Subject: [PATCH 15/88] move comments --- c/tests/test_tables.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 21cf63f168..59142739e4 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8631,6 +8631,34 @@ test_simplify_metadata(void) tsk_table_collection_free(&tables); } +/* + * Start with this tree: + * 6 + * / \ + * / \ + * / \ + * / 5 + * 4 / \ + * / \ / \ + * 0 1 2 3 + * + * Add data like a fake forward sim to give: + * + * 6 + * / \ + * / \ + * / \ + * / 5 + * 4 / \ + * / \ / \ + * 0 1 2 3 + * | | + * 7 8 <- new_parent 1 and 2, resp. + * | | + * 9 10 <- new child 1 and 2, resp. + * + * Then, we simplify w.r.to 9 and 10. + */ static void make_single_tree_for_testing_modular_simplify( tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) @@ -8681,34 +8709,6 @@ make_single_tree_for_testing_modular_simplify( (*samples)[1] = new_child2; } -/* - * Start with this tree: - * 6 - * / \ - * / \ - * / \ - * / 5 - * 4 / \ - * / \ / \ - * 0 1 2 3 - * - * Add data like a fake forward sim to give: - * - * 6 - * / \ - * / \ - * / \ - * / 5 - * 4 / \ - * / \ / \ - * 0 1 2 3 - * | | - * 7 8 <- new_parent 1 and 2, resp. - * | | - * 9 10 <- new child 1 and 2, resp. - * - * Then, we simplify w.r.to 9 and 10. - */ static void test_table_collection_modular_simplify_simple_tree(void) { From 7a96dcda78f5a3ab22f2f52114b34a646da02037 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 15:23:58 -0700 Subject: [PATCH 16/88] FIXME --- c/tskit/tables.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 6f55131222..a291de4454 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13659,19 +13659,19 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, int ret = 0; if (parent == TSK_NULL || parent >= (tsk_id_t) self->pimpl->num_input_nodes) { + // FIXME: we don't have good error code for this ret = TSK_ERR_NULL_PARENT; goto out; } if (child == TSK_NULL || child >= (tsk_id_t) self->pimpl->num_input_nodes) { + // FIXME: we don't have good error code for this ret = TSK_ERR_NULL_CHILD; goto out; } - fprintf(stdout, "%lf %lf\n", self->pimpl->last_parent_time, - self->pimpl->simplifier.input_tables.nodes.time[parent]); if (self->pimpl->simplifier.input_tables.nodes.time[parent] < self->pimpl->last_parent_time) { - ret = -999999999; + ret = TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME; goto out; } self->pimpl->last_parent_time From 145abcede5c90d7c50956e0bda4a1d9b06a168db Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 16:40:26 -0700 Subject: [PATCH 17/88] show some of the side effects that one can get --- c/tests/test_tables.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 59142739e4..35b5a48ba2 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8794,13 +8794,26 @@ test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(v // more recent than the previous. if (row < 2) { CU_ASSERT_TRUE_FATAL(ret >= 0); - ++failed; } else { CU_ASSERT_TRUE_FATAL(ret < 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); + ++failed; } } CU_ASSERT_TRUE_FATAL(failed); + ret = tsk_table_collection_build_index(&tables, 0); + CU_ASSERT_TRUE_FATAL(ret == 0); + /* NOTE: by not handling the errors that increment failed, + * we can get tables that appear valid. + * Production code MUST exit the "edge adding loop" upon error. + */ + ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING); + CU_ASSERT_TRUE_FATAL(ret == 0); + ret = tsk_table_collection_check_integrity( + &tables, TSK_CHECK_EDGE_ORDERING | TSK_CHECK_TREES | TSK_CHECK_INDEXES); + CU_ASSERT_TRUE_FATAL(ret == 1); + tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); From c71e4ed8d35520619ea343f085f101065b53d29c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 17:02:23 -0700 Subject: [PATCH 18/88] not detecting discontiguous parents --- c/tests/test_tables.c | 72 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 35b5a48ba2..e903587476 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8653,9 +8653,9 @@ test_simplify_metadata(void) * / \ / \ * 0 1 2 3 * | | - * 7 8 <- new_parent 1 and 2, resp. - * | | - * 9 10 <- new child 1 and 2, resp. + * 7 8--- <- new_parent 1 and 2, resp. + * | | | + * 9 10 11 <- new child 1, 2, and 3, resp. * * Then, we simplify w.r.to 9 and 10. */ @@ -8664,14 +8664,14 @@ make_single_tree_for_testing_modular_simplify( tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) { int ret; - tsk_id_t new_parent1, new_parent2, new_child1, new_child2; + tsk_id_t new_parent1, new_parent2, new_child1, new_child2, new_child3; tsk_size_t row; ret = tsk_table_collection_init(tables, 0); tables->sequence_length = 1.0; CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_edge_table_init(new_edges, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - *samples = tsk_malloc(2 * sizeof(tsk_id_t)); + *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE(samples != NULL) parse_nodes(single_tree_ex_nodes, &tables->nodes); @@ -8686,6 +8686,8 @@ make_single_tree_for_testing_modular_simplify( CU_ASSERT_TRUE_FATAL(new_child1 >= 0); new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -2.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child2 >= 0); + new_child3 = tsk_node_table_add_row(&tables->nodes, 0, -2.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child3 >= 0); for (row = 0; row < tables->nodes.num_rows; ++row) { // make all times >= 0.0. @@ -8705,8 +8707,12 @@ make_single_tree_for_testing_modular_simplify( ret = tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent2, new_child2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + new_edges, 0, tables->sequence_length, new_parent2, new_child3, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); (*samples)[0] = new_child1; (*samples)[1] = new_child2; + (*samples)[2] = new_child3; } static void @@ -8723,7 +8729,6 @@ test_table_collection_modular_simplify_simple_tree(void) make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE_FATAL(ret == 0); - CU_ASSERT_EQUAL(new_edges.num_rows, 4); last_parent = TSK_NULL; for (row = 0; row < new_edges.num_rows; ++row) { @@ -8733,7 +8738,7 @@ test_table_collection_modular_simplify_simple_tree(void) if (new_edges.parent[new_edges.num_rows - row - 1] != last_parent) { ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); CU_ASSERT_EQUAL_FATAL(ret, 0); - last_parent = new_edges.parent[row]; + last_parent = new_edges.parent[new_edges.num_rows - row - 1]; } CU_ASSERT_FATAL(new_edges.parent[new_edges.num_rows - row - 1] >= 0); ret = tsk_modular_simplifier_add_edge(&simplifier, @@ -8759,6 +8764,55 @@ test_table_collection_modular_simplify_simple_tree(void) tsk_safe_free(samples); } +static void +test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + tsk_id_t last_parent; + tsk_size_t row; + tsk_id_t row_order[5] = { 3, 2, 4, 1, 0 }; + int failure_code = 0; + + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + CU_ASSERT_EQUAL_FATAL(new_edges.num_rows, 5); + for (row = 0; row < new_edges.num_rows; ++row) { + fprintf(stdout, "parent column: %ld %d -> %d\n", row, row_order[row], + new_edges.parent[row_order[row]]); + } + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); + CU_ASSERT_TRUE_FATAL(ret == 0); + + last_parent = TSK_NULL; + + // The last 2 new edges have the same parent, + // Let's only process one of them in this loop. + for (row = 0; row < new_edges.num_rows; ++row) { + if (last_parent == TSK_NULL) { + last_parent = new_edges.parent[row_order[row]]; + } + if (new_edges.parent[row_order[row]] != last_parent) { + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + CU_ASSERT_EQUAL_FATAL(ret, 0); + last_parent = new_edges.parent[row_order[row]]; + } + CU_ASSERT_FATAL(new_edges.parent[row_order[row]] >= 0); + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_edges.left[row_order[row]], new_edges.right[row_order[row]], + new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + CU_ASSERT_TRUE_FATAL(ret >= 0); + if (ret < 0) { + failure_code = ret; + break; + } + } + CU_ASSERT_TRUE_FATAL(failure_code < 0); + CU_ASSERT_EQUAL_FATAL(failure_code, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); +} + static void test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(void) { @@ -8774,7 +8828,6 @@ test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(v make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE_FATAL(ret == 0); - CU_ASSERT_EQUAL(new_edges.num_rows, 4); last_parent = TSK_NULL; for (row = 0; row < new_edges.num_rows; ++row) { @@ -8819,6 +8872,7 @@ test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(v tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); } + static void test_edge_update_invalidates_index(void) { @@ -11784,6 +11838,8 @@ main(int argc, char **argv) test_table_collection_simplify_errors }, { "test_table_collection_modular_simplify_simple_tree", test_table_collection_modular_simplify_simple_tree }, + { "test_table_collection_modular_simplify_simple_tree_discontiguous_parents", + test_table_collection_modular_simplify_simple_tree_discontiguous_parents }, { "test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_" "order", test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order }, From 59ce926c71173898571a4a7d81aa71b2762a4eef Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 17:12:18 -0700 Subject: [PATCH 19/88] all tests fail when we try to check for non-contiguous parents --- c/tests/test_tables.c | 1 + c/tskit/tables.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e903587476..b1f9356820 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8795,6 +8795,7 @@ test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) last_parent = new_edges.parent[row_order[row]]; } if (new_edges.parent[row_order[row]] != last_parent) { + fprintf(stdout, "merging %d\n", last_parent); ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); CU_ASSERT_EQUAL_FATAL(ret, 0); last_parent = new_edges.parent[row_order[row]]; diff --git a/c/tskit/tables.c b/c/tskit/tables.c index a291de4454..2ba61fb63d 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13679,7 +13679,11 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, if (parent != self->pimpl->last_parent_processed && self->pimpl->last_parent_processed != TSK_NULL) { + fprintf( + stdout, "parent stuff: %d %d\n", parent, self->pimpl->last_parent_processed); if (self->pimpl->input_nodes[parent] != TSK_NULL) { + fprintf(stdout, "parent stuff fail: %d %d\n", parent, + self->pimpl->last_parent_processed); ret = TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS; goto out; } @@ -13699,6 +13703,7 @@ tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t *self, tsk_id_t /* mark this input parent as "seen" */ self->pimpl->input_nodes[parent] = parent; self->pimpl->simplifier.segment_queue_size = 0; + self->pimpl->last_parent_processed = parent; out: return ret; } From 946a735559ef381aeaf71835446adb9569495695 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 8 May 2023 17:15:49 -0700 Subject: [PATCH 20/88] fix array init to TSK_NULL --- c/tests/test_tables.c | 6 ------ c/tskit/tables.c | 8 ++++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index b1f9356820..719dfae8df 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8779,10 +8779,6 @@ test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); CU_ASSERT_EQUAL_FATAL(new_edges.num_rows, 5); - for (row = 0; row < new_edges.num_rows; ++row) { - fprintf(stdout, "parent column: %ld %d -> %d\n", row, row_order[row], - new_edges.parent[row_order[row]]); - } ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); CU_ASSERT_TRUE_FATAL(ret == 0); @@ -8795,7 +8791,6 @@ test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) last_parent = new_edges.parent[row_order[row]]; } if (new_edges.parent[row_order[row]] != last_parent) { - fprintf(stdout, "merging %d\n", last_parent); ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); CU_ASSERT_EQUAL_FATAL(ret, 0); last_parent = new_edges.parent[row_order[row]]; @@ -8804,7 +8799,6 @@ test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[row_order[row]], new_edges.right[row_order[row]], new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); - CU_ASSERT_TRUE_FATAL(ret >= 0); if (ret < 0) { failure_code = ret; break; diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 2ba61fb63d..aab6eec898 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13622,6 +13622,7 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, tsk_flags_t options) { int ret = 0; + tsk_size_t i; self->pimpl = tsk_malloc(sizeof(tsk_modular_simplifier_impl_t)); if (self->pimpl == NULL) { ret = TSK_ERR_NO_MEMORY; @@ -13635,6 +13636,9 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, ret = TSK_ERR_NO_MEMORY; goto out; } + for (i = 0; i < tables->nodes.num_rows; ++i) { + self->pimpl->input_nodes[i] = TSK_NULL; + } self->pimpl->num_input_nodes = tables->nodes.num_rows; self->pimpl->last_parent_processed = -1; self->pimpl->last_parent_time = DBL_MIN; @@ -13679,11 +13683,7 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, if (parent != self->pimpl->last_parent_processed && self->pimpl->last_parent_processed != TSK_NULL) { - fprintf( - stdout, "parent stuff: %d %d\n", parent, self->pimpl->last_parent_processed); if (self->pimpl->input_nodes[parent] != TSK_NULL) { - fprintf(stdout, "parent stuff fail: %d %d\n", parent, - self->pimpl->last_parent_processed); ret = TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS; goto out; } From f3bfd499182e5a5509ffe590a92b749e165e24c9 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 08:59:13 -0700 Subject: [PATCH 21/88] name field for what we use it for... --- c/tskit/tables.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index aab6eec898..8499ea6fb6 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13611,7 +13611,7 @@ tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right, typedef struct __tsk_modular_simplifier_impl_t { simplifier_t simplifier; tsk_id_t last_parent_processed; - tsk_id_t *input_nodes; + tsk_id_t *input_node_visited; tsk_size_t num_input_nodes; double last_parent_time; } tsk_modular_simplifier_impl_t; @@ -13631,13 +13631,14 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, /* Have to init this array here b/c the internal simplifier * "steals" input tables" */ - self->pimpl->input_nodes = tsk_malloc(tables->nodes.num_rows * sizeof(tsk_id_t)); - if (self->pimpl->input_nodes == NULL) { + self->pimpl->input_node_visited + = tsk_malloc(tables->nodes.num_rows * sizeof(tsk_id_t)); + if (self->pimpl->input_node_visited == NULL) { ret = TSK_ERR_NO_MEMORY; goto out; } for (i = 0; i < tables->nodes.num_rows; ++i) { - self->pimpl->input_nodes[i] = TSK_NULL; + self->pimpl->input_node_visited[i] = 0; } self->pimpl->num_input_nodes = tables->nodes.num_rows; self->pimpl->last_parent_processed = -1; @@ -13683,7 +13684,7 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, if (parent != self->pimpl->last_parent_processed && self->pimpl->last_parent_processed != TSK_NULL) { - if (self->pimpl->input_nodes[parent] != TSK_NULL) { + if (self->pimpl->input_node_visited[parent] != 0) { ret = TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS; goto out; } @@ -13701,7 +13702,7 @@ tsk_modular_simplifier_merge_ancestors(tsk_modular_simplifier_t *self, tsk_id_t goto out; } /* mark this input parent as "seen" */ - self->pimpl->input_nodes[parent] = parent; + self->pimpl->input_node_visited[parent] = 1; self->pimpl->simplifier.segment_queue_size = 0; self->pimpl->last_parent_processed = parent; out: @@ -13725,7 +13726,7 @@ tsk_modular_simplifier_free(tsk_modular_simplifier_t *self) if (ret != 0) { goto out; } - tsk_safe_free(self->pimpl->input_nodes); + tsk_safe_free(self->pimpl->input_node_visited); tsk_safe_free(self->pimpl); out: return ret; From eeef36ad73dcf841ee60501fb1c1e2d98826ae3d Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 10:37:52 -0700 Subject: [PATCH 22/88] valgrind --- c/tests/test_tables.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 719dfae8df..7e0cc3a61c 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8806,6 +8806,11 @@ test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) } CU_ASSERT_TRUE_FATAL(failure_code < 0); CU_ASSERT_EQUAL_FATAL(failure_code, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); + + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); + tsk_safe_free(samples); } static void From 7e799f00b1bc26e966638bf28d2cf28b80d18c13 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 11:23:08 -0700 Subject: [PATCH 23/88] testing pattern w/less duplication --- c/tests/test_tables.c | 161 ++++++++++++------------------------------ 1 file changed, 45 insertions(+), 116 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 7e0cc3a61c..c0284c5733 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8716,161 +8716,90 @@ make_single_tree_for_testing_modular_simplify( } static void -test_table_collection_modular_simplify_simple_tree(void) +run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result) { int ret; - tsk_table_collection_t tables; + tsk_table_collection_t tables, standard_tables; tsk_edge_table_t new_edges; tsk_modular_simplifier_t simplifier; tsk_id_t *samples; tsk_id_t last_parent; tsk_size_t row; - make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); - ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); - CU_ASSERT_TRUE_FATAL(ret == 0); - last_parent = TSK_NULL; - for (row = 0; row < new_edges.num_rows; ++row) { - if (last_parent == TSK_NULL) { - last_parent = new_edges.parent[new_edges.num_rows - row - 1]; - } - if (new_edges.parent[new_edges.num_rows - row - 1] != last_parent) { - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - CU_ASSERT_EQUAL_FATAL(ret, 0); - last_parent = new_edges.parent[new_edges.num_rows - row - 1]; - } - CU_ASSERT_FATAL(new_edges.parent[new_edges.num_rows - row - 1] >= 0); - ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[new_edges.num_rows - row - 1], - new_edges.right[new_edges.num_rows - row - 1], - new_edges.parent[new_edges.num_rows - row - 1], - new_edges.child[new_edges.num_rows - row - 1]); - CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_table_collection_copy(&tables, &standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, + new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); + if (ret < 0) { + goto out; + } + ret = tsk_table_collection_sort(&standard_tables, NULL, 0); + if (ret < 0) { + goto out; } - ret = tsk_modular_simplifier_finalise(&simplifier, NULL); - CU_ASSERT_TRUE_FATAL(ret >= 0); - CU_ASSERT_TRUE_FATAL(!tsk_table_collection_has_index(&tables, 0)); - ret = tsk_table_collection_build_index(&tables, 0); - CU_ASSERT_TRUE_FATAL(ret == 0); - ret = tsk_table_collection_check_integrity( - &tables, TSK_CHECK_EDGE_ORDERING | TSK_CHECK_TREES | TSK_CHECK_INDEXES); - CU_ASSERT_TRUE_FATAL(ret == 1); /* this is no. trees, not an error code */ - - tsk_table_collection_free(&tables); - tsk_edge_table_free(&new_edges); - tsk_modular_simplifier_free(&simplifier); - tsk_safe_free(samples); -} - -static void -test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) -{ - int ret; - tsk_table_collection_t tables; - tsk_edge_table_t new_edges; - tsk_modular_simplifier_t simplifier; - tsk_id_t *samples; - tsk_id_t last_parent; - tsk_size_t row; - tsk_id_t row_order[5] = { 3, 2, 4, 1, 0 }; - int failure_code = 0; + ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); + if (ret < 0) { + goto out; + } - make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); - CU_ASSERT_EQUAL_FATAL(new_edges.num_rows, 5); ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); - CU_ASSERT_TRUE_FATAL(ret == 0); - + if (ret < 0) { + goto out; + } last_parent = TSK_NULL; - - // The last 2 new edges have the same parent, - // Let's only process one of them in this loop. for (row = 0; row < new_edges.num_rows; ++row) { if (last_parent == TSK_NULL) { last_parent = new_edges.parent[row_order[row]]; } if (new_edges.parent[row_order[row]] != last_parent) { ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - CU_ASSERT_EQUAL_FATAL(ret, 0); + if (ret < 0) { + goto out; + } last_parent = new_edges.parent[row_order[row]]; } - CU_ASSERT_FATAL(new_edges.parent[row_order[row]] >= 0); ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[row_order[row]], new_edges.right[row_order[row]], new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); if (ret < 0) { - failure_code = ret; - break; + goto out; } } - CU_ASSERT_TRUE_FATAL(failure_code < 0); - CU_ASSERT_EQUAL_FATAL(failure_code, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); +out: tsk_table_collection_free(&tables); + tsk_table_collection_free(&standard_tables); tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); + CU_ASSERT_EQUAL_FATAL(ret, expected_result); } static void -test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(void) +test_table_collection_modular_simplify_simple_tree(void) { - int ret; - tsk_table_collection_t tables; - tsk_edge_table_t new_edges; - tsk_modular_simplifier_t simplifier; - tsk_id_t *samples; - tsk_id_t last_parent; - tsk_size_t row; - int failed = 0; - - make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); - ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); - CU_ASSERT_TRUE_FATAL(ret == 0); - - last_parent = TSK_NULL; - for (row = 0; row < new_edges.num_rows; ++row) { - if (last_parent == TSK_NULL) { - last_parent = new_edges.parent[row]; - } - if (new_edges.parent[row] != last_parent) { - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - CU_ASSERT_EQUAL_FATAL(ret, 0); - last_parent = new_edges.parent[row]; - } - CU_ASSERT_FATAL(new_edges.parent[row] >= 0); - ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[row], - new_edges.right[row], new_edges.parent[row], new_edges.child[row]); - // First two rows are okay, - // then the next row has a birth time - // more recent than the previous. - if (row < 2) { - CU_ASSERT_TRUE_FATAL(ret >= 0); - } else { - CU_ASSERT_TRUE_FATAL(ret < 0); - CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); - ++failed; - } - } - CU_ASSERT_TRUE_FATAL(failed); + tsk_id_t row_order[5] = { 4, 3, 2, 1, 0 }; + run_test_modular_simplify_single_tree(row_order, 0); +} - ret = tsk_table_collection_build_index(&tables, 0); - CU_ASSERT_TRUE_FATAL(ret == 0); - /* NOTE: by not handling the errors that increment failed, - * we can get tables that appear valid. - * Production code MUST exit the "edge adding loop" upon error. - */ - ret = tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING); - CU_ASSERT_TRUE_FATAL(ret == 0); - ret = tsk_table_collection_check_integrity( - &tables, TSK_CHECK_EDGE_ORDERING | TSK_CHECK_TREES | TSK_CHECK_INDEXES); - CU_ASSERT_TRUE_FATAL(ret == 1); +static void +test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) +{ + tsk_id_t row_order[5] = { 3, 2, 4, 1, 0 }; + run_test_modular_simplify_single_tree( + row_order, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); +} - tsk_table_collection_free(&tables); - tsk_edge_table_free(&new_edges); - tsk_modular_simplifier_free(&simplifier); - tsk_safe_free(samples); +static void +test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(void) +{ + tsk_id_t row_order[5] = { 0, 1, 2, 3, 4 }; + run_test_modular_simplify_single_tree( + row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } static void From 127106e71588a4d77117b069293433e9a3b7654b Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 11:33:17 -0700 Subject: [PATCH 24/88] found a goof. yay for testing --- c/tests/test_tables.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index c0284c5733..bc636362f8 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8740,13 +8740,16 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result if (ret < 0) { goto out; } + CU_ASSERT_EQUAL_FATAL( + standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); if (ret < 0) { goto out; } - ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 2, 0); + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); if (ret < 0) { goto out; } @@ -8755,13 +8758,17 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result if (last_parent == TSK_NULL) { last_parent = new_edges.parent[row_order[row]]; } - if (new_edges.parent[row_order[row]] != last_parent) { + /* NOTE: this is a goof, as we are not correctly diagnosing + * when the final input row is a sole new parent id + */ + if (new_edges.parent[row_order[row]] != last_parent && last_parent != TSK_NULL) { + fprintf(stdout, "merging %d\n", last_parent); ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); if (ret < 0) { goto out; } - last_parent = new_edges.parent[row_order[row]]; } + last_parent = new_edges.parent[row_order[row]]; ret = tsk_modular_simplifier_add_edge(&simplifier, new_edges.left[row_order[row]], new_edges.right[row_order[row]], new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); @@ -8769,6 +8776,16 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } } + ret = tsk_modular_simplifier_finalise(&simplifier, NULL); + if (ret < 0) { + goto out; + } + + // Now, we can compare various properties of the two table collections + fprintf(stdout, "%ld %ld\n", standard_tables.nodes.num_rows, tables.nodes.num_rows); + fprintf(stdout, "%ld %ld\n", standard_tables.edges.num_rows, tables.edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); out: tsk_table_collection_free(&tables); From ac8236f8ecea41884dcd084eb135293052d847af Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 11:48:42 -0700 Subject: [PATCH 25/88] fix logic error --- c/tests/test_tables.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index bc636362f8..3037027d73 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8753,25 +8753,20 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result if (ret < 0) { goto out; } - last_parent = TSK_NULL; - for (row = 0; row < new_edges.num_rows; ++row) { - if (last_parent == TSK_NULL) { - last_parent = new_edges.parent[row_order[row]]; - } - /* NOTE: this is a goof, as we are not correctly diagnosing - * when the final input row is a sole new parent id - */ - if (new_edges.parent[row_order[row]] != last_parent && last_parent != TSK_NULL) { - fprintf(stdout, "merging %d\n", last_parent); - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + row = 0; + while (row < new_edges.num_rows) { + last_parent = new_edges.parent[row_order[row]]; + while (row < new_edges.num_rows + && new_edges.parent[row_order[row]] == last_parent) { + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_edges.left[row_order[row]], new_edges.right[row_order[row]], + new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); if (ret < 0) { goto out; } + ++row; } - last_parent = new_edges.parent[row_order[row]]; - ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[row_order[row]], new_edges.right[row_order[row]], - new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); if (ret < 0) { goto out; } From 0a149ce5ae312e02536d130e39e1d53a90449ee7 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 11:54:39 -0700 Subject: [PATCH 26/88] start treeseq comparison --- c/tests/test_tables.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 3037027d73..c8dfb3735b 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -24,6 +24,7 @@ #include "testlib.h" #include "tskit/core.h" +#include "tskit/trees.h" #include #include #include @@ -8722,6 +8723,7 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result tsk_table_collection_t tables, standard_tables; tsk_edge_table_t new_edges; tsk_modular_simplifier_t simplifier; + tsk_treeseq_t treeseq, standard_treeseq; tsk_id_t *samples; tsk_id_t last_parent; tsk_size_t row; @@ -8777,11 +8779,31 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result } // Now, we can compare various properties of the two table collections - fprintf(stdout, "%ld %ld\n", standard_tables.nodes.num_rows, tables.nodes.num_rows); - fprintf(stdout, "%ld %ld\n", standard_tables.edges.num_rows, tables.edges.num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); + ret = tsk_table_collection_build_index(&standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_table_collection_build_index(&tables, 0); + if (ret < 0) { + goto out; + } + + ret = tsk_treeseq_init(&standard_treeseq, &standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_treeseq_init(&treeseq, &tables, 0); + if (ret < 0) { + goto out; + } + CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), + tsk_treeseq_get_num_trees(&treeseq)); + tsk_treeseq_free(&standard_treeseq); + tsk_treeseq_free(&treeseq); + out: tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); From f19f3a0c9aca466048b5314909b967e52f44781b Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:05:54 -0700 Subject: [PATCH 27/88] tree time comparisons... --- c/tests/test_tables.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index c8dfb3735b..718841b644 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8724,9 +8724,11 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result tsk_edge_table_t new_edges; tsk_modular_simplifier_t simplifier; tsk_treeseq_t treeseq, standard_treeseq; + tsk_tree_t standard_tree, tree; tsk_id_t *samples; tsk_id_t last_parent; tsk_size_t row; + double ttl_time, standard_ttl_time; make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); ret = tsk_table_collection_copy(&tables, &standard_tables, 0); @@ -8778,6 +8780,8 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } + CU_ASSERT_EQUAL_FATAL(ret, 0); + // Now, we can compare various properties of the two table collections CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); @@ -8801,9 +8805,27 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result } CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), tsk_treeseq_get_num_trees(&treeseq)); + ret = tsk_tree_init(&standard_tree, &standard_treeseq, 0); + if (ret < 0) { + goto out; + } + ret = tsk_tree_init(&tree, &treeseq, 0); + if (ret < 0) { + goto out; + } + ret = tsk_tree_first(&standard_tree); + CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK); + for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) { + tsk_tree_get_total_branch_length(&tree, -1, &ttl_time); + tsk_tree_get_total_branch_length(&standard_tree, -1, &standard_ttl_time); + CU_ASSERT_TRUE(ttl_time - standard_ttl_time <= 1e-9); + tsk_tree_next(&standard_tree); + } tsk_treeseq_free(&standard_treeseq); tsk_treeseq_free(&treeseq); - + tsk_tree_free(&standard_tree); + tsk_tree_free(&tree); + CU_ASSERT_EQUAL_FATAL(ret, 0); out: tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); From c94677fe911a8dea033657f15cdf9f8158421f0d Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:14:03 -0700 Subject: [PATCH 28/88] notes --- c/tests/test_tables.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 718841b644..5ee060c6a3 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8758,6 +8758,23 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } row = 0; + /* Pseudocode that we are mocking: + * For each parent of a new edge: + * - add that edge to the segment queue. + * - When done, finalise the queue and merge ancestors. + * + * If our buffer is wrong, we will have parents unsorted by time + * and/or the same parent processed in different loop iterations. + * Each case is an error that MUST be handled. + * It is trivial to show that not handling the errors can give rise + * to invalid table collections / tree sequences. + * The requirement for error handling must be documented + * + * Production code should use an input other than + * an edge table. + * (How edges are sorted is an internal detail + * and cannot be used for testing.) + */ while (row < new_edges.num_rows) { last_parent = new_edges.parent[row_order[row]]; while (row < new_edges.num_rows @@ -8775,6 +8792,18 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } } + /* Simplification's internal cleanup. + * Shoud NOT be called if above loop errors. + * We know that not calling it and calling + * "modular simplifier free" does not leak + * because valgrind is happy. + * + * Now, we have processed all (child) nodes whose births are + * MORE RECENT than those in the input tables. + * + * TODO: the init method should check the preconditon + * stated above. + */ ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { goto out; From ae005e6c07a35a00e8f65183549e4bf0de1975e9 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:16:46 -0700 Subject: [PATCH 29/88] TODO --- c/tskit/tables.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 24d06c5e64..0fb6b64d67 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -4784,21 +4784,26 @@ int tsk_diff_iter_next(tsk_diff_iter_t *self, double *left, double *right, tsk_edge_list_t *edges_out, tsk_edge_list_t *edges_in); void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out); +/* TODO: document */ typedef struct { /* don't leak private types into public API */ struct __tsk_modular_simplifier_impl_t *pimpl; } tsk_modular_simplifier_t; +/* TODO: document */ int tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, tsk_table_collection_t *tables, const tsk_id_t *samples, tsk_size_t num_samples, tsk_flags_t options); +/* TODO: document */ int tsk_modular_simplifier_free(tsk_modular_simplifier_t *self); -// metadata... +/* TODO: document */ int tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, double right, tsk_id_t parent, tsk_id_t child); +/* TODO: document */ int tsk_modular_simplifier_merge_ancestors( tsk_modular_simplifier_t *self, tsk_id_t parent); +/* TODO: document */ // runs the simplifier, thus processing ancient edges // present in the input edge table. int tsk_modular_simplifier_finalise(tsk_modular_simplifier_t *self, tsk_id_t *node_map); From bddce2756efca93611be6366a6ebb03dae2b3fdd Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:19:00 -0700 Subject: [PATCH 30/88] fix typo --- c/tests/test_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 5ee060c6a3..a61473ea9e 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8793,7 +8793,7 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result } } /* Simplification's internal cleanup. - * Shoud NOT be called if above loop errors. + * Should NOT be called if above loop errors. * We know that not calling it and calling * "modular simplifier free" does not leak * because valgrind is happy. From afe426f1446b691fdd520b952c3aa10f6cc9b213 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:22:23 -0700 Subject: [PATCH 31/88] update note --- c/tests/test_tables.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index a61473ea9e..90b10b506a 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8801,8 +8801,10 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result * Now, we have processed all (child) nodes whose births are * MORE RECENT than those in the input tables. * - * TODO: the init method should check the preconditon - * stated above. + * TODO: the init method should calculate the minimum + * birth time of any child in the input table. + * The "edge adder" function should ensure that all + * new edges are < (or <= ??) that value. */ ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { From bfa2484c5e0d1f3388646b425bf9b9a11b9437fe Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:23:50 -0700 Subject: [PATCH 32/88] fix setup comment --- c/tests/test_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 90b10b506a..2c6ac84dec 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8658,7 +8658,7 @@ test_simplify_metadata(void) * | | | * 9 10 11 <- new child 1, 2, and 3, resp. * - * Then, we simplify w.r.to 9 and 10. + * Then, we simplify w.r.to [9, 10, 11]. */ static void make_single_tree_for_testing_modular_simplify( From dd2e20e9233a58cc36c6f76023f6e2c675cae7d1 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 12:59:17 -0700 Subject: [PATCH 33/88] fix implicit integer cast flagged by circle CI --- c/tests/test_tables.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 2c6ac84dec..19c430f48b 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8696,19 +8696,19 @@ make_single_tree_for_testing_modular_simplify( } /* record edges */ - ret = tsk_edge_table_add_row( + ret = (tsk_id_t) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, 0, new_parent1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (tsk_id_t) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, 2, new_parent2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (tsk_id_t) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent1, new_child1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (tsk_id_t) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent2, new_child2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (tsk_id_t) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent2, new_child3, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); (*samples)[0] = new_child1; From 0a8f5ab916bc92c4ae74f2765ca049b5d71d0153 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 13:04:30 -0700 Subject: [PATCH 34/88] truly fix the cast --- c/tests/test_tables.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 19c430f48b..9355e7a8fd 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8696,19 +8696,19 @@ make_single_tree_for_testing_modular_simplify( } /* record edges */ - ret = (tsk_id_t) tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, 0, new_parent1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (tsk_id_t) tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, 2, new_parent2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (tsk_id_t) tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent1, new_child1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (tsk_id_t) tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent2, new_child2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (tsk_id_t) tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( new_edges, 0, tables->sequence_length, new_parent2, new_child3, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); (*samples)[0] = new_child1; From c0112d1c71090357347cf8f1810a060e011d1306 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 13:15:51 -0700 Subject: [PATCH 35/88] check that no new child is born before any child in the input tables. --- c/tskit/tables.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 8499ea6fb6..440092d9ac 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13614,6 +13614,7 @@ typedef struct __tsk_modular_simplifier_impl_t { tsk_id_t *input_node_visited; tsk_size_t num_input_nodes; double last_parent_time; + double minimum_input_node_time; } tsk_modular_simplifier_impl_t; int @@ -13643,6 +13644,12 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, self->pimpl->num_input_nodes = tables->nodes.num_rows; self->pimpl->last_parent_processed = -1; self->pimpl->last_parent_time = DBL_MIN; + self->pimpl->minimum_input_node_time = DBL_MAX; + for (i = 0; i < tables->edges.num_rows; ++i) { + self->pimpl->minimum_input_node_time + = TSK_MIN(self->pimpl->minimum_input_node_time, + tables->nodes.time[tables->edges.child[i]]); + } /* Now that we have set up the pimpl state, * we can let the unsual init happen @@ -13674,6 +13681,13 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, goto out; } + if (self->pimpl->simplifier.input_tables.nodes.time[child] + > self->pimpl->minimum_input_node_time) { + // TODO: does this mean what I think it means :) + ret = TSK_ERR_EDGES_NOT_SORTED_CHILD; + goto out; + } + if (self->pimpl->simplifier.input_tables.nodes.time[parent] < self->pimpl->last_parent_time) { ret = TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME; From 91734e3d4a446ea27295c6de3d4b68a1ccc9001e Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 13:21:03 -0700 Subject: [PATCH 36/88] use correct error codes --- c/tskit/tables.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 440092d9ac..2e52619972 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13670,20 +13670,23 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, { int ret = 0; - if (parent == TSK_NULL || parent >= (tsk_id_t) self->pimpl->num_input_nodes) { - // FIXME: we don't have good error code for this + if (parent == TSK_NULL) { ret = TSK_ERR_NULL_PARENT; goto out; } - if (child == TSK_NULL || child >= (tsk_id_t) self->pimpl->num_input_nodes) { - // FIXME: we don't have good error code for this + if (child == TSK_NULL) { ret = TSK_ERR_NULL_CHILD; goto out; } + if (parent >= (tsk_id_t) self->pimpl->num_input_nodes + || child >= (tsk_id_t) self->pimpl->num_input_nodes) { + ret = TSK_ERR_NODE_OUT_OF_BOUNDS; + goto out; + } if (self->pimpl->simplifier.input_tables.nodes.time[child] > self->pimpl->minimum_input_node_time) { - // TODO: does this mean what I think it means :) + // TODO: does this mean what I think it means? ret = TSK_ERR_EDGES_NOT_SORTED_CHILD; goto out; } From ebcbd5e8978a76973592529c113de359ba854843 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 13:36:27 -0700 Subject: [PATCH 37/88] add tests of null parent/child in edge --- c/tests/test_tables.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 9355e7a8fd..3ff30540ad 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8889,6 +8889,30 @@ test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(v row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } +static void +test_table_collection_modular_simplify_add_null_parent_or_child(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_modular_simplifier_add_edge( + &simplifier, 0., 1, TSK_NULL, new_edges.child[4]); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_PARENT); + ret = tsk_modular_simplifier_add_edge( + &simplifier, 0., 1, new_edges.parent[4], TSK_NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD); + + tsk_safe_free(samples); + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); +} + static void test_edge_update_invalidates_index(void) { @@ -11859,6 +11883,8 @@ main(int argc, char **argv) { "test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_" "order", test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order }, + { "test_table_collection_modular_simplify_add_null_parent_or_child", + test_table_collection_modular_simplify_add_null_parent_or_child }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From 6fb5985869713f1a8dcac16b48f82e4201a7e284 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 13:58:22 -0700 Subject: [PATCH 38/88] test for child time > that of any in the input tables --- c/tests/test_tables.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 3ff30540ad..bdefcbf4ed 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8913,6 +8913,29 @@ test_table_collection_modular_simplify_add_null_parent_or_child(void) tsk_modular_simplifier_free(&simplifier); } +static void +test_table_collection_modular_simplify_add_child_with_invalid_time(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + /* edit the first child's birth time to be "very wrong" */ + tables.nodes.time[new_edges.child[4]] = DBL_MAX; + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_modular_simplifier_add_edge( + &simplifier, 0., 1, new_edges.parent[4], new_edges.child[4]); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD); + + tsk_safe_free(samples); + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); +} + static void test_edge_update_invalidates_index(void) { @@ -11885,6 +11908,8 @@ main(int argc, char **argv) test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order }, { "test_table_collection_modular_simplify_add_null_parent_or_child", test_table_collection_modular_simplify_add_null_parent_or_child }, + { "test_table_collection_modular_simplify_add_child_with_invalid_time", + test_table_collection_modular_simplify_add_child_with_invalid_time }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From 6803b4d2e999a6cd8a6acecf194e840afed11431 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 14:07:50 -0700 Subject: [PATCH 39/88] remove useless if --- c/tskit/tables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 2e52619972..b13a7d246e 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13656,10 +13656,6 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, */ ret = simplifier_init( &self->pimpl->simplifier, samples, num_samples, tables, options); - if (ret != 0) { - goto out; - } - out: return ret; } From b2830ff3c3f735d0e617aa46b7c5ed1ea01eb085 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 14:12:54 -0700 Subject: [PATCH 40/88] test node out of range --- c/tests/test_tables.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index bdefcbf4ed..09d8ce663d 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8890,7 +8890,7 @@ test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(v } static void -test_table_collection_modular_simplify_add_null_parent_or_child(void) +test_table_collection_modular_simplify_add_invalid_parent_or_child(void) { int ret; tsk_table_collection_t tables; @@ -8906,6 +8906,11 @@ test_table_collection_modular_simplify_add_null_parent_or_child(void) ret = tsk_modular_simplifier_add_edge( &simplifier, 0., 1, new_edges.parent[4], TSK_NULL); CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD); + ret = tsk_modular_simplifier_add_edge(&simplifier, 0., 1, 10000, new_edges.child[4]); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); + ret = tsk_modular_simplifier_add_edge( + &simplifier, 0., 1, new_edges.parent[4], 10000); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); tsk_safe_free(samples); tsk_table_collection_free(&tables); @@ -11906,8 +11911,8 @@ main(int argc, char **argv) { "test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_" "order", test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order }, - { "test_table_collection_modular_simplify_add_null_parent_or_child", - test_table_collection_modular_simplify_add_null_parent_or_child }, + { "test_table_collection_modular_simplify_add_invalid_parent_or_child", + test_table_collection_modular_simplify_add_invalid_parent_or_child }, { "test_table_collection_modular_simplify_add_child_with_invalid_time", test_table_collection_modular_simplify_add_child_with_invalid_time }, { "test_table_collection_time_units", test_table_collection_time_units }, From 77bb9c866c5a00408d544c2a44c936e927fa69af Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 14:38:39 -0700 Subject: [PATCH 41/88] bad samples --- c/tests/test_tables.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 09d8ce663d..7dc0b2d896 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8941,6 +8941,24 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) tsk_modular_simplifier_free(&simplifier); } +static void +test_table_collection_modular_simplify_bad_samples(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + samples[0] = -1; + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); + tsk_safe_free(samples); + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); +} + static void test_edge_update_invalidates_index(void) { @@ -11915,6 +11933,8 @@ main(int argc, char **argv) test_table_collection_modular_simplify_add_invalid_parent_or_child }, { "test_table_collection_modular_simplify_add_child_with_invalid_time", test_table_collection_modular_simplify_add_child_with_invalid_time }, + { "test_table_collection_modular_simplify_bad_samples", + test_table_collection_modular_simplify_bad_samples }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From e30c155e749dfc0d659e8aba245c8fda8710dfef Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 14:42:46 -0700 Subject: [PATCH 42/88] note --- c/tests/test_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 7dc0b2d896..6882bd0ce7 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8941,6 +8941,9 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) tsk_modular_simplifier_free(&simplifier); } +/* This hits part of simplifier intialisation that is + * NOT part of table integrity checks + */ static void test_table_collection_modular_simplify_bad_samples(void) { From 09bd543bcdc48872364cb32951bebc70a7c5fff3 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 14:46:59 -0700 Subject: [PATCH 43/88] input table integrity check fail --- c/tests/test_tables.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 6882bd0ce7..f278190333 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8962,6 +8962,28 @@ test_table_collection_modular_simplify_bad_samples(void) tsk_modular_simplifier_free(&simplifier); } +static void +test_table_collection_modular_simplify_table_integrity_check_fail(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + double temp; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + temp = tables.nodes.time[4]; + // now we have a parent/child time violation + tables.nodes.time[4] = tables.nodes.time[3]; + tables.nodes.time[3] = temp; + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING); + tsk_safe_free(samples); + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); +} + static void test_edge_update_invalidates_index(void) { @@ -11938,6 +11960,8 @@ main(int argc, char **argv) test_table_collection_modular_simplify_add_child_with_invalid_time }, { "test_table_collection_modular_simplify_bad_samples", test_table_collection_modular_simplify_bad_samples }, + { "test_table_collection_modular_simplify_table_integrity_check_fail", + test_table_collection_modular_simplify_table_integrity_check_fail }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From 820eed6a025d8a8861cc45561b86ab93d54eb7f0 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 15:14:25 -0700 Subject: [PATCH 44/88] start on overlapping generation tests --- c/tests/test_tables.c | 74 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index f278190333..e6f84adc17 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8716,6 +8716,66 @@ make_single_tree_for_testing_modular_simplify( (*samples)[2] = new_child3; } +/* This is our starting tree. + * We will add additional births + * to 0/1/3 and then use that as + * the basis for testing. + * + * 1.20┊ ┊ 8 ┊ ┊ + * ┊ ┊ ┏━┻━┓ ┊ ┊ + * 1.00┊ 7 ┊ ┃ ┃ ┊ ┊ + * ┊ ┏━┻━┓ ┊ ┃ ┃ ┊ ┊ + * 0.70┊ ┃ ┃ ┊ ┃ ┃ ┊ 6 ┊ + * ┊ ┃ ┃ ┊ ┃ ┃ ┊ ┏━┻━┓ ┊ + * 0.50┊ ┃ 5 ┊ 5 ┃ ┊ ┃ 5 ┊ + * ┊ ┃ ┏━┻┓ ┊ ┏┻━┓ ┃ ┊ ┃ ┏━┻┓ ┊ + * 0.40┊ ┃ ┃ 4 ┊ 4 ┃ ┃ ┊ ┃ ┃ 4 ┊ + * ┊ ┃ ┃ ┏┻┓ ┊ ┏┻┓ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ + * 0.20┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊ + * ┊ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┊ + * 0.10┊ ┃ 1 2 ┊ ┃ 2 1 ┊ ┃ 1 2 ┊ + * ┊ ┃ ┊ ┃ ┊ ┃ ┊ + * 0.00┊ 0 ┊ 0 ┊ 0 ┊ + * 0.00 2.00 8.00 10.00 + */ +static void +make_overlapping_generations_trees_for_testing_modular_simplify( + tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) +{ + int ret; + tsk_id_t new_child0, new_child1, new_child2; + ret = tsk_table_collection_init(tables, 0); + parse_edges(internal_sample_ex_edges, &tables->edges); + parse_nodes(internal_sample_ex_nodes, &tables->nodes); + tables->sequence_length = 10.0; + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_init(new_edges, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + *samples = tsk_malloc(3 * sizeof(tsk_id_t)); + CU_ASSERT_TRUE_FATAL(*samples != NULL); + + new_child0 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child0 > 0); + new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child1 > 0); + new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); + CU_ASSERT_TRUE_FATAL(new_child2 > 0); + + /* To maintain sanity, transmit non-recombinant genomes */ + ret = tsk_edge_table_add_row( + &tables->edges, 0., tables->sequence_length, 0, new_child0, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + &tables->edges, 0., tables->sequence_length, 1, new_child1, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + ret = tsk_edge_table_add_row( + &tables->edges, 0., tables->sequence_length, 3, new_child2, NULL, 0); + CU_ASSERT_TRUE_FATAL(ret >= 0); + (*samples)[0] = new_child0; + (*samples)[1] = new_child1; + (*samples)[2] = new_child2; +} + static void run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result) { @@ -8941,6 +9001,18 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) tsk_modular_simplifier_free(&simplifier); } +static void +test_table_collection_modular_simplify_overlapping_generations(void) +{ + // int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + // tsk_modular_simplifier_t simplifier; + tsk_id_t *samples; + make_overlapping_generations_trees_for_testing_modular_simplify( + &tables, &new_edges, &samples); +} + /* This hits part of simplifier intialisation that is * NOT part of table integrity checks */ @@ -11962,6 +12034,8 @@ main(int argc, char **argv) test_table_collection_modular_simplify_bad_samples }, { "test_table_collection_modular_simplify_table_integrity_check_fail", test_table_collection_modular_simplify_table_integrity_check_fail }, + { "test_table_collection_modular_simplify_overlapping_generations", + test_table_collection_modular_simplify_overlapping_generations }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From fdbfac13156a8214ae02b4ca5e94897e662795b3 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 15:19:54 -0700 Subject: [PATCH 45/88] big code duplication. ugh --- c/tests/test_tables.c | 159 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 4 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e6f84adc17..df4e24ebbe 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9002,15 +9002,166 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) } static void -test_table_collection_modular_simplify_overlapping_generations(void) +run_test_modular_simplify_overlapping_generations( + tsk_id_t row_order[5], int expected_result) { - // int ret; - tsk_table_collection_t tables; + int ret; + tsk_table_collection_t tables, standard_tables; tsk_edge_table_t new_edges; - // tsk_modular_simplifier_t simplifier; + tsk_modular_simplifier_t simplifier; + tsk_treeseq_t treeseq, standard_treeseq; + tsk_tree_t standard_tree, tree; tsk_id_t *samples; + tsk_id_t last_parent; + tsk_size_t row; + double ttl_time, standard_ttl_time; make_overlapping_generations_trees_for_testing_modular_simplify( &tables, &new_edges, &samples); + + for (row = 0; row < 3; ++row) { + fprintf(stdout, "%d\n", samples[row]); + } + + ret = tsk_table_collection_copy(&tables, &standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, + new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); + if (ret < 0) { + goto out; + } + ret = tsk_table_collection_sort(&standard_tables, NULL, 0); + if (ret < 0) { + goto out; + } + CU_ASSERT_EQUAL_FATAL( + standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); + + ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); + if (ret < 0) { + goto out; + } + + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + if (ret < 0) { + goto out; + } + row = 0; + /* Pseudocode that we are mocking: + * For each parent of a new edge: + * - add that edge to the segment queue. + * - When done, finalise the queue and merge ancestors. + * + * If our buffer is wrong, we will have parents unsorted by time + * and/or the same parent processed in different loop iterations. + * Each case is an error that MUST be handled. + * It is trivial to show that not handling the errors can give rise + * to invalid table collections / tree sequences. + * The requirement for error handling must be documented + * + * Production code should use an input other than + * an edge table. + * (How edges are sorted is an internal detail + * and cannot be used for testing.) + */ + while (row < new_edges.num_rows) { + last_parent = new_edges.parent[row_order[row]]; + while (row < new_edges.num_rows + && new_edges.parent[row_order[row]] == last_parent) { + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_edges.left[row_order[row]], new_edges.right[row_order[row]], + new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + if (ret < 0) { + goto out; + } + ++row; + } + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + if (ret < 0) { + goto out; + } + } + /* Simplification's internal cleanup. + * Should NOT be called if above loop errors. + * We know that not calling it and calling + * "modular simplifier free" does not leak + * because valgrind is happy. + * + * Now, we have processed all (child) nodes whose births are + * MORE RECENT than those in the input tables. + * + * TODO: the init method should calculate the minimum + * birth time of any child in the input table. + * The "edge adder" function should ensure that all + * new edges are < (or <= ??) that value. + */ + ret = tsk_modular_simplifier_finalise(&simplifier, NULL); + if (ret < 0) { + goto out; + } + + CU_ASSERT_EQUAL_FATAL(ret, 0); + + // Now, we can compare various properties of the two table collections + CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); + + ret = tsk_table_collection_build_index(&standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_table_collection_build_index(&tables, 0); + if (ret < 0) { + goto out; + } + + ret = tsk_treeseq_init(&standard_treeseq, &standard_tables, 0); + if (ret < 0) { + goto out; + } + ret = tsk_treeseq_init(&treeseq, &tables, 0); + if (ret < 0) { + goto out; + } + CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), + tsk_treeseq_get_num_trees(&treeseq)); + ret = tsk_tree_init(&standard_tree, &standard_treeseq, 0); + if (ret < 0) { + goto out; + } + ret = tsk_tree_init(&tree, &treeseq, 0); + if (ret < 0) { + goto out; + } + ret = tsk_tree_first(&standard_tree); + CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK); + for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) { + tsk_tree_get_total_branch_length(&tree, -1, &ttl_time); + tsk_tree_get_total_branch_length(&standard_tree, -1, &standard_ttl_time); + CU_ASSERT_TRUE(ttl_time - standard_ttl_time <= 1e-9); + tsk_tree_next(&standard_tree); + } + tsk_treeseq_free(&standard_treeseq); + tsk_treeseq_free(&treeseq); + tsk_tree_free(&standard_tree); + tsk_tree_free(&tree); + CU_ASSERT_EQUAL_FATAL(ret, 0); +out: + tsk_table_collection_free(&tables); + tsk_table_collection_free(&standard_tables); + tsk_edge_table_free(&new_edges); + tsk_modular_simplifier_free(&simplifier); + tsk_safe_free(samples); + CU_ASSERT_EQUAL_FATAL(ret, expected_result); +} + +static void +test_table_collection_modular_simplify_overlapping_generations(void) +{ + tsk_id_t row_order[5] = { 2, 1, 0 }; + run_test_modular_simplify_overlapping_generations(row_order, 0); } /* This hits part of simplifier intialisation that is From a38e8a0c0a0dd8baa824b1caccb22632c3a6484f Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 15:27:32 -0700 Subject: [PATCH 46/88] need to think now about how to handle overlapping gens w/o the actual edge buffer machinery in place. --- c/tests/test_tables.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index df4e24ebbe..d00ad8369e 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8744,6 +8744,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( { int ret; tsk_id_t new_child0, new_child1, new_child2; + tsk_size_t row; ret = tsk_table_collection_init(tables, 0); parse_edges(internal_sample_ex_edges, &tables->edges); parse_nodes(internal_sample_ex_nodes, &tables->nodes); @@ -8761,6 +8762,13 @@ make_overlapping_generations_trees_for_testing_modular_simplify( new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child2 > 0); + for (row = 0; row < tables->nodes.num_rows; ++row) { + // for some reason, the fixture sets pop to 0... + tables->nodes.population[row] = -1; + // make all times >= 0.0. + tables->nodes.time[row] += 1.0; + } + /* To maintain sanity, transmit non-recombinant genomes */ ret = tsk_edge_table_add_row( &tables->edges, 0., tables->sequence_length, 0, new_child0, NULL, 0); @@ -9152,8 +9160,9 @@ run_test_modular_simplify_overlapping_generations( tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); tsk_edge_table_free(&new_edges); - tsk_modular_simplifier_free(&simplifier); + // tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); + fprintf(stdout, "ret = %d\n", ret); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } From b5eb4467641dbb2337a063807565630cfcdec39c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 15:31:33 -0700 Subject: [PATCH 47/88] note for tomorrow --- c/tests/test_tables.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index d00ad8369e..8506bc7831 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8755,6 +8755,11 @@ make_overlapping_generations_trees_for_testing_modular_simplify( *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); + // We need to cheat, "stealing" edges from the input + // table and putting them into the new_edges so that + // we can mimic what a "real" implementation needs + // to do. + new_child0 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child0 > 0); new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); From f5c23cbd28109cc5f0c1ef313560298b44898b6a Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 9 May 2023 15:49:19 -0700 Subject: [PATCH 48/88] more detailed notes --- c/tests/test_tables.c | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 8506bc7831..1e83f958e9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8746,20 +8746,15 @@ make_overlapping_generations_trees_for_testing_modular_simplify( tsk_id_t new_child0, new_child1, new_child2; tsk_size_t row; ret = tsk_table_collection_init(tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); parse_edges(internal_sample_ex_edges, &tables->edges); parse_nodes(internal_sample_ex_nodes, &tables->nodes); tables->sequence_length = 10.0; - CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_edge_table_init(new_edges, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); - // We need to cheat, "stealing" edges from the input - // table and putting them into the new_edges so that - // we can mimic what a "real" implementation needs - // to do. - new_child0 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child0 > 0); new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); @@ -8773,8 +8768,43 @@ make_overlapping_generations_trees_for_testing_modular_simplify( // make all times >= 0.0. tables->nodes.time[row] += 1.0; } + ret = tsk_table_collection_check_integrity(tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* We need to cheat, "stealing" edges from the input + * table and putting them into the new_edges so that + * we can mimic what a "real" implementation needs + * to do. + * + * Initial edge fixture: + * const char *internal_sample_ex_edges = "2 8 4 0\n" + * "0 10 4 2\n" + * "0 2 4 3\n" + * "8 10 4 3\n" + * "0 10 5 1,4\n" + * "8 10 6 0,5\n" + * "0 2 7 0,5\n" + * "2 8 8 3,5\n"; + * We can rewrite it to make the edge_id easier to read: + * "2 8 4 0\n" + * "0 10 4 2\n" + * "0 2 4 3\n" + * "8 10 4 3\n" + * "0 10 5 1\n" + * "0 10 5 4\n" + * "8 10 6 0\n" + * "8 10 6 5\n" + * "0 2 7 0\n" + * "0 2 7 5\n" + * "2 8 8 3\n"; + * "2 8 8 5\n"; + */ /* To maintain sanity, transmit non-recombinant genomes */ + for (row = 0; row < tables->edges.num_rows; ++row) { + fprintf(stdout, "%lf %lf %d %d\n", tables->edges.left[row], + tables->edges.right[row], tables->edges.parent[row], + tables->edges.child[row]); + } ret = tsk_edge_table_add_row( &tables->edges, 0., tables->sequence_length, 0, new_child0, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); From a890997ab82e5f3814d8140f27d98a8510f91291 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 10 May 2023 13:02:56 -0700 Subject: [PATCH 49/88] update what to do --- c/tests/test_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 1e83f958e9..186c175849 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9139,6 +9139,9 @@ run_test_modular_simplify_overlapping_generations( * birth time of any child in the input table. * The "edge adder" function should ensure that all * new edges are < (or <= ??) that value. + * + * UPDATE: we need to do <= and it must be with respect + * to ANY node that is "alive" the last time we simplified. */ ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { From 4060d13c99222ca58be9747ef63013ac3483df0d Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 10 May 2023 13:08:11 -0700 Subject: [PATCH 50/88] fix cast errors that aren't in printf --- c/tests/test_tables.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 186c175849..3a69e2848d 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8805,13 +8805,13 @@ make_overlapping_generations_trees_for_testing_modular_simplify( tables->edges.right[row], tables->edges.parent[row], tables->edges.child[row]); } - ret = tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( &tables->edges, 0., tables->sequence_length, 0, new_child0, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( &tables->edges, 0., tables->sequence_length, 1, new_child1, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = tsk_edge_table_add_row( + ret = (int) tsk_edge_table_add_row( &tables->edges, 0., tables->sequence_length, 3, new_child2, NULL, 0); CU_ASSERT_TRUE_FATAL(ret >= 0); (*samples)[0] = new_child0; From e95b671485a7ad7fd674c71bae0e17a8967c4a05 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 16 May 2023 13:41:29 -0700 Subject: [PATCH 51/88] start mocking a buffer for internal testing. overlapping generations too hard otherwise --- c/tests/test_tables.c | 132 ++++++++++++++++++++++++++++-------------- 1 file changed, 90 insertions(+), 42 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 3a69e2848d..b05392038f 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8632,6 +8632,61 @@ test_simplify_metadata(void) tsk_table_collection_free(&tables); } +typedef struct { + tsk_id_t **parent; + tsk_id_t **child; + double **left; + double **right; + tsk_size_t max_nodes; + tsk_size_t max_edges; + tsk_size_t *buffered_edges; +} fauxbuffer; + +static void +fauxbuffer_init(tsk_size_t max_nodes, tsk_size_t max_edges, fauxbuffer *buffer) +{ + tsk_size_t i; + buffer->parent = tsk_malloc(max_nodes * sizeof(tsk_id_t *)); + CU_ASSERT_FATAL(buffer->parent != NULL); + buffer->child = tsk_malloc(max_nodes * sizeof(tsk_id_t *)); + CU_ASSERT_FATAL(buffer->child != NULL); + buffer->left = tsk_malloc(max_nodes * sizeof(double *)); + CU_ASSERT_FATAL(buffer->parent != NULL); + buffer->right = tsk_malloc(max_nodes * sizeof(double *)); + CU_ASSERT_FATAL(buffer->right != NULL); + + buffer->buffered_edges = tsk_malloc(max_nodes * sizeof(tsk_size_t)); + CU_ASSERT_FATAL(buffer->buffered_edges != NULL); + + for (i = 0; i < max_nodes; ++i) { + buffer->parent[i] = tsk_malloc(max_edges * sizeof(tsk_id_t)); + CU_ASSERT_FATAL(buffer->parent[i] != NULL); + buffer->child[i] = tsk_malloc(max_edges * sizeof(tsk_id_t)); + CU_ASSERT_FATAL(buffer->child[i] != NULL); + buffer->left[i] = tsk_malloc(max_edges * sizeof(double)); + CU_ASSERT_FATAL(buffer->parent[i] != NULL); + buffer->right[i] = tsk_malloc(max_edges * sizeof(double)); + CU_ASSERT_FATAL(buffer->right[i] != NULL); + buffer->buffered_edges[i] = 0; + } + buffer->max_nodes = max_nodes; + buffer->max_edges = max_edges; +} + +static void +fauxbuffer_buffer( + tsk_id_t parent, tsk_id_t child, double left, double right, fauxbuffer *buffer) +{ + CU_ASSERT_FATAL(parent < (tsk_id_t) buffer->max_nodes); + CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); + buffer->parent[parent][buffer->buffered_edges[parent]] = parent; + buffer->child[parent][buffer->buffered_edges[parent]] = child; + buffer->left[parent][buffer->buffered_edges[parent]] = left; + buffer->right[parent][buffer->buffered_edges[parent]] = right; + buffer->buffered_edges[parent] += 1; + CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); +} + /* * Start with this tree: * 6 @@ -8721,6 +8776,10 @@ make_single_tree_for_testing_modular_simplify( * to 0/1/3 and then use that as * the basis for testing. * + * Alive nodes will be 0, 1, 2, 3, 4 + * in order to generated the complexity + * we need + * * 1.20┊ ┊ 8 ┊ ┊ * ┊ ┊ ┏━┻━┓ ┊ ┊ * 1.00┊ 7 ┊ ┃ ┃ ┊ ┊ @@ -8740,18 +8799,18 @@ make_single_tree_for_testing_modular_simplify( */ static void make_overlapping_generations_trees_for_testing_modular_simplify( - tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) + tsk_table_collection_t *tables, fauxbuffer *buffer, tsk_id_t **samples) { int ret; tsk_id_t new_child0, new_child1, new_child2; - tsk_size_t row; + tsk_size_t row, num_rows_buffered; ret = tsk_table_collection_init(tables, 0); + double tmax; CU_ASSERT_EQUAL_FATAL(ret, 0); parse_edges(internal_sample_ex_edges, &tables->edges); parse_nodes(internal_sample_ex_nodes, &tables->nodes); tables->sequence_length = 10.0; - ret = tsk_edge_table_init(new_edges, 0); - CU_ASSERT_EQUAL_FATAL(ret, 0); + fauxbuffer_init(12, 100, buffer); *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); @@ -8770,6 +8829,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( } ret = tsk_table_collection_check_integrity(tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); + tmax = tables->nodes.time[4]; /* We need to cheat, "stealing" edges from the input * table and putting them into the new_edges so that @@ -8799,21 +8859,21 @@ make_overlapping_generations_trees_for_testing_modular_simplify( * "2 8 8 3\n"; * "2 8 8 5\n"; */ - /* To maintain sanity, transmit non-recombinant genomes */ + num_rows_buffered = 0; for (row = 0; row < tables->edges.num_rows; ++row) { + if (tables->nodes.time[tables->edges.parent[row]] <= tmax) { + fauxbuffer_buffer(tables->edges.parent[row], tables->edges.child[row], + tables->edges.left[row], tables->edges.right[row], buffer); + ++num_rows_buffered; + } fprintf(stdout, "%lf %lf %d %d\n", tables->edges.left[row], tables->edges.right[row], tables->edges.parent[row], tables->edges.child[row]); } - ret = (int) tsk_edge_table_add_row( - &tables->edges, 0., tables->sequence_length, 0, new_child0, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (int) tsk_edge_table_add_row( - &tables->edges, 0., tables->sequence_length, 1, new_child1, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); - ret = (int) tsk_edge_table_add_row( - &tables->edges, 0., tables->sequence_length, 3, new_child2, NULL, 0); - CU_ASSERT_TRUE_FATAL(ret >= 0); + /* To maintain sanity, transmit non-recombinant genomes */ + fauxbuffer_buffer(0, new_child0, 0., tables->sequence_length, buffer); + fauxbuffer_buffer(1, new_child1, 0., tables->sequence_length, buffer); + fauxbuffer_buffer(3, new_child2, 0., tables->sequence_length, buffer); (*samples)[0] = new_child0; (*samples)[1] = new_child1; (*samples)[2] = new_child2; @@ -9050,16 +9110,17 @@ run_test_modular_simplify_overlapping_generations( { int ret; tsk_table_collection_t tables, standard_tables; - tsk_edge_table_t new_edges; + fauxbuffer buffer; tsk_modular_simplifier_t simplifier; tsk_treeseq_t treeseq, standard_treeseq; tsk_tree_t standard_tree, tree; tsk_id_t *samples; tsk_id_t last_parent; - tsk_size_t row; + tsk_size_t row, node, edge; double ttl_time, standard_ttl_time; + make_overlapping_generations_trees_for_testing_modular_simplify( - &tables, &new_edges, &samples); + &tables, &buffer, &samples); for (row = 0; row < 3; ++row) { fprintf(stdout, "%d\n", samples[row]); @@ -9069,17 +9130,22 @@ run_test_modular_simplify_overlapping_generations( if (ret < 0) { goto out; } - ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, - new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); - if (ret < 0) { - goto out; + + for (node = 0; node < buffer.max_nodes; ++node) { + for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { + + ret = tsk_edge_table_add_row(&standard_tables.edges, buffer.left[node][edge], + buffer.right[node][edge], buffer.parent[node][edge], + buffer.child[node][edge], NULL, 0); + if (ret < 0) { + goto out; + } + } } ret = tsk_table_collection_sort(&standard_tables, NULL, 0); if (ret < 0) { goto out; } - CU_ASSERT_EQUAL_FATAL( - standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); @@ -9109,23 +9175,7 @@ run_test_modular_simplify_overlapping_generations( * (How edges are sorted is an internal detail * and cannot be used for testing.) */ - while (row < new_edges.num_rows) { - last_parent = new_edges.parent[row_order[row]]; - while (row < new_edges.num_rows - && new_edges.parent[row_order[row]] == last_parent) { - ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[row_order[row]], new_edges.right[row_order[row]], - new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); - if (ret < 0) { - goto out; - } - ++row; - } - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - if (ret < 0) { - goto out; - } - } + /* Simplification's internal cleanup. * Should NOT be called if above loop errors. * We know that not calling it and calling @@ -9197,8 +9247,6 @@ run_test_modular_simplify_overlapping_generations( out: tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); - tsk_edge_table_free(&new_edges); - // tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); fprintf(stdout, "ret = %d\n", ret); CU_ASSERT_EQUAL_FATAL(ret, expected_result); From f0cb28051cffda1b7445340cb98cc84e40887ce7 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 16 May 2023 14:43:46 -0700 Subject: [PATCH 52/88] not quite --- c/tests/test_tables.c | 57 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index b05392038f..168fc59237 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8803,7 +8803,8 @@ make_overlapping_generations_trees_for_testing_modular_simplify( { int ret; tsk_id_t new_child0, new_child1, new_child2; - tsk_size_t row, num_rows_buffered; + tsk_size_t row, moved; + int last_row_to_lift_over; ret = tsk_table_collection_init(tables, 0); double tmax; CU_ASSERT_EQUAL_FATAL(ret, 0); @@ -8859,17 +8860,41 @@ make_overlapping_generations_trees_for_testing_modular_simplify( * "2 8 8 3\n"; * "2 8 8 5\n"; */ - num_rows_buffered = 0; + last_row_to_lift_over = -1; for (row = 0; row < tables->edges.num_rows; ++row) { if (tables->nodes.time[tables->edges.parent[row]] <= tmax) { - fauxbuffer_buffer(tables->edges.parent[row], tables->edges.child[row], - tables->edges.left[row], tables->edges.right[row], buffer); - ++num_rows_buffered; + last_row_to_lift_over = (int) row; } fprintf(stdout, "%lf %lf %d %d\n", tables->edges.left[row], tables->edges.right[row], tables->edges.parent[row], tables->edges.child[row]); } + moved = 0; + if (last_row_to_lift_over > -1) { + for (row = 0; row <= (tsk_size_t) last_row_to_lift_over; ++row) { + fauxbuffer_buffer( + tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.child[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.left[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.right[(tsk_size_t) last_row_to_lift_over - row], buffer); + } + for (row = (tsk_size_t) last_row_to_lift_over + 1; row < tables->edges.num_rows; + ++row) { + tables->edges.left[moved] = tables->edges.left[row]; + tables->edges.right[moved] = tables->edges.right[row]; + tables->edges.parent[moved] = tables->edges.parent[row]; + tables->edges.child[moved] = tables->edges.child[row]; + moved += 1; + } + } + fprintf(stdout, "lrb=%d, nr=%ld, moved=%ld\n", last_row_to_lift_over, + tables->edges.num_rows, moved); + tsk_edge_table_truncate(&tables->edges, moved); + for (row = 0; row < tables->edges.num_rows; ++row) { + fprintf(stdout, "after move: %lf %lf %d %d\n", tables->edges.left[row], + tables->edges.right[row], tables->edges.parent[row], + tables->edges.child[row]); + } /* To maintain sanity, transmit non-recombinant genomes */ fauxbuffer_buffer(0, new_child0, 0., tables->sequence_length, buffer); fauxbuffer_buffer(1, new_child1, 0., tables->sequence_length, buffer); @@ -9115,7 +9140,6 @@ run_test_modular_simplify_overlapping_generations( tsk_treeseq_t treeseq, standard_treeseq; tsk_tree_t standard_tree, tree; tsk_id_t *samples; - tsk_id_t last_parent; tsk_size_t row, node, edge; double ttl_time, standard_ttl_time; @@ -9193,6 +9217,25 @@ run_test_modular_simplify_overlapping_generations( * UPDATE: we need to do <= and it must be with respect * to ANY node that is "alive" the last time we simplified. */ + for (row = 0; row < 5; ++row) { + node = (tsk_size_t) row_order[row]; + for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { + ret = tsk_modular_simplifier_add_edge(&simplifier, + buffer.left[node][buffer.buffered_edges[node] - edge - 1], + buffer.right[node][buffer.buffered_edges[node] - edge - 1], + buffer.parent[node][buffer.buffered_edges[node] - edge - 1], + buffer.child[node][buffer.buffered_edges[node] - edge - 1]); + if (ret < 0) { + fprintf(stdout, "bad\n"); + goto out; + } + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, row_order[row]); + if (ret < 0) { + fprintf(stdout, "bad2\n"); + goto out; + } + } + } ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { goto out; @@ -9255,7 +9298,7 @@ run_test_modular_simplify_overlapping_generations( static void test_table_collection_modular_simplify_overlapping_generations(void) { - tsk_id_t row_order[5] = { 2, 1, 0 }; + tsk_id_t row_order[5] = { 2, 1, 0, 5, 4 }; run_test_modular_simplify_overlapping_generations(row_order, 0); } From 8bba18a9cf3808fb784cb4e82d8c6a4ee92a3444 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 16 May 2023 14:48:29 -0700 Subject: [PATCH 53/88] remove memory leak --- c/tests/test_tables.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 168fc59237..00ba0a7e42 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8687,6 +8687,24 @@ fauxbuffer_buffer( CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); } +static void +fauxbuffer_free(fauxbuffer *buffer) +{ + tsk_size_t i; + + for (i = 0; i < buffer->max_nodes; ++i) { + tsk_safe_free(buffer->parent[i]); + tsk_safe_free(buffer->child[i]); + tsk_safe_free(buffer->left[i]); + tsk_safe_free(buffer->right[i]); + } + tsk_safe_free(buffer->parent); + tsk_safe_free(buffer->child); + tsk_safe_free(buffer->left); + tsk_safe_free(buffer->right); + tsk_safe_free(buffer->buffered_edges); +} + /* * Start with this tree: * 6 @@ -9291,6 +9309,8 @@ run_test_modular_simplify_overlapping_generations( tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); tsk_safe_free(samples); + fauxbuffer_free(&buffer); + tsk_modular_simplifier_free(&simplifier); fprintf(stdout, "ret = %d\n", ret); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } From c04efbbadf927196df5f5d629808d648148bd4bc Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Tue, 16 May 2023 15:00:40 -0700 Subject: [PATCH 54/88] closer --- c/tests/test_tables.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 00ba0a7e42..2e749899c3 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8683,6 +8683,12 @@ fauxbuffer_buffer( buffer->child[parent][buffer->buffered_edges[parent]] = child; buffer->left[parent][buffer->buffered_edges[parent]] = left; buffer->right[parent][buffer->buffered_edges[parent]] = right; + fprintf(stdout, "buffered %lf %lf %d %d\n", + buffer->left[parent][buffer->buffered_edges[parent]], + buffer->right[parent][buffer->buffered_edges[parent]], + buffer->parent[parent][buffer->buffered_edges[parent]], + buffer->child[parent][buffer->buffered_edges[parent]]); + buffer->buffered_edges[parent] += 1; CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); } @@ -9199,7 +9205,6 @@ run_test_modular_simplify_overlapping_generations( if (ret < 0) { goto out; } - row = 0; /* Pseudocode that we are mocking: * For each parent of a new edge: * - add that edge to the segment queue. @@ -9237,7 +9242,14 @@ run_test_modular_simplify_overlapping_generations( */ for (row = 0; row < 5; ++row) { node = (tsk_size_t) row_order[row]; + fprintf(stdout, "processing node %ld\n", node); for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { + fprintf(stdout, "copying from buffer %lf %lf %d %d, %ld\n", + buffer.left[node][buffer.buffered_edges[node] - edge - 1], + buffer.right[node][buffer.buffered_edges[node] - edge - 1], + buffer.parent[node][buffer.buffered_edges[node] - edge - 1], + buffer.child[node][buffer.buffered_edges[node] - edge - 1], node); + ret = tsk_modular_simplifier_add_edge(&simplifier, buffer.left[node][buffer.buffered_edges[node] - edge - 1], buffer.right[node][buffer.buffered_edges[node] - edge - 1], @@ -9318,8 +9330,8 @@ run_test_modular_simplify_overlapping_generations( static void test_table_collection_modular_simplify_overlapping_generations(void) { - tsk_id_t row_order[5] = { 2, 1, 0, 5, 4 }; - run_test_modular_simplify_overlapping_generations(row_order, 0); + tsk_id_t input_node_order[5] = { 0, 1, 2, 3, 4 }; + run_test_modular_simplify_overlapping_generations(input_node_order, 0); } /* This hits part of simplifier intialisation that is From 07164aaed9f4a89dbf08a5f57d8fd8d28f5e4e19 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 09:02:07 -0700 Subject: [PATCH 55/88] tests pass --- c/tests/test_tables.c | 32 +------------------------------- c/tskit/tables.c | 6 +++--- 2 files changed, 4 insertions(+), 34 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 2e749899c3..cda5ac9516 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8683,12 +8683,6 @@ fauxbuffer_buffer( buffer->child[parent][buffer->buffered_edges[parent]] = child; buffer->left[parent][buffer->buffered_edges[parent]] = left; buffer->right[parent][buffer->buffered_edges[parent]] = right; - fprintf(stdout, "buffered %lf %lf %d %d\n", - buffer->left[parent][buffer->buffered_edges[parent]], - buffer->right[parent][buffer->buffered_edges[parent]], - buffer->parent[parent][buffer->buffered_edges[parent]], - buffer->child[parent][buffer->buffered_edges[parent]]); - buffer->buffered_edges[parent] += 1; CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); } @@ -8889,9 +8883,6 @@ make_overlapping_generations_trees_for_testing_modular_simplify( if (tables->nodes.time[tables->edges.parent[row]] <= tmax) { last_row_to_lift_over = (int) row; } - fprintf(stdout, "%lf %lf %d %d\n", tables->edges.left[row], - tables->edges.right[row], tables->edges.parent[row], - tables->edges.child[row]); } moved = 0; if (last_row_to_lift_over > -1) { @@ -8911,14 +8902,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( moved += 1; } } - fprintf(stdout, "lrb=%d, nr=%ld, moved=%ld\n", last_row_to_lift_over, - tables->edges.num_rows, moved); tsk_edge_table_truncate(&tables->edges, moved); - for (row = 0; row < tables->edges.num_rows; ++row) { - fprintf(stdout, "after move: %lf %lf %d %d\n", tables->edges.left[row], - tables->edges.right[row], tables->edges.parent[row], - tables->edges.child[row]); - } /* To maintain sanity, transmit non-recombinant genomes */ fauxbuffer_buffer(0, new_child0, 0., tables->sequence_length, buffer); fauxbuffer_buffer(1, new_child1, 0., tables->sequence_length, buffer); @@ -9145,7 +9129,7 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) CU_ASSERT_EQUAL_FATAL(ret, 0); ret = tsk_modular_simplifier_add_edge( &simplifier, 0., 1, new_edges.parent[4], new_edges.child[4]); - CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING); tsk_safe_free(samples); tsk_table_collection_free(&tables); @@ -9170,10 +9154,6 @@ run_test_modular_simplify_overlapping_generations( make_overlapping_generations_trees_for_testing_modular_simplify( &tables, &buffer, &samples); - for (row = 0; row < 3; ++row) { - fprintf(stdout, "%d\n", samples[row]); - } - ret = tsk_table_collection_copy(&tables, &standard_tables, 0); if (ret < 0) { goto out; @@ -9242,26 +9222,17 @@ run_test_modular_simplify_overlapping_generations( */ for (row = 0; row < 5; ++row) { node = (tsk_size_t) row_order[row]; - fprintf(stdout, "processing node %ld\n", node); for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { - fprintf(stdout, "copying from buffer %lf %lf %d %d, %ld\n", - buffer.left[node][buffer.buffered_edges[node] - edge - 1], - buffer.right[node][buffer.buffered_edges[node] - edge - 1], - buffer.parent[node][buffer.buffered_edges[node] - edge - 1], - buffer.child[node][buffer.buffered_edges[node] - edge - 1], node); - ret = tsk_modular_simplifier_add_edge(&simplifier, buffer.left[node][buffer.buffered_edges[node] - edge - 1], buffer.right[node][buffer.buffered_edges[node] - edge - 1], buffer.parent[node][buffer.buffered_edges[node] - edge - 1], buffer.child[node][buffer.buffered_edges[node] - edge - 1]); if (ret < 0) { - fprintf(stdout, "bad\n"); goto out; } ret = tsk_modular_simplifier_merge_ancestors(&simplifier, row_order[row]); if (ret < 0) { - fprintf(stdout, "bad2\n"); goto out; } } @@ -9323,7 +9294,6 @@ run_test_modular_simplify_overlapping_generations( tsk_safe_free(samples); fauxbuffer_free(&buffer); tsk_modular_simplifier_free(&simplifier); - fprintf(stdout, "ret = %d\n", ret); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } diff --git a/c/tskit/tables.c b/c/tskit/tables.c index b13a7d246e..bc793fe48d 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -23,6 +23,7 @@ * SOFTWARE. */ +#include "tskit/core.h" #include #include #include @@ -13681,9 +13682,8 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, } if (self->pimpl->simplifier.input_tables.nodes.time[child] - > self->pimpl->minimum_input_node_time) { - // TODO: does this mean what I think it means? - ret = TSK_ERR_EDGES_NOT_SORTED_CHILD; + >= self->pimpl->simplifier.input_tables.nodes.time[parent]) { + ret = TSK_ERR_BAD_NODE_TIME_ORDERING; goto out; } From aeea43b2115b49f5327035ae4f60366a668e6379 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 09:05:05 -0700 Subject: [PATCH 56/88] comment out unused code --- c/tskit/tables.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index bc793fe48d..c9f237e781 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13615,7 +13615,7 @@ typedef struct __tsk_modular_simplifier_impl_t { tsk_id_t *input_node_visited; tsk_size_t num_input_nodes; double last_parent_time; - double minimum_input_node_time; + /*double minimum_input_node_time;*/ } tsk_modular_simplifier_impl_t; int @@ -13645,12 +13645,22 @@ tsk_modular_simplifier_init(tsk_modular_simplifier_t *self, self->pimpl->num_input_nodes = tables->nodes.num_rows; self->pimpl->last_parent_processed = -1; self->pimpl->last_parent_time = DBL_MIN; + + /* NOTE: the original intent here was to catch + * issues where children aren't sorted properly, + * but it is clear I didn't get that right. + * + * Need to write more tests to see if I can + * trigger any problems + */ + /* self->pimpl->minimum_input_node_time = DBL_MAX; for (i = 0; i < tables->edges.num_rows; ++i) { self->pimpl->minimum_input_node_time = TSK_MIN(self->pimpl->minimum_input_node_time, tables->nodes.time[tables->edges.child[i]]); } + */ /* Now that we have set up the pimpl state, * we can let the unsual init happen From c1cb31506c714f71003912cb4314944132673f83 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 09:19:18 -0700 Subject: [PATCH 57/88] fix compile fails on CI --- c/tests/test_tables.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index cda5ac9516..6718e184b0 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8846,7 +8846,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( // make all times >= 0.0. tables->nodes.time[row] += 1.0; } - ret = tsk_table_collection_check_integrity(tables, 0); + ret = (int) tsk_table_collection_check_integrity(tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); tmax = tables->nodes.time[4]; @@ -9162,9 +9162,9 @@ run_test_modular_simplify_overlapping_generations( for (node = 0; node < buffer.max_nodes; ++node) { for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { - ret = tsk_edge_table_add_row(&standard_tables.edges, buffer.left[node][edge], - buffer.right[node][edge], buffer.parent[node][edge], - buffer.child[node][edge], NULL, 0); + ret = (int) tsk_edge_table_add_row(&standard_tables.edges, + buffer.left[node][edge], buffer.right[node][edge], + buffer.parent[node][edge], buffer.child[node][edge], NULL, 0); if (ret < 0) { goto out; } From fcff748a503519f50fc0a8b7f2047f04a304e85c Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 12:06:22 -0700 Subject: [PATCH 58/88] allocate new edges as needed --- c/tests/test_tables.c | 44 +++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 6718e184b0..e4f8cd2b3c 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8638,8 +8638,7 @@ typedef struct { double **left; double **right; tsk_size_t max_nodes; - tsk_size_t max_edges; - tsk_size_t *buffered_edges; + tsk_size_t *num_buffered_edges; } fauxbuffer; static void @@ -8655,8 +8654,8 @@ fauxbuffer_init(tsk_size_t max_nodes, tsk_size_t max_edges, fauxbuffer *buffer) buffer->right = tsk_malloc(max_nodes * sizeof(double *)); CU_ASSERT_FATAL(buffer->right != NULL); - buffer->buffered_edges = tsk_malloc(max_nodes * sizeof(tsk_size_t)); - CU_ASSERT_FATAL(buffer->buffered_edges != NULL); + buffer->num_buffered_edges = tsk_malloc(max_nodes * sizeof(tsk_size_t)); + CU_ASSERT_FATAL(buffer->num_buffered_edges != NULL); for (i = 0; i < max_nodes; ++i) { buffer->parent[i] = tsk_malloc(max_edges * sizeof(tsk_id_t)); @@ -8667,10 +8666,9 @@ fauxbuffer_init(tsk_size_t max_nodes, tsk_size_t max_edges, fauxbuffer *buffer) CU_ASSERT_FATAL(buffer->parent[i] != NULL); buffer->right[i] = tsk_malloc(max_edges * sizeof(double)); CU_ASSERT_FATAL(buffer->right[i] != NULL); - buffer->buffered_edges[i] = 0; + buffer->num_buffered_edges[i] = 0; } buffer->max_nodes = max_nodes; - buffer->max_edges = max_edges; } static void @@ -8678,13 +8676,19 @@ fauxbuffer_buffer( tsk_id_t parent, tsk_id_t child, double left, double right, fauxbuffer *buffer) { CU_ASSERT_FATAL(parent < (tsk_id_t) buffer->max_nodes); - CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); - buffer->parent[parent][buffer->buffered_edges[parent]] = parent; - buffer->child[parent][buffer->buffered_edges[parent]] = child; - buffer->left[parent][buffer->buffered_edges[parent]] = left; - buffer->right[parent][buffer->buffered_edges[parent]] = right; - buffer->buffered_edges[parent] += 1; - CU_ASSERT_FATAL(buffer->buffered_edges[parent] < buffer->max_edges); + buffer->num_buffered_edges[parent] += 1; + buffer->parent[parent] = tsk_realloc( + buffer->parent[parent], buffer->num_buffered_edges[parent] * sizeof(tsk_id_t)); + buffer->child[parent] = tsk_realloc( + buffer->child[parent], buffer->num_buffered_edges[parent] * sizeof(tsk_id_t)); + buffer->left[parent] = tsk_realloc( + buffer->left[parent], buffer->num_buffered_edges[parent] * sizeof(double)); + buffer->right[parent] = tsk_realloc( + buffer->right[parent], buffer->num_buffered_edges[parent] * sizeof(double)); + buffer->parent[parent][buffer->num_buffered_edges[parent] - 1] = parent; + buffer->child[parent][buffer->num_buffered_edges[parent] - 1] = child; + buffer->left[parent][buffer->num_buffered_edges[parent] - 1] = left; + buffer->right[parent][buffer->num_buffered_edges[parent] - 1] = right; } static void @@ -8702,7 +8706,7 @@ fauxbuffer_free(fauxbuffer *buffer) tsk_safe_free(buffer->child); tsk_safe_free(buffer->left); tsk_safe_free(buffer->right); - tsk_safe_free(buffer->buffered_edges); + tsk_safe_free(buffer->num_buffered_edges); } /* @@ -9160,7 +9164,7 @@ run_test_modular_simplify_overlapping_generations( } for (node = 0; node < buffer.max_nodes; ++node) { - for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { + for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { ret = (int) tsk_edge_table_add_row(&standard_tables.edges, buffer.left[node][edge], buffer.right[node][edge], @@ -9222,12 +9226,12 @@ run_test_modular_simplify_overlapping_generations( */ for (row = 0; row < 5; ++row) { node = (tsk_size_t) row_order[row]; - for (edge = 0; edge < buffer.buffered_edges[node]; ++edge) { + for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { ret = tsk_modular_simplifier_add_edge(&simplifier, - buffer.left[node][buffer.buffered_edges[node] - edge - 1], - buffer.right[node][buffer.buffered_edges[node] - edge - 1], - buffer.parent[node][buffer.buffered_edges[node] - edge - 1], - buffer.child[node][buffer.buffered_edges[node] - edge - 1]); + buffer.left[node][buffer.num_buffered_edges[node] - edge - 1], + buffer.right[node][buffer.num_buffered_edges[node] - edge - 1], + buffer.parent[node][buffer.num_buffered_edges[node] - edge - 1], + buffer.child[node][buffer.num_buffered_edges[node] - edge - 1]); if (ret < 0) { goto out; } From 23544cd5b51f73271ad755a1116dabe66cc758f5 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 12:32:50 -0700 Subject: [PATCH 59/88] be careful about setup --- c/tests/test_tables.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e4f8cd2b3c..e0957faebc 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8642,7 +8642,7 @@ typedef struct { } fauxbuffer; static void -fauxbuffer_init(tsk_size_t max_nodes, tsk_size_t max_edges, fauxbuffer *buffer) +fauxbuffer_init(tsk_size_t max_nodes, fauxbuffer *buffer) { tsk_size_t i; buffer->parent = tsk_malloc(max_nodes * sizeof(tsk_id_t *)); @@ -8658,14 +8658,10 @@ fauxbuffer_init(tsk_size_t max_nodes, tsk_size_t max_edges, fauxbuffer *buffer) CU_ASSERT_FATAL(buffer->num_buffered_edges != NULL); for (i = 0; i < max_nodes; ++i) { - buffer->parent[i] = tsk_malloc(max_edges * sizeof(tsk_id_t)); - CU_ASSERT_FATAL(buffer->parent[i] != NULL); - buffer->child[i] = tsk_malloc(max_edges * sizeof(tsk_id_t)); - CU_ASSERT_FATAL(buffer->child[i] != NULL); - buffer->left[i] = tsk_malloc(max_edges * sizeof(double)); - CU_ASSERT_FATAL(buffer->parent[i] != NULL); - buffer->right[i] = tsk_malloc(max_edges * sizeof(double)); - CU_ASSERT_FATAL(buffer->right[i] != NULL); + buffer->parent[i] = NULL; + buffer->child[i] = NULL; + buffer->left[i] = NULL; + buffer->right[i] = NULL; buffer->num_buffered_edges[i] = 0; } buffer->max_nodes = max_nodes; @@ -8833,7 +8829,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( parse_edges(internal_sample_ex_edges, &tables->edges); parse_nodes(internal_sample_ex_nodes, &tables->nodes); tables->sequence_length = 10.0; - fauxbuffer_init(12, 100, buffer); + fauxbuffer_init(12, buffer); *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); From 0732c81443feb9f7e291491fa35333be3107694f Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 15:46:39 -0700 Subject: [PATCH 60/88] force a failing test --- c/tests/test_tables.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e0957faebc..0520b390aa 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9141,6 +9141,9 @@ static void run_test_modular_simplify_overlapping_generations( tsk_id_t row_order[5], int expected_result) { + // This testing pattern is pretty ugly for overlapping + // generations. + CU_ASSERT_FATAL(false); int ret; tsk_table_collection_t tables, standard_tables; fauxbuffer buffer; From c81da32549d14be8a4568e6babce9c5b123cb3c1 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 15:56:17 -0700 Subject: [PATCH 61/88] we don't need a particular order for the child nodes... --- c/tests/test_tables.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 0520b390aa..e167c46e70 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9226,11 +9226,9 @@ run_test_modular_simplify_overlapping_generations( for (row = 0; row < 5; ++row) { node = (tsk_size_t) row_order[row]; for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { - ret = tsk_modular_simplifier_add_edge(&simplifier, - buffer.left[node][buffer.num_buffered_edges[node] - edge - 1], - buffer.right[node][buffer.num_buffered_edges[node] - edge - 1], - buffer.parent[node][buffer.num_buffered_edges[node] - edge - 1], - buffer.child[node][buffer.num_buffered_edges[node] - edge - 1]); + ret = tsk_modular_simplifier_add_edge(&simplifier, buffer.left[node][edge], + buffer.right[node][edge], buffer.parent[node][edge], + buffer.child[node][edge]); if (ret < 0) { goto out; } From 8d6f8b45dabfeb74dbb1bab34eacf43f1a0be1f1 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 16:17:14 -0700 Subject: [PATCH 62/88] add test of parent time error --- c/tests/test_tables.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index e167c46e70..f984ec3dd9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9139,11 +9139,11 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) static void run_test_modular_simplify_overlapping_generations( - tsk_id_t row_order[5], int expected_result) + tsk_id_t node_order_in_buffer[4], int expected_result) { // This testing pattern is pretty ugly for overlapping // generations. - CU_ASSERT_FATAL(false); + // CU_ASSERT_FATAL(false); int ret; tsk_table_collection_t tables, standard_tables; fauxbuffer buffer; @@ -9152,11 +9152,21 @@ run_test_modular_simplify_overlapping_generations( tsk_tree_t standard_tree, tree; tsk_id_t *samples; tsk_size_t row, node, edge; + int num_nodes_in_buffer = 0; double ttl_time, standard_ttl_time; make_overlapping_generations_trees_for_testing_modular_simplify( &tables, &buffer, &samples); + for (row = 0; row < buffer.max_nodes; ++row) { + if (buffer.num_buffered_edges[row] > 0) { + fprintf(stdout, "%ld\n", row); + ++num_nodes_in_buffer; + } + } + fprintf(stdout, "%d\n", num_nodes_in_buffer); + CU_ASSERT_EQUAL_FATAL(num_nodes_in_buffer, 4); + ret = tsk_table_collection_copy(&tables, &standard_tables, 0); if (ret < 0) { goto out; @@ -9223,8 +9233,9 @@ run_test_modular_simplify_overlapping_generations( * UPDATE: we need to do <= and it must be with respect * to ANY node that is "alive" the last time we simplified. */ - for (row = 0; row < 5; ++row) { - node = (tsk_size_t) row_order[row]; + for (row = 0; row < 4; ++row) { + node = (tsk_size_t) node_order_in_buffer[row]; + fprintf(stdout, "node=%ld\n", node); for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { ret = tsk_modular_simplifier_add_edge(&simplifier, buffer.left[node][edge], buffer.right[node][edge], buffer.parent[node][edge], @@ -9232,7 +9243,8 @@ run_test_modular_simplify_overlapping_generations( if (ret < 0) { goto out; } - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, row_order[row]); + ret = tsk_modular_simplifier_merge_ancestors( + &simplifier, node_order_in_buffer[row]); if (ret < 0) { goto out; } @@ -9295,16 +9307,25 @@ run_test_modular_simplify_overlapping_generations( tsk_safe_free(samples); fauxbuffer_free(&buffer); tsk_modular_simplifier_free(&simplifier); + fprintf(stdout, "code: %d, expected_result: %d", ret, expected_result); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } static void test_table_collection_modular_simplify_overlapping_generations(void) { - tsk_id_t input_node_order[5] = { 0, 1, 2, 3, 4 }; + tsk_id_t input_node_order[4] = { 0, 1, 3, 4 }; run_test_modular_simplify_overlapping_generations(input_node_order, 0); } +static void +test_table_collection_modular_simplify_overlapping_generations_parent_time_error(void) +{ + tsk_id_t input_node_order[4] = { 0, 1, 0, 4 }; + run_test_modular_simplify_overlapping_generations( + input_node_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); +} + /* This hits part of simplifier intialisation that is * NOT part of table integrity checks */ @@ -12328,6 +12349,9 @@ main(int argc, char **argv) test_table_collection_modular_simplify_table_integrity_check_fail }, { "test_table_collection_modular_simplify_overlapping_generations", test_table_collection_modular_simplify_overlapping_generations }, + { "test_table_collection_modular_simplify_overlapping_generations_parent_time_" + "error", + test_table_collection_modular_simplify_overlapping_generations_parent_time_error }, { "test_table_collection_time_units", test_table_collection_time_units }, { "test_table_collection_reference_sequence", test_table_collection_reference_sequence }, From 3b39431f11c22100a19664c6dba68feebffa181b Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 17 May 2023 16:18:21 -0700 Subject: [PATCH 63/88] reinstate test failure --- c/tests/test_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index f984ec3dd9..369c80d776 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9143,7 +9143,7 @@ run_test_modular_simplify_overlapping_generations( { // This testing pattern is pretty ugly for overlapping // generations. - // CU_ASSERT_FATAL(false); + CU_ASSERT_FATAL(false); int ret; tsk_table_collection_t tables, standard_tables; fauxbuffer buffer; From cb0bd0306c00384772d93d4d2a502893ca12f2a3 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 12:17:22 -0700 Subject: [PATCH 64/88] start refactor from temp buffer back into temp edge table. idea is to let us use a common runner routine later --- c/tests/test_tables.c | 121 ++++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 369c80d776..62ed5ec4f0 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8817,28 +8817,33 @@ make_single_tree_for_testing_modular_simplify( */ static void make_overlapping_generations_trees_for_testing_modular_simplify( - tsk_table_collection_t *tables, fauxbuffer *buffer, tsk_id_t **samples) + tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, tsk_id_t **samples) { int ret; tsk_id_t new_child0, new_child1, new_child2; - tsk_size_t row, moved; + tsk_size_t row, moved, edge; int last_row_to_lift_over; + fauxbuffer buffer; ret = tsk_table_collection_init(tables, 0); double tmax; CU_ASSERT_EQUAL_FATAL(ret, 0); parse_edges(internal_sample_ex_edges, &tables->edges); parse_nodes(internal_sample_ex_nodes, &tables->nodes); tables->sequence_length = 10.0; - fauxbuffer_init(12, buffer); + fauxbuffer_init(12, &buffer); + ret = tsk_edge_table_init(new_edges, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); + fprintf(stdout, "we start with %ld nodes\n", tables->nodes.num_rows); new_child0 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child0 > 0); new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child1 > 0); new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child2 > 0); + fprintf(stdout, "we finish with %ld nodes\n", tables->nodes.num_rows); for (row = 0; row < tables->nodes.num_rows; ++row) { // for some reason, the fixture sets pop to 0... @@ -8891,7 +8896,7 @@ make_overlapping_generations_trees_for_testing_modular_simplify( tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], tables->edges.child[(tsk_size_t) last_row_to_lift_over - row], tables->edges.left[(tsk_size_t) last_row_to_lift_over - row], - tables->edges.right[(tsk_size_t) last_row_to_lift_over - row], buffer); + tables->edges.right[(tsk_size_t) last_row_to_lift_over - row], &buffer); } for (row = (tsk_size_t) last_row_to_lift_over + 1; row < tables->edges.num_rows; ++row) { @@ -8904,12 +8909,25 @@ make_overlapping_generations_trees_for_testing_modular_simplify( } tsk_edge_table_truncate(&tables->edges, moved); /* To maintain sanity, transmit non-recombinant genomes */ - fauxbuffer_buffer(0, new_child0, 0., tables->sequence_length, buffer); - fauxbuffer_buffer(1, new_child1, 0., tables->sequence_length, buffer); - fauxbuffer_buffer(3, new_child2, 0., tables->sequence_length, buffer); + fauxbuffer_buffer(0, new_child0, 0., tables->sequence_length, &buffer); + fauxbuffer_buffer(1, new_child1, 0., tables->sequence_length, &buffer); + fauxbuffer_buffer(3, new_child2, 0., tables->sequence_length, &buffer); + + for (row = 0; row < buffer.max_nodes; ++row) { + for (edge = 0; edge < buffer.num_buffered_edges[row]; ++edge) { + fprintf(stdout, "parent %d child %d\n", buffer.parent[row][edge], + buffer.child[row][edge]); + tsk_edge_table_add_row(new_edges, buffer.left[row][edge], + buffer.right[row][edge], buffer.parent[row][edge], + buffer.child[row][edge], NULL, 0); + } + } + (*samples)[0] = new_child0; (*samples)[1] = new_child1; (*samples)[2] = new_child2; + + fauxbuffer_free(&buffer); } static void @@ -9139,54 +9157,40 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) static void run_test_modular_simplify_overlapping_generations( - tsk_id_t node_order_in_buffer[4], int expected_result) + tsk_id_t row_order[3], int expected_result) { // This testing pattern is pretty ugly for overlapping // generations. - CU_ASSERT_FATAL(false); int ret; tsk_table_collection_t tables, standard_tables; - fauxbuffer buffer; + tsk_edge_table_t new_edges; tsk_modular_simplifier_t simplifier; tsk_treeseq_t treeseq, standard_treeseq; tsk_tree_t standard_tree, tree; tsk_id_t *samples; - tsk_size_t row, node, edge; - int num_nodes_in_buffer = 0; + tsk_id_t last_parent; + tsk_size_t row; double ttl_time, standard_ttl_time; make_overlapping_generations_trees_for_testing_modular_simplify( - &tables, &buffer, &samples); - - for (row = 0; row < buffer.max_nodes; ++row) { - if (buffer.num_buffered_edges[row] > 0) { - fprintf(stdout, "%ld\n", row); - ++num_nodes_in_buffer; - } - } - fprintf(stdout, "%d\n", num_nodes_in_buffer); - CU_ASSERT_EQUAL_FATAL(num_nodes_in_buffer, 4); + &tables, &new_edges, &samples); + fprintf(stdout, "we find ourselves with %ld nodes\n", tables.nodes.num_rows); ret = tsk_table_collection_copy(&tables, &standard_tables, 0); if (ret < 0) { goto out; } - - for (node = 0; node < buffer.max_nodes; ++node) { - for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { - - ret = (int) tsk_edge_table_add_row(&standard_tables.edges, - buffer.left[node][edge], buffer.right[node][edge], - buffer.parent[node][edge], buffer.child[node][edge], NULL, 0); - if (ret < 0) { - goto out; - } - } + ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, + new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); + if (ret < 0) { + goto out; } ret = tsk_table_collection_sort(&standard_tables, NULL, 0); if (ret < 0) { goto out; } + CU_ASSERT_EQUAL_FATAL( + standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); @@ -9198,6 +9202,7 @@ run_test_modular_simplify_overlapping_generations( if (ret < 0) { goto out; } + row = 0; /* Pseudocode that we are mocking: * For each parent of a new edge: * - add that edge to the segment queue. @@ -9215,7 +9220,25 @@ run_test_modular_simplify_overlapping_generations( * (How edges are sorted is an internal detail * and cannot be used for testing.) */ - + while (row < 3) { + last_parent = new_edges.parent[row_order[row]]; + fprintf(stdout, "%d %ld", last_parent, tables.nodes.num_rows); + // CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables.nodes.num_rows); + while (row < new_edges.num_rows + && new_edges.parent[row_order[row]] == last_parent) { + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_edges.left[row_order[row]], new_edges.right[row_order[row]], + new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + if (ret < 0) { + goto out; + } + ++row; + } + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + if (ret < 0) { + goto out; + } + } /* Simplification's internal cleanup. * Should NOT be called if above loop errors. * We know that not calling it and calling @@ -9229,27 +9252,7 @@ run_test_modular_simplify_overlapping_generations( * birth time of any child in the input table. * The "edge adder" function should ensure that all * new edges are < (or <= ??) that value. - * - * UPDATE: we need to do <= and it must be with respect - * to ANY node that is "alive" the last time we simplified. */ - for (row = 0; row < 4; ++row) { - node = (tsk_size_t) node_order_in_buffer[row]; - fprintf(stdout, "node=%ld\n", node); - for (edge = 0; edge < buffer.num_buffered_edges[node]; ++edge) { - ret = tsk_modular_simplifier_add_edge(&simplifier, buffer.left[node][edge], - buffer.right[node][edge], buffer.parent[node][edge], - buffer.child[node][edge]); - if (ret < 0) { - goto out; - } - ret = tsk_modular_simplifier_merge_ancestors( - &simplifier, node_order_in_buffer[row]); - if (ret < 0) { - goto out; - } - } - } ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { goto out; @@ -9305,7 +9308,7 @@ run_test_modular_simplify_overlapping_generations( tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); tsk_safe_free(samples); - fauxbuffer_free(&buffer); + tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); fprintf(stdout, "code: %d, expected_result: %d", ret, expected_result); CU_ASSERT_EQUAL_FATAL(ret, expected_result); @@ -9314,16 +9317,16 @@ run_test_modular_simplify_overlapping_generations( static void test_table_collection_modular_simplify_overlapping_generations(void) { - tsk_id_t input_node_order[4] = { 0, 1, 3, 4 }; - run_test_modular_simplify_overlapping_generations(input_node_order, 0); + tsk_id_t row_order[3] = { 3, 2, 1 }; + run_test_modular_simplify_overlapping_generations(row_order, 0); } static void test_table_collection_modular_simplify_overlapping_generations_parent_time_error(void) { - tsk_id_t input_node_order[4] = { 0, 1, 0, 4 }; + tsk_id_t row_order[4] = { 2, 1, 3 }; run_test_modular_simplify_overlapping_generations( - input_node_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); + row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } /* This hits part of simplifier intialisation that is From 8af74c1e32457335b36834402d57085816cd9998 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 13:01:22 -0700 Subject: [PATCH 65/88] at least no segfault any more... --- c/tests/test_tables.c | 54 ++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 62ed5ec4f0..dd373949b3 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8891,7 +8891,12 @@ make_overlapping_generations_trees_for_testing_modular_simplify( } moved = 0; if (last_row_to_lift_over > -1) { - for (row = 0; row <= (tsk_size_t) last_row_to_lift_over; ++row) { + for (row = 0; row < (tsk_size_t) last_row_to_lift_over; ++row) { + fprintf(stdout, "copying %lf %lf %d %d into temp buffer\n", + tables->edges.left[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.right[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], + tables->edges.child[(tsk_size_t) last_row_to_lift_over - row]); fauxbuffer_buffer( tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], tables->edges.child[(tsk_size_t) last_row_to_lift_over - row], @@ -8915,8 +8920,9 @@ make_overlapping_generations_trees_for_testing_modular_simplify( for (row = 0; row < buffer.max_nodes; ++row) { for (edge = 0; edge < buffer.num_buffered_edges[row]; ++edge) { - fprintf(stdout, "parent %d child %d\n", buffer.parent[row][edge], - buffer.child[row][edge]); + fprintf(stdout, "parent %d child %d left %lf right %lf\n", + buffer.parent[row][edge], buffer.child[row][edge], + buffer.left[row][edge], buffer.right[row][edge]); tsk_edge_table_add_row(new_edges, buffer.left[row][edge], buffer.right[row][edge], buffer.parent[row][edge], buffer.child[row][edge], NULL, 0); @@ -9157,19 +9163,19 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) static void run_test_modular_simplify_overlapping_generations( - tsk_id_t row_order[3], int expected_result) + tsk_id_t row_order[4], int expected_result) { // This testing pattern is pretty ugly for overlapping // generations. int ret; - tsk_table_collection_t tables, standard_tables; + tsk_table_collection_t tables, tables_copy, standard_tables; tsk_edge_table_t new_edges; tsk_modular_simplifier_t simplifier; tsk_treeseq_t treeseq, standard_treeseq; tsk_tree_t standard_tree, tree; tsk_id_t *samples; tsk_id_t last_parent; - tsk_size_t row; + tsk_size_t row, row_for_parent; double ttl_time, standard_ttl_time; make_overlapping_generations_trees_for_testing_modular_simplify( @@ -9198,11 +9204,15 @@ run_test_modular_simplify_overlapping_generations( goto out; } + ret = tsk_table_collection_copy(&tables, &tables_copy, 0); + if (ret < 0) { + goto out; + } + ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); if (ret < 0) { goto out; } - row = 0; /* Pseudocode that we are mocking: * For each parent of a new edge: * - add that edge to the segment queue. @@ -9220,21 +9230,28 @@ run_test_modular_simplify_overlapping_generations( * (How edges are sorted is an internal detail * and cannot be used for testing.) */ - while (row < 3) { + for (row = 0; row < 4; ++row) { last_parent = new_edges.parent[row_order[row]]; - fprintf(stdout, "%d %ld", last_parent, tables.nodes.num_rows); - // CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables.nodes.num_rows); - while (row < new_edges.num_rows - && new_edges.parent[row_order[row]] == last_parent) { + row_for_parent = 0; + fprintf(stdout, "parent %d num nodes %ld\n", last_parent, + tables_copy.nodes.num_rows); + CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables_copy.nodes.num_rows); + for (row_for_parent = 0; + row + row_for_parent < new_edges.num_rows + && new_edges.parent[row + row_for_parent] == last_parent; + ++row_for_parent) { + CU_ASSERT_FATAL(row + row_for_parent < new_edges.num_rows); ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[row_order[row]], new_edges.right[row_order[row]], - new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + new_edges.left[row + row_for_parent], + new_edges.right[row + row_for_parent], + new_edges.parent[row + row_for_parent], + new_edges.child[row + row_for_parent]); if (ret < 0) { goto out; } - ++row; } ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + fprintf(stdout, "procesed parent %d\n", last_parent); if (ret < 0) { goto out; } @@ -9306,25 +9323,26 @@ run_test_modular_simplify_overlapping_generations( CU_ASSERT_EQUAL_FATAL(ret, 0); out: tsk_table_collection_free(&tables); + tsk_table_collection_free(&tables_copy); tsk_table_collection_free(&standard_tables); tsk_safe_free(samples); tsk_edge_table_free(&new_edges); tsk_modular_simplifier_free(&simplifier); - fprintf(stdout, "code: %d, expected_result: %d", ret, expected_result); + fprintf(stdout, "code: %d, expected_result: %d\n", ret, expected_result); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } static void test_table_collection_modular_simplify_overlapping_generations(void) { - tsk_id_t row_order[3] = { 3, 2, 1 }; + tsk_id_t row_order[4] = { 0, 1, 2, 3 }; run_test_modular_simplify_overlapping_generations(row_order, 0); } static void test_table_collection_modular_simplify_overlapping_generations_parent_time_error(void) { - tsk_id_t row_order[4] = { 2, 1, 3 }; + tsk_id_t row_order[4] = { 2, 1, 0, 3 }; run_test_modular_simplify_overlapping_generations( row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } From 9254283f4e14a4f11aa5f2da04a069458de3aeaf Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 13:49:18 -0700 Subject: [PATCH 66/88] some pring --- c/tests/test_tables.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index dd373949b3..65eb300bf9 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8928,6 +8928,16 @@ make_overlapping_generations_trees_for_testing_modular_simplify( buffer.child[row][edge], NULL, 0); } } + fprintf(stdout, "after all this mucking around:"); + for (row = 0; row < tables->edges.num_rows; ++row) { + fprintf(stdout, "in tables: %lf %lf %d %d\n", tables->edges.left[row], + tables->edges.right[row], tables->edges.parent[row], + tables->edges.child[row]); + } + for (row = 0; row < new_edges->num_rows; ++row) { + fprintf(stdout, "in new_edges: %lf %lf %d %d\n", new_edges->left[row], + new_edges->right[row], new_edges->parent[row], new_edges->child[row]); + } (*samples)[0] = new_child0; (*samples)[1] = new_child1; From 154e5a36059917a5ab2374f565c72f6d7dc1d3af Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 14:16:15 -0700 Subject: [PATCH 67/88] we are not detecting parent time violations properly --- c/tests/test_tables.c | 34 ++++++++-------------------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 65eb300bf9..9791cfd2f8 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8836,14 +8836,12 @@ make_overlapping_generations_trees_for_testing_modular_simplify( *samples = tsk_malloc(3 * sizeof(tsk_id_t)); CU_ASSERT_TRUE_FATAL(*samples != NULL); - fprintf(stdout, "we start with %ld nodes\n", tables->nodes.num_rows); new_child0 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child0 > 0); new_child1 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child1 > 0); new_child2 = tsk_node_table_add_row(&tables->nodes, 0, -1.0, -1, -1, NULL, 0); CU_ASSERT_TRUE_FATAL(new_child2 > 0); - fprintf(stdout, "we finish with %ld nodes\n", tables->nodes.num_rows); for (row = 0; row < tables->nodes.num_rows; ++row) { // for some reason, the fixture sets pop to 0... @@ -8892,11 +8890,6 @@ make_overlapping_generations_trees_for_testing_modular_simplify( moved = 0; if (last_row_to_lift_over > -1) { for (row = 0; row < (tsk_size_t) last_row_to_lift_over; ++row) { - fprintf(stdout, "copying %lf %lf %d %d into temp buffer\n", - tables->edges.left[(tsk_size_t) last_row_to_lift_over - row], - tables->edges.right[(tsk_size_t) last_row_to_lift_over - row], - tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], - tables->edges.child[(tsk_size_t) last_row_to_lift_over - row]); fauxbuffer_buffer( tables->edges.parent[(tsk_size_t) last_row_to_lift_over - row], tables->edges.child[(tsk_size_t) last_row_to_lift_over - row], @@ -8920,24 +8913,11 @@ make_overlapping_generations_trees_for_testing_modular_simplify( for (row = 0; row < buffer.max_nodes; ++row) { for (edge = 0; edge < buffer.num_buffered_edges[row]; ++edge) { - fprintf(stdout, "parent %d child %d left %lf right %lf\n", - buffer.parent[row][edge], buffer.child[row][edge], - buffer.left[row][edge], buffer.right[row][edge]); tsk_edge_table_add_row(new_edges, buffer.left[row][edge], buffer.right[row][edge], buffer.parent[row][edge], buffer.child[row][edge], NULL, 0); } } - fprintf(stdout, "after all this mucking around:"); - for (row = 0; row < tables->edges.num_rows; ++row) { - fprintf(stdout, "in tables: %lf %lf %d %d\n", tables->edges.left[row], - tables->edges.right[row], tables->edges.parent[row], - tables->edges.child[row]); - } - for (row = 0; row < new_edges->num_rows; ++row) { - fprintf(stdout, "in new_edges: %lf %lf %d %d\n", new_edges->left[row], - new_edges->right[row], new_edges->parent[row], new_edges->child[row]); - } (*samples)[0] = new_child0; (*samples)[1] = new_child1; @@ -9190,7 +9170,6 @@ run_test_modular_simplify_overlapping_generations( make_overlapping_generations_trees_for_testing_modular_simplify( &tables, &new_edges, &samples); - fprintf(stdout, "we find ourselves with %ld nodes\n", tables.nodes.num_rows); ret = tsk_table_collection_copy(&tables, &standard_tables, 0); if (ret < 0) { @@ -9243,8 +9222,6 @@ run_test_modular_simplify_overlapping_generations( for (row = 0; row < 4; ++row) { last_parent = new_edges.parent[row_order[row]]; row_for_parent = 0; - fprintf(stdout, "parent %d num nodes %ld\n", last_parent, - tables_copy.nodes.num_rows); CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables_copy.nodes.num_rows); for (row_for_parent = 0; row + row_for_parent < new_edges.num_rows @@ -9261,7 +9238,8 @@ run_test_modular_simplify_overlapping_generations( } } ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - fprintf(stdout, "procesed parent %d\n", last_parent); + fprintf(stdout, "procesed parent %d, time: %lf\n", last_parent, + tables_copy.nodes.time[last_parent]); if (ret < 0) { goto out; } @@ -9285,7 +9263,11 @@ run_test_modular_simplify_overlapping_generations( goto out; } - CU_ASSERT_EQUAL_FATAL(ret, 0); + // fprintf(stdout, "standard\n"); + // tsk_table_collection_print_state(&standard_tables, stdout); + // fprintf(stdout, "buffered\n"); + // tsk_table_collection_print_state(&tables, stdout); + // CU_ASSERT_EQUAL_FATAL(ret, 0); // Now, we can compare various properties of the two table collections CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); @@ -9352,7 +9334,7 @@ test_table_collection_modular_simplify_overlapping_generations(void) static void test_table_collection_modular_simplify_overlapping_generations_parent_time_error(void) { - tsk_id_t row_order[4] = { 2, 1, 0, 3 }; + tsk_id_t row_order[4] = { 2, 3, 0, 1 }; run_test_modular_simplify_overlapping_generations( row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } From ead9f5c5a70e8d4971f94165221cb98753066625 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 14:21:08 -0700 Subject: [PATCH 68/88] now tests pass --- c/tests/test_tables.c | 16 +++++++++------- c/tskit/tables.c | 3 +++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 9791cfd2f8..53e5f38e27 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9224,15 +9224,17 @@ run_test_modular_simplify_overlapping_generations( row_for_parent = 0; CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables_copy.nodes.num_rows); for (row_for_parent = 0; - row + row_for_parent < new_edges.num_rows - && new_edges.parent[row + row_for_parent] == last_parent; + (tsk_size_t) row_order[row] + row_for_parent < new_edges.num_rows + && new_edges.parent[(tsk_size_t) row_order[row] + row_for_parent] + == last_parent; ++row_for_parent) { - CU_ASSERT_FATAL(row + row_for_parent < new_edges.num_rows); + CU_ASSERT_FATAL( + (tsk_size_t) row_order[row] + row_for_parent < new_edges.num_rows); ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[row + row_for_parent], - new_edges.right[row + row_for_parent], - new_edges.parent[row + row_for_parent], - new_edges.child[row + row_for_parent]); + new_edges.left[(tsk_size_t) row_order[row] + row_for_parent], + new_edges.right[(tsk_size_t) row_order[row] + row_for_parent], + new_edges.parent[(tsk_size_t) row_order[row] + row_for_parent], + new_edges.child[(tsk_size_t) row_order[row] + row_for_parent]); if (ret < 0) { goto out; } diff --git a/c/tskit/tables.c b/c/tskit/tables.c index c9f237e781..5ac9f0c2ab 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13697,6 +13697,9 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, goto out; } + fprintf(stdout, "parent %d birth time %lf < last parent birth time %lf\n", parent, + self->pimpl->simplifier.input_tables.nodes.time[parent], + self->pimpl->last_parent_time); if (self->pimpl->simplifier.input_tables.nodes.time[parent] < self->pimpl->last_parent_time) { ret = TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME; From 5114645551916afcd6c5cce277ce622b6e5791ae Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 14:21:22 -0700 Subject: [PATCH 69/88] clean --- c/tskit/tables.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 5ac9f0c2ab..c9f237e781 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13697,9 +13697,6 @@ tsk_modular_simplifier_add_edge(tsk_modular_simplifier_t *self, double left, goto out; } - fprintf(stdout, "parent %d birth time %lf < last parent birth time %lf\n", parent, - self->pimpl->simplifier.input_tables.nodes.time[parent], - self->pimpl->last_parent_time); if (self->pimpl->simplifier.input_tables.nodes.time[parent] < self->pimpl->last_parent_time) { ret = TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME; From 60ef2e4e48fd7ed1e2fc925931509ca144443064 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 14:31:28 -0700 Subject: [PATCH 70/88] delete printf --- c/tests/test_tables.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 53e5f38e27..5d7c1565f4 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9240,8 +9240,6 @@ run_test_modular_simplify_overlapping_generations( } } ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - fprintf(stdout, "procesed parent %d, time: %lf\n", last_parent, - tables_copy.nodes.time[last_parent]); if (ret < 0) { goto out; } From c25d6f91b63be21821b79e8d1dda54ecafdea375 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 14:59:42 -0700 Subject: [PATCH 71/88] start to abstract out the test runner step --- c/tests/test_tables.c | 241 +++++++++++++++++++++++++++++++++++------- 1 file changed, 205 insertions(+), 36 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 5d7c1565f4..550dddaf01 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8926,27 +8926,27 @@ make_overlapping_generations_trees_for_testing_modular_simplify( fauxbuffer_free(&buffer); } -static void -run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result) +static int +run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *new_edges, + tsk_id_t *row_order, tsk_size_t len_row_order, tsk_id_t *samples, + tsk_size_t len_samples) { int ret; - tsk_table_collection_t tables, standard_tables; - tsk_edge_table_t new_edges; + tsk_table_collection_t standard_tables; tsk_modular_simplifier_t simplifier; tsk_treeseq_t treeseq, standard_treeseq; tsk_tree_t standard_tree, tree; - tsk_id_t *samples; tsk_id_t last_parent; - tsk_size_t row; + tsk_size_t row, row_for_parent; double ttl_time, standard_ttl_time; - make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); - ret = tsk_table_collection_copy(&tables, &standard_tables, 0); + ret = tsk_table_collection_copy(tables, &standard_tables, 0); if (ret < 0) { goto out; } - ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, - new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); + ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges->num_rows, + new_edges->left, new_edges->right, new_edges->parent, new_edges->child, NULL, + NULL); if (ret < 0) { goto out; } @@ -8955,19 +8955,13 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } CU_ASSERT_EQUAL_FATAL( - standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); - CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); - - ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); - if (ret < 0) { - goto out; - } + standard_tables.edges.num_rows, tables->edges.num_rows + new_edges->num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables->nodes.num_rows); - ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + ret = tsk_modular_simplifier_init(&simplifier, tables, samples, len_samples, 0); if (ret < 0) { goto out; } - row = 0; /* Pseudocode that we are mocking: * For each parent of a new edge: * - add that edge to the segment queue. @@ -8985,17 +8979,24 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result * (How edges are sorted is an internal detail * and cannot be used for testing.) */ - while (row < new_edges.num_rows) { - last_parent = new_edges.parent[row_order[row]]; - while (row < new_edges.num_rows - && new_edges.parent[row_order[row]] == last_parent) { + for (row = 0; row < len_row_order; ++row) { + last_parent = new_edges->parent[row_order[row]]; + row_for_parent = 0; + for (row_for_parent = 0; + (tsk_size_t) row_order[row] + row_for_parent < new_edges->num_rows + && new_edges->parent[(tsk_size_t) row_order[row] + row_for_parent] + == last_parent; + ++row_for_parent) { + CU_ASSERT_FATAL( + (tsk_size_t) row_order[row] + row_for_parent < new_edges->num_rows); ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[row_order[row]], new_edges.right[row_order[row]], - new_edges.parent[row_order[row]], new_edges.child[row_order[row]]); + new_edges->left[(tsk_size_t) row_order[row] + row_for_parent], + new_edges->right[(tsk_size_t) row_order[row] + row_for_parent], + new_edges->parent[(tsk_size_t) row_order[row] + row_for_parent], + new_edges->child[(tsk_size_t) row_order[row] + row_for_parent]); if (ret < 0) { goto out; } - ++row; } ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); if (ret < 0) { @@ -9021,17 +9022,21 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result goto out; } - CU_ASSERT_EQUAL_FATAL(ret, 0); + // fprintf(stdout, "standard\n"); + // tsk_table_collection_print_state(&standard_tables, stdout); + // fprintf(stdout, "buffered\n"); + // tsk_table_collection_print_state(&tables, stdout); + // CU_ASSERT_EQUAL_FATAL(ret, 0); // Now, we can compare various properties of the two table collections - CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); - CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables->edges.num_rows); + CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables->nodes.num_rows); ret = tsk_table_collection_build_index(&standard_tables, 0); if (ret < 0) { goto out; } - ret = tsk_table_collection_build_index(&tables, 0); + ret = tsk_table_collection_build_index(tables, 0); if (ret < 0) { goto out; } @@ -9040,7 +9045,7 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result if (ret < 0) { goto out; } - ret = tsk_treeseq_init(&treeseq, &tables, 0); + ret = tsk_treeseq_init(&treeseq, tables, 0); if (ret < 0) { goto out; } @@ -9068,12 +9073,174 @@ run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result tsk_tree_free(&tree); CU_ASSERT_EQUAL_FATAL(ret, 0); out: - tsk_table_collection_free(&tables); tsk_table_collection_free(&standard_tables); - tsk_edge_table_free(&new_edges); - tsk_modular_simplifier_free(&simplifier); tsk_safe_free(samples); + tsk_modular_simplifier_free(&simplifier); + return ret; +} + +static void +run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result) +{ + int ret; + tsk_table_collection_t tables; + tsk_edge_table_t new_edges; + // tsk_modular_simplifier_t simplifier; + // tsk_treeseq_t treeseq, standard_treeseq; + // tsk_tree_t standard_tree, tree; + tsk_id_t *samples; + // tsk_id_t last_parent; + // tsk_size_t row; + // double ttl_time, standard_ttl_time; + make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); + + ret = run_test_modular_simplifier(&tables, &new_edges, &row_order[0], 5, samples, 3); + tsk_table_collection_free(&tables); + tsk_edge_table_free(&new_edges); CU_ASSERT_EQUAL_FATAL(ret, expected_result); + + // ret = tsk_table_collection_copy(&tables, &standard_tables, 0); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_edge_table_append_columns(&standard_tables.edges, + // new_edges.num_rows, + // new_edges.left, new_edges.right, new_edges.parent, new_edges.child, + // NULL, NULL); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_table_collection_sort(&standard_tables, NULL, 0); + // if (ret < 0) { + // goto out; + // } + // CU_ASSERT_EQUAL_FATAL( + // standard_tables.edges.num_rows, tables.edges.num_rows + + // new_edges.num_rows); + // CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, + // tables.nodes.num_rows); + // + // ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); + // if (ret < 0) { + // goto out; + // } + // + // ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); + // if (ret < 0) { + // goto out; + // } + // row = 0; + // /* Pseudocode that we are mocking: + // * For each parent of a new edge: + // * - add that edge to the segment queue. + // * - When done, finalise the queue and merge ancestors. + // * + // * If our buffer is wrong, we will have parents unsorted by time + // * and/or the same parent processed in different loop iterations. + // * Each case is an error that MUST be handled. + // * It is trivial to show that not handling the errors can give rise + // * to invalid table collections / tree sequences. + // * The requirement for error handling must be documented + // * + // * Production code should use an input other than + // * an edge table. + // * (How edges are sorted is an internal detail + // * and cannot be used for testing.) + // */ + // while (row < new_edges.num_rows) { + // last_parent = new_edges.parent[row_order[row]]; + // while (row < new_edges.num_rows + // && new_edges.parent[row_order[row]] == last_parent) { + // ret = tsk_modular_simplifier_add_edge(&simplifier, + // new_edges.left[row_order[row]], + // new_edges.right[row_order[row]], + // new_edges.parent[row_order[row]], + // new_edges.child[row_order[row]]); + // if (ret < 0) { + // goto out; + // } + // ++row; + // } + // ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); + // if (ret < 0) { + // goto out; + // } + // } + // /* Simplification's internal cleanup. + // * Should NOT be called if above loop errors. + // * We know that not calling it and calling + // * "modular simplifier free" does not leak + // * because valgrind is happy. + // * + // * Now, we have processed all (child) nodes whose births are + // * MORE RECENT than those in the input tables. + // * + // * TODO: the init method should calculate the minimum + // * birth time of any child in the input table. + // * The "edge adder" function should ensure that all + // * new edges are < (or <= ??) that value. + // */ + // ret = tsk_modular_simplifier_finalise(&simplifier, NULL); + // if (ret < 0) { + // goto out; + // } + // + // CU_ASSERT_EQUAL_FATAL(ret, 0); + // + // // Now, we can compare various properties of the two table collections + // CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, + // tables.edges.num_rows); + // CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, + // tables.nodes.num_rows); + // + // ret = tsk_table_collection_build_index(&standard_tables, 0); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_table_collection_build_index(&tables, 0); + // if (ret < 0) { + // goto out; + // } + // + // ret = tsk_treeseq_init(&standard_treeseq, &standard_tables, 0); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_treeseq_init(&treeseq, &tables, 0); + // if (ret < 0) { + // goto out; + // } + // CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), + // tsk_treeseq_get_num_trees(&treeseq)); + // ret = tsk_tree_init(&standard_tree, &standard_treeseq, 0); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_tree_init(&tree, &treeseq, 0); + // if (ret < 0) { + // goto out; + // } + // ret = tsk_tree_first(&standard_tree); + // CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK); + // for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = + // tsk_tree_next(&tree)) { + // tsk_tree_get_total_branch_length(&tree, -1, &ttl_time); + // tsk_tree_get_total_branch_length(&standard_tree, -1, + // &standard_ttl_time); CU_ASSERT_TRUE(ttl_time - standard_ttl_time <= + // 1e-9); tsk_tree_next(&standard_tree); + // } + // tsk_treeseq_free(&standard_treeseq); + // tsk_treeseq_free(&treeseq); + // tsk_tree_free(&standard_tree); + // tsk_tree_free(&tree); + // CU_ASSERT_EQUAL_FATAL(ret, 0); + // out: + // tsk_table_collection_free(&tables); + // tsk_table_collection_free(&standard_tables); + // tsk_edge_table_free(&new_edges); + // tsk_modular_simplifier_free(&simplifier); + // tsk_safe_free(samples); + // CU_ASSERT_EQUAL_FATAL(ret, expected_result); } static void @@ -9544,7 +9711,8 @@ test_sort_tables_offsets(void) CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, ©, 0)); - /* Check that sorting would have had no effect as individuals not in default sort*/ + /* Check that sorting would have had no effect as individuals not in default + * sort*/ ret = tsk_table_collection_sort(&tables, NULL, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, ©, 0)); @@ -12362,7 +12530,8 @@ main(int argc, char **argv) test_table_collection_modular_simplify_table_integrity_check_fail }, { "test_table_collection_modular_simplify_overlapping_generations", test_table_collection_modular_simplify_overlapping_generations }, - { "test_table_collection_modular_simplify_overlapping_generations_parent_time_" + { "test_table_collection_modular_simplify_overlapping_generations_parent_" + "time_" "error", test_table_collection_modular_simplify_overlapping_generations_parent_time_error }, { "test_table_collection_time_units", test_table_collection_time_units }, From 7819804f3de58df75811369ea15f528aa535e1f2 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 15:19:55 -0700 Subject: [PATCH 72/88] streamline for new test runner --- c/tests/test_tables.c | 174 ++++-------------------------------------- 1 file changed, 15 insertions(+), 159 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 550dddaf01..1297770d32 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8958,6 +8958,10 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne standard_tables.edges.num_rows, tables->edges.num_rows + new_edges->num_rows); CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables->nodes.num_rows); + ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); + if (ret < 0) { + goto out; + } ret = tsk_modular_simplifier_init(&simplifier, tables, samples, len_samples, 0); if (ret < 0) { goto out; @@ -8981,6 +8985,7 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne */ for (row = 0; row < len_row_order; ++row) { last_parent = new_edges->parent[row_order[row]]; + fprintf(stdout, "last parent = %d\n", last_parent); row_for_parent = 0; for (row_for_parent = 0; (tsk_size_t) row_order[row] + row_for_parent < new_edges->num_rows @@ -9021,7 +9026,6 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne if (ret < 0) { goto out; } - // fprintf(stdout, "standard\n"); // tsk_table_collection_print_state(&standard_tables, stdout); // fprintf(stdout, "buffered\n"); @@ -9080,190 +9084,42 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne } static void -run_test_modular_simplify_single_tree(tsk_id_t row_order[5], int expected_result) +run_test_modular_simplify_single_tree( + tsk_id_t *row_order, tsk_size_t len_row_order, int expected_result) { int ret; tsk_table_collection_t tables; tsk_edge_table_t new_edges; - // tsk_modular_simplifier_t simplifier; - // tsk_treeseq_t treeseq, standard_treeseq; - // tsk_tree_t standard_tree, tree; tsk_id_t *samples; - // tsk_id_t last_parent; - // tsk_size_t row; - // double ttl_time, standard_ttl_time; make_single_tree_for_testing_modular_simplify(&tables, &new_edges, &samples); - - ret = run_test_modular_simplifier(&tables, &new_edges, &row_order[0], 5, samples, 3); + ret = run_test_modular_simplifier( + &tables, &new_edges, row_order, len_row_order, samples, 3); tsk_table_collection_free(&tables); tsk_edge_table_free(&new_edges); CU_ASSERT_EQUAL_FATAL(ret, expected_result); - - // ret = tsk_table_collection_copy(&tables, &standard_tables, 0); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_edge_table_append_columns(&standard_tables.edges, - // new_edges.num_rows, - // new_edges.left, new_edges.right, new_edges.parent, new_edges.child, - // NULL, NULL); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_table_collection_sort(&standard_tables, NULL, 0); - // if (ret < 0) { - // goto out; - // } - // CU_ASSERT_EQUAL_FATAL( - // standard_tables.edges.num_rows, tables.edges.num_rows + - // new_edges.num_rows); - // CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, - // tables.nodes.num_rows); - // - // ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); - // if (ret < 0) { - // goto out; - // } - // - // ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); - // if (ret < 0) { - // goto out; - // } - // row = 0; - // /* Pseudocode that we are mocking: - // * For each parent of a new edge: - // * - add that edge to the segment queue. - // * - When done, finalise the queue and merge ancestors. - // * - // * If our buffer is wrong, we will have parents unsorted by time - // * and/or the same parent processed in different loop iterations. - // * Each case is an error that MUST be handled. - // * It is trivial to show that not handling the errors can give rise - // * to invalid table collections / tree sequences. - // * The requirement for error handling must be documented - // * - // * Production code should use an input other than - // * an edge table. - // * (How edges are sorted is an internal detail - // * and cannot be used for testing.) - // */ - // while (row < new_edges.num_rows) { - // last_parent = new_edges.parent[row_order[row]]; - // while (row < new_edges.num_rows - // && new_edges.parent[row_order[row]] == last_parent) { - // ret = tsk_modular_simplifier_add_edge(&simplifier, - // new_edges.left[row_order[row]], - // new_edges.right[row_order[row]], - // new_edges.parent[row_order[row]], - // new_edges.child[row_order[row]]); - // if (ret < 0) { - // goto out; - // } - // ++row; - // } - // ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - // if (ret < 0) { - // goto out; - // } - // } - // /* Simplification's internal cleanup. - // * Should NOT be called if above loop errors. - // * We know that not calling it and calling - // * "modular simplifier free" does not leak - // * because valgrind is happy. - // * - // * Now, we have processed all (child) nodes whose births are - // * MORE RECENT than those in the input tables. - // * - // * TODO: the init method should calculate the minimum - // * birth time of any child in the input table. - // * The "edge adder" function should ensure that all - // * new edges are < (or <= ??) that value. - // */ - // ret = tsk_modular_simplifier_finalise(&simplifier, NULL); - // if (ret < 0) { - // goto out; - // } - // - // CU_ASSERT_EQUAL_FATAL(ret, 0); - // - // // Now, we can compare various properties of the two table collections - // CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, - // tables.edges.num_rows); - // CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, - // tables.nodes.num_rows); - // - // ret = tsk_table_collection_build_index(&standard_tables, 0); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_table_collection_build_index(&tables, 0); - // if (ret < 0) { - // goto out; - // } - // - // ret = tsk_treeseq_init(&standard_treeseq, &standard_tables, 0); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_treeseq_init(&treeseq, &tables, 0); - // if (ret < 0) { - // goto out; - // } - // CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), - // tsk_treeseq_get_num_trees(&treeseq)); - // ret = tsk_tree_init(&standard_tree, &standard_treeseq, 0); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_tree_init(&tree, &treeseq, 0); - // if (ret < 0) { - // goto out; - // } - // ret = tsk_tree_first(&standard_tree); - // CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK); - // for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = - // tsk_tree_next(&tree)) { - // tsk_tree_get_total_branch_length(&tree, -1, &ttl_time); - // tsk_tree_get_total_branch_length(&standard_tree, -1, - // &standard_ttl_time); CU_ASSERT_TRUE(ttl_time - standard_ttl_time <= - // 1e-9); tsk_tree_next(&standard_tree); - // } - // tsk_treeseq_free(&standard_treeseq); - // tsk_treeseq_free(&treeseq); - // tsk_tree_free(&standard_tree); - // tsk_tree_free(&tree); - // CU_ASSERT_EQUAL_FATAL(ret, 0); - // out: - // tsk_table_collection_free(&tables); - // tsk_table_collection_free(&standard_tables); - // tsk_edge_table_free(&new_edges); - // tsk_modular_simplifier_free(&simplifier); - // tsk_safe_free(samples); - // CU_ASSERT_EQUAL_FATAL(ret, expected_result); } static void test_table_collection_modular_simplify_simple_tree(void) { - tsk_id_t row_order[5] = { 4, 3, 2, 1, 0 }; - run_test_modular_simplify_single_tree(row_order, 0); + tsk_id_t row_order[4] = { 3, 2, 1, 0 }; + run_test_modular_simplify_single_tree(&row_order[0], 4, 0); } static void test_table_collection_modular_simplify_simple_tree_discontiguous_parents(void) { - tsk_id_t row_order[5] = { 3, 2, 4, 1, 0 }; + tsk_id_t row_order[4] = { 3, 2, 3, 1 }; run_test_modular_simplify_single_tree( - row_order, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); + &row_order[0], 4, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS); } static void test_table_collection_modular_simplify_simple_tree_add_edges_wrong_birth_order(void) { - tsk_id_t row_order[5] = { 0, 1, 2, 3, 4 }; + tsk_id_t row_order[4] = { 0, 1, 2, 3 }; run_test_modular_simplify_single_tree( - row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); + &row_order[0], 4, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } static void From 8d25cc6ce3736f92723202bb81308b52e65da4b3 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 15:23:35 -0700 Subject: [PATCH 73/88] use new runner for most/all tests --- c/tests/test_tables.c | 168 ++---------------------------------------- 1 file changed, 6 insertions(+), 162 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 1297770d32..33f606bd33 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -9176,174 +9176,18 @@ test_table_collection_modular_simplify_add_child_with_invalid_time(void) static void run_test_modular_simplify_overlapping_generations( - tsk_id_t row_order[4], int expected_result) + tsk_id_t *row_order, tsk_size_t len_row_order, int expected_result) { - // This testing pattern is pretty ugly for overlapping - // generations. int ret; - tsk_table_collection_t tables, tables_copy, standard_tables; + tsk_table_collection_t tables; tsk_edge_table_t new_edges; - tsk_modular_simplifier_t simplifier; - tsk_treeseq_t treeseq, standard_treeseq; - tsk_tree_t standard_tree, tree; tsk_id_t *samples; - tsk_id_t last_parent; - tsk_size_t row, row_for_parent; - double ttl_time, standard_ttl_time; - make_overlapping_generations_trees_for_testing_modular_simplify( &tables, &new_edges, &samples); - - ret = tsk_table_collection_copy(&tables, &standard_tables, 0); - if (ret < 0) { - goto out; - } - ret = tsk_edge_table_append_columns(&standard_tables.edges, new_edges.num_rows, - new_edges.left, new_edges.right, new_edges.parent, new_edges.child, NULL, NULL); - if (ret < 0) { - goto out; - } - ret = tsk_table_collection_sort(&standard_tables, NULL, 0); - if (ret < 0) { - goto out; - } - CU_ASSERT_EQUAL_FATAL( - standard_tables.edges.num_rows, tables.edges.num_rows + new_edges.num_rows); - CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); - - ret = tsk_table_collection_simplify(&standard_tables, samples, 3, 0, NULL); - if (ret < 0) { - goto out; - } - - ret = tsk_table_collection_copy(&tables, &tables_copy, 0); - if (ret < 0) { - goto out; - } - - ret = tsk_modular_simplifier_init(&simplifier, &tables, samples, 3, 0); - if (ret < 0) { - goto out; - } - /* Pseudocode that we are mocking: - * For each parent of a new edge: - * - add that edge to the segment queue. - * - When done, finalise the queue and merge ancestors. - * - * If our buffer is wrong, we will have parents unsorted by time - * and/or the same parent processed in different loop iterations. - * Each case is an error that MUST be handled. - * It is trivial to show that not handling the errors can give rise - * to invalid table collections / tree sequences. - * The requirement for error handling must be documented - * - * Production code should use an input other than - * an edge table. - * (How edges are sorted is an internal detail - * and cannot be used for testing.) - */ - for (row = 0; row < 4; ++row) { - last_parent = new_edges.parent[row_order[row]]; - row_for_parent = 0; - CU_ASSERT_FATAL(last_parent < (tsk_id_t) tables_copy.nodes.num_rows); - for (row_for_parent = 0; - (tsk_size_t) row_order[row] + row_for_parent < new_edges.num_rows - && new_edges.parent[(tsk_size_t) row_order[row] + row_for_parent] - == last_parent; - ++row_for_parent) { - CU_ASSERT_FATAL( - (tsk_size_t) row_order[row] + row_for_parent < new_edges.num_rows); - ret = tsk_modular_simplifier_add_edge(&simplifier, - new_edges.left[(tsk_size_t) row_order[row] + row_for_parent], - new_edges.right[(tsk_size_t) row_order[row] + row_for_parent], - new_edges.parent[(tsk_size_t) row_order[row] + row_for_parent], - new_edges.child[(tsk_size_t) row_order[row] + row_for_parent]); - if (ret < 0) { - goto out; - } - } - ret = tsk_modular_simplifier_merge_ancestors(&simplifier, last_parent); - if (ret < 0) { - goto out; - } - } - /* Simplification's internal cleanup. - * Should NOT be called if above loop errors. - * We know that not calling it and calling - * "modular simplifier free" does not leak - * because valgrind is happy. - * - * Now, we have processed all (child) nodes whose births are - * MORE RECENT than those in the input tables. - * - * TODO: the init method should calculate the minimum - * birth time of any child in the input table. - * The "edge adder" function should ensure that all - * new edges are < (or <= ??) that value. - */ - ret = tsk_modular_simplifier_finalise(&simplifier, NULL); - if (ret < 0) { - goto out; - } - - // fprintf(stdout, "standard\n"); - // tsk_table_collection_print_state(&standard_tables, stdout); - // fprintf(stdout, "buffered\n"); - // tsk_table_collection_print_state(&tables, stdout); - // CU_ASSERT_EQUAL_FATAL(ret, 0); - - // Now, we can compare various properties of the two table collections - CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables.edges.num_rows); - CU_ASSERT_EQUAL_FATAL(standard_tables.nodes.num_rows, tables.nodes.num_rows); - - ret = tsk_table_collection_build_index(&standard_tables, 0); - if (ret < 0) { - goto out; - } - ret = tsk_table_collection_build_index(&tables, 0); - if (ret < 0) { - goto out; - } - - ret = tsk_treeseq_init(&standard_treeseq, &standard_tables, 0); - if (ret < 0) { - goto out; - } - ret = tsk_treeseq_init(&treeseq, &tables, 0); - if (ret < 0) { - goto out; - } - CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_trees(&standard_treeseq), - tsk_treeseq_get_num_trees(&treeseq)); - ret = tsk_tree_init(&standard_tree, &standard_treeseq, 0); - if (ret < 0) { - goto out; - } - ret = tsk_tree_init(&tree, &treeseq, 0); - if (ret < 0) { - goto out; - } - ret = tsk_tree_first(&standard_tree); - CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK); - for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) { - tsk_tree_get_total_branch_length(&tree, -1, &ttl_time); - tsk_tree_get_total_branch_length(&standard_tree, -1, &standard_ttl_time); - CU_ASSERT_TRUE(ttl_time - standard_ttl_time <= 1e-9); - tsk_tree_next(&standard_tree); - } - tsk_treeseq_free(&standard_treeseq); - tsk_treeseq_free(&treeseq); - tsk_tree_free(&standard_tree); - tsk_tree_free(&tree); - CU_ASSERT_EQUAL_FATAL(ret, 0); -out: + ret = run_test_modular_simplifier( + &tables, &new_edges, row_order, len_row_order, samples, 3); tsk_table_collection_free(&tables); - tsk_table_collection_free(&tables_copy); - tsk_table_collection_free(&standard_tables); - tsk_safe_free(samples); tsk_edge_table_free(&new_edges); - tsk_modular_simplifier_free(&simplifier); - fprintf(stdout, "code: %d, expected_result: %d\n", ret, expected_result); CU_ASSERT_EQUAL_FATAL(ret, expected_result); } @@ -9351,7 +9195,7 @@ static void test_table_collection_modular_simplify_overlapping_generations(void) { tsk_id_t row_order[4] = { 0, 1, 2, 3 }; - run_test_modular_simplify_overlapping_generations(row_order, 0); + run_test_modular_simplify_overlapping_generations(&row_order[0], 4, 0); } static void @@ -9359,7 +9203,7 @@ test_table_collection_modular_simplify_overlapping_generations_parent_time_error { tsk_id_t row_order[4] = { 2, 3, 0, 1 }; run_test_modular_simplify_overlapping_generations( - row_order, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); + &row_order[0], 4, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME); } /* This hits part of simplifier intialisation that is From dc51f7df9600d25369d0e87e8d6e0588f6843c8f Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Fri, 26 May 2023 15:23:50 -0700 Subject: [PATCH 74/88] cleanup --- c/tests/test_tables.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 33f606bd33..c87e7a1d86 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8985,7 +8985,6 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne */ for (row = 0; row < len_row_order; ++row) { last_parent = new_edges->parent[row_order[row]]; - fprintf(stdout, "last parent = %d\n", last_parent); row_for_parent = 0; for (row_for_parent = 0; (tsk_size_t) row_order[row] + row_for_parent < new_edges->num_rows @@ -9026,11 +9025,6 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne if (ret < 0) { goto out; } - // fprintf(stdout, "standard\n"); - // tsk_table_collection_print_state(&standard_tables, stdout); - // fprintf(stdout, "buffered\n"); - // tsk_table_collection_print_state(&tables, stdout); - // CU_ASSERT_EQUAL_FATAL(ret, 0); // Now, we can compare various properties of the two table collections CU_ASSERT_EQUAL_FATAL(standard_tables.edges.num_rows, tables->edges.num_rows); From 2e1be0f9c8d9f355233012282ffe01fccd46c10e Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 08:43:20 -0700 Subject: [PATCH 75/88] add new .c --- c/examples/efficient_forward_simulation.c | 100 ++++++++++++++++++++++ c/tests/test_tables.c | 8 +- 2 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 c/examples/efficient_forward_simulation.c diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c new file mode 100644 index 0000000000..285e939d7f --- /dev/null +++ b/c/examples/efficient_forward_simulation.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include + +#include + +#define check_tsk_error(val) \ + if (val < 0) { \ + errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \ + } + +void +simulate( + tsk_table_collection_t *tables, int N, int T, int simplify_interval) +{ + tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent; + double breakpoint; + int ret, j, t, b; + + assert(simplify_interval != 0); // leads to division by zero + buffer = malloc(2 * N * sizeof(tsk_id_t)); + if (buffer == NULL) { + errx(EXIT_FAILURE, "Out of memory"); + } + tables->sequence_length = 1.0; + parents = buffer; + for (j = 0; j < N; j++) { + parents[j] + = tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0); + check_tsk_error(parents[j]); + } + b = 0; + for (t = T - 1; t >= 0; t--) { + /* Alternate between using the first and last N values in the buffer */ + parents = buffer + (b * N); + b = (b + 1) % 2; + children = buffer + (b * N); + for (j = 0; j < N; j++) { + child = tsk_node_table_add_row( + &tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0); + check_tsk_error(child); + /* NOTE: the use of rand() is discouraged for + * research code and proper random number generator + * libraries should be preferred. + */ + left_parent = parents[(size_t)((rand()/(1.+RAND_MAX))*N)]; + right_parent = parents[(size_t)((rand()/(1.+RAND_MAX))*N)]; + do { + breakpoint = rand()/(1.+RAND_MAX); + } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */ + ret = tsk_edge_table_add_row( + &tables->edges, 0, breakpoint, left_parent, child, NULL, 0); + check_tsk_error(ret); + ret = tsk_edge_table_add_row( + &tables->edges, breakpoint, 1, right_parent, child, NULL, 0); + check_tsk_error(ret); + children[j] = child; + } + if (t % simplify_interval == 0) { + printf("Simplify at generation %lld: (%lld nodes %lld edges)", + (long long) t, + (long long) tables->nodes.num_rows, + (long long) tables->edges.num_rows); + /* Note: Edges must be sorted for simplify to work, and we use a brute force + * approach of sorting each time here for simplicity. This is inefficient. */ + ret = tsk_table_collection_sort(tables, NULL, 0); + check_tsk_error(ret); + ret = tsk_table_collection_simplify(tables, children, N, 0, NULL); + check_tsk_error(ret); + printf(" -> (%lld nodes %lld edges)\n", + (long long) tables->nodes.num_rows, + (long long) tables->edges.num_rows); + for (j = 0; j < N; j++) { + children[j] = j; + } + } + } + free(buffer); +} + +int +main(int argc, char **argv) +{ + int ret; + tsk_table_collection_t tables; + + if (argc != 6) { + errx(EXIT_FAILURE, "usage: N T simplify-interval output-file seed"); + } + ret = tsk_table_collection_init(&tables, 0); + check_tsk_error(ret); + srand((unsigned)atoi(argv[5])); + simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3])); + ret = tsk_table_collection_dump(&tables, argv[4], 0); + check_tsk_error(ret); + + tsk_table_collection_free(&tables); + return 0; +} diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index c87e7a1d86..b81cbd8032 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -8976,7 +8976,8 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne * Each case is an error that MUST be handled. * It is trivial to show that not handling the errors can give rise * to invalid table collections / tree sequences. - * The requirement for error handling must be documented + * (TODO: The requirement for error handling must be documented + * in tables.h.) * * Production code should use an input other than * an edge table. @@ -9015,11 +9016,6 @@ run_test_modular_simplifier(tsk_table_collection_t *tables, tsk_edge_table_t *ne * * Now, we have processed all (child) nodes whose births are * MORE RECENT than those in the input tables. - * - * TODO: the init method should calculate the minimum - * birth time of any child in the input table. - * The "edge adder" function should ensure that all - * new edges are < (or <= ??) that value. */ ret = tsk_modular_simplifier_finalise(&simplifier, NULL); if (ret < 0) { From b11525d231f121494b98c973e2bc94c4e9cf38d8 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 08:44:32 -0700 Subject: [PATCH 76/88] update the meson stuff --- c/meson.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/meson.build b/c/meson.build index c6150db2e7..39ed8d70c6 100644 --- a/c/meson.build +++ b/c/meson.build @@ -117,5 +117,8 @@ if not meson.is_subproject() executable('haploid_wright_fisher', sources: ['examples/haploid_wright_fisher.c'], link_with: [tskit_lib], dependencies: lib_deps) + executable('efficient_forward_simulation', + sources: ['examples/efficient_forward_simulation.c'], + link_with: [tskit_lib], dependencies: lib_deps) endif endif From 9f2d49636bfe023de56620f9c26e5902531101cf Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:03:19 -0700 Subject: [PATCH 77/88] add some infrastructure --- c/examples/efficient_forward_simulation.c | 90 ++++++++++++++++++++--- 1 file changed, 78 insertions(+), 12 deletions(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 285e939d7f..667b99fcb3 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -10,9 +10,78 @@ errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \ } +typedef struct { + double left; + double right; + double parent_birth_time; + tsk_id_t parent; + tsk_id_t child; +} birth; + +int +cmp_birth(const void *lhs, const void *rhs) +{ + const birth *clhs = (const birth *) lhs; + const birth *crhs = (const birth *) rhs; + int ret = (clhs->parent_birth_time > crhs->parent_birth_time) + - (crhs->parent_birth_time > clhs->parent_birth_time); + if (ret == 0) { + ret = (clhs->parent > crhs->parent) - (crhs->parent > clhs->parent); + } + return ret; +} + +typedef struct { + birth *births; + tsk_size_t capacity; + tsk_size_t size; +} new_edges; + +int +new_edges_init(new_edges *buffer, tsk_size_t initial_capacity) +{ + int ret = 0; + if (initial_capacity == 0) { + ret = -1; + goto out; + } + buffer->births = (birth *) malloc(initial_capacity); + buffer->capacity = initial_capacity; + buffer->size = 0; +out: + return ret; +} + +void +new_edges_realloc(new_edges *buffer) +{ + if (buffer->size + 1 >= buffer->capacity) { + buffer->capacity *= 2; + buffer->births = (birth *) realloc(buffer->births, buffer->capacity); + } +} + void -simulate( - tsk_table_collection_t *tables, int N, int T, int simplify_interval) +new_edges_buffer_birth(double left, double right, double parent_birth_time, + tsk_id_t parent, tsk_id_t child, new_edges *buffer) +{ + new_edges_realloc(buffer); + buffer->births[buffer->size].left = left; + buffer->births[buffer->size].right = right; + buffer->births[buffer->size].parent_birth_time = parent_birth_time; + buffer->births[buffer->size].parent = parent; + buffer->births[buffer->size].child = child; + buffer->size += 1; +} + +void +new_edges_prep_for_simplification(new_edges *buffer) +{ + qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); +} + +void +simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) { tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent; double breakpoint; @@ -44,10 +113,10 @@ simulate( * research code and proper random number generator * libraries should be preferred. */ - left_parent = parents[(size_t)((rand()/(1.+RAND_MAX))*N)]; - right_parent = parents[(size_t)((rand()/(1.+RAND_MAX))*N)]; + left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; + right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; do { - breakpoint = rand()/(1.+RAND_MAX); + breakpoint = rand() / (1. + RAND_MAX); } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */ ret = tsk_edge_table_add_row( &tables->edges, 0, breakpoint, left_parent, child, NULL, 0); @@ -58,18 +127,15 @@ simulate( children[j] = child; } if (t % simplify_interval == 0) { - printf("Simplify at generation %lld: (%lld nodes %lld edges)", - (long long) t, - (long long) tables->nodes.num_rows, - (long long) tables->edges.num_rows); + printf("Simplify at generation %lld: (%lld nodes %lld edges)", (long long) t, + (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); /* Note: Edges must be sorted for simplify to work, and we use a brute force * approach of sorting each time here for simplicity. This is inefficient. */ ret = tsk_table_collection_sort(tables, NULL, 0); check_tsk_error(ret); ret = tsk_table_collection_simplify(tables, children, N, 0, NULL); check_tsk_error(ret); - printf(" -> (%lld nodes %lld edges)\n", - (long long) tables->nodes.num_rows, + printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); for (j = 0; j < N; j++) { children[j] = j; @@ -90,7 +156,7 @@ main(int argc, char **argv) } ret = tsk_table_collection_init(&tables, 0); check_tsk_error(ret); - srand((unsigned)atoi(argv[5])); + srand((unsigned) atoi(argv[5])); simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3])); ret = tsk_table_collection_dump(&tables, argv[4], 0); check_tsk_error(ret); From add7347f701d105a62d2313650b23b116ee5ab3e Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:03:27 -0700 Subject: [PATCH 78/88] add some infrastructure --- c/examples/efficient_forward_simulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 667b99fcb3..7ed81ff044 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -77,7 +77,7 @@ new_edges_buffer_birth(double left, double right, double parent_birth_time, void new_edges_prep_for_simplification(new_edges *buffer) { - qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); + qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); } void From 4e07d2215a21ce10c269f27daf9fa7d2fe934ccc Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:03:43 -0700 Subject: [PATCH 79/88] fmt --- c/examples/efficient_forward_simulation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 7ed81ff044..667b99fcb3 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -77,7 +77,7 @@ new_edges_buffer_birth(double left, double right, double parent_birth_time, void new_edges_prep_for_simplification(new_edges *buffer) { - qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); + qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); } void From 3dfc9fce14becb1ddd6c63e06b7bef8879454d47 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:22:14 -0700 Subject: [PATCH 80/88] skeleton in place. we have a segfault --- c/examples/efficient_forward_simulation.c | 71 +++++++++++++++++------ 1 file changed, 54 insertions(+), 17 deletions(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 667b99fcb3..31f898c8a4 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -35,10 +35,10 @@ typedef struct { birth *births; tsk_size_t capacity; tsk_size_t size; -} new_edges; +} edge_buffer; int -new_edges_init(new_edges *buffer, tsk_size_t initial_capacity) +edge_buffer_init(edge_buffer *buffer, tsk_size_t initial_capacity) { int ret = 0; if (initial_capacity == 0) { @@ -53,7 +53,7 @@ new_edges_init(new_edges *buffer, tsk_size_t initial_capacity) } void -new_edges_realloc(new_edges *buffer) +edge_buffer_realloc(edge_buffer *buffer) { if (buffer->size + 1 >= buffer->capacity) { buffer->capacity *= 2; @@ -62,10 +62,10 @@ new_edges_realloc(new_edges *buffer) } void -new_edges_buffer_birth(double left, double right, double parent_birth_time, - tsk_id_t parent, tsk_id_t child, new_edges *buffer) +edge_buffer_buffer_birth(double left, double right, double parent_birth_time, + tsk_id_t parent, tsk_id_t child, edge_buffer *buffer) { - new_edges_realloc(buffer); + edge_buffer_realloc(buffer); buffer->births[buffer->size].left = left; buffer->births[buffer->size].right = right; buffer->births[buffer->size].parent_birth_time = parent_birth_time; @@ -75,19 +75,29 @@ new_edges_buffer_birth(double left, double right, double parent_birth_time, } void -new_edges_prep_for_simplification(new_edges *buffer) +edge_buffer_prep_for_simplification(edge_buffer *buffer) { qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); } +void +edge_buffer_clear(edge_buffer *buffer) +{ + buffer->size = 0; +} + void simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) { tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent; double breakpoint; int ret, j, t, b; + edge_buffer new_births; + tsk_modular_simplifier_t simplifier; assert(simplify_interval != 0); // leads to division by zero + ret = edge_buffer_init(&new_births, 1000); + assert(ret == 0); buffer = malloc(2 * N * sizeof(tsk_id_t)); if (buffer == NULL) { errx(EXIT_FAILURE, "Out of memory"); @@ -118,28 +128,55 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) do { breakpoint = rand() / (1. + RAND_MAX); } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */ - ret = tsk_edge_table_add_row( - &tables->edges, 0, breakpoint, left_parent, child, NULL, 0); - check_tsk_error(ret); - ret = tsk_edge_table_add_row( - &tables->edges, breakpoint, 1, right_parent, child, NULL, 0); - check_tsk_error(ret); + edge_buffer_buffer_birth(0., breakpoint, tables->nodes.time[left_parent], + left_parent, child, &new_births); + edge_buffer_buffer_birth(breakpoint, 1.0, tables->nodes.time[right_parent], + right_parent, child, &new_births); + // ret = tsk_edge_table_add_row( + // &tables->edges, 0, breakpoint, left_parent, child, NULL, 0); + // check_tsk_error(ret); + // ret = tsk_edge_table_add_row( + // &tables->edges, breakpoint, 1, right_parent, child, NULL, 0); + // check_tsk_error(ret); children[j] = child; } if (t % simplify_interval == 0) { printf("Simplify at generation %lld: (%lld nodes %lld edges)", (long long) t, (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); - /* Note: Edges must be sorted for simplify to work, and we use a brute force - * approach of sorting each time here for simplicity. This is inefficient. */ - ret = tsk_table_collection_sort(tables, NULL, 0); + edge_buffer_prep_for_simplification(&new_births); + ret = tsk_modular_simplifier_init(&simplifier, tables, children, N, 0); check_tsk_error(ret); - ret = tsk_table_collection_simplify(tables, children, N, 0, NULL); + j = 0; + while (j < new_births.size) { + left_parent = new_births.births[j].parent; + while ( + j < new_births.size && new_births.births[j].parent == left_parent) { + ret = tsk_modular_simplifier_add_edge(&simplifier, + new_births.births[j].left, new_births.births[j].right, + new_births.births[j].parent, new_births.births[j].child); + + check_tsk_error(ret); + j++; + } + ret = tsk_modular_simplifier_merge_ancestors(&simplifier, left_parent); + check_tsk_error(ret); + } + ret = tsk_modular_simplifier_finalise(&simplifier, NULL); check_tsk_error(ret); + ret = tsk_modular_simplifier_free(&simplifier); + check_tsk_error(ret); + /* Note: Edges must be sorted for simplify to work, and we use a brute force + * approach of sorting each time here for simplicity. This is inefficient. */ + // ret = tsk_table_collection_sort(tables, NULL, 0); + // check_tsk_error(ret); + // ret = tsk_table_collection_simplify(tables, children, N, 0, NULL); + // check_tsk_error(ret); printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); for (j = 0; j < N; j++) { children[j] = j; } + edge_buffer_clear(&new_births); } } free(buffer); From 24a76bf187d7c26a1ba449d9afd9ec1b4b4accaf Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:32:06 -0700 Subject: [PATCH 81/88] make valgrind happy --- c/examples/efficient_forward_simulation.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 31f898c8a4..1c513f717f 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -45,7 +45,7 @@ edge_buffer_init(edge_buffer *buffer, tsk_size_t initial_capacity) ret = -1; goto out; } - buffer->births = (birth *) malloc(initial_capacity); + buffer->births = (birth *) malloc(initial_capacity * sizeof(birth)); buffer->capacity = initial_capacity; buffer->size = 0; out: @@ -57,7 +57,17 @@ edge_buffer_realloc(edge_buffer *buffer) { if (buffer->size + 1 >= buffer->capacity) { buffer->capacity *= 2; - buffer->births = (birth *) realloc(buffer->births, buffer->capacity); + buffer->births + = (birth *) realloc(buffer->births, buffer->capacity * sizeof(birth)); + } +} + +void +edge_buffer_free(edge_buffer *buffer) +{ + if (buffer->births != NULL) { + free(buffer->births); + buffer->births = NULL; } } @@ -180,6 +190,7 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) } } free(buffer); + edge_buffer_free(&new_births); } int From a5ac4c4981b864540a0bafeee901fd89c84204ce Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:33:04 -0700 Subject: [PATCH 82/88] cleanup --- c/examples/efficient_forward_simulation.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 1c513f717f..30832d1c49 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -142,12 +142,6 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) left_parent, child, &new_births); edge_buffer_buffer_birth(breakpoint, 1.0, tables->nodes.time[right_parent], right_parent, child, &new_births); - // ret = tsk_edge_table_add_row( - // &tables->edges, 0, breakpoint, left_parent, child, NULL, 0); - // check_tsk_error(ret); - // ret = tsk_edge_table_add_row( - // &tables->edges, breakpoint, 1, right_parent, child, NULL, 0); - // check_tsk_error(ret); children[j] = child; } if (t % simplify_interval == 0) { @@ -164,7 +158,6 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) ret = tsk_modular_simplifier_add_edge(&simplifier, new_births.births[j].left, new_births.births[j].right, new_births.births[j].parent, new_births.births[j].child); - check_tsk_error(ret); j++; } @@ -175,12 +168,6 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) check_tsk_error(ret); ret = tsk_modular_simplifier_free(&simplifier); check_tsk_error(ret); - /* Note: Edges must be sorted for simplify to work, and we use a brute force - * approach of sorting each time here for simplicity. This is inefficient. */ - // ret = tsk_table_collection_sort(tables, NULL, 0); - // check_tsk_error(ret); - // ret = tsk_table_collection_simplify(tables, children, N, 0, NULL); - // check_tsk_error(ret); printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); for (j = 0; j < N; j++) { From cc89815753b6a42f178f2e0f348776e846658d56 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 09:37:21 -0700 Subject: [PATCH 83/88] cleanup --- c/examples/efficient_forward_simulation.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 30832d1c49..b7c58b1356 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -168,6 +168,9 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) check_tsk_error(ret); ret = tsk_modular_simplifier_free(&simplifier); check_tsk_error(ret); + /* For fun/safety/paranoia */ + ret = tsk_table_collection_check_integrity(tables, TSK_CHECK_EDGE_ORDERING); + check_tsk_error(ret); printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); for (j = 0; j < N; j++) { From f095a761c841346f37188e269873b49b8da799ee Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 10:36:11 -0700 Subject: [PATCH 84/88] add death rate, update lots of the new example internals --- c/examples/efficient_forward_simulation.c | 68 ++++++++++++++++------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index b7c58b1356..6ece2d493a 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -97,35 +97,54 @@ edge_buffer_clear(edge_buffer *buffer) } void -simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) +simulate( + tsk_table_collection_t *tables, int N, int T, int simplify_interval, double pdeath) { - tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent; + tsk_id_t *buffer, *alive, *deaths, *replacements, *idmap, child, left_parent, + right_parent; double breakpoint; - int ret, j, t, b; + int ret, j, t; edge_buffer new_births; + size_t ndeaths; tsk_modular_simplifier_t simplifier; assert(simplify_interval != 0); // leads to division by zero + assert(pdeath > 0.0 && pdeath <= 1.0); ret = edge_buffer_init(&new_births, 1000); assert(ret == 0); buffer = malloc(2 * N * sizeof(tsk_id_t)); if (buffer == NULL) { errx(EXIT_FAILURE, "Out of memory"); } + idmap = malloc(N * sizeof(tsk_id_t)); + if (idmap == NULL) { + errx(EXIT_FAILURE, "Out of memory"); + } + deaths = malloc(N * sizeof(tsk_id_t)); + if (deaths == NULL) { + errx(EXIT_FAILURE, "Out of memory"); + } tables->sequence_length = 1.0; - parents = buffer; + alive = buffer; + replacements = buffer + N; for (j = 0; j < N; j++) { - parents[j] + alive[j] = tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0); - check_tsk_error(parents[j]); + check_tsk_error(alive[j]); } - b = 0; for (t = T - 1; t >= 0; t--) { - /* Alternate between using the first and last N values in the buffer */ - parents = buffer + (b * N); - b = (b + 1) % 2; - children = buffer + (b * N); + ndeaths = 0; for (j = 0; j < N; j++) { + /* NOTE: the use of rand() is discouraged for + * research code and proper random number generator + * libraries should be preferred. + */ + if (rand() / (1. + RAND_MAX) <= pdeath) { + deaths[ndeaths] = j; + ++ndeaths; + } + } + for (j = 0; j < ndeaths; j++) { child = tsk_node_table_add_row( &tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0); check_tsk_error(child); @@ -133,8 +152,8 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) * research code and proper random number generator * libraries should be preferred. */ - left_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; - right_parent = parents[(size_t)((rand() / (1. + RAND_MAX)) * N)]; + left_parent = alive[(size_t)((rand() / (1. + RAND_MAX)) * N)]; + right_parent = alive[(size_t)((rand() / (1. + RAND_MAX)) * N)]; do { breakpoint = rand() / (1. + RAND_MAX); } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */ @@ -142,13 +161,19 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) left_parent, child, &new_births); edge_buffer_buffer_birth(breakpoint, 1.0, tables->nodes.time[right_parent], right_parent, child, &new_births); - children[j] = child; + replacements[j] = child; + } + /* replace deaths with births */ + for (j = 0; j < ndeaths; j++) { + alive[deaths[j]] = replacements[j]; } if (t % simplify_interval == 0) { printf("Simplify at generation %lld: (%lld nodes %lld edges)", (long long) t, (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); edge_buffer_prep_for_simplification(&new_births); - ret = tsk_modular_simplifier_init(&simplifier, tables, children, N, 0); + idmap + = (tsk_id_t *) realloc(idmap, tables->nodes.num_rows * sizeof(tsk_id_t)); + ret = tsk_modular_simplifier_init(&simplifier, tables, alive, N, 0); check_tsk_error(ret); j = 0; while (j < new_births.size) { @@ -164,7 +189,7 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) ret = tsk_modular_simplifier_merge_ancestors(&simplifier, left_parent); check_tsk_error(ret); } - ret = tsk_modular_simplifier_finalise(&simplifier, NULL); + ret = tsk_modular_simplifier_finalise(&simplifier, idmap); check_tsk_error(ret); ret = tsk_modular_simplifier_free(&simplifier); check_tsk_error(ret); @@ -174,12 +199,15 @@ simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval) printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows, (long long) tables->edges.num_rows); for (j = 0; j < N; j++) { - children[j] = j; + alive[j] = idmap[alive[j]]; + assert(alive[j] != TSK_NULL); } edge_buffer_clear(&new_births); } } free(buffer); + free(idmap); + free(deaths); edge_buffer_free(&new_births); } @@ -189,13 +217,13 @@ main(int argc, char **argv) int ret; tsk_table_collection_t tables; - if (argc != 6) { - errx(EXIT_FAILURE, "usage: N T simplify-interval output-file seed"); + if (argc != 7) { + errx(EXIT_FAILURE, "usage: N T simplify-interval output-file seed pdeath"); } ret = tsk_table_collection_init(&tables, 0); check_tsk_error(ret); srand((unsigned) atoi(argv[5])); - simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3])); + simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]), atof(argv[6])); ret = tsk_table_collection_dump(&tables, argv[4], 0); check_tsk_error(ret); From 0c498c02cc0d009ba7a50c79ddfb363241064504 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 10:53:07 -0700 Subject: [PATCH 85/88] overlapping gens seems to work --- c/examples/efficient_forward_simulation.c | 49 +++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index 6ece2d493a..e4fb71819d 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -90,6 +90,53 @@ edge_buffer_prep_for_simplification(edge_buffer *buffer) qsort(buffer->births, (size_t) buffer->size, sizeof(birth), cmp_birth); } +/* Overlapping generations are a pain point. + * Nodes that are currently "alive" can span an + * arbitrary range of ages >= 0.0. + * These nodes can have ancestry (edges). + * The children of these edges may not be "alive" (e.g., + * cannot be parents). + * + * We identify the range of such edges in a (simplified) + * table collection and lift them over into our buffer. + */ +void +edge_buffer_post_simplification(tsk_id_t *output_samples, tsk_size_t num_output_samples, + tsk_table_collection_t *tables, edge_buffer *buffer) +{ + /* node times cannot be negative, so this + * is a reasonable floor + */ + double max_alive_time = 0.0; + int64_t i, last_row_to_lift_over = -1; + tsk_size_t moved = 0; + + for (i = 0; i < num_output_samples; ++i) { + max_alive_time = TSK_MAX(tables->nodes.time[output_samples[i]], max_alive_time); + } + for (i = 0; i < tables->edges.num_rows; ++i) { + if (tables->nodes.time[tables->edges.parent[i]] <= max_alive_time) { + last_row_to_lift_over = (int) i; + } + } + if (last_row_to_lift_over > -1) { + for (i = 0; i < (tsk_size_t) last_row_to_lift_over; ++i) { + edge_buffer_buffer_birth(tables->edges.left[i], tables->edges.right[i], + tables->nodes.time[tables->edges.parent[i]], tables->edges.parent[i], + tables->edges.child[i], buffer); + } + for (i = (tsk_size_t) last_row_to_lift_over + 1; i < tables->edges.num_rows; + ++i) { + tables->edges.left[moved] = tables->edges.left[i]; + tables->edges.right[moved] = tables->edges.right[i]; + tables->edges.parent[moved] = tables->edges.parent[i]; + tables->edges.child[moved] = tables->edges.child[i]; + moved += 1; + } + tsk_edge_table_truncate(&tables->edges, moved); + } +} + void edge_buffer_clear(edge_buffer *buffer) { @@ -202,7 +249,9 @@ simulate( alive[j] = idmap[alive[j]]; assert(alive[j] != TSK_NULL); } + /* The order of these next two steps MATTERS */ edge_buffer_clear(&new_births); + edge_buffer_post_simplification(alive, N, tables, &new_births); } } free(buffer); From fcd17f3f91e8c508abd5f0a2bfc7c7ce6460a8cc Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 31 May 2023 11:24:23 -0700 Subject: [PATCH 86/88] add an important note --- c/examples/efficient_forward_simulation.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/c/examples/efficient_forward_simulation.c b/c/examples/efficient_forward_simulation.c index e4fb71819d..3e93a1bf4f 100644 --- a/c/examples/efficient_forward_simulation.c +++ b/c/examples/efficient_forward_simulation.c @@ -204,6 +204,14 @@ simulate( do { breakpoint = rand() / (1. + RAND_MAX); } while (breakpoint == 0); /* tiny proba of breakpoint being 0 */ + /* NOTE: invalid left/right values here CANNOT be caught + * by tsk_table_collection_check_integrity! + * (They are not present in the edge table when the + * simplifier is initialized, and the simplified tables + * will naively process the overlaps, resulting in invalid output.) + * + * It is therefore a precondition that input edges are okay. + */ edge_buffer_buffer_birth(0., breakpoint, tables->nodes.time[left_parent], left_parent, child, &new_births); edge_buffer_buffer_birth(breakpoint, 1.0, tables->nodes.time[right_parent], From e19a5a6c00a510d1c05f4e0bfe0b8bcabae62183 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 23 Jun 2023 09:39:16 +0100 Subject: [PATCH 87/88] Add basic simulation for development --- python/tests/test_forward_sims.py | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 python/tests/test_forward_sims.py diff --git a/python/tests/test_forward_sims.py b/python/tests/test_forward_sims.py new file mode 100644 index 0000000000..17d91fd7ab --- /dev/null +++ b/python/tests/test_forward_sims.py @@ -0,0 +1,117 @@ +# MIT License +# +# Copyright (c) 2023 Tskit Developers +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +""" +Python implementation of the low-level supporting code for forward simulations. +""" +import collections +import random + +import numpy as np +import pytest + +import tskit + + +def simplify_with_buffer(tables, parent_buffer, samples, verbose): + # Pretend this was done efficiently internally without any sorting + # by creating a simplifier object and adding the ancstry for the + # new parents appropriately before flushing through the rest of the + # edges. + for parent, edges in parent_buffer.items(): + for left, right, child in edges: + tables.edges.add_row(left, right, parent, child) + tables.sort() + tables.simplify(samples) + # We've exhausted the parent buffer, so clear it out. In reality we'd + # do this more carefully, like KT does in the post_simplify step. + parent_buffer.clear() + + +def wright_fisher( + N, *, death_proba=1, L=1, T=10, simplify_interval=1, seed=42, verbose=0 +): + rng = random.Random(seed) + tables = tskit.TableCollection(L) + alive = [tables.nodes.add_row(time=T) for _ in range(N)] + parent_buffer = collections.defaultdict(list) + + t = T + while t > 0: + t -= 1 + next_alive = list(alive) + for j in range(N): + if rng.random() < death_proba: + # alive[j] is dead - replace it. + u = tables.nodes.add_row(time=t) + next_alive[j] = u + a = rng.randint(0, N - 1) + b = rng.randint(0, N - 1) + x = rng.uniform(0, L) + parent_buffer[alive[a]].append((0, x, u)) + parent_buffer[alive[b]].append((x, L, u)) + alive = next_alive + if t % simplify_interval == 0 or t == 0: + simplify_with_buffer(tables, parent_buffer, alive, verbose=verbose) + alive = list(range(N)) + return tables.tree_sequence() + + +class TestSimulationBasics: + """ + Check that the basic simulation algorithm roughly works, so we're not building + on sand. + """ + + @pytest.mark.parametrize("N", [1, 10, 100]) + def test_pop_size(self, N): + ts = wright_fisher(N, simplify_interval=100) + assert ts.num_samples == N + + @pytest.mark.parametrize("T", [1, 10, 100]) + def test_time(self, T): + N = 10 + ts = wright_fisher(N=N, T=T, simplify_interval=1000) + assert np.all(ts.nodes_time[ts.samples()] == 0) + # Can't really assert anything much stronger, not really trying to + # do anything particularly rigorous here + assert np.max(ts.nodes_time) > 0 + + def test_death_proba_0(self): + N = 10 + T = 5 + ts = wright_fisher(N=N, T=T, death_proba=0, simplify_interval=1000) + assert ts.num_nodes == N + + @pytest.mark.parametrize("seed", [1, 5, 1234]) + def test_seed_identical(self, seed): + N = 10 + T = 5 + ts1 = wright_fisher(N=N, T=T, simplify_interval=1000, seed=seed) + ts2 = wright_fisher(N=N, T=T, simplify_interval=1000, seed=seed) + ts1.tables.assert_equals(ts2.tables, ignore_provenance=True) + ts3 = wright_fisher(N=N, T=T, simplify_interval=1000, seed=seed - 1) + assert not ts3.tables.equals(ts2.tables, ignore_provenance=True) + + def test_full_simulation(self): + ts = wright_fisher(N=5, T=500, death_proba=0.9, simplify_interval=1000) + for tree in ts.trees(): + assert tree.num_roots == 1 From c559f64713ffaec36289f0c276e2a39aede3e71f Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Fri, 23 Jun 2023 16:41:09 +0100 Subject: [PATCH 88/88] Add almost-working version with Python simplifier --- python/tests/simplify.py | 20 ++-- python/tests/test_forward_sims.py | 174 +++++++++++++++++++++++++++--- 2 files changed, 171 insertions(+), 23 deletions(-) diff --git a/python/tests/simplify.py b/python/tests/simplify.py index 02e0482cca..c009ee8fa4 100644 --- a/python/tests/simplify.py +++ b/python/tests/simplify.py @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2019-2022 Tskit Developers +# Copyright (c) 2019-2023 Tskit Developers # Copyright (c) 2015-2018 University of Oxford # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -114,6 +114,8 @@ def __init__( filter_nodes=True, update_sample_flags=True, ): + # DELETE ME + self.parent_edges_processed = 0 self.ts = ts self.n = len(sample) self.reduce_to_site_topology = reduce_to_site_topology @@ -397,6 +399,7 @@ def process_parent_edges(self, edges): """ Process all of the edges for a given parent. """ + self.parent_edges_processed += len(edges) assert len({e.parent for e in edges}) == 1 parent = edges[0].parent S = [] @@ -535,6 +538,14 @@ def insert_input_roots(self): offset += 1 self.sort_offset = offset + def finalise(self): + if self.keep_input_roots: + self.insert_input_roots() + self.finalise_sites() + self.finalise_references() + if self.sort_offset != -1: + self.tables.sort(edge_start=self.sort_offset) + def simplify(self): if self.ts.num_edges > 0: all_edges = list(self.ts.edges()) @@ -545,12 +556,7 @@ def simplify(self): edges = [] edges.append(e) self.process_parent_edges(edges) - if self.keep_input_roots: - self.insert_input_roots() - self.finalise_sites() - self.finalise_references() - if self.sort_offset != -1: - self.tables.sort(edge_start=self.sort_offset) + self.finalise() ts = self.tables.tree_sequence() return ts, self.node_id_map diff --git a/python/tests/test_forward_sims.py b/python/tests/test_forward_sims.py index 17d91fd7ab..54ea9039fd 100644 --- a/python/tests/test_forward_sims.py +++ b/python/tests/test_forward_sims.py @@ -22,28 +22,147 @@ """ Python implementation of the low-level supporting code for forward simulations. """ -import collections +import itertools import random import numpy as np import pytest import tskit +from tests import simplify -def simplify_with_buffer(tables, parent_buffer, samples, verbose): - # Pretend this was done efficiently internally without any sorting - # by creating a simplifier object and adding the ancstry for the - # new parents appropriately before flushing through the rest of the - # edges. - for parent, edges in parent_buffer.items(): - for left, right, child in edges: +class BirthBuffer: + def __init__(self): + self.edges = {} + self.parents = [] + + def add_edge(self, left, right, parent, child): + if parent not in self.edges: + self.parents.append(parent) + self.edges[parent] = [] + self.edges[parent].append((child, left, right)) + + def clear(self): + self.edges = {} + self.parents = [] + + def __str__(self): + s = "" + for parent in self.parents: + for child, left, right in self.edges[parent]: + s += f"{parent}\t{child}\t{left:0.3f}\t{right:0.3f}\n" + return s + + +def add_younger_edges_to_simplifier(simplifier, t, tables, edge_offset): + parent_edges = [] + while ( + edge_offset < len(tables.edges) + and tables.nodes.time[tables.edges.parent[edge_offset]] <= t + ): + print("edge offset = ", edge_offset) + if len(parent_edges) == 0: + last_parent = tables.edges.parent[edge_offset] + else: + last_parent = parent_edges[-1].parent + if last_parent == tables.edges.parent[edge_offset]: + parent_edges.append(tables.edges[edge_offset]) + else: + print( + "Flush ", tables.nodes.time[parent_edges[-1].parent], len(parent_edges) + ) + simplifier.process_parent_edges(parent_edges) + parent_edges = [] + edge_offset += 1 + if len(parent_edges) > 0: + print("Flush ", tables.nodes.time[parent_edges[-1].parent], len(parent_edges)) + simplifier.process_parent_edges(parent_edges) + return edge_offset + + +def simplify_with_births(tables, births, alive, verbose): + total_edges = len(tables.edges) + for edges in births.edges.values(): + total_edges += len(edges) + if verbose > 0: + print("Simplify with births") + # print(births) + print("total_input edges = ", total_edges) + print("alive = ", alive) + print("\ttable edges:", len(tables.edges)) + print("\ttable nodes:", len(tables.nodes)) + + simplifier = simplify.Simplifier(tables.tree_sequence(), alive) + nodes_time = tables.nodes.time + # This should be almost sorted, because + parent_time = nodes_time[births.parents] + index = np.argsort(parent_time) + print(index) + offset = 0 + for parent in np.array(births.parents)[index]: + offset = add_younger_edges_to_simplifier( + simplifier, nodes_time[parent], tables, offset + ) + edges = [ + tskit.Edge(left, right, parent, child) + for child, left, right in sorted(births.edges[parent]) + ] + # print("Adding parent from time", nodes_time[parent], len(edges)) + # print("edges = ", edges) + simplifier.process_parent_edges(edges) + # simplifier.print_state() + + # FIXME should probably reuse the add_younger_edges_to_simplifier function + # for this - doesn't quite seem to work though + for _, edges in itertools.groupby(tables.edges[offset:], lambda e: e.parent): + edges = list(edges) + simplifier.process_parent_edges(edges) + + simplifier.check_state() + assert simplifier.parent_edges_processed == total_edges + # if simplifier.parent_edges_processed != total_edges: + # print("HERE!!!!", total_edges) + simplifier.finalise() + + tables.nodes.replace_with(simplifier.tables.nodes) + tables.edges.replace_with(simplifier.tables.edges) + + # This is needed because we call .tree_sequence here and later. + # Can be removed is we change the Simplifier to take a set of + # tables which it modifies, like the C version. + tables.drop_index() + # Just to check + tables.tree_sequence() + + births.clear() + # Add back all the edges with an alive parent to the buffer, so that + # we store them contiguously + keep = np.ones(len(tables.edges), dtype=bool) + for u in alive: + u = simplifier.node_id_map[u] + for e in np.where(tables.edges.parent == u)[0]: + keep[e] = False + edge = tables.edges[e] + # print(edge) + births.add_edge(edge.left, edge.right, edge.parent, edge.child) + + if verbose > 0: + print("Done") + print(births) + print("\ttable edges:", len(tables.edges)) + print("\ttable nodes:", len(tables.nodes)) + + +def simplify_with_births_easy(tables, births, alive, verbose): + for parent, edges in births.edges.items(): + for child, left, right in edges: tables.edges.add_row(left, right, parent, child) tables.sort() - tables.simplify(samples) - # We've exhausted the parent buffer, so clear it out. In reality we'd - # do this more carefully, like KT does in the post_simplify step. - parent_buffer.clear() + tables.simplify(alive) + births.clear() + + # print(tables.nodes.time[tables.edges.parent]) def wright_fisher( @@ -52,7 +171,7 @@ def wright_fisher( rng = random.Random(seed) tables = tskit.TableCollection(L) alive = [tables.nodes.add_row(time=T) for _ in range(N)] - parent_buffer = collections.defaultdict(list) + births = BirthBuffer() t = T while t > 0: @@ -66,12 +185,16 @@ def wright_fisher( a = rng.randint(0, N - 1) b = rng.randint(0, N - 1) x = rng.uniform(0, L) - parent_buffer[alive[a]].append((0, x, u)) - parent_buffer[alive[b]].append((x, L, u)) + # TODO Possibly more natural do this like + # births.add(u, parents=[a, b], breaks=[0, x, L]) + births.add_edge(0, x, alive[a], u) + births.add_edge(x, L, alive[b], u) alive = next_alive if t % simplify_interval == 0 or t == 0: - simplify_with_buffer(tables, parent_buffer, alive, verbose=verbose) + simplify_with_births(tables, births, alive, verbose=verbose) + # simplify_with_births_easy(tables, births, alive, verbose=verbose) alive = list(range(N)) + # print(tables.tree_sequence()) return tables.tree_sequence() @@ -115,3 +238,22 @@ def test_full_simulation(self): ts = wright_fisher(N=5, T=500, death_proba=0.9, simplify_interval=1000) for tree in ts.trees(): assert tree.num_roots == 1 + + +class TestSimplifyIntervals: + @pytest.mark.parametrize("interval", [1, 10, 33, 100]) + def test_non_overlapping_generations(self, interval): + N = 10 + ts = wright_fisher(N, T=100, death_proba=1, simplify_interval=interval) + assert ts.num_samples == N + + @pytest.mark.parametrize("interval", [1, 10, 33, 100]) + @pytest.mark.parametrize("death_proba", [0.33, 0.5, 0.9]) + def test_overlapping_generations(self, interval, death_proba): + N = 4 + ts = wright_fisher( + N, T=20, death_proba=death_proba, simplify_interval=interval, verbose=1 + ) + assert ts.num_samples == N + print() + print(ts.draw_text())