diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 1b99ac6fe3..7e50631036 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -1490,6 +1490,98 @@ test_node_table_update_row(void) tsk_node_table_free(&table); } +static void +test_node_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_node_table_t source, t1, t2; + tsk_node_t row; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + const char *metadata = "ABC"; + + ret = tsk_node_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_node_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_node_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_node_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_node_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0)); + + ret = tsk_node_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_node_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_node_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.flags, 1); + CU_ASSERT_EQUAL_FATAL(row.time, 2.0); + CU_ASSERT_EQUAL_FATAL(row.population, 3); + CU_ASSERT_EQUAL_FATAL(row.individual, 4); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_node_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_node_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_node_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_node_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_node_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &t1, 0)); + + tsk_node_table_free(&t1); + tsk_node_table_free(&t2); + } + + tsk_node_table_free(&source); +} + static void test_edge_table_with_options(tsk_flags_t options) { @@ -2033,6 +2125,185 @@ test_edge_table_update_row_no_metadata(void) tsk_edge_table_free(&table); } +static void +test_edge_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_edge_table_t source, t1, t2; + tsk_edge_t row; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + const char *metadata = "ABC"; + + ret = tsk_edge_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_edge_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0)); + + ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_edge_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.left, 1); + CU_ASSERT_EQUAL_FATAL(row.right, 2.0); + CU_ASSERT_EQUAL_FATAL(row.parent, 3); + CU_ASSERT_EQUAL_FATAL(row.child, 4); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_edge_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_edge_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t1, 0)); + tsk_edge_table_free(&t1); + tsk_edge_table_free(&t2); + } + + tsk_edge_table_free(&source); +} + +static void +test_edge_table_keep_rows_no_metadata(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_edge_table_t source, t1, t2; + tsk_edge_t row; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_edge_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0)); + + ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_edge_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.left, 1); + CU_ASSERT_EQUAL_FATAL(row.right, 2.0); + CU_ASSERT_EQUAL_FATAL(row.parent, 3); + CU_ASSERT_EQUAL_FATAL(row.child, 4); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0); + + tsk_edge_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_edge_table_copy(&source, &t2, TSK_TABLE_NO_METADATA); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_edge_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t1, 0)); + tsk_edge_table_free(&t1); + tsk_edge_table_free(&t2); + } + + tsk_edge_table_free(&source); +} + static void test_edge_table_takeset_with_options(tsk_flags_t table_options) { @@ -2957,6 +3228,98 @@ test_site_table_update_row(void) tsk_site_table_free(&table); } +static void +test_site_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_site_table_t source, t1, t2; + tsk_site_t row; + const char *ancestral_state = "XYZ"; + const char *metadata = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_site_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_site_table_add_row(&source, 0, ancestral_state, 1, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_site_table_add_row(&source, 1, ancestral_state, 2, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_site_table_add_row(&source, 2, ancestral_state, 3, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_site_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0)); + + ret = tsk_site_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_site_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_site_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.position, 1); + CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 2); + CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X'); + CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y'); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_site_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_site_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_site_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_site_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_site_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &t1, 0)); + tsk_site_table_free(&t1); + tsk_site_table_free(&t2); + } + + tsk_site_table_free(&source); +} + static void test_mutation_table(void) { @@ -3642,6 +4005,190 @@ test_mutation_table_update_row(void) tsk_mutation_table_free(&table); } +static void +test_mutation_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_mutation_table_t source, t1, t2; + tsk_mutation_t row; + const char *derived_state = "XYZ"; + const char *metadata = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_mutation_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_mutation_table_add_row( + &source, 0, 1, -1, 3.0, derived_state, 1, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_mutation_table_add_row( + &source, 1, 2, -1, 4.0, derived_state, 2, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_mutation_table_add_row( + &source, 2, 3, 0, 5.0, derived_state, 3, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_mutation_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0)); + + ret = tsk_mutation_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_mutation_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_mutation_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.site, 1); + CU_ASSERT_EQUAL_FATAL(row.node, 2); + CU_ASSERT_EQUAL_FATAL(row.parent, -1); + CU_ASSERT_EQUAL_FATAL(row.time, 4); + CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 2); + CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X'); + CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y'); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_mutation_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_mutation_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_mutation_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_mutation_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_mutation_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &t1, 0)); + tsk_mutation_table_free(&t1); + tsk_mutation_table_free(&t2); + } + + tsk_mutation_table_free(&source); +} + +static void +test_mutation_table_keep_rows_parent_references(void) +{ + int ret; + tsk_id_t ret_id; + tsk_mutation_table_t source, t; + bool keep[4] = { 1, 1, 1, 1 }; + tsk_id_t id_map[4]; + + ret = tsk_mutation_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_mutation_table_add_row(&source, 0, 1, -1, 3.0, "A", 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_mutation_table_add_row(&source, 1, 2, -1, 4.0, "A", 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_mutation_table_add_row(&source, 2, 3, 1, 5.0, "A", 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_mutation_table_add_row(&source, 3, 4, 1, 6.0, "A", 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_mutation_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + /* OOB errors */ + t.parent[0] = -2; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 4); + + t.parent[0] = 4; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 4); + /* But ignored if row is not kept */ + keep[0] = false; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + tsk_mutation_table_free(&t); + + ret = tsk_mutation_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* Try to remove referenced row 1 */ + keep[0] = true; + keep[1] = false; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0)); + tsk_mutation_table_free(&t); + + ret = tsk_mutation_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* remove unreferenced row 0 */ + keep[0] = false; + keep[1] = true; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 3); + CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL); + CU_ASSERT_EQUAL_FATAL(t.parent[1], 0); + CU_ASSERT_EQUAL_FATAL(t.parent[2], 0); + tsk_mutation_table_free(&t); + + /* Check that we don't change the table in error cases. */ + source.parent[3] = -2; + ret = tsk_mutation_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = true; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0)); + tsk_mutation_table_free(&t); + + /* Check that we don't change the table in error cases. */ + source.parent[3] = 0; + ret = tsk_mutation_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = false; + ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED); + CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0)); + tsk_mutation_table_free(&t); + + tsk_mutation_table_free(&source); +} + static void test_migration_table(void) { @@ -4243,6 +4790,99 @@ test_migration_table_update_row(void) tsk_migration_table_free(&table); } +static void +test_migration_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_migration_table_t source, t1, t2; + tsk_migration_t row; + const char *metadata = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_migration_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_migration_table_add_row(&source, 0, 1.0, 2, 3, 4, 5, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_migration_table_add_row(&source, 1, 2.0, 3, 4, 5, 6, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_migration_table_add_row(&source, 2, 3.0, 4, 5, 6, 7, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_migration_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0)); + + ret = tsk_migration_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_migration_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_migration_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.left, 1); + CU_ASSERT_EQUAL_FATAL(row.right, 2); + CU_ASSERT_EQUAL_FATAL(row.node, 3); + CU_ASSERT_EQUAL_FATAL(row.source, 4); + CU_ASSERT_EQUAL_FATAL(row.dest, 5); + CU_ASSERT_EQUAL_FATAL(row.time, 6); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_migration_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_migration_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_migration_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_migration_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_migration_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &t1, 0)); + tsk_migration_table_free(&t1); + tsk_migration_table_free(&t2); + } + + tsk_migration_table_free(&source); +} + static void test_individual_table(void) { @@ -4968,6 +5608,192 @@ test_individual_table_update_row(void) tsk_individual_table_free(&table); } +static void +test_individual_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_individual_t row; + double location[] = { 0, 1, 2 }; + tsk_id_t parents[] = { -1, 1, -1 }; + const char *metadata = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + tsk_individual_table_t source, t1, t2; + tsk_size_t j; + + ret = tsk_individual_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id + = tsk_individual_table_add_row(&source, 0, location, 1, parents, 1, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id + = tsk_individual_table_add_row(&source, 1, location, 2, parents, 2, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id + = tsk_individual_table_add_row(&source, 2, location, 3, parents, 3, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_individual_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0)); + + ret = tsk_individual_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_individual_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_individual_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.flags, 1); + CU_ASSERT_EQUAL_FATAL(row.parents_length, 2); + CU_ASSERT_EQUAL_FATAL(row.parents[0], -1); + CU_ASSERT_EQUAL_FATAL(row.parents[1], 0); + CU_ASSERT_EQUAL_FATAL(row.location_length, 2); + CU_ASSERT_EQUAL_FATAL(row.location[0], 0); + CU_ASSERT_EQUAL_FATAL(row.location[1], 1); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_individual_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_individual_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_individual_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_individual_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_individual_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &t1, 0)); + tsk_individual_table_free(&t1); + tsk_individual_table_free(&t2); + } + + tsk_individual_table_free(&source); +} + +static void +test_individual_table_keep_rows_parent_references(void) +{ + int ret; + tsk_id_t ret_id; + tsk_individual_table_t source, t; + bool keep[] = { 1, 1, 1, 1 }; + tsk_id_t parents[] = { -1, 1, 2 }; + tsk_id_t id_map[4]; + + ret = tsk_individual_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 3, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_individual_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + /* OOB errors */ + t.parents[0] = -2; + ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 4); + + t.parents[0] = 4; + ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 4); + /* But ignored if row is not kept */ + keep[0] = false; + ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + tsk_individual_table_free(&t); + + ret = tsk_individual_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* Try to remove referenced row 2 */ + keep[0] = true; + keep[2] = false; + ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED); + CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0)); + tsk_individual_table_free(&t); + + ret = tsk_individual_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + /* remove unreferenced row 0 */ + keep[0] = false; + keep[2] = true; + ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t.num_rows, 3); + CU_ASSERT_EQUAL_FATAL(t.parents[0], TSK_NULL); + CU_ASSERT_EQUAL_FATAL(t.parents[1], 0); + CU_ASSERT_EQUAL_FATAL(t.parents[2], 1); + tsk_individual_table_free(&t); + + /* Check that we don't change the table in error cases. */ + source.parents[1] = -2; + ret = tsk_individual_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = true; + ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS); + CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0)); + tsk_individual_table_free(&t); + + /* Check that we don't change the table in error cases. */ + source.parents[1] = 0; + ret = tsk_individual_table_copy(&source, &t, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = false; + ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED); + CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0)); + tsk_individual_table_free(&t); + + tsk_individual_table_free(&source); +} + static void test_population_table(void) { @@ -5345,6 +6171,93 @@ test_population_table_update_row(void) tsk_population_table_free(&table); } +static void +test_population_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_population_table_t source, t1, t2; + tsk_population_t row; + const char *metadata = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_population_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_population_table_add_row(&source, metadata, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_population_table_add_row(&source, metadata, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_population_table_add_row(&source, metadata, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_population_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0)); + + ret = tsk_population_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_population_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_population_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2); + CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B'); + + tsk_population_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_population_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_population_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_population_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_population_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &t1, 0)); + tsk_population_table_free(&t1); + tsk_population_table_free(&t2); + } + + tsk_population_table_free(&source); +} + static void test_provenance_table(void) { @@ -5784,6 +6697,97 @@ test_provenance_table_update_row(void) tsk_provenance_table_free(&table); } +static void +test_provenance_table_keep_rows(void) +{ + int ret; + tsk_id_t ret_id; + tsk_size_t j; + tsk_provenance_table_t source, t1, t2; + tsk_provenance_t row; + const char *timestamp = "XYZ"; + const char *record = "ABC"; + bool keep[3] = { 1, 1, 1 }; + tsk_id_t id_map[3]; + + ret = tsk_provenance_table_init(&source, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret_id = tsk_provenance_table_add_row(&source, timestamp, 1, record, 1); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_provenance_table_add_row(&source, timestamp, 2, record, 2); + CU_ASSERT_FATAL(ret_id >= 0); + ret_id = tsk_provenance_table_add_row(&source, timestamp, 3, record, 3); + CU_ASSERT_FATAL(ret_id >= 0); + + ret = tsk_provenance_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0)); + + ret = tsk_provenance_table_keep_rows(&t1, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0)); + CU_ASSERT_EQUAL_FATAL(id_map[0], 0); + CU_ASSERT_EQUAL_FATAL(id_map[1], 1); + CU_ASSERT_EQUAL_FATAL(id_map[2], 2); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], -1); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_provenance_table_copy(&source, &t1, TSK_NO_INIT); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1); + CU_ASSERT_EQUAL_FATAL(id_map[0], -1); + CU_ASSERT_EQUAL_FATAL(id_map[1], 0); + CU_ASSERT_EQUAL_FATAL(id_map[2], -1); + + ret = tsk_provenance_table_get_row(&t1, 0, &row); + CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 2); + CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X'); + CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y'); + CU_ASSERT_EQUAL_FATAL(row.record_length, 2); + CU_ASSERT_EQUAL_FATAL(row.record[0], 'A'); + CU_ASSERT_EQUAL_FATAL(row.record[1], 'B'); + + tsk_provenance_table_free(&t1); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + /* Keeping first n rows equivalent to truncate */ + for (j = 0; j < source.num_rows; j++) { + ret = tsk_provenance_table_copy(&source, &t2, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_provenance_table_copy(&source, &t1, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + ret = tsk_provenance_table_truncate(&t1, j); + CU_ASSERT_EQUAL_FATAL(ret, 0); + keep[j] = 1; + ret = tsk_provenance_table_keep_rows(&t2, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &t1, 0)); + tsk_provenance_table_free(&t1); + tsk_provenance_table_free(&t2); + } + + tsk_provenance_table_free(&source); +} + static void test_table_size_increments(void) { @@ -10426,11 +11430,15 @@ main(int argc, char **argv) CU_TestInfo tests[] = { { "test_node_table", test_node_table }, { "test_node_table_update_row", test_node_table_update_row }, + { "test_node_table_keep_rows", test_node_table_keep_rows }, { "test_node_table_takeset", test_node_table_takeset }, { "test_edge_table", test_edge_table }, { "test_edge_table_update_row", test_edge_table_update_row }, { "test_edge_table_update_row_no_metadata", test_edge_table_update_row_no_metadata }, + { "test_edge_table_keep_rows", test_edge_table_keep_rows }, + { "test_edge_table_keep_rows_no_metadata", + test_edge_table_keep_rows_no_metadata }, { "test_edge_table_takeset", test_edge_table_takeset }, { "test_edge_table_copy_semantics", test_edge_table_copy_semantics }, { "test_edge_table_squash", test_edge_table_squash }, @@ -10442,21 +11450,31 @@ main(int argc, char **argv) { "test_edge_table_squash_metadata", test_edge_table_squash_metadata }, { "test_site_table", test_site_table }, { "test_site_table_update_row", test_site_table_update_row }, + { "test_site_table_keep_rows", test_site_table_keep_rows }, { "test_site_table_takeset", test_site_table_takeset }, { "test_mutation_table", test_mutation_table }, { "test_mutation_table_update_row", test_mutation_table_update_row }, { "test_mutation_table_takeset", test_mutation_table_takeset }, + { "test_mutation_table_keep_rows", test_mutation_table_keep_rows }, + { "test_mutation_table_keep_rows_parent_references", + test_mutation_table_keep_rows_parent_references }, { "test_migration_table", test_migration_table }, { "test_migration_table_update_row", test_migration_table_update_row }, + { "test_migration_table_keep_rows", test_migration_table_keep_rows }, { "test_migration_table_takeset", test_migration_table_takeset }, { "test_individual_table", test_individual_table }, { "test_individual_table_takeset", test_individual_table_takeset }, { "test_individual_table_update_row", test_individual_table_update_row }, + { "test_individual_table_keep_rows", test_individual_table_keep_rows }, + { "test_individual_table_keep_rows_parent_references", + test_individual_table_keep_rows_parent_references }, { "test_population_table", test_population_table }, { "test_population_table_update_row", test_population_table_update_row }, + { "test_population_table_keep_rows", test_population_table_keep_rows }, { "test_population_table_takeset", test_population_table_takeset }, { "test_provenance_table", test_provenance_table }, { "test_provenance_table_update_row", test_provenance_table_update_row }, + { "test_provenance_table_keep_rows", test_provenance_table_keep_rows }, { "test_provenance_table_takeset", test_provenance_table_takeset }, { "test_table_size_increments", test_table_size_increments }, { "test_table_expansion", test_table_expansion }, diff --git a/c/tskit/core.c b/c/tskit/core.c index bc50a21a5f..b1ea25badd 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * Copyright (c) 2015-2018 University of Oxford * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -222,6 +222,10 @@ tsk_strerror_internal(int err) case TSK_ERR_SEEK_OUT_OF_BOUNDS: ret = "Tree seek position out of bounds. (TSK_ERR_SEEK_OUT_OF_BOUNDS)"; break; + case TSK_ERR_KEEP_ROWS_MAP_TO_DELETED: + ret = "One of the kept rows in the table refers to a deleted row. " + "(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED)"; + break; /* Edge errors */ case TSK_ERR_NULL_PARENT: diff --git a/c/tskit/core.h b/c/tskit/core.h index 0e7d528b0c..7810a8d048 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * Copyright (c) 2015-2018 University of Oxford * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -356,6 +356,12 @@ A time value was non-finite (NaN counts as finite) A genomic position was non-finite */ #define TSK_ERR_GENOME_COORDS_NONFINITE -211 +/** +One of the rows in the retained table refers to a row that has been +deleted. +*/ +#define TSK_ERR_KEEP_ROWS_MAP_TO_DELETED -212 + /** @} */ /** diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 3039dbbb38..da5f4b6558 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * Copyright (c) 2017-2018 University of Oxford * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -732,6 +732,187 @@ write_metadata_schema_header( return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema); } +/* Utilities for in-place subsetting columns */ + +static tsk_size_t +count_true(tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j; + tsk_size_t count = 0; + + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + count++; + } + } + return count; +} + +static void +keep_mask_to_id_map( + tsk_size_t num_rows, const bool *restrict keep, tsk_id_t *restrict id_map) +{ + tsk_size_t j; + tsk_id_t next_id = 0; + + for (j = 0; j < num_rows; j++) { + id_map[j] = TSK_NULL; + if (keep[j]) { + id_map[j] = next_id; + next_id++; + } + } +} + +static tsk_size_t +subset_remap_id_column(tsk_id_t *restrict column, tsk_size_t num_rows, + const bool *restrict keep, const tsk_id_t *restrict id_map) +{ + tsk_size_t j, k; + tsk_id_t value; + + k = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + value = column[j]; + if (value != TSK_NULL) { + value = id_map[value]; + } + column[k] = value; + k++; + } + } + return k; +} + +/* Trigger warning: C++ programmers should look away... This may be one of the + * few cases where some macro funkiness is warranted, as these are exact + * duplicates of the same function with just the type of the column + * parameter changed. */ + +static tsk_size_t +subset_id_column( + tsk_id_t *restrict column, tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j, k; + + k = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + column[k] = column[j]; + k++; + } + } + return k; +} + +static tsk_size_t +subset_flags_column( + tsk_flags_t *restrict column, tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j, k; + + k = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + column[k] = column[j]; + k++; + } + } + return k; +} + +static tsk_size_t +subset_double_column( + double *restrict column, tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j, k; + + k = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + column[k] = column[j]; + k++; + } + } + return k; +} + +static tsk_size_t +subset_ragged_char_column(char *restrict data, tsk_size_t *restrict offset_col, + tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j, k, i, offset; + + k = 0; + offset = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + offset_col[k] = offset; + /* Note: Unclear whether it's worth calling memcpy instead here? + * Need to be careful since the regions are overlapping */ + for (i = offset_col[j]; i < offset_col[j + 1]; i++) { + data[offset] = data[i]; + offset++; + } + k++; + } + } + offset_col[k] = offset; + return offset; +} + +static tsk_size_t +subset_remap_ragged_id_column(tsk_id_t *restrict data, tsk_size_t *restrict offset_col, + tsk_size_t num_rows, const bool *restrict keep, const tsk_id_t *restrict id_map) +{ + tsk_size_t j, k, i, offset; + tsk_id_t di; + + k = 0; + offset = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + offset_col[k] = offset; + for (i = offset_col[j]; i < offset_col[j + 1]; i++) { + di = data[i]; + if (di != TSK_NULL) { + di = id_map[di]; + } + data[offset] = di; + offset++; + } + k++; + } + } + offset_col[k] = offset; + return offset; +} + +static tsk_size_t +subset_ragged_double_column(double *restrict data, tsk_size_t *restrict offset_col, + tsk_size_t num_rows, const bool *restrict keep) +{ + tsk_size_t j, k, i, offset; + + k = 0; + offset = 0; + for (j = 0; j < num_rows; j++) { + if (keep[j]) { + offset_col[k] = offset; + /* Note: Unclear whether it's worth calling memcpy instead here? + * Need to be careful since the regions are overlapping */ + for (i = offset_col[j]; i < offset_col[j + 1]; i++) { + data[offset] = data[i]; + offset++; + } + k++; + } + } + offset_col[k] = offset; + return offset; +} + /************************* * reference sequence *************************/ @@ -1622,6 +1803,71 @@ tsk_individual_table_equals(const tsk_individual_table_t *self, return ret; } +int +tsk_individual_table_keep_rows(tsk_individual_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map) +{ + int ret = 0; + const tsk_size_t current_num_rows = self->num_rows; + tsk_size_t j, k, remaining_rows; + tsk_id_t pk; + tsk_id_t *id_map = ret_id_map; + tsk_id_t *restrict parents = self->parents; + tsk_size_t *restrict parents_offset = self->parents_offset; + + if (ret_id_map == NULL) { + id_map = tsk_malloc(current_num_rows * sizeof(*id_map)); + if (id_map == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + } + + keep_mask_to_id_map(current_num_rows, keep, id_map); + + /* See notes in tsk_mutation_table_keep_rows for possibilities + * on making this more flexible */ + for (j = 0; j < current_num_rows; j++) { + if (keep[j]) { + for (k = parents_offset[j]; k < parents_offset[j + 1]; k++) { + pk = parents[k]; + if (pk != TSK_NULL) { + if (pk < 0 || pk >= (tsk_id_t) current_num_rows) { + ret = TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS; + ; + goto out; + } + if (id_map[pk] == TSK_NULL) { + ret = TSK_ERR_KEEP_ROWS_MAP_TO_DELETED; + goto out; + } + } + } + } + } + + remaining_rows = subset_flags_column(self->flags, current_num_rows, keep); + self->parents_length = subset_remap_ragged_id_column( + self->parents, self->parents_offset, current_num_rows, keep, id_map); + self->location_length = subset_ragged_double_column( + self->location, self->location_offset, current_num_rows, keep); + if (self->metadata_length > 0) { + /* Implementation note: we special case metadata here because + * it'll make the common-case of no metadata a bit faster, and + * to also potentially support more general use of the + * TSK_TABLE_NO_METADATA option. This is done for all the tables + * but only commented on here. */ + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, current_num_rows, keep); + } + self->num_rows = remaining_rows; +out: + if (ret_id_map == NULL) { + tsk_safe_free(id_map); + } + return ret; +} + static int tsk_individual_table_dump( const tsk_individual_table_t *self, kastore_t *store, tsk_flags_t options) @@ -2271,6 +2517,29 @@ tsk_node_table_get_row(const tsk_node_table_t *self, tsk_id_t index, tsk_node_t return ret; } +int +tsk_node_table_keep_rows(tsk_node_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + tsk_size_t remaining_rows; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + + remaining_rows = subset_flags_column(self->flags, self->num_rows, keep); + subset_double_column(self->time, self->num_rows, keep); + subset_id_column(self->population, self->num_rows, keep); + subset_id_column(self->individual, self->num_rows, keep); + if (self->metadata_length > 0) { + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, self->num_rows, keep); + } + self->num_rows = remaining_rows; + return ret; +} + static int tsk_node_table_dump(const tsk_node_table_t *self, kastore_t *store, tsk_flags_t options) { @@ -2940,6 +3209,29 @@ tsk_edge_table_equals( return ret; } +int +tsk_edge_table_keep_rows(tsk_edge_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + tsk_size_t remaining_rows; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + remaining_rows = subset_double_column(self->left, self->num_rows, keep); + subset_double_column(self->right, self->num_rows, keep); + subset_id_column(self->parent, self->num_rows, keep); + subset_id_column(self->child, self->num_rows, keep); + if (self->metadata_length > 0) { + tsk_bug_assert(!(self->options & TSK_TABLE_NO_METADATA)); + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, self->num_rows, keep); + } + self->num_rows = remaining_rows; + return ret; +} + static int tsk_edge_table_dump(const tsk_edge_table_t *self, kastore_t *store, tsk_flags_t options) { @@ -3675,6 +3967,28 @@ tsk_site_table_dump_text(const tsk_site_table_t *self, FILE *out) return ret; } +int +tsk_site_table_keep_rows(tsk_site_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + tsk_size_t remaining_rows; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + + remaining_rows = subset_double_column(self->position, self->num_rows, keep); + self->ancestral_state_length = subset_ragged_char_column( + self->ancestral_state, self->ancestral_state_offset, self->num_rows, keep); + if (self->metadata_length > 0) { + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, self->num_rows, keep); + } + self->num_rows = remaining_rows; + return ret; +} + static int tsk_site_table_dump(const tsk_site_table_t *self, kastore_t *store, tsk_flags_t options) { @@ -4418,6 +4732,65 @@ tsk_mutation_table_dump_text(const tsk_mutation_table_t *self, FILE *out) return ret; } +int +tsk_mutation_table_keep_rows(tsk_mutation_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map) +{ + int ret = 0; + const tsk_size_t current_num_rows = self->num_rows; + tsk_size_t j, remaining_rows; + tsk_id_t pj; + tsk_id_t *id_map = ret_id_map; + tsk_id_t *restrict parent = self->parent; + + if (ret_id_map == NULL) { + id_map = tsk_malloc(current_num_rows * sizeof(*id_map)); + if (id_map == NULL) { + ret = TSK_ERR_NO_MEMORY; + goto out; + } + } + + keep_mask_to_id_map(current_num_rows, keep, id_map); + + /* Note: we could add some options to avoid these checks if we wanted. + * MAP_DELETED_TO_NULL is an obvious one, and I guess it might be + * helpful to also provide NO_REMAP to prevent reference remapping + * entirely. */ + for (j = 0; j < current_num_rows; j++) { + if (keep[j]) { + pj = parent[j]; + if (pj != TSK_NULL) { + if (pj < 0 || pj >= (tsk_id_t) current_num_rows) { + ret = TSK_ERR_MUTATION_OUT_OF_BOUNDS; + goto out; + } + if (id_map[pj] == TSK_NULL) { + ret = TSK_ERR_KEEP_ROWS_MAP_TO_DELETED; + goto out; + } + } + } + } + + remaining_rows = subset_id_column(self->site, current_num_rows, keep); + subset_id_column(self->node, current_num_rows, keep); + subset_remap_id_column(parent, current_num_rows, keep, id_map); + subset_double_column(self->time, current_num_rows, keep); + self->derived_state_length = subset_ragged_char_column( + self->derived_state, self->derived_state_offset, current_num_rows, keep); + if (self->metadata_length > 0) { + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, current_num_rows, keep); + } + self->num_rows = remaining_rows; +out: + if (ret_id_map == NULL) { + tsk_safe_free(id_map); + } + return ret; +} + static int tsk_mutation_table_dump( const tsk_mutation_table_t *self, kastore_t *store, tsk_flags_t options) @@ -5063,6 +5436,31 @@ tsk_migration_table_equals(const tsk_migration_table_t *self, return ret; } +int +tsk_migration_table_keep_rows(tsk_migration_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + tsk_size_t remaining_rows; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + + remaining_rows = subset_double_column(self->left, self->num_rows, keep); + subset_double_column(self->right, self->num_rows, keep); + subset_id_column(self->node, self->num_rows, keep); + subset_id_column(self->source, self->num_rows, keep); + subset_id_column(self->dest, self->num_rows, keep); + subset_double_column(self->time, self->num_rows, keep); + if (self->metadata_length > 0) { + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, self->num_rows, keep); + } + self->num_rows = remaining_rows; + return ret; +} + static int tsk_migration_table_dump( const tsk_migration_table_t *self, kastore_t *store, tsk_flags_t options) @@ -5632,6 +6030,24 @@ tsk_population_table_equals(const tsk_population_table_t *self, return ret; } +int +tsk_population_table_keep_rows(tsk_population_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + + if (self->metadata_length > 0) { + self->metadata_length = subset_ragged_char_column( + self->metadata, self->metadata_offset, self->num_rows, keep); + } + self->num_rows = count_true(self->num_rows, keep); + return ret; +} + static int tsk_population_table_dump( const tsk_population_table_t *self, kastore_t *store, tsk_flags_t options) @@ -6244,6 +6660,24 @@ tsk_provenance_table_equals(const tsk_provenance_table_t *self, return ret; } +int +tsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const bool *keep, + tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) +{ + int ret = 0; + + if (id_map != NULL) { + keep_mask_to_id_map(self->num_rows, keep, id_map); + } + self->timestamp_length = subset_ragged_char_column( + self->timestamp, self->timestamp_offset, self->num_rows, keep); + self->record_length = subset_ragged_char_column( + self->record, self->record_offset, self->num_rows, keep); + self->num_rows = count_true(self->num_rows, keep); + + return ret; +} + static int tsk_provenance_table_dump( const tsk_provenance_table_t *self, kastore_t *store, tsk_flags_t options) diff --git a/c/tskit/tables.h b/c/tskit/tables.h index a3496bb9ef..9af1f85009 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -1043,6 +1043,36 @@ int tsk_individual_table_extend(tsk_individual_table_t *self, const tsk_individual_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_individual_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_individual_table_keep_rows(tsk_individual_table_t *self, const bool *keep, + tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -1391,6 +1421,36 @@ and is not checked for compatibility with any existing schema on this table. int tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_node_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_node_table_keep_rows( + tsk_node_table_t *self, const bool *keep, tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -1701,6 +1761,36 @@ as-is and is not checked for compatibility with any existing schema on this tabl int tsk_edge_table_extend(tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_edge_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_edge_table_keep_rows( + tsk_edge_table_t *self, const bool *keep, tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -2035,6 +2125,36 @@ int tsk_migration_table_extend(tsk_migration_table_t *self, const tsk_migration_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_migration_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_migration_table_keep_rows(tsk_migration_table_t *self, const bool *keep, + tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -2343,6 +2463,36 @@ and is not checked for compatibility with any existing schema on this table. int tsk_site_table_extend(tsk_site_table_t *self, const tsk_site_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_site_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_site_table_keep_rows( + tsk_site_table_t *self, const bool *keep, tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -2679,6 +2829,36 @@ int tsk_mutation_table_extend(tsk_mutation_table_t *self, const tsk_mutation_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_mutation_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_mutation_table_keep_rows( + tsk_mutation_table_t *self, const bool *keep, tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -3006,6 +3186,36 @@ int tsk_population_table_extend(tsk_population_table_t *self, const tsk_population_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_population_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_population_table_keep_rows(tsk_population_table_t *self, const bool *keep, + tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table. @@ -3300,6 +3510,36 @@ int tsk_provenance_table_extend(tsk_provenance_table_t *self, const tsk_provenance_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); +/** +@brief Subset this table by keeping rows according to a boolean mask. + +@rst +Deletes rows from this table and optionally return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep`` such that for each row ``j`` if +``keep[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep`` must be an array of at least ``num_rows`` +:c:type:`bool` values. + +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. +@endrst + +@param self A pointer to a tsk_provenance_table_t object. +@param keep Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. +@param options Bitwise option flags. Currently unused; should be + set to zero to ensure compatibility with later versions of tskit. +@param id_map An array in which to store the mapping between new + and old IDs. If NULL, this will be ignored. +@return Return 0 on success or a negative value on failure. +*/ +int tsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const bool *keep, + tsk_flags_t options, tsk_id_t *id_map); + /** @brief Returns true if the data in the specified table is identical to the data in this table.