From 1503cf4b7cc7fd781003f81d090c99cb16754da6 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 25 Jan 2023 20:37:38 +0000 Subject: [PATCH] Start on node table subset Closes #2666 --- c/tests/test_tables.c | 30 +++++++++++++++++------------- c/tskit/tables.c | 10 +++++++--- c/tskit/tables.h | 31 ++++++++++++++++--------------- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 53b4e0c081..f9695802e1 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -1491,14 +1491,14 @@ test_node_table_update_row(void) } static void -test_node_table_delete_rows(void) +test_node_table_subset(void) { int ret; tsk_id_t ret_id; tsk_node_table_t table; tsk_node_table_t copy; tsk_node_t row; - bool delete[3] = { 0, 0, 0 }; + bool keep[3] = { 1, 1, 1 }; tsk_id_t id_map[3]; const char *metadata = "ABC"; @@ -1515,23 +1515,27 @@ test_node_table_delete_rows(void) ret = tsk_node_table_copy(&table, ©, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = tsk_node_table_delete_rows(©, delete, 0, id_map); + ret = tsk_node_table_subset(©, keep, 0, id_map); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_TRUE(tsk_node_table_equals(©, &table, 0)); - delete[0] = 1; - delete[1] = 1; - delete[2] = 1; - ret = tsk_node_table_delete_rows(©, delete, 0, id_map); + ret = tsk_node_table_subset(©, keep, 0, NULL); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(tsk_node_table_equals(©, &table, 0)); + + keep[0] = 0; + keep[1] = 0; + keep[2] = 0; + ret = tsk_node_table_subset(©, keep, 0, id_map); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_EQUAL_FATAL(copy.num_rows, 0); ret = tsk_node_table_copy(&table, ©, TSK_NO_INIT); CU_ASSERT_EQUAL_FATAL(ret, 0); - delete[0] = 1; - delete[1] = 0; - delete[2] = 1; - ret = tsk_node_table_delete_rows(©, delete, 0, id_map); + keep[0] = 0; + keep[1] = 1; + keep[2] = 0; + ret = tsk_node_table_subset(©, keep, 0, id_map); CU_ASSERT_EQUAL_FATAL(ret, 0); CU_ASSERT_EQUAL_FATAL(copy.num_rows, 1); @@ -10484,7 +10488,7 @@ main(int argc, char **argv) CU_TestInfo tests[] = { { "test_node_table", test_node_table }, { "test_node_table_update_row", test_node_table_update_row }, - { "test_node_table_delete_rows", test_node_table_delete_rows }, + { "test_node_table_subset", test_node_table_subset }, { "test_node_table_takeset", test_node_table_takeset }, { "test_edge_table", test_edge_table }, { "test_edge_table_update_row", test_edge_table_update_row }, diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 251e8a801d..43fabab5d8 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -1,7 +1,7 @@ /* * MIT License * - * Copyright (c) 2019-2022 Tskit Developers + * Copyright (c) 2019-2023 Tskit Developers * Copyright (c) 2017-2018 University of Oxford * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -2272,7 +2272,7 @@ tsk_node_table_get_row(const tsk_node_table_t *self, tsk_id_t index, tsk_node_t } int -tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows, +tsk_node_table_subset(tsk_node_table_t *self, bool *keep_rows, tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map) { int ret; @@ -2281,6 +2281,10 @@ tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows, tsk_size_t j; tsk_id_t ret_id; + /* NOTE: this could be done more efficiently here if we went + * column-by-column. Ragged arrays could be packed in-place + * with general code so there wouldn't be too much duplication. + */ ret = tsk_node_table_copy(self, ©, 0); if (ret != 0) { goto out; @@ -2293,7 +2297,7 @@ tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows, if (id_map != NULL) { id_map[j] = TSK_NULL; } - if (!delete_rows[j]) { + if (keep_rows[j]) { tsk_node_table_get_row_unsafe(©, (tsk_id_t) j, &node); ret_id = tsk_node_table_add_row(self, node.flags, node.time, node.population, node.individual, node.metadata, node.metadata_length); diff --git a/c/tskit/tables.h b/c/tskit/tables.h index c623904ab6..cc9464f3d4 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -1392,33 +1392,34 @@ int tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options); /** -@brief Deletes rows specified by a boolean array. +@brief Subset this table according to a boolean mask. @rst -Deletes rows from this table and (optionally) return the mapping from IDs -in the current table to the updated table. The rows to delete are specified -by a boolean array, such that for each row ``j`` if ``delete_rows[j]`` is -true (or more generally non-zero), then row ``j`` will be removed. +Deletes rows from this table and (optionally) return the mapping from IDs in +the current table to the updated table. Rows are kept or deleted according to +the specified boolean array ``keep_rows`` such that for each row ``j`` if +``keep_rows[j]`` is false (zero) the row is deleted, and otherwise the row is +retained. Thus, ``keep_rows`` must be an array of at least ``num_rows`` +:c:type:`bool` values. -If the ``id_map`` argument is non-null, this array will be updated -to represent the mapping between IDs before and after row deletion. -For row ``j``, ``id_map[j]`` will contain the new ID for row ``j`` -if it is retained, or :c:macro:`TSK_NULL` if the row has been removed. -Thus, ``id_map`` must be an array of at least ``num_rows`` :c:type:`tsk_id_t` -values. +If the ``id_map`` argument is non-null, this array will be updated to represent +the mapping between IDs before and after row deletion. For row ``j``, +``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or +:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an +array of at least ``num_rows`` :c:type:`tsk_id_t` values. @endrst @param self A pointer to a tsk_node_table_t object. -@param delete_rows Array of boolean flags describing whether a particular - row should be deleted or not. Must be at least ``num_rows`` long. +@param keep_rows Array of boolean flags describing whether a particular + row should be kept or not. Must be at least ``num_rows`` long. @param options Bitwise option flags. Currently unused; should be set to zero to ensure compatibility with later versions of tskit. @param id_map An array in which to store the mapping between new and old IDs. If NULL, this will be ignored. @return Return 0 on success or a negative value on failure. */ -int tsk_node_table_delete_rows( - tsk_node_table_t *self, bool *delete_rows, tsk_flags_t options, tsk_id_t *id_map); +int tsk_node_table_subset( + tsk_node_table_t *self, bool *keep_rows, tsk_flags_t options, tsk_id_t *id_map); /** @brief Returns true if the data in the specified table is identical to the data