Skip to content

Commit

Permalink
Start on node table subset
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Feb 3, 2023
1 parent 88fcae2 commit 8989cae
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 31 deletions.
30 changes: 17 additions & 13 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -1491,14 +1491,14 @@ test_node_table_update_row(void)
}

static void
test_node_table_delete_rows(void)
test_node_table_subset(void)
{
int ret;
tsk_id_t ret_id;
tsk_node_table_t table;
tsk_node_table_t copy;
tsk_node_t row;
bool delete[3] = { 0, 0, 0 };
bool keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
const char *metadata = "ABC";

Expand All @@ -1515,23 +1515,27 @@ test_node_table_delete_rows(void)
ret = tsk_node_table_copy(&table, &copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);

ret = tsk_node_table_delete_rows(&copy, delete, 0, id_map);
ret = tsk_node_table_subset(&copy, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&copy, &table, 0));

delete[0] = 1;
delete[1] = 1;
delete[2] = 1;
ret = tsk_node_table_delete_rows(&copy, delete, 0, id_map);
ret = tsk_node_table_subset(&copy, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&copy, &table, 0));

keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_node_table_subset(&copy, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(copy.num_rows, 0);

ret = tsk_node_table_copy(&table, &copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
delete[0] = 1;
delete[1] = 0;
delete[2] = 1;
ret = tsk_node_table_delete_rows(&copy, delete, 0, id_map);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_node_table_subset(&copy, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(copy.num_rows, 1);

Expand Down Expand Up @@ -10484,7 +10488,7 @@ main(int argc, char **argv)
CU_TestInfo tests[] = {
{ "test_node_table", test_node_table },
{ "test_node_table_update_row", test_node_table_update_row },
{ "test_node_table_delete_rows", test_node_table_delete_rows },
{ "test_node_table_subset", test_node_table_subset },
{ "test_node_table_takeset", test_node_table_takeset },
{ "test_edge_table", test_edge_table },
{ "test_edge_table_update_row", test_edge_table_update_row },
Expand Down
10 changes: 7 additions & 3 deletions c/tskit/tables.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
* Copyright (c) 2017-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -2272,7 +2272,7 @@ tsk_node_table_get_row(const tsk_node_table_t *self, tsk_id_t index, tsk_node_t
}

int
tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows,
tsk_node_table_subset(tsk_node_table_t *self, bool *keep_rows,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret;
Expand All @@ -2281,6 +2281,10 @@ tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows,
tsk_size_t j;
tsk_id_t ret_id;

/* NOTE: this could be done more efficiently here if we went
* column-by-column. Ragged arrays could be packed in-place
* with general code so there wouldn't be too much duplication.
*/
ret = tsk_node_table_copy(self, &copy, 0);
if (ret != 0) {
goto out;
Expand All @@ -2293,7 +2297,7 @@ tsk_node_table_delete_rows(tsk_node_table_t *self, bool *delete_rows,
if (id_map != NULL) {
id_map[j] = TSK_NULL;
}
if (!delete_rows[j]) {
if (keep_rows[j]) {
tsk_node_table_get_row_unsafe(&copy, (tsk_id_t) j, &node);
ret_id = tsk_node_table_add_row(self, node.flags, node.time, node.population,
node.individual, node.metadata, node.metadata_length);
Expand Down
31 changes: 16 additions & 15 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -1392,33 +1392,34 @@ int tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);

/**
@brief Deletes rows specified by a boolean array.
@brief Subset this table according to a boolean mask.
@rst
Deletes rows from this table and (optionally) return the mapping from IDs
in the current table to the updated table. The rows to delete are specified
by a boolean array, such that for each row ``j`` if ``delete_rows[j]`` is
true (or more generally non-zero), then row ``j`` will be removed.
Deletes rows from this table and (optionally) return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep_rows`` such that for each row ``j`` if
``keep_rows[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep_rows`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated
to represent the mapping between IDs before and after row deletion.
For row ``j``, ``id_map[j]`` will contain the new ID for row ``j``
if it is retained, or :c:macro:`TSK_NULL` if the row has been removed.
Thus, ``id_map`` must be an array of at least ``num_rows`` :c:type:`tsk_id_t`
values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param delete_rows Array of boolean flags describing whether a particular
row should be deleted or not. Must be at least ``num_rows`` long.
@param keep_rows Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_delete_rows(
tsk_node_table_t *self, bool *delete_rows, tsk_flags_t options, tsk_id_t *id_map);
int tsk_node_table_subset(
tsk_node_table_t *self, bool *keep_rows, tsk_flags_t options, tsk_id_t *id_map);

/**
@brief Returns true if the data in the specified table is identical to the data
Expand Down

0 comments on commit 8989cae

Please sign in to comment.