Skip to content

Commit

Permalink
Refactor edge difference iterator to work from table collections as well
Browse files Browse the repository at this point in the history
as from tree sequences.
  • Loading branch information
molpopgen authored and mergify[bot] committed Feb 8, 2023
1 parent 9025f57 commit 88fe00d
Show file tree
Hide file tree
Showing 7 changed files with 258 additions and 199 deletions.
34 changes: 33 additions & 1 deletion c/tests/test_tables.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -23,6 +23,7 @@
*/

#include "testlib.h"
#include "tskit/core.h"
#include <tskit/tables.h>

#include <float.h>
Expand Down Expand Up @@ -10420,6 +10421,35 @@ test_table_collection_delete_older(void)
tsk_treeseq_free(&ts);
}

static void
test_table_collection_edge_diffs_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_diff_iter_t iter;

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL(ret, 0);
tables.sequence_length = 1;
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, -1, -1, NULL, 0);
CU_ASSERT_EQUAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, -1, -1, NULL, 0);
CU_ASSERT_EQUAL(ret_id, 1);
ret = (int) tsk_edge_table_add_row(&tables.edges, 0., 1., 1, 0, NULL, 0);
CU_ASSERT_EQUAL(ret, 0);

ret = tsk_diff_iter_init(&iter, &tables, -1, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_TABLES_NOT_INDEXED);

tables.nodes.time[0] = 1;
ret = tsk_diff_iter_init(&iter, &tables, -1, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);

tsk_table_collection_free(&tables);
}

int
main(int argc, char **argv)
{
Expand Down Expand Up @@ -10542,6 +10572,8 @@ main(int argc, char **argv)
{ "test_table_collection_takeset_indexes",
test_table_collection_takeset_indexes },
{ "test_table_collection_delete_older", test_table_collection_delete_older },
{ "test_table_collection_edge_diffs_errors",
test_table_collection_edge_diffs_errors },
{ NULL, NULL },
};

Expand Down
4 changes: 2 additions & 2 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -407,7 +407,7 @@ verify_tree_diffs(tsk_treeseq_t *ts, tsk_flags_t options)
child[j] = TSK_NULL;
sib[j] = TSK_NULL;
}
ret = tsk_diff_iter_init(&iter, ts, options);
ret = tsk_diff_iter_init_from_ts(&iter, ts, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&tree, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
Expand Down
4 changes: 2 additions & 2 deletions c/tskit/haplotype_matching.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -250,7 +250,7 @@ tsk_ls_hmm_reset(tsk_ls_hmm_t *self)

/* This is safe because we've already zero'd out the memory. */
tsk_diff_iter_free(&self->diffs);
ret = tsk_diff_iter_init(&self->diffs, self->tree_sequence, false);
ret = tsk_diff_iter_init_from_ts(&self->diffs, self->tree_sequence, false);
if (ret != 0) {
goto out;
}
Expand Down
164 changes: 163 additions & 1 deletion c/tskit/tables.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2019-2023 Tskit Developers
* Copyright (c) 2017-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -13010,3 +13010,165 @@ tsk_squash_edges(tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output
out:
return ret;
}

/* ======================================================== *
* Tree diff iterator.
* ======================================================== */

int TSK_WARN_UNUSED
tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables,
tsk_id_t num_trees, tsk_flags_t options)
{
int ret = 0;

tsk_bug_assert(tables != NULL);
tsk_memset(self, 0, sizeof(tsk_diff_iter_t));
self->num_nodes = tables->nodes.num_rows;
self->num_edges = tables->edges.num_rows;
self->tables = tables;
self->insertion_index = 0;
self->removal_index = 0;
self->tree_left = 0;
self->tree_index = -1;
if (num_trees < 0) {
num_trees = tsk_table_collection_check_integrity(self->tables, TSK_CHECK_TREES);
if (num_trees < 0) {
ret = (int) num_trees;
goto out;
}
}
self->last_index = num_trees;

if (options & TSK_INCLUDE_TERMINAL) {
self->last_index = self->last_index + 1;
}
self->edge_list_nodes = tsk_malloc(self->num_edges * sizeof(*self->edge_list_nodes));
if (self->edge_list_nodes == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
out:
return ret;
}

int
tsk_diff_iter_free(tsk_diff_iter_t *self)
{
tsk_safe_free(self->edge_list_nodes);
return 0;
}

void
tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out)
{
fprintf(out, "tree_diff_iterator state\n");
fprintf(out, "num_edges = %lld\n", (long long) self->num_edges);
fprintf(out, "insertion_index = %lld\n", (long long) self->insertion_index);
fprintf(out, "removal_index = %lld\n", (long long) self->removal_index);
fprintf(out, "tree_left = %f\n", self->tree_left);
fprintf(out, "tree_index = %lld\n", (long long) self->tree_index);
}

int TSK_WARN_UNUSED
tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right,
tsk_edge_list_t *edges_out_ret, tsk_edge_list_t *edges_in_ret)
{
int ret = 0;
tsk_id_t k;
const double sequence_length = self->tables->sequence_length;
double left = self->tree_left;
double right = sequence_length;
tsk_size_t next_edge_list_node = 0;
tsk_edge_list_node_t *out_head = NULL;
tsk_edge_list_node_t *out_tail = NULL;
tsk_edge_list_node_t *in_head = NULL;
tsk_edge_list_node_t *in_tail = NULL;
tsk_edge_list_node_t *w = NULL;
tsk_edge_list_t edges_out;
tsk_edge_list_t edges_in;
const tsk_edge_table_t *edges = &self->tables->edges;
const tsk_id_t *insertion_order = self->tables->indexes.edge_insertion_order;
const tsk_id_t *removal_order = self->tables->indexes.edge_removal_order;

tsk_memset(&edges_out, 0, sizeof(edges_out));
tsk_memset(&edges_in, 0, sizeof(edges_in));

if (self->tree_index + 1 < self->last_index) {
/* First we remove the stale records */
while (self->removal_index < (tsk_id_t) self->num_edges
&& left == edges->right[removal_order[self->removal_index]]) {
k = removal_order[self->removal_index];
tsk_bug_assert(next_edge_list_node < self->num_edges);
w = &self->edge_list_nodes[next_edge_list_node];
next_edge_list_node++;
w->edge.id = k;
w->edge.left = edges->left[k];
w->edge.right = edges->right[k];
w->edge.parent = edges->parent[k];
w->edge.child = edges->child[k];
w->edge.metadata = edges->metadata + edges->metadata_offset[k];
w->edge.metadata_length
= edges->metadata_offset[k + 1] - edges->metadata_offset[k];
w->next = NULL;
w->prev = NULL;
if (out_head == NULL) {
out_head = w;
out_tail = w;
} else {
out_tail->next = w;
w->prev = out_tail;
out_tail = w;
}
self->removal_index++;
}
edges_out.head = out_head;
edges_out.tail = out_tail;

/* Now insert the new records */
while (self->insertion_index < (tsk_id_t) self->num_edges
&& left == edges->left[insertion_order[self->insertion_index]]) {
k = insertion_order[self->insertion_index];
tsk_bug_assert(next_edge_list_node < self->num_edges);
w = &self->edge_list_nodes[next_edge_list_node];
next_edge_list_node++;
w->edge.id = k;
w->edge.left = edges->left[k];
w->edge.right = edges->right[k];
w->edge.parent = edges->parent[k];
w->edge.child = edges->child[k];
w->edge.metadata = edges->metadata + edges->metadata_offset[k];
w->edge.metadata_length
= edges->metadata_offset[k + 1] - edges->metadata_offset[k];
w->next = NULL;
w->prev = NULL;
if (in_head == NULL) {
in_head = w;
in_tail = w;
} else {
in_tail->next = w;
w->prev = in_tail;
in_tail = w;
}
self->insertion_index++;
}
edges_in.head = in_head;
edges_in.tail = in_tail;

right = sequence_length;
if (self->insertion_index < (tsk_id_t) self->num_edges) {
right = TSK_MIN(right, edges->left[insertion_order[self->insertion_index]]);
}
if (self->removal_index < (tsk_id_t) self->num_edges) {
right = TSK_MIN(right, edges->right[removal_order[self->removal_index]]);
}
self->tree_index++;
ret = TSK_TREE_OK;
}
*edges_out_ret = edges_out;
*edges_in_ret = edges_in;
*ret_left = left;
*ret_right = right;
/* Set the left coordinate for the next tree */
self->tree_left = right;
return ret;
}
47 changes: 47 additions & 0 deletions c/tskit/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,30 @@ typedef struct {
bool store_pairs;
} tsk_identity_segments_t;

/* Diff iterator. */
typedef struct _tsk_edge_list_node_t {
tsk_edge_t edge;
struct _tsk_edge_list_node_t *next;
struct _tsk_edge_list_node_t *prev;
} tsk_edge_list_node_t;

typedef struct {
tsk_edge_list_node_t *head;
tsk_edge_list_node_t *tail;
} tsk_edge_list_t;

typedef struct {
tsk_size_t num_nodes;
tsk_size_t num_edges;
double tree_left;
const tsk_table_collection_t *tables;
tsk_id_t insertion_index;
tsk_id_t removal_index;
tsk_id_t tree_index;
tsk_id_t last_index;
tsk_edge_list_node_t *edge_list_nodes;
} tsk_diff_iter_t;

/****************************************************************************/
/* Common function options */
/****************************************************************************/
Expand Down Expand Up @@ -892,6 +916,16 @@ top-level information of the table collections being compared.
#define TSK_CLEAR_PROVENANCE (1 << 2)
/** @} */

/* For the edge diff iterator */
#define TSK_INCLUDE_TERMINAL (1 << 0)

/** @brief Value returned by seeking methods when they have successfully
seeked to a non-null tree.
@ingroup TREE_API_SEEKING_GROUP
*/
#define TSK_TREE_OK 1

/****************************************************************************/
/* Function signatures */
/****************************************************************************/
Expand Down Expand Up @@ -4417,6 +4451,19 @@ int tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t a,
void tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out);
int tsk_identity_segments_free(tsk_identity_segments_t *self);

/* Edge differences */

/* Internal API - currently used in a few places, but a better API is envisaged
* at some point.
* IMPORTANT: tskit-rust uses this API, so don't break without discussing!
*/
int tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables,
tsk_id_t num_trees, tsk_flags_t options);
int tsk_diff_iter_free(tsk_diff_iter_t *self);
int tsk_diff_iter_next(tsk_diff_iter_t *self, double *left, double *right,
tsk_edge_list_t *edges_out, tsk_edge_list_t *edges_in);
void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out);

#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit 88fe00d

Please sign in to comment.