Skip to content

Commit

Permalink
Divergence matrix tree-by-tree algorithms
Browse files Browse the repository at this point in the history
Implement the basic version of the divergence matrix operation using
tree-by-tree algorithms, and provide interface for parallelising along
the genome.
  • Loading branch information
jeromekelleher committed Jul 7, 2023
1 parent 3d4fc51 commit 741164e
Show file tree
Hide file tree
Showing 9 changed files with 2,189 additions and 25 deletions.
353 changes: 352 additions & 1 deletion c/tests/test_trees.c

Large diffs are not rendered by default.

569 changes: 557 additions & 12 deletions c/tskit/trees.c

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,10 @@ int tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);

int tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_samples,
const tsk_id_t *samples, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);

/****************************************************************************/
/* Tree */
/****************************************************************************/
Expand Down
76 changes: 76 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -9638,6 +9638,78 @@ TreeSequence_f4(TreeSequence *self, PyObject *args, PyObject *kwds)
return TreeSequence_k_way_stat_method(self, args, kwds, 4, tsk_treeseq_f4);
}

static PyObject *
TreeSequence_divergence_matrix(TreeSequence *self, PyObject *args, PyObject *kwds)
{
PyObject *ret = NULL;
static char *kwlist[] = { "windows", "samples", "mode", NULL };
PyArrayObject *result_array = NULL;
PyObject *windows = NULL;
PyObject *py_samples = Py_None;
char *mode = NULL;
PyArrayObject *windows_array = NULL;
PyArrayObject *samples_array = NULL;
tsk_flags_t options = 0;
npy_intp *shape, dims[3];
tsk_size_t num_samples, num_windows;
tsk_id_t *samples = NULL;
int err;

if (TreeSequence_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTupleAndKeywords(
args, kwds, "O|Os", kwlist, &windows, &py_samples, &mode)) {
goto out;
}
num_samples = tsk_treeseq_get_num_samples(self->tree_sequence);
if (py_samples != Py_None) {
samples_array = (PyArrayObject *) PyArray_FROMANY(
py_samples, NPY_INT32, 1, 1, NPY_ARRAY_IN_ARRAY);
if (samples_array == NULL) {
goto out;
}
shape = PyArray_DIMS(samples_array);
samples = PyArray_DATA(samples_array);
num_samples = (tsk_size_t) shape[0];
}
if (parse_windows(windows, &windows_array, &num_windows) != 0) {
goto out;
}
dims[0] = num_windows;
dims[1] = num_samples;
dims[2] = num_samples;
result_array = (PyArrayObject *) PyArray_SimpleNew(3, dims, NPY_FLOAT64);
if (result_array == NULL) {
goto out;
}
if (parse_stats_mode(mode, &options) != 0) {
goto out;
}
// clang-format off
Py_BEGIN_ALLOW_THREADS
err = tsk_treeseq_divergence_matrix(
self->tree_sequence,
num_samples, samples,
num_windows, PyArray_DATA(windows_array),
options, PyArray_DATA(result_array));
Py_END_ALLOW_THREADS
// clang-format on
/* Clang-format insists on doing this in spite of the "off" instruction above */
if (err != 0)
{
handle_library_error(err);
goto out;
}
ret = (PyObject *) result_array;
result_array = NULL;
out:
Py_XDECREF(result_array);
Py_XDECREF(windows_array);
Py_XDECREF(samples_array);
return ret;
}

static PyObject *
TreeSequence_get_num_mutations(TreeSequence *self)
{
Expand Down Expand Up @@ -10346,6 +10418,10 @@ static PyMethodDef TreeSequence_methods[] = {
.ml_meth = (PyCFunction) TreeSequence_f4,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Computes the f4 statistic." },
{ .ml_name = "divergence_matrix",
.ml_meth = (PyCFunction) TreeSequence_divergence_matrix,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Computes the pairwise divergence matrix." },
{ .ml_name = "split_edges",
.ml_meth = (PyCFunction) TreeSequence_split_edges,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
Expand Down
Loading

0 comments on commit 741164e

Please sign in to comment.