-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
time windows in statistics #2948
base: main
Are you sure you want to change the base?
Changes from 13 commits
aeda4dc
0a30696
1a988c0
18ffda0
c6f9562
7a3149f
71da7ad
2a44909
4460db9
bbec6a9
da5f205
6b3ab4f
f2d857b
b8f4ba5
c9f6c06
59ea266
96ac0ce
26a7f09
8a8c05b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1232,6 +1232,35 @@ tsk_treeseq_check_windows(const tsk_treeseq_t *self, tsk_size_t num_windows, | |||||
return ret; | ||||||
} | ||||||
|
||||||
static int | ||||||
tsk_treeseq_check_time_windows(const tsk_treeseq_t *self, tsk_size_t num_windows, | ||||||
const double *windows, tsk_flags_t options) | ||||||
{ | ||||||
int ret = TSK_ERR_BAD_WINDOWS; | ||||||
tsk_size_t j; | ||||||
|
||||||
if (num_windows < 1) { | ||||||
ret = TSK_ERR_BAD_NUM_WINDOWS; | ||||||
goto out; | ||||||
} | ||||||
|
||||||
if (windows[0] < 0) { | ||||||
goto out; | ||||||
} | ||||||
if (windows[num_windows] > INFINITY) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure this can't happen; I think we should either check if this is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, you're right, it would be difficult to initialize something larger than INFINITY! I'm removing this. |
||||||
goto out; | ||||||
} | ||||||
|
||||||
for (j = 0; j < num_windows; j++) { | ||||||
if (windows[j] >= windows[j + 1]) { | ||||||
goto out; | ||||||
} | ||||||
} | ||||||
ret = 0; | ||||||
out: | ||||||
return ret; | ||||||
} | ||||||
|
||||||
/* TODO make these functions more consistent in how the arguments are ordered */ | ||||||
|
||||||
static inline void | ||||||
|
@@ -3484,10 +3513,25 @@ tsk_treeseq_site_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
return ret; | ||||||
} | ||||||
|
||||||
#define MAX(a,b) ((a) > (b) ? (a) : (b)) | ||||||
#define MIN(a,b) ((a) < (b) ? (a) : (b)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. YES! Actually. Thanks! |
||||||
|
||||||
/* int getValue_nDimensions( int * baseAddress, int * indexes, int nDimensions ) { */ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed? remove if not? |
||||||
/* int i; */ | ||||||
/* int offset = 0; */ | ||||||
/* for( i = 0; i < nDimensions; i++ ) { */ | ||||||
/* offset += pow(LEN,i) * indexes[nDimensions - (i + 1)]; */ | ||||||
/* } */ | ||||||
|
||||||
/* return *(baseAddress + offset); */ | ||||||
/* } */ | ||||||
|
||||||
static int TSK_WARN_UNUSED | ||||||
tsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double right, | ||||||
const double *restrict branch_length, double *restrict last_update, | ||||||
const double *counts, tsk_size_t num_sample_sets, tsk_size_t window_index, | ||||||
const double *restrict time, tsk_id_t *restrict parent, const double *time_windows, | ||||||
const double *counts, tsk_size_t num_sample_sets, tsk_size_t num_windows, | ||||||
tsk_size_t num_time_windows, tsk_size_t window_index, tsk_size_t time_window_index, | ||||||
const tsk_size_t *result_dims, tsk_flags_t options, double *result) | ||||||
{ | ||||||
int ret = 0; | ||||||
|
@@ -3497,24 +3541,31 @@ tsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double righ | |||||
tsk_size_t *coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gee, wouldn't it be better to malloc this outside this function, and pass it in? (I honestly don't know...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HA! IDK, I'll let it there for now, but yeah maybe! |
||||||
bool polarised = !!(options & TSK_STAT_POLARISED); | ||||||
const double *count_row = GET_2D_ROW(counts, num_sample_sets + 1, u); | ||||||
double x = (right - last_update[u]) * branch_length[u]; | ||||||
/* double x = (right - last_update[u]) * branch_length[u]; */ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
double x = 0; | ||||||
double t_v = time[parent[u]]; | ||||||
double tw_branch_length = 0; | ||||||
const tsk_size_t all_samples = (tsk_size_t) count_row[num_sample_sets]; | ||||||
|
||||||
if (coordinate == NULL) { | ||||||
ret = TSK_ERR_NO_MEMORY; | ||||||
goto out; | ||||||
} | ||||||
|
||||||
if (0 < all_samples && all_samples < self->num_samples) { | ||||||
afs_size = result_dims[num_sample_sets]; | ||||||
afs = result + afs_size * window_index; | ||||||
for (k = 0; k < num_sample_sets; k++) { | ||||||
coordinate[k] = (tsk_size_t) count_row[k]; | ||||||
} | ||||||
if (!polarised) { | ||||||
fold(coordinate, result_dims, num_sample_sets); | ||||||
} | ||||||
increment_nd_array_value(afs, num_sample_sets, result_dims, coordinate, x); | ||||||
if (parent[u] != -1){ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hah - it took me a while to see why we needed this, but now I see it |
||||||
if (0 < all_samples && all_samples < self->num_samples) { | ||||||
for (time_window_index = 0; time_window_index < num_time_windows; time_window_index++){ | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A lot of edges are recent, so we might avoid substantial work if we do like
|
||||||
afs_size = result_dims[num_sample_sets]; | ||||||
afs = result + afs_size * (window_index * num_time_windows + time_window_index); | ||||||
for (k = 0; k < num_sample_sets; k++) { | ||||||
coordinate[k] = (tsk_size_t) count_row[k]; | ||||||
} | ||||||
if (!polarised){ | ||||||
fold(coordinate, result_dims, num_sample_sets); | ||||||
} | ||||||
tw_branch_length = MIN(time_windows[time_window_index + 1], t_v) - MAX(time_windows[0], time[u]); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this be
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm - the tests below should be catching this if it is indeed wrong, but it sure looks wrong to me - I'm not sure what's going on? |
||||||
x = (right - last_update[u]) * tw_branch_length; | ||||||
increment_nd_array_value(afs, num_sample_sets, result_dims, coordinate, x); | ||||||
} | ||||||
} | ||||||
} | ||||||
last_update[u] = right; | ||||||
out: | ||||||
|
@@ -3525,12 +3576,12 @@ tsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double righ | |||||
static int | ||||||
tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self, | ||||||
tsk_size_t num_sample_sets, double *counts, tsk_size_t num_windows, | ||||||
const double *windows, const tsk_size_t *result_dims, tsk_flags_t options, | ||||||
double *result) | ||||||
tsk_size_t num_time_windows, const double *windows, const double *time_windows, | ||||||
const tsk_size_t *result_dims, tsk_flags_t options, double *result) | ||||||
{ | ||||||
int ret = 0; | ||||||
tsk_id_t u, v; | ||||||
tsk_size_t window_index; | ||||||
tsk_size_t window_index, time_window_index; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. So the allocation should be done locally too (in the update function), I presume. |
||||||
tsk_size_t num_nodes = self->tables->nodes.num_rows; | ||||||
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows; | ||||||
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order; | ||||||
|
@@ -3564,6 +3615,7 @@ tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
tk = 0; | ||||||
t_left = 0; | ||||||
window_index = 0; | ||||||
time_window_index = 0; | ||||||
while (tj < num_edges || t_left < sequence_length) { | ||||||
tsk_bug_assert(window_index < num_windows); | ||||||
while (tk < num_edges && edge_right[O[tk]] == t_left) { | ||||||
|
@@ -3572,15 +3624,17 @@ tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
u = edge_child[h]; | ||||||
v = edge_parent[h]; | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, t_left, branch_length, | ||||||
last_update, counts, num_sample_sets, window_index, result_dims, options, | ||||||
result); | ||||||
last_update, node_time, parent, time_windows, counts, num_sample_sets, | ||||||
num_windows, num_time_windows, window_index, time_window_index, | ||||||
result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
while (v != TSK_NULL) { | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, branch_length, | ||||||
last_update, counts, num_sample_sets, window_index, result_dims, | ||||||
options, result); | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_windows, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3600,8 +3654,9 @@ tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
branch_length[u] = node_time[v] - node_time[u]; | ||||||
while (v != TSK_NULL) { | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, branch_length, | ||||||
last_update, counts, num_sample_sets, window_index, result_dims, | ||||||
options, result); | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_windows, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3624,8 +3679,9 @@ tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
for (u = 0; u < (tsk_id_t) num_nodes; u++) { | ||||||
tsk_bug_assert(last_update[u] < w_right); | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, w_right, branch_length, | ||||||
last_update, counts, num_sample_sets, window_index, result_dims, | ||||||
options, result); | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_windows, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3653,13 +3709,15 @@ int | |||||
tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self, | ||||||
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, | ||||||
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows, | ||||||
tsk_flags_t options, double *result) | ||||||
tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options, | ||||||
double *result) | ||||||
{ | ||||||
int ret = 0; | ||||||
bool stat_site = !!(options & TSK_STAT_SITE); | ||||||
bool stat_branch = !!(options & TSK_STAT_BRANCH); | ||||||
bool stat_node = !!(options & TSK_STAT_NODE); | ||||||
const double default_windows[] = { 0, self->tables->sequence_length }; | ||||||
const double default_time_windows[] = { 0, INFINITY }; | ||||||
const tsk_size_t num_nodes = self->tables->nodes.num_rows; | ||||||
const tsk_size_t K = num_sample_sets + 1; | ||||||
tsk_size_t j, k, l, afs_size; | ||||||
|
@@ -3669,7 +3727,6 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
* reuse code from the general_stats code paths. */ | ||||||
double *counts = NULL; | ||||||
double *count_row; | ||||||
|
||||||
if (stat_node) { | ||||||
ret = TSK_ERR_UNSUPPORTED_STAT_MODE; | ||||||
goto out; | ||||||
|
@@ -3693,6 +3750,16 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
goto out; | ||||||
} | ||||||
} | ||||||
if (time_windows == NULL) { | ||||||
num_time_windows = 1; | ||||||
time_windows = default_time_windows; | ||||||
} else { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After this line is probably the right place to check if it's |
||||||
ret = tsk_treeseq_check_time_windows( | ||||||
self, num_time_windows, time_windows, TSK_REQUIRE_FULL_SPAN); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
} | ||||||
ret = tsk_treeseq_check_sample_sets( | ||||||
self, num_sample_sets, sample_set_sizes, sample_sets); | ||||||
if (ret != 0) { | ||||||
|
@@ -3728,15 +3795,17 @@ tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self, | |||||
count_row[num_sample_sets] = 1; | ||||||
} | ||||||
result_dims[num_sample_sets] = (tsk_size_t) afs_size; | ||||||
// Initiate memory for result array | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
tsk_memset(result, 0, num_windows * num_time_windows * afs_size * sizeof(*result)); | ||||||
|
||||||
tsk_memset(result, 0, num_windows * afs_size * sizeof(*result)); | ||||||
if (stat_site) { | ||||||
ret = tsk_treeseq_site_allele_frequency_spectrum(self, num_sample_sets, | ||||||
sample_set_sizes, counts, num_windows, windows, result_dims, options, | ||||||
result); | ||||||
} else { | ||||||
ret = tsk_treeseq_branch_allele_frequency_spectrum(self, num_sample_sets, counts, | ||||||
num_windows, windows, result_dims, options, result); | ||||||
num_windows, num_time_windows, windows, time_windows, result_dims, options, | ||||||
result); | ||||||
} | ||||||
|
||||||
if (options & TSK_STAT_SPAN_NORMALISE) { | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -9077,7 +9077,7 @@ parse_windows( | |||||
npy_intp *shape; | ||||||
|
||||||
windows_array = (PyArrayObject *) PyArray_FROMANY( | ||||||
windows, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY); | ||||||
windows, NPY_FLOAT64, 1, 1, NPY_ARRAY_IN_ARRAY); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this change (and others like it) done by linting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, or probably an indentation that I made by mistake. I put it back how it was before. |
||||||
if (windows_array == NULL) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -9095,6 +9095,7 @@ parse_windows( | |||||
return ret; | ||||||
} | ||||||
|
||||||
|
||||||
static PyArrayObject * | ||||||
TreeSequence_allocate_results_array( | ||||||
TreeSequence *self, tsk_flags_t mode, tsk_size_t num_windows, tsk_size_t output_dim) | ||||||
|
@@ -9439,29 +9440,30 @@ TreeSequence_allele_frequency_spectrum( | |||||
TreeSequence *self, PyObject *args, PyObject *kwds) | ||||||
{ | ||||||
PyObject *ret = NULL; | ||||||
static char *kwlist[] = { "sample_set_sizes", "sample_sets", "windows", "mode", | ||||||
static char *kwlist[] = { "sample_set_sizes", "sample_sets", "windows", "time_windows", "mode", | ||||||
"span_normalise", "polarised", NULL }; | ||||||
PyObject *sample_set_sizes = NULL; | ||||||
PyObject *sample_sets = NULL; | ||||||
PyObject *windows = NULL; | ||||||
char *mode = NULL; | ||||||
PyObject *time_windows = NULL; | ||||||
char *mode = "NULL"; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
PyArrayObject *sample_set_sizes_array = NULL; | ||||||
PyArrayObject *sample_sets_array = NULL; | ||||||
PyArrayObject *windows_array = NULL; | ||||||
PyArrayObject *time_windows_array = NULL; | ||||||
PyArrayObject *result_array = NULL; | ||||||
tsk_size_t *sizes; | ||||||
npy_intp *shape = NULL; | ||||||
tsk_size_t k, num_windows, num_sample_sets; | ||||||
tsk_size_t k, num_windows, num_time_windows, num_sample_sets; | ||||||
tsk_flags_t options = 0; | ||||||
int polarised = 0; | ||||||
int span_normalise = 1; | ||||||
int err; | ||||||
|
||||||
if (TreeSequence_check_state(self) != 0) { | ||||||
goto out; | ||||||
} | ||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOO|sii", kwlist, &sample_set_sizes, | ||||||
&sample_sets, &windows, &mode, &span_normalise, &polarised)) { | ||||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOO|sii", kwlist, &sample_set_sizes, | ||||||
&sample_sets, &windows, &time_windows, &mode, &span_normalise, &polarised)) { | ||||||
goto out; | ||||||
} | ||||||
if (parse_stats_mode(mode, &options) != 0) { | ||||||
|
@@ -9481,24 +9483,28 @@ TreeSequence_allele_frequency_spectrum( | |||||
if (parse_windows(windows, &windows_array, &num_windows) != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
||||||
shape = PyMem_Malloc((num_sample_sets + 1) * sizeof(*shape)); | ||||||
if (parse_windows(time_windows, &time_windows_array, &num_time_windows) != 0) { | ||||||
goto out; | ||||||
} | ||||||
shape = PyMem_Malloc((num_sample_sets + 1 + 1) * sizeof(*shape)); | ||||||
if (shape == NULL) { | ||||||
goto out; | ||||||
} | ||||||
sizes = PyArray_DATA(sample_set_sizes_array); | ||||||
shape[0] = num_windows; | ||||||
shape[1] = num_time_windows; | ||||||
for (k = 0; k < num_sample_sets; k++) { | ||||||
shape[k + 1] = 1 + sizes[k]; | ||||||
shape[k + 1 + 1] = 1 + sizes[k]; | ||||||
} | ||||||
result_array | ||||||
= (PyArrayObject *) PyArray_SimpleNew(1 + num_sample_sets, shape, NPY_FLOAT64); | ||||||
= (PyArrayObject *) PyArray_SimpleNew(1 + 1 + num_sample_sets, shape, NPY_FLOAT64); | ||||||
if (result_array == NULL) { | ||||||
goto out; | ||||||
} | ||||||
err = tsk_treeseq_allele_frequency_spectrum(self->tree_sequence, num_sample_sets, | ||||||
PyArray_DATA(sample_set_sizes_array), PyArray_DATA(sample_sets_array), | ||||||
num_windows, PyArray_DATA(windows_array), options, PyArray_DATA(result_array)); | ||||||
num_windows, PyArray_DATA(windows_array), num_time_windows, | ||||||
PyArray_DATA(time_windows_array), options, PyArray_DATA(result_array)); | ||||||
if (err != 0) { | ||||||
handle_library_error(err); | ||||||
goto out; | ||||||
|
@@ -9510,6 +9516,7 @@ TreeSequence_allele_frequency_spectrum( | |||||
Py_XDECREF(sample_set_sizes_array); | ||||||
Py_XDECREF(sample_sets_array); | ||||||
Py_XDECREF(windows_array); | ||||||
Py_XDECREF(time_windows_array); | ||||||
Py_XDECREF(result_array); | ||||||
return ret; | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if currently the code assumes this is 0, should check for == here