Skip to content

Commit

Permalink
Merge pull request #2313 from jeromekelleher/v1.3.3-hotfix
Browse files Browse the repository at this point in the history
V1.3.3 hotfix
  • Loading branch information
jeromekelleher authored Aug 7, 2024
2 parents f6198b1 + dcc3d2c commit 0da0150
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 102 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Changelog

## [1.3.3] - 2024-08-07

Bugfix release for issues with Dirac and Beta coalescent models.

**Bug fixes**:

- Fix segfault for in Dirac and Beta coalescent models for ploidy > 2 ({issue}`2307`, {pr}`{2308}`)

- Correct the Dirac coalescent time scaling with polyploidy and population growth. ({pr}`{2310},
{user}`JereKoskela`)

- Allow psi = 1 in the Dirac coalescent ({pr}`{2310}, {user}`JereKoskela`).

## [1.3.2] - 2024-07-08

- Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`)
Expand Down
158 changes: 72 additions & 86 deletions lib/msprime.c
Original file line number Diff line number Diff line change
Expand Up @@ -6763,33 +6763,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate(
msp_t *self, population_t *pop, double lambda)
{
double ret = DBL_MAX;
double alpha = pop->growth_rate;
double alpha = 2.0 * pop->growth_rate;
double t = self->time;
double u, dt, z;
double pop_size = pop->initial_size;

if (lambda > 0.0) {
u = gsl_ran_exponential(self->rng, 1.0 / lambda);
if (alpha == 0.0) {
if (self->ploidy == 1) {
ret = pop->initial_size * pop->initial_size * u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
ret = pop->initial_size * pop->initial_size * u / 4.0;
}
ret = pop_size * pop_size * u;
} else {
dt = t - pop->start_time;
if (self->ploidy == 1) {
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u;
} else {
/* For ploidy > 1 we assume N/2 two-parent families, so that the rate
* with which 2 lineages belong to a common family is (2/N)^2 */
z = 1
+ alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt)
* u / 4.0;
}
z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u;
/* if z is <= 0 no coancestry can occur */
if (z > 0) {
ret = log(z) / alpha;
Expand All @@ -6808,13 +6793,7 @@ msp_dirac_get_common_ancestor_waiting_time(
{
population_t *pop = &self->populations[pop_id];
unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]);
double c = self->model.params.dirac_coalescent.c;
double lambda = n * (n - 1.0) / 2.0;
if (self->ploidy == 1) {
lambda += c;
} else {
lambda += c / (2.0 * self->ploidy);
}
double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c;

return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda);
}
Expand All @@ -6824,71 +6803,71 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t
{
int ret = 0;
uint32_t j, n, num_participants, num_parental_copies;
avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */
avl_tree_t *ancestors;
avl_tree_t *Q = NULL;
avl_node_t *x_node, *y_node;
segment_t *x, *y;
double nC2, p;
double psi = self->model.params.dirac_coalescent.psi;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}

ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
nC2 = gsl_sf_choose(n, 2);
if (self->ploidy == 1) {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c));
} else {
p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy)));
}
p = nC2 / (nC2 + self->model.params.dirac_coalescent.c);

if (gsl_rng_uniform(self->rng) < p) {
/* When 2 * ploidy parental chromosomes are available, Mendelian segregation
* results in a merger only 1 / (2 * ploidy) of the time. */
if (self->ploidy == 1
|| gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) {
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x = (segment_t *) x_node->item;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y = (segment_t *) y_node->item;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_avl_node(self, y_node);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
}
/* Choose x and y */
n = avl_count(ancestors);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n);
x_node = avl_at(ancestors, j);
tsk_bug_assert(x_node != NULL);
x = (segment_t *) x_node->item;
avl_unlink_node(ancestors, x_node);
j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1);
y_node = avl_at(ancestors, j);
tsk_bug_assert(y_node != NULL);
y = (segment_t *) y_node->item;
avl_unlink_node(ancestors, y_node);
self->num_ca_events++;
msp_free_avl_node(self, x_node);
msp_free_avl_node(self, y_node);
ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL);
} else {
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
num_participants = gsl_ran_binomial(self->rng, psi, n);
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (num_participants > 1) {
/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}
for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
goto out;
}
/* All the lineages that have been assigned to the particular pots can now be
* merged.
*/
for (j = 0; j < num_parental_copies; j++) {
ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL);
if (ret < 0) {
goto out;
}
}
}
}
out:
tsk_safe_free(Q);
return ret;
}

Expand Down Expand Up @@ -7035,22 +7014,12 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
{
int ret = 0;
uint32_t j, n, num_participants, num_parental_copies;
avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */
avl_tree_t *ancestors;
avl_tree_t *Q = NULL;
double alpha = self->model.params.beta_coalescent.alpha;
double truncation_point = beta_compute_truncation(self);
double beta_x, u, increment;

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ancestors = &self->populations[pop_id].ancestors[label];
n = avl_count(ancestors);
beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point);
Expand Down Expand Up @@ -7088,6 +7057,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2);
} while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2));

/* We assume haploid reproduction is single-parent, while all other ploidies
* are two-parent */
if (self->ploidy == 1) {
num_parental_copies = 1;
} else {
num_parental_copies = 2 * self->ploidy;
}
Q = tsk_malloc(num_parental_copies * sizeof(*Q));
if (Q == NULL) {
ret = MSP_ERR_NO_MEMORY;
goto out;
}

for (j = 0; j < num_parental_copies; j++) {
avl_init_tree(&Q[j], cmp_segment_queue, NULL);
}
ret = msp_multi_merger_common_ancestor_event(
self, ancestors, Q, num_participants, num_parental_copies);
if (ret < 0) {
Expand All @@ -7106,6 +7091,7 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l
}

out:
tsk_safe_free(Q);
return ret;
}

Expand Down
42 changes: 42 additions & 0 deletions lib/tests/test_ancestry.c
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,47 @@ test_multiple_mergers_simulation(void)
gsl_rng_free(rng);
}

static void
test_multiple_mergers_ploidy(void)
{
int ret;
size_t j, ploidy;
uint32_t n = 10;
long seed = 10;
msp_t msp;
gsl_rng *rng = safe_rng_alloc();
tsk_table_collection_t tables;

for (j = 0; j < 2; j++) {
gsl_rng_set(rng, seed);
for (ploidy = 1; ploidy < 12; ploidy++) {
ret = build_sim(&msp, &tables, rng, 10, 1, NULL, n);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(msp_set_recombination_rate(&msp, 0.1), 0);
if (j == 0) {
ret = msp_set_simulation_model_dirac(&msp, 0.9, 10);
} else {
ret = msp_set_simulation_model_beta(&msp, 1.8, 1);
}
CU_ASSERT_EQUAL(ret, 0);

msp_set_ploidy(&msp, ploidy);
ret = msp_initialise(&msp);

ret = msp_run(&msp, DBL_MAX, ULONG_MAX);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(msp_is_completed(&msp));
CU_ASSERT_TRUE(msp.time > 0);
msp_verify(&msp, 0);

ret = msp_free(&msp);
CU_ASSERT_EQUAL(ret, 0);
tsk_table_collection_free(&tables);
}
}
gsl_rng_free(rng);
}

static void
test_multiple_mergers_growth_rate(void)
{
Expand Down Expand Up @@ -3875,6 +3916,7 @@ main(int argc, char **argv)
{ "test_gc_rates", test_gc_rates },

{ "test_multiple_mergers_simulation", test_multiple_mergers_simulation },
{ "test_multiple_mergers_ploidy", test_multiple_mergers_ploidy },
{ "test_multiple_mergers_growth_rate", test_multiple_mergers_growth_rate },
{ "test_dirac_coalescent_bad_parameters", test_dirac_coalescent_bad_parameters },
{ "test_beta_coalescent_bad_parameters", test_beta_coalescent_bad_parameters },
Expand Down
4 changes: 2 additions & 2 deletions msprime/_msprimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1177,8 +1177,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model)
goto out;
}
c = PyFloat_AsDouble(value);
if (psi <= 0 || psi >= 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1");
if (psi <= 0 || psi > 1.0) {
PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1");
goto out;
}
if (c < 0) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ancestry.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_multimerger_dirac(self):
recombination_rate=0.1,
record_full_arg=True,
sequence_length=10,
model=msprime.DiracCoalescent(psi=0.5, c=5),
model=msprime.DiracCoalescent(psi=0.5, c=100),
)
self.verify(sim, multiple_mergers=True)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ def test_dirac_simulation_model(self):
make_sim(model=model)
with pytest.raises(ValueError):
make_sim(model=get_simulation_model("dirac"))
for bad_psi in [-1, 0, -1e-6, 1, 1e6]:
for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]:
with pytest.raises(ValueError):
make_sim(
model=get_simulation_model("dirac", c=1, psi=bad_psi),
Expand All @@ -1337,7 +1337,7 @@ def test_dirac_simulation_model(self):
make_sim(
model=get_simulation_model("dirac", psi=0.5, c=bad_c),
)
for psi in [0.99, 0.2, 1e-4]:
for psi in [1, 0.2, 1e-4]:
for c in [5.0, 1e2, 1e-4]:
model = get_simulation_model("dirac", psi=psi, c=c)
sim = make_sim(model=model)
Expand Down
26 changes: 15 additions & 11 deletions verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000):
logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}")
b = growth_rate * (alpha - 1)
model = (msprime.BetaCoalescent(alpha=alpha),)
ploidy = 2
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
for ploidy in range(2, 7):
a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy))
name = (
f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
)
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, ploidy, name
)
ploidy = 1
a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy)
name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}"
Expand Down Expand Up @@ -3474,12 +3476,14 @@ def test_100000_11_001(self):
class DiracGrowth(XiGrowth):
def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000):
logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}")
b = growth_rate
b = 2 * growth_rate
model = (msprime.DiracCoalescent(psi=psi, c=c),)
p = 2
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name)
for p in range(2, 7):
a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
self.compare_tmrca(
pop_size, growth_rate, model, num_replicates, a, b, p, name
)
p = 1
a = (1 + c * psi * psi) / (pop_size * pop_size)
name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}"
Expand Down

0 comments on commit 0da0150

Please sign in to comment.