You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@nspope sent me this, which is a good core for a new tutorial on cross coalescence rates. Probably worth waiting until some of the core functionality (e.g. rates_from_ecdf) is implemented in tskit.
importstdpopsimimportnumpyasnpimportmatplotlib.pyplotaspltdefrates_from_ecdf(weights, atoms, quantiles):
""" Estimate rates from weighted time-to-event data """assertweights.size==atoms.sizeassertquantiles.size>2assertquantiles[0] ==0.0andquantiles[-1] ==1.0# sort and filter inputstime_order=np.argsort(atoms)
weights=weights[time_order]
atoms=atoms[time_order]
nonzero=weights>0atoms=atoms[nonzero]
weights=weights[nonzero]
# find interior quantilesweights=np.append(0, weights)
atoms=np.append(0, atoms)
ecdf=np.cumsum(weights)
indices=np.searchsorted(ecdf, ecdf[-1] *quantiles[1:-1], side='left')
lower, upper=atoms[indices-1], atoms[indices]
ecdfl, ecdfu=ecdf[indices-1] /ecdf[-1], ecdf[indices] /ecdf[-1]
# interpolate ECDFassertnp.all(ecdfu-ecdfl>0)
slope= (upper-lower) / (ecdfu-ecdfl)
breaks=np.append(0, lower+slope* (quantiles[1:-1] -ecdfl))
# calculate coalescence rates within intervals# this uses a Kaplan-Meier-type censored estimator: https://github.com/tskit-dev/tskit/pull/2119coalrate=np.full(quantiles.size-1, np.nan)
propcoal=np.diff(quantiles[:-1]) / (1-quantiles[:-2])
coalrate[:-1] =-np.log(1-propcoal) /np.diff(breaks)
last=indices[-1]
coalrate[-1] =np.sum(weights[last:]) /np.dot(atoms[last:] -breaks[-1], weights[last:])
returncoalrate, breaks# --- on dataimportstdpopsimengine=stdpopsim.get_engine("msprime")
homsap=stdpopsim.get_species("HomSap")
demogr=homsap.get_demographic_model("OutOfAfrica_4J17")
contig=homsap.get_contig("chr17", genetic_map='HapMapII_GRCh38', right=1e7)
sample= {"CEU" : 100, "YRI" : 100, "JPT" : 100, "CHB" : 100}
ts=engine.simulate(demogr, contig, sample, seed=1024).trim()
pop_idx= {p.metadata['name'] : ifori, pinenumerate(ts.populations())}
sample_sets= [
np.flatnonzero(ts.nodes_population[:ts.num_samples] ==i)
foriinrange(ts.num_populations)
]
quantiles=np.linspace(0.0, 1.0, 25)
indexes= [(pop_idx["CEU"], pop_idx["CHB"])] # CEU vs CHBnode_coal_weight=ts.pair_coalescence_counts(sample_sets, indexes=indexes).flatten()
rates, breaks=rates_from_ecdf(node_coal_weight, ts.nodes_time, quantiles)
# sanity check: mean of cross-coalescence time distribution should equal branch-mode divergenceprint(
"Mean of cross-coalescence time distr: ",
2*node_coal_weight @ ts.nodes_time/np.sum(node_coal_weight),
"Branch-mode divergence: ",
ts.divergence(sample_sets, indexes=indexes, mode='branch'),
)
# sanity check: compare estimated rates to expectation under modeltheory_rates=demogr.model.debug().coalescence_rate_trajectory(breaks, {"CEU":1, "CHB":1})[0]
plt.clf()
plt.step(breaks, rates, color='firebrick', label='estimated')
plt.step(breaks, theory_rates, color='black', label='theoretical')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Generations in past')
plt.ylabel('Cross-coalescence rate')
plt.legend()
plt.savefig('rate-check.png')
# as an aside: rates are a 1-to-1 transformation of the pair coalescence time distribution. We can plot this distribution using e.g. a weighted density estimate or histogram.hist_density, hist_breaks=np.histogram(
np.log10(ts.nodes_time[ts.num_samples:]),
weights=node_coal_weight[ts.num_samples:],
bins=25,
density=True,
)
plt.clf()
plt.fill_between(hist_breaks[:-1], hist_density, step='pre', alpha=0.5)
plt.xlabel('log10 generations in past')
plt.ylabel('Density')
plt.savefig('rate-hist.png')
# this also gives a very easy way to get coalescence rates if we have a fixed timegrid# (but, rates will be noisier in bins with few nodes, and have to watch for nan)hist_density, hist_breaks=np.histogram(
ts.nodes_time, # note natural time scaleweights=node_coal_weight,
bins=np.logspace(2, 5, 25),
density=False,
)
hist_density/=hist_density.sum()
hist_rates=-np.log(1-hist_density/ (1-np.cumsum(hist_density))) /np.diff(hist_breaks)
hist_theory_rates=demogr.model.debug().coalescence_rate_trajectory(hist_breaks, {"CEU":1, "CHB":1})[0]
hist_theory_rates=hist_theory_rates[1:] # remove [0, first break]plt.clf()
plt.step(hist_breaks[:-1], hist_rates, color='firebrick', label='estimated')
plt.step(hist_breaks[:-1], hist_theory_rates, color='black', label='theoretical')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Generations in past')
plt.ylabel('Cross-coalescence rate')
plt.legend()
plt.savefig('rate-check-2.png')
The text was updated successfully, but these errors were encountered:
@nspope sent me this, which is a good core for a new tutorial on cross coalescence rates. Probably worth waiting until some of the core functionality (e.g.
rates_from_ecdf
) is implemented in tskit.The text was updated successfully, but these errors were encountered: