This repository has been archived by the owner on Oct 12, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathbenchmarking.py
114 lines (101 loc) · 4.98 KB
/
benchmarking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import argparse
import sys
import numpy as np
from fwdpy11_arg_example.evolve_arg import evolve_track
import fwdpy11 as fp11
import fwdpy11.fitness
import fwdpy11.model_params
import fwdpy11.wright_fisher as wf
import fwdpy11.sampling
import msprime
import gzip
import pandas as pd
def parse_args():
dstring = "Prototype implementation of ARG tracking and regular garbage collection."
parser = argparse.ArgumentParser(description=dstring,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--popsize', '-N', type=int,
help="Diploid population size")
parser.add_argument('--theta', '-T', type=float, help="4Nu")
parser.add_argument('--rho', '-R', type=float, help="4Nr")
parser.add_argument('--pdel', default=0.0, type=float,
help="Ratio of deleterious mutations to neutral mutations.")
parser.add_argument('--nsam', '-n', type=int,
help="Sample size (in chromosomes).")
parser.add_argument('--seed', '-S', type=int, help="RNG seed")
parser.add_argument('--gc', '-G', type=int,
help="GC interval")
parser.add_argument('--neutral', action='store_true',
help="Simulate no selection")
parser.add_argument('--neutral_mutations',
action='store_true',
help="Simulate neutral mutations. If False, ARG is tracked instead and neutral mutations dropped down on the sample afterwards.")
parser.add_argument('--simlen', type=int, default=10,
help="Simulation length, in multiples of N generations")
parser.add_argument('--outfile1', type=str, help="Main output file")
parser.add_argument('--async',action='store_true',help="Execute msprime step in separate process during simulation.")
parser.add_argument('--queue',action='store_true',help="Use queue.Queue to handle msprime steps")
parser.add_argument('--qsize',type=int,default=2,help="Size of queue.Queue.")
parser.add_argument('--wthreads',type=int,default=2,help="Number of threads to use for fitness calculations. Only works with --queue.")
return parser
if __name__ == "__main__":
parser = parse_args()
args = parser.parse_args(sys.argv[1:])
pop = fp11.SlocusPop(args.popsize)
# Set up parameters with defaults
recrate = args.rho / (4.0 * float(args.popsize))
mutrate_n = args.theta / (4.0 * float(args.popsize))
mutrate_s = args.pdel * mutrate_n
pdict = {'rates': (mutrate_n, mutrate_s, recrate),
'nregions': [fp11.Region(0, 1, 1)],
# The below is equivelent to R's -1*rgamma(1,shape=1.0,scale=5.0)
# The scaling of 2N means that the DFE is with respect to 2Ns
'sregions': [fp11.GammaS(0, 1, 1, h=0.5, mean=-5.0, shape=1.0, scaling=2 * args.popsize)],
'recregions': [fp11.Region(0, 1, 1)],
'gvalue': fwdpy11.fitness.SlocusMult(1.0),
'demography': np.array([args.popsize] * args.simlen * args.popsize, dtype=np.uint32)
}
params = fwdpy11.model_params.SlocusParams(**pdict)
# Adjust params based on user input
if args.neutral is True:
params.mutrate_s = 0.0
params.sregions = []
if args.neutral_mutations is False:
params.mutrate_n = 0.0
params.nregions = []
# Run the sim.
# If tracking the ARG, we use this current module.
# If simulating the neutral mutations, we use
# the regular fwdpy11 machinery.
rng = fp11.GSLrng(args.seed)
if args.neutral_mutations is True:
# Use fwdpy11
wf.evolve(rng, pop, params)
# Get a sample
s = fwdpy11.sampling.sample_separate(rng, pop, args.nsam)
else:
# Use this module
simplifier, atracker, tsim = evolve_track(
rng, pop, params, args.gc, True, args.seed, args.async, args.queue, args.qsize, args.wthreads)
# Take times from simplifier before they change.
times = simplifier.times
times['fwd_sim_runtime'] = [tsim]
times['N'] = [args.popsize]
times['theta'] = [args.theta]
times['rho'] = [args.rho]
times['simplify_interval'] = [args.gc]
d = pd.DataFrame(times)
d.to_csv(args.outfile1, sep='\t', index=False, compression='gzip')
# Simplify the genealogy down to a sample,
# And throw mutations onto that sample
msprime.simplify_tables(np.random.choice(2 * args.popsize, args.nsam,
replace=False).tolist(),
nodes=simplifier.nodes,
edges=simplifier.edges)
msp_rng = msprime.RandomGenerator(args.seed)
sites = msprime.SiteTable()
mutations = msprime.MutationTable()
mutgen = msprime.MutationGenerator(
msp_rng, args.theta / float(4 * args.popsize))
mutgen.generate(simplifier.nodes,
simplifier.edges, sites, mutations)