Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates to get PhoSin and HomSap working #112

Merged
merged 1 commit into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ by editing the following line in the `Snakefile` file
configfile: "workflows/config/snakemake/tiny_config.yaml"
```

The file can also be overridden by passing a different
yaml file via snakemake's `--configfile` flag.

Having set the config file, and perhaps edited to your wishes,
you are now ready to try a dry run.

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- pip
- slim==4.0
- snakemake==7.32.4
- bedtools
- ldc
- seaborn
- pysam
Expand Down
34 changes: 34 additions & 0 deletions notebooks/bin_regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# for analysis2 pi plot, binning genomic regions


import sys
import numpy as np


num_average = 5 # number of lines to average


with open(sys.argv[1]) as infile:
group = []
counter = 0
header = infile.readline().strip().split()
header = header[:-1]
print("\t".join(header))
for line in infile:
newline = line.strip().split()
newline = newline[:-1]
#print(newline) # get rid of seed number
newline = list(map(float,newline))
group.append(np.array(newline))

if counter % num_average != 0:
pass
elif counter == 0:
pass
else:
# average rows
avgs = np.mean(group, axis=0)
print("\t".join(map(str,avgs)))
group = [] # reset

counter+=1
924 changes: 924 additions & 0 deletions notebooks/manuscript_figures.ipynb
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Migrated Chris Smith's poster figs to here. Still some work needed, but will push figures from here to manuscript soon.

Large diffs are not rendered by default.

49 changes: 49 additions & 0 deletions workflows/config/snakemake/production_config_PhoSin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# snakemake settings
"seed": 12345
"replicates": 3
"output_dir": "results"

# species-specific settings
"species": "PhoSin"
"demo_models": [
{"id":"Constant",
"num_samples_per_population": [100],
},
{"id":"Vaquita2Epoch_1R22",
"num_samples_per_population": 3*[100],
}
]
"genetic_map": null
"chrm_list": "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21"
"dfe_list": ["none", "Gamma_R22"]
"annotation_list": ["all_sites", "Phocoena_sinus.mPhoSin1.pri.110_exons"]
"mask_file": "workflows/masks/PhoSin_fake.mask.bed"
"stairway_annot_mask" : "" # set this or any of the below to 'none' to skip annot masking
"msmc_annot_mask" : ""
"gone_annot_mask" : ""
"smcpp_annot_mask" : ""

# slim settings
"slim_scaling_factor": 1
"slim_burn_in": 10

# n(t) specific configs
"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]
"num_sampled_genomes_msmc" : [6]
"num_msmc_iterations" : 20
"gone_phase" : 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps" : 50000 # default=50000
"gone_threads" : 8
"gone_num_gens" : 2000 # default=2000
"gone_num_bins" : 400 # default=400


# exe paths
"poly_dfe_exec": "ext/polyDFE/polyDFE-2.0-linux-64-bit"
"dfe_alpha_exec": "ext/dfe-alpha-release-2.16/est_dfe"
"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
"dfe_alpha_data_path_2": "three-epoch"
"grapes_exec": "ext/grapes/multi_grapes"
"msmc_exec" : "ext/msmc2/build/release/msmc2"
"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
"gone_code" : "ext/GONE/Linux"
49 changes: 49 additions & 0 deletions workflows/config/snakemake/tiny_config_PhoSin.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"seed": 12345
"replicates": 2

"species": "PhoSin"
"demo_models":
[
{"id":"Vaquita2Epoch_1R22",
"num_samples_per_population": [20],
}
]

"genetic_map": null
"chrm_list": "2,9,20"
"dfe_list": ["none", "Gamma_R22"]
"annotation_list": ["none", "Phocoena_sinus.mPhoSin1.pri.110_exons"]
"output_dir": "results"
"mask_file": "workflows/masks/PhoSin_fake.mask.bed"

# set any of the below to 'none' to skip annot masking
"stairway_annot_mask" : ""
"msmc_annot_mask" : ""
"gone_annot_mask" : ""
"smcpp_annot_mask" : ""

# slim settings
"slim_scaling_factor": 15
"slim_burn_in": 10

# n(t) specific configs
"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]

"num_sampled_genomes_msmc" : [6]
"num_msmc_iterations" : 20

"gone_phase" : 1 # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
"gone_max_snps" : 50000 # default=50000
"gone_threads" : 8
"gone_num_gens" : 2000 # default=2000
"gone_num_bins" : 400 # default=400

# exe paths
"poly_dfe_exec": "ext/polyDFE/polyDFE-2.0-linux-64-bit"
"dfe_alpha_exec": "ext/dfe-alpha-release-2.16/est_dfe"
"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
"dfe_alpha_data_path_2": "three-epoch"
"grapes_exec": "ext/grapes/multi_grapes"
"msmc_exec" : "ext/msmc2/build/release/msmc2"
"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
"gone_code" : "ext/GONE/Linux"
8 changes: 6 additions & 2 deletions workflows/gone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import tskit
import os
import numpy as np
import msprime
import pandas as pd

def params(gone_code, params):
Expand Down Expand Up @@ -47,14 +48,17 @@ def copy(gone_code, outpath, seed, threads):
subprocess.run(cmd, shell=True, check=True)


def ts2plink(ts_path, ped_file, map_file, pop_name, genetic_map, chromID, mask_intervals):
def ts2plink(ts_path, ped_file, map_file, species, pop_name, genetic_map, chromID, mask_intervals):
"""
converts ts to plink format
masks are the intervals to exclude
"""
if type(mask_intervals) is not list:
mask_intervals = [mask_intervals]
gm_chr = [genetic_map.get_chromosome_map(chrms) for chrms in chromID]
if genetic_map is not None:
gm_chr = [genetic_map.get_chromosome_map(chrms) for chrms in chromID]
else:
gm_chr = [species.get_contig(chrms).recombination_map for chrms in chromID]
snp_counter = 1
genomat_list = []
# add to map file for chroms
Expand Down
22 changes: 22 additions & 0 deletions workflows/masks/PhoSin_fake.mask.bed
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is another spot where I hit a snag that also relates to not having a genetic map. Since a lot of places expected a non-empty mask file to exist, I figured this would be simpler and be would unlikely impact the results. I admit it's a little hacky though.

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
1 0 1
2 0 1
3 0 1
4 0 1
5 0 1
6 0 1
7 0 1
8 0 1
9 0 1
10 0 1
11 0 1
12 0 1
13 0 1
14 0 1
15 0 1
16 0 1
17 0 1
18 0 1
19 0 1
20 0 1
21 0 1
X 0 1
13 changes: 6 additions & 7 deletions workflows/n_t.snake
Original file line number Diff line number Diff line change
Expand Up @@ -484,12 +484,13 @@ rule gone_prep_inputs:
chromIDs,
chrom_annotation=wildcards.annots,
)
genetic_map = None
if genetic_map_id is not None:
genetic_map = species.get_genetic_map(genetic_map_id)
if not genetic_map.is_cached():
genetic_map.download()

genetic_map = species.get_genetic_map(genetic_map_id)
if not genetic_map.is_cached():
genetic_map.download()

gone.ts2plink(inputs, output[0], output[1], wildcards.pops, genetic_map, chromIDs, mask_intervals=mask_intervals)
gone.ts2plink(inputs, output[0], output[1], species, wildcards.pops, genetic_map, chromIDs, mask_intervals=mask_intervals)


rule gone_run:
Expand Down Expand Up @@ -557,7 +558,6 @@ rule clone_smcpp:
rule ts_to_smc:
input:
output_dir + "/simulated_data/{demog}/{dfes}/{annots}/{seeds}/sim_{chrms}.trees",
rules.clone_smcpp.output
output:
output_dir + "/inference/{demog}/smcpp/{dfes}/{annots}/{seeds}/{pops}/sim_{chrms}.trees.smc.gz"
run:
Expand All @@ -582,7 +582,6 @@ rule run_smcpp:
input:
expand(output_dir+ "/inference/{{demog}}/smcpp/{{dfes}}/{{annots}}/{{seeds}}/{{pops}}/sim_{chrms}.trees.smc.gz",
chrms=chrm_list),
rules.clone_smcpp.output,
output:
output_dir + "/inference/{demog}/smcpp/{dfes}/{annots}/{seeds}/{pops}/model.final.json"
threads: 20
Expand Down
Loading