popsim-consortium · mufernando · May 13, 2024 · May 10, 2024 · silastittes · May 10, 2024
diff --git a/README.md b/README.md
@@ -36,6 +36,9 @@ by editing the following line in the `Snakefile` file
 configfile: "workflows/config/snakemake/tiny_config.yaml"
 ```
 
+The file can also be overridden by passing a different 
+yaml file via snakemake's `--configfile` flag.
+
 Having set the config file, and perhaps edited to your wishes,
 you are now ready to try a dry run.
 

diff --git a/environment.yml b/environment.yml
@@ -11,6 +11,7 @@ dependencies:
   - pip
   - slim==4.0
   - snakemake==7.32.4
+  - bedtools
   - ldc
   - seaborn
   - pysam

diff --git a/notebooks/bin_regions.py b/notebooks/bin_regions.py
@@ -0,0 +1,34 @@
+# for analysis2 pi plot, binning genomic regions
+
+
+import sys
+import numpy as np
+
+
+num_average = 5 # number of lines to average    
+
+
+with open(sys.argv[1]) as infile:
+    group = []
+    counter = 0
+    header = infile.readline().strip().split()
+    header = header[:-1]
+    print("\t".join(header))
+    for line in infile:
+        newline = line.strip().split()
+        newline = newline[:-1]
+        #print(newline) # get rid of seed number
+        newline = list(map(float,newline))
+        group.append(np.array(newline))
+
+        if counter % num_average != 0:
+            pass
+        elif counter == 0:
+            pass
+        else:
+            # average rows               
+            avgs = np.mean(group, axis=0)
+            print("\t".join(map(str,avgs)))
+            group = [] # reset                  
+
+        counter+=1
diff --git a/notebooks/manuscript_figures.ipynb b/notebooks/manuscript_figures.ipynb
diff --git a/workflows/config/snakemake/production_config_PhoSin.yml b/workflows/config/snakemake/production_config_PhoSin.yml
@@ -0,0 +1,49 @@
+# snakemake settings
+"seed": 12345
+"replicates": 3
+"output_dir": "results"
+
+# species-specific settings
+"species": "PhoSin"
+"demo_models": [
+    {"id":"Constant", 
+    "num_samples_per_population": [100],
+    },
+    {"id":"Vaquita2Epoch_1R22",
+    "num_samples_per_population": 3*[100],
+    }
+]
+"genetic_map": null
+"chrm_list": "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21"
+"dfe_list": ["none", "Gamma_R22"]
+"annotation_list": ["all_sites", "Phocoena_sinus.mPhoSin1.pri.110_exons"]
+"mask_file": "workflows/masks/PhoSin_fake.mask.bed"
+"stairway_annot_mask" : "" # set this or any of the below to 'none' to skip annot masking
+"msmc_annot_mask" : ""
+"gone_annot_mask" : ""
+"smcpp_annot_mask" : ""
+
+# slim settings
+"slim_scaling_factor": 1
+"slim_burn_in": 10
+
+# n(t) specific configs
+"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]
+"num_sampled_genomes_msmc" : [6]
+"num_msmc_iterations" : 20
+"gone_phase" : 1  # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
+"gone_max_snps" : 50000  # default=50000
+"gone_threads" : 8
+"gone_num_gens" : 2000  # default=2000
+"gone_num_bins" : 400  # default=400
+
+
+# exe paths
+"poly_dfe_exec": "ext/polyDFE/polyDFE-2.0-linux-64-bit"
+"dfe_alpha_exec": "ext/dfe-alpha-release-2.16/est_dfe"
+"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
+"dfe_alpha_data_path_2": "three-epoch"
+"grapes_exec": "ext/grapes/multi_grapes"
+"msmc_exec" : "ext/msmc2/build/release/msmc2"
+"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
+"gone_code" : "ext/GONE/Linux"
diff --git a/workflows/config/snakemake/tiny_config_PhoSin.yaml b/workflows/config/snakemake/tiny_config_PhoSin.yaml
@@ -0,0 +1,49 @@
+"seed": 12345
+"replicates": 2
+
+"species": "PhoSin"
+"demo_models":
+    [
+    {"id":"Vaquita2Epoch_1R22",
+    "num_samples_per_population": [20],
+    }
+    ]
+
+"genetic_map": null
+"chrm_list": "2,9,20"
+"dfe_list": ["none", "Gamma_R22"]
+"annotation_list": ["none", "Phocoena_sinus.mPhoSin1.pri.110_exons"]
+"output_dir": "results"
+"mask_file": "workflows/masks/PhoSin_fake.mask.bed"
+
+# set any of the below to 'none' to skip annot masking
+"stairway_annot_mask" : ""
+"msmc_annot_mask" : ""
+"gone_annot_mask" : ""
+"smcpp_annot_mask" : ""
+
+# slim settings
+"slim_scaling_factor": 15
+"slim_burn_in": 10
+
+# n(t) specific configs
+"methods" : ["stairwayplot", "gone", "smcpp", "msmc"]
+
+"num_sampled_genomes_msmc" : [6]
+"num_msmc_iterations" : 20
+
+"gone_phase" : 1  # 0 for pseudohaploid, 1 for phased, 2 for unknown phase
+"gone_max_snps" : 50000  # default=50000
+"gone_threads" : 8
+"gone_num_gens" : 2000  # default=2000
+"gone_num_bins" : 400  # default=400
+
+# exe paths
+"poly_dfe_exec": "ext/polyDFE/polyDFE-2.0-linux-64-bit"
+"dfe_alpha_exec": "ext/dfe-alpha-release-2.16/est_dfe"
+"dfe_alpha_data_path_1": "ext/dfe-alpha-release-2.16/data"
+"dfe_alpha_data_path_2": "three-epoch"
+"grapes_exec": "ext/grapes/multi_grapes"
+"msmc_exec" : "ext/msmc2/build/release/msmc2"
+"stairwayplot_code" : "ext/stairwayplot/swarmops.jar"
+"gone_code" : "ext/GONE/Linux"
diff --git a/workflows/gone.py b/workflows/gone.py
@@ -5,6 +5,7 @@
 import tskit
 import os
 import numpy as np
+import msprime
 import pandas as pd
 
 def params(gone_code, params):
@@ -47,14 +48,17 @@ def copy(gone_code, outpath, seed, threads):
     subprocess.run(cmd, shell=True, check=True)
 
 
-def ts2plink(ts_path, ped_file, map_file, pop_name, genetic_map, chromID, mask_intervals):
+def ts2plink(ts_path, ped_file, map_file, species, pop_name, genetic_map, chromID, mask_intervals):
     """
     converts ts to plink format
     masks are the intervals to exclude
     """
     if type(mask_intervals) is not list:
         mask_intervals = [mask_intervals]
-    gm_chr = [genetic_map.get_chromosome_map(chrms) for chrms in chromID]
+    if genetic_map is not None:
+        gm_chr = [genetic_map.get_chromosome_map(chrms) for chrms in chromID]
+    else:
+        gm_chr = [species.get_contig(chrms).recombination_map for chrms in chromID]
     snp_counter = 1
     genomat_list = []
     # add to map file for chroms

diff --git a/workflows/masks/PhoSin_fake.mask.bed b/workflows/masks/PhoSin_fake.mask.bed
@@ -0,0 +1,22 @@
+1	0	1
+2	0	1
+3	0	1
+4	0	1
+5	0	1
+6	0	1
+7	0	1
+8	0	1
+9	0	1
+10	0	1
+11	0	1
+12	0	1
+13	0	1
+14	0	1
+15	0	1
+16	0	1
+17	0	1
+18	0	1
+19	0	1
+20	0	1
+21	0	1
+X	0	1
diff --git a/workflows/n_t.snake b/workflows/n_t.snake
@@ -484,12 +484,13 @@ rule gone_prep_inputs:
                                             chromIDs,
                                             chrom_annotation=wildcards.annots,
                                             )
+        genetic_map = None
+        if genetic_map_id is not None:
+            genetic_map = species.get_genetic_map(genetic_map_id)
+            if not genetic_map.is_cached():
+                genetic_map.download()
 
-        genetic_map = species.get_genetic_map(genetic_map_id)
-        if not genetic_map.is_cached():
-            genetic_map.download()
-
-        gone.ts2plink(inputs, output[0], output[1], wildcards.pops, genetic_map, chromIDs, mask_intervals=mask_intervals)
+        gone.ts2plink(inputs, output[0], output[1], species, wildcards.pops, genetic_map, chromIDs, mask_intervals=mask_intervals)
 
 
 rule gone_run:
@@ -557,7 +558,6 @@ rule clone_smcpp:
 rule ts_to_smc:
     input:
         output_dir + "/simulated_data/{demog}/{dfes}/{annots}/{seeds}/sim_{chrms}.trees",
-        rules.clone_smcpp.output
     output:
         output_dir + "/inference/{demog}/smcpp/{dfes}/{annots}/{seeds}/{pops}/sim_{chrms}.trees.smc.gz"
     run:
@@ -582,7 +582,6 @@ rule run_smcpp:
     input:
         expand(output_dir+ "/inference/{{demog}}/smcpp/{{dfes}}/{{annots}}/{{seeds}}/{{pops}}/sim_{chrms}.trees.smc.gz",
             chrms=chrm_list),
-        rules.clone_smcpp.output,
     output:
         output_dir + "/inference/{demog}/smcpp/{dfes}/{annots}/{seeds}/{pops}/model.final.json"
     threads: 20