From ae85199ff11eb2d052dd55eae1544675642bedad Mon Sep 17 00:00:00 2001
From: Silas Tittes <silas.tittes@gmail.com>
Date: Tue, 16 Apr 2024 11:00:42 -0600
Subject: [PATCH] edits to get tiny config working on talapas (#110)

* edits to get tiny config working on talapas

* pinned snakemake version
---
 environment.yml     |  3 ++-
 workflows/dfe.snake | 15 +++++++--------
 workflows/plots.py  |  2 +-
 workflows/smc.py    |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/environment.yml b/environment.yml
index a1e08a3..2b6a4ba 100644
--- a/environment.yml
+++ b/environment.yml
@@ -10,7 +10,7 @@ dependencies:
   - mpfr
   - pip
   - slim==4.0
-  - snakemake
+  - snakemake==7.32.4
   - ldc
   - seaborn
   - pysam
@@ -41,6 +41,7 @@ dependencies:
   - libprotobuf=3.21.12
   - pip:
     - git+https://github.com/popsim-consortium/stdpopsim.git
+    - git+https://github.com/popgenmethods/smcpp
     - scikit-allel
     - git+https://github.com/xin-huang/dadi-cli
     - diploSHIC
diff --git a/workflows/dfe.snake b/workflows/dfe.snake
index 75f6956..ead89a9 100644
--- a/workflows/dfe.snake
+++ b/workflows/dfe.snake
@@ -121,7 +121,7 @@ rule dadi_infer_dm:
     threads: 8
     shell:
         """
-        dadi-cli InferDM --fs {input} --model {params.demog} --p0 {params.demog_p0} --ubounds {params.demog_ubounds} --lbounds {params.demog_lbounds} --output-prefix {params.prefix} --optimizations {params.opts} --grids {params.grid_size} --cpus {threads} --nomisid --force-convergence
+        dadi-cli InferDM --fs {input} --model {params.demog} --p0 {params.demog_p0} --ubounds {params.demog_ubounds} --lbounds {params.demog_lbounds} --output-prefix {params.prefix} --optimizations {params.opts} --grids {params.grid_size} --cpus {threads} --nomisid --force-convergence 1
 
         """
 
@@ -161,7 +161,7 @@ rule dadi_infer_dfe:
     threads: 8
     shell:
         """
-        dadi-cli InferDFE --fs {input[0]} --cache1d {input[1]} --demo-popt {input[2]} --output-prefix {params.prefix} --pdf1d {params.dfe} --p0 {params.dfe_p0} --ubounds {params.dfe_ubounds} --lbounds {params.dfe_lbounds} --ratio {params.ratio} --optimizations {params.opts} --cpus {threads} --nomisid --force-convergence
+        dadi-cli InferDFE --fs {input[0]} --cache1d {input[1]} --demo-popt {input[2]} --output-prefix {params.prefix} --pdf1d {params.dfe} --p0 {params.dfe_p0} --ubounds {params.dfe_ubounds} --lbounds {params.dfe_lbounds} --ratio {params.ratio} --optimizations {params.opts} --cpus {threads} --nomisid --force-convergence 1
 
         """
 
@@ -249,19 +249,18 @@ rule download_dfe_alpha:
     output:
         "ext/dfe-alpha-release-2.16/est_dfe",
         "ext/dfe-alpha-release-2.16/data/n1_100/s_evaluated.dat"
-
     shell:
         """
         cd ext
-        wget -c https://sourceforge.net/projects/dfe-alpha-k-e-w/files/dfe-alpha-release-2.16.tar.gz/download
-        mv download dfe-alpha-release-2.16.tar.gz
-        tar -xvf dfe-alpha-release-2.16.tar.gz
-        cat dfe_alpha_makefile_stdpopsim_patch > dfe-alpha-release-2.16/Makefile && cd dfe-alpha-release-2.16 && make
+        mkdir -p dfe-alpha-release-2.16/
+        cd dfe-alpha-release-2.16/
+        wget -O est_dfe http://sesame.uoregon.edu/~stittes/stdpopsim/est_dfe
+        chmod u+x est_dfe
         wget http://sesame.uoregon.edu/~adkern/stdpopsim/data.tar.gz && tar -xvf data.tar.gz && rm -f data.tar.gz
         # old location of above
         #  https://datashare.ed.ac.uk/bitstream/handle/10283/2730/data.tar.gz?sequence=1&isAllowed=y
         cd ../
-        rm dfe-alpha-release-2.16.tar.gz
+        rm -f dfe-alpha-release-2.16.tar.gz
         cd ../
         """
         
diff --git a/workflows/plots.py b/workflows/plots.py
index 265b90b..5b2672c 100644
--- a/workflows/plots.py
+++ b/workflows/plots.py
@@ -81,7 +81,7 @@ def gather_inference_results(output_dir, demog, output, method, chrm_mask,
                 annot_mask_i = annot
             if method == "stairwayplot":
                 nt = pd.read_csv(infile, sep="\t", skiprows=5)
-                nt.columns = nt.columns.str.replace('[%,.]','')
+                nt.columns = nt.columns.str.replace('%','').str.replace('.', '').str.replace(',', '')
                 for row in nt.itertuples():
                     f.write(f'{method},{pop},{size},{dfe},{annot},{row.year},{row.Ne_median},{seed},{chrm_mask_i},{annot_mask_i},{slim_scaling_factor},{getattr(row, "Ne_25")},{getattr(row, "Ne_975")}\n')
             elif method == "msmc":
diff --git a/workflows/smc.py b/workflows/smc.py
index 5007a67..4f23825 100644
--- a/workflows/smc.py
+++ b/workflows/smc.py
@@ -40,7 +40,7 @@ def write_smcpp_file(path, output, pop_name, num_sampled_genomes=2, mask_interva
     with open(vcf_file, "w") as vcf:
         ts.write_vcf(vcf, contig_id=chr_name)  # site_mask=np.array(bool)
     # index/compress the vcf
-    cmd = f"bgzip {vcf_file}"
+    cmd = f"bgzip -f {vcf_file}"
     logging.info("Running:" + cmd)
     subprocess.run(cmd, shell=True, check=True)
     vz_file = f"{vcf_file}.gz"
@@ -50,7 +50,7 @@ def write_smcpp_file(path, output, pop_name, num_sampled_genomes=2, mask_interva
     # write mask file
     if mask_intervals is not None:
         intervals2BedFile(mask_intervals, mask_outfile, chr_name)
-        cmd = f"bgzip {mask_outfile}"
+        cmd = f"bgzip -f {mask_outfile}"
         logging.info("Running:" + cmd)
         subprocess.run(cmd, shell=True, check=True)
         cmd = f"tabix -p bed {mask_outfile}.gz"