From ae85199ff11eb2d052dd55eae1544675642bedad Mon Sep 17 00:00:00 2001 From: Silas Tittes Date: Tue, 16 Apr 2024 11:00:42 -0600 Subject: [PATCH] edits to get tiny config working on talapas (#110) * edits to get tiny config working on talapas * pinned snakemake version --- environment.yml | 3 ++- workflows/dfe.snake | 15 +++++++-------- workflows/plots.py | 2 +- workflows/smc.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/environment.yml b/environment.yml index a1e08a3..2b6a4ba 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,7 @@ dependencies: - mpfr - pip - slim==4.0 - - snakemake + - snakemake==7.32.4 - ldc - seaborn - pysam @@ -41,6 +41,7 @@ dependencies: - libprotobuf=3.21.12 - pip: - git+https://github.com/popsim-consortium/stdpopsim.git + - git+https://github.com/popgenmethods/smcpp - scikit-allel - git+https://github.com/xin-huang/dadi-cli - diploSHIC diff --git a/workflows/dfe.snake b/workflows/dfe.snake index 75f6956..ead89a9 100644 --- a/workflows/dfe.snake +++ b/workflows/dfe.snake @@ -121,7 +121,7 @@ rule dadi_infer_dm: threads: 8 shell: """ - dadi-cli InferDM --fs {input} --model {params.demog} --p0 {params.demog_p0} --ubounds {params.demog_ubounds} --lbounds {params.demog_lbounds} --output-prefix {params.prefix} --optimizations {params.opts} --grids {params.grid_size} --cpus {threads} --nomisid --force-convergence + dadi-cli InferDM --fs {input} --model {params.demog} --p0 {params.demog_p0} --ubounds {params.demog_ubounds} --lbounds {params.demog_lbounds} --output-prefix {params.prefix} --optimizations {params.opts} --grids {params.grid_size} --cpus {threads} --nomisid --force-convergence 1 """ @@ -161,7 +161,7 @@ rule dadi_infer_dfe: threads: 8 shell: """ - dadi-cli InferDFE --fs {input[0]} --cache1d {input[1]} --demo-popt {input[2]} --output-prefix {params.prefix} --pdf1d {params.dfe} --p0 {params.dfe_p0} --ubounds {params.dfe_ubounds} --lbounds {params.dfe_lbounds} --ratio {params.ratio} --optimizations {params.opts} --cpus {threads} --nomisid --force-convergence + dadi-cli InferDFE --fs {input[0]} --cache1d {input[1]} --demo-popt {input[2]} --output-prefix {params.prefix} --pdf1d {params.dfe} --p0 {params.dfe_p0} --ubounds {params.dfe_ubounds} --lbounds {params.dfe_lbounds} --ratio {params.ratio} --optimizations {params.opts} --cpus {threads} --nomisid --force-convergence 1 """ @@ -249,19 +249,18 @@ rule download_dfe_alpha: output: "ext/dfe-alpha-release-2.16/est_dfe", "ext/dfe-alpha-release-2.16/data/n1_100/s_evaluated.dat" - shell: """ cd ext - wget -c https://sourceforge.net/projects/dfe-alpha-k-e-w/files/dfe-alpha-release-2.16.tar.gz/download - mv download dfe-alpha-release-2.16.tar.gz - tar -xvf dfe-alpha-release-2.16.tar.gz - cat dfe_alpha_makefile_stdpopsim_patch > dfe-alpha-release-2.16/Makefile && cd dfe-alpha-release-2.16 && make + mkdir -p dfe-alpha-release-2.16/ + cd dfe-alpha-release-2.16/ + wget -O est_dfe http://sesame.uoregon.edu/~stittes/stdpopsim/est_dfe + chmod u+x est_dfe wget http://sesame.uoregon.edu/~adkern/stdpopsim/data.tar.gz && tar -xvf data.tar.gz && rm -f data.tar.gz # old location of above # https://datashare.ed.ac.uk/bitstream/handle/10283/2730/data.tar.gz?sequence=1&isAllowed=y cd ../ - rm dfe-alpha-release-2.16.tar.gz + rm -f dfe-alpha-release-2.16.tar.gz cd ../ """ diff --git a/workflows/plots.py b/workflows/plots.py index 265b90b..5b2672c 100644 --- a/workflows/plots.py +++ b/workflows/plots.py @@ -81,7 +81,7 @@ def gather_inference_results(output_dir, demog, output, method, chrm_mask, annot_mask_i = annot if method == "stairwayplot": nt = pd.read_csv(infile, sep="\t", skiprows=5) - nt.columns = nt.columns.str.replace('[%,.]','') + nt.columns = nt.columns.str.replace('%','').str.replace('.', '').str.replace(',', '') for row in nt.itertuples(): f.write(f'{method},{pop},{size},{dfe},{annot},{row.year},{row.Ne_median},{seed},{chrm_mask_i},{annot_mask_i},{slim_scaling_factor},{getattr(row, "Ne_25")},{getattr(row, "Ne_975")}\n') elif method == "msmc": diff --git a/workflows/smc.py b/workflows/smc.py index 5007a67..4f23825 100644 --- a/workflows/smc.py +++ b/workflows/smc.py @@ -40,7 +40,7 @@ def write_smcpp_file(path, output, pop_name, num_sampled_genomes=2, mask_interva with open(vcf_file, "w") as vcf: ts.write_vcf(vcf, contig_id=chr_name) # site_mask=np.array(bool) # index/compress the vcf - cmd = f"bgzip {vcf_file}" + cmd = f"bgzip -f {vcf_file}" logging.info("Running:" + cmd) subprocess.run(cmd, shell=True, check=True) vz_file = f"{vcf_file}.gz" @@ -50,7 +50,7 @@ def write_smcpp_file(path, output, pop_name, num_sampled_genomes=2, mask_interva # write mask file if mask_intervals is not None: intervals2BedFile(mask_intervals, mask_outfile, chr_name) - cmd = f"bgzip {mask_outfile}" + cmd = f"bgzip -f {mask_outfile}" logging.info("Running:" + cmd) subprocess.run(cmd, shell=True, check=True) cmd = f"tabix -p bed {mask_outfile}.gz"