From 0602a2a97c749562bf408cd138a846c49653b7df Mon Sep 17 00:00:00 2001
From: mufernando <murillofer.rodrigues@gmail.com>
Date: Thu, 16 May 2024 10:39:11 -0700
Subject: [PATCH] stuff & diploshic/vcf bug fix

---
 environment.yml                               |  1 +
 .../oregon_profile_simple/config.yaml         | 11 ++++----
 .../oregon_profile_simple/status-sacct.sh     | 24 +++++++++++++++++
 workflows/config/snakemake/sweep_config.yaml  |  2 +-
 workflows/sweep_simulate.snake                | 26 +++++++++----------
 5 files changed, 44 insertions(+), 20 deletions(-)
 create mode 100755 workflows/config/snakemake/oregon_profile_simple/status-sacct.sh

diff --git a/environment.yml b/environment.yml
index 3118ad7..3bd97fc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -46,4 +46,5 @@ dependencies:
     - git+https://github.com/popgenmethods/smcpp
     - scikit-allel
     - git+https://github.com/xin-huang/dadi-cli
+    - git+https://github.com/kr-colab/diploSHIC.git@refs/pull/56/merge
     - diploSHIC
diff --git a/workflows/config/snakemake/oregon_profile_simple/config.yaml b/workflows/config/snakemake/oregon_profile_simple/config.yaml
index fe5d000..cb66c5c 100644
--- a/workflows/config/snakemake/oregon_profile_simple/config.yaml
+++ b/workflows/config/snakemake/oregon_profile_simple/config.yaml
@@ -8,17 +8,16 @@ cluster:
                 --time={resources.time}
                 --job-name=smk-{rule}%j
                 --output=logs/{rule}/{rule}%j.out
+                --parsable
 default-resources:
         - time=60
         - mem_mb=12000
         - threads=1
+cluster-status: "status-sacct.sh" 
 restart-times: 3
-max-jobs-per-second: 10
-max-status-checks-per-second: 1
-local-cores: 1
-latency-wait: 60
-jobs: 500
-keep-going: True
+max-jobs-per-second: 1000
+max-status-checks-per-second: 1000
+jobs: 2000
 rerun-incomplete: True
 printshellcmds: True
 scheduler: greedy
diff --git a/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh b/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh
new file mode 100755
index 0000000..53752f5
--- /dev/null
+++ b/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# Check status of Slurm job
+
+jobid="$1"
+
+if [[ "$jobid" == Submitted ]]
+then
+  echo smk-simple-slurm: Invalid job ID: "$jobid" >&2
+  echo smk-simple-slurm: Did you remember to add the flag --parsable to your sbatch call? >&2
+  exit 1
+fi
+
+output=`sacct -j "$jobid" --format State --noheader | head -n 1 | awk '{print $1}'`
+
+if [[ $output =~ ^(COMPLETED).* ]]
+then
+  echo success
+elif [[ $output =~ ^(RUNNING|PENDING|COMPLETING|CONFIGURING|SUSPENDED).* ]]
+then
+  echo running
+else
+  echo failed
+fi
diff --git a/workflows/config/snakemake/sweep_config.yaml b/workflows/config/snakemake/sweep_config.yaml
index 287038b..fd444d4 100644
--- a/workflows/config/snakemake/sweep_config.yaml
+++ b/workflows/config/snakemake/sweep_config.yaml
@@ -1,6 +1,6 @@
 # General configs
 seed: 12345
-replicates: 200
+replicates: 1_000
 output_dir: results
 
 # Contig configs
diff --git a/workflows/sweep_simulate.snake b/workflows/sweep_simulate.snake
index fff0adb..961a214 100644
--- a/workflows/sweep_simulate.snake
+++ b/workflows/sweep_simulate.snake
@@ -413,7 +413,7 @@ def dump_results(input, output, params_dict, target_pops, num_subwins=1):
     if len(del_intervals) > 0:
         tss = tss.delete_intervals(del_intervals)
         tss = tss.trim()
-    tss.write_vcf(fh_vcf, position_transform = lambda x: np.fmax(1, np.round(x)))
+    tss.write_vcf(fh_vcf, position_transform = lambda x: 1 + np.round(x))
     fh_vcf.close()
     # write seqlen of shortened ts
     with open(output[2], 'w') as f:
@@ -559,7 +559,7 @@ rule boundary_sims:
     input:
     output:
         output_dir + "/simulated_data/sweeps/boundary_sims/sim_{seed}_{region_size}.trees"
-    resources: time=6000, mem_mb=6000
+    resources: time=60, mem_mb=6000
     run:
         model = species.get_demographic_model(demo_model["id"])
         mut_rate = model.mutation_rate 
@@ -588,7 +588,7 @@ rule neutral:
     input:
     output:
         output_dir + f"/simulated_data/sweeps/neutral/{demo_model['id']}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees", 
-    resources: time=3000, mem_mb=8000
+    resources: time=30, mem_mb=7000
     run:
         model = species.get_demographic_model(demo_model["id"])
         mutation_rate = model.mutation_rate
@@ -620,7 +620,7 @@ rule bgs:
     input:
     output:
         output_dir + f"/simulated_data/sweeps/bgs/{demo_model['id']}/{{annot}}/{{dfe}}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees", 
-    resources: time=3000, mem_mb=3000
+    resources: time=30, mem_mb=8000
     run:
         model = species.get_demographic_model(demo_model["id"])
         mutation_rate = model.mutation_rate
@@ -659,7 +659,7 @@ rule sweep:
     input:
     output:
         output_dir + f"/simulated_data/sweeps/sweep/{demo_model['id']}/{{popu}}/{{annot}}/{{dfe}}/{{coeff}}/{{tmult}}/{{seed}}/sim_{{chrom}}_{{left}}_{{right}}.trees", 
-    resources: time=3000, mem_mb=16000
+    resources: time=30, mem_mb=12000
     run:
         model = species.get_demographic_model(demo_model["id"])
         mutation_rate = model.mutation_rate
@@ -754,7 +754,7 @@ rule get_stats:
         output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.ancFile",
         output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.samples"
 
-    resources: time=3000, mem_mb=2000
+    resources: time=30, mem_mb=4000
     run:
         params_dict, target_pops = _get_params_dict_from_wildcards(wildcards)
         dump_results(input, output, params_dict, target_pops, config["num_subwins"])
@@ -769,7 +769,7 @@ rule diploshic_fvs:
 
     output:
         output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.fv'
-    resources: time=30, mem_mb=1200
+    resources: time=40, mem_mb=5000
     run:
         with open(input[0],'r') as f:
             seq_len = f.read().strip()
@@ -783,7 +783,7 @@ rule diploshic_pred:
         rules.diploshic_train_classifier.output
     output:
         output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.preds'
-    resources: time=30, mem_mb=1200
+    resources: time=30, mem_mb=3000
     run:
         cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.hdf5 {input[0]} {output[0]}" 
         shell(cmd)   
@@ -820,7 +820,7 @@ rule merge_stats:
     input: stats_outs
     output:
         output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv'
-    resources: time=3000, mem_mb=350000, disk_mb=350000
+    resources: time=1500, mem_mb=350000, disk_mb=350000
     run:
         #print(input, flush=True)
         #import pdb; pdb.set_trace()
@@ -830,7 +830,7 @@ rule merge_stats_shic1:
     input: shic_outs1
     output:
         output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv'
-    resources: time=3000, mem_mb=150000, disk_mb=150000
+    resources: time=3000, mem_mb=350000, disk_mb=350000
     run:
         #print(input, flush=True)
         #import pdb; pdb.set_trace()
@@ -840,7 +840,7 @@ rule merge_stats_shic2:
     input: shic_outs2
     output:
         output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv'
-    resources: time=3000, mem_mb=150000, disk_mb=150000
+    resources: time=3000, mem_mb=350000, disk_mb=350000
     run:
         #print(input, flush=True)
         #import pdb; pdb.set_trace()
@@ -850,7 +850,7 @@ rule merge_stats_shic3:
     input: shic_outs3
     output:
         output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv'
-    resources: time=3000, mem_mb=150000, disk_mb=150000
+    resources: time=3000, mem_mb=350000, disk_mb=350000
     run:
         #print(input, flush=True)
         #import pdb; pdb.set_trace()
@@ -860,7 +860,7 @@ rule merge_stats_shic3:
 rule merge_all_stats:
     input: [output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv']
     output: output_dir + f'/simulated_data/sweeps/all_sims.stats.tsv'
-    resources: time=3000, mem_mb=150000, disk_mb=150000
+    resources: time=3000, mem_mb=350000, disk_mb=350000
     shell:
        "cat {input} > {output}"