From 0602a2a97c749562bf408cd138a846c49653b7df Mon Sep 17 00:00:00 2001 From: mufernando Date: Thu, 16 May 2024 10:39:11 -0700 Subject: [PATCH] stuff & diploshic/vcf bug fix --- environment.yml | 1 + .../oregon_profile_simple/config.yaml | 11 ++++---- .../oregon_profile_simple/status-sacct.sh | 24 +++++++++++++++++ workflows/config/snakemake/sweep_config.yaml | 2 +- workflows/sweep_simulate.snake | 26 +++++++++---------- 5 files changed, 44 insertions(+), 20 deletions(-) create mode 100755 workflows/config/snakemake/oregon_profile_simple/status-sacct.sh diff --git a/environment.yml b/environment.yml index 3118ad7..3bd97fc 100644 --- a/environment.yml +++ b/environment.yml @@ -46,4 +46,5 @@ dependencies: - git+https://github.com/popgenmethods/smcpp - scikit-allel - git+https://github.com/xin-huang/dadi-cli + - git+https://github.com/kr-colab/diploSHIC.git@refs/pull/56/merge - diploSHIC diff --git a/workflows/config/snakemake/oregon_profile_simple/config.yaml b/workflows/config/snakemake/oregon_profile_simple/config.yaml index fe5d000..cb66c5c 100644 --- a/workflows/config/snakemake/oregon_profile_simple/config.yaml +++ b/workflows/config/snakemake/oregon_profile_simple/config.yaml @@ -8,17 +8,16 @@ cluster: --time={resources.time} --job-name=smk-{rule}%j --output=logs/{rule}/{rule}%j.out + --parsable default-resources: - time=60 - mem_mb=12000 - threads=1 +cluster-status: "status-sacct.sh" restart-times: 3 -max-jobs-per-second: 10 -max-status-checks-per-second: 1 -local-cores: 1 -latency-wait: 60 -jobs: 500 -keep-going: True +max-jobs-per-second: 1000 +max-status-checks-per-second: 1000 +jobs: 2000 rerun-incomplete: True printshellcmds: True scheduler: greedy diff --git a/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh b/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh new file mode 100755 index 0000000..53752f5 --- /dev/null +++ b/workflows/config/snakemake/oregon_profile_simple/status-sacct.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# Check status of Slurm job + +jobid="$1" + +if [[ "$jobid" == Submitted ]] +then + echo smk-simple-slurm: Invalid job ID: "$jobid" >&2 + echo smk-simple-slurm: Did you remember to add the flag --parsable to your sbatch call? >&2 + exit 1 +fi + +output=`sacct -j "$jobid" --format State --noheader | head -n 1 | awk '{print $1}'` + +if [[ $output =~ ^(COMPLETED).* ]] +then + echo success +elif [[ $output =~ ^(RUNNING|PENDING|COMPLETING|CONFIGURING|SUSPENDED).* ]] +then + echo running +else + echo failed +fi diff --git a/workflows/config/snakemake/sweep_config.yaml b/workflows/config/snakemake/sweep_config.yaml index 287038b..fd444d4 100644 --- a/workflows/config/snakemake/sweep_config.yaml +++ b/workflows/config/snakemake/sweep_config.yaml @@ -1,6 +1,6 @@ # General configs seed: 12345 -replicates: 200 +replicates: 1_000 output_dir: results # Contig configs diff --git a/workflows/sweep_simulate.snake b/workflows/sweep_simulate.snake index fff0adb..961a214 100644 --- a/workflows/sweep_simulate.snake +++ b/workflows/sweep_simulate.snake @@ -413,7 +413,7 @@ def dump_results(input, output, params_dict, target_pops, num_subwins=1): if len(del_intervals) > 0: tss = tss.delete_intervals(del_intervals) tss = tss.trim() - tss.write_vcf(fh_vcf, position_transform = lambda x: np.fmax(1, np.round(x))) + tss.write_vcf(fh_vcf, position_transform = lambda x: 1 + np.round(x)) fh_vcf.close() # write seqlen of shortened ts with open(output[2], 'w') as f: @@ -559,7 +559,7 @@ rule boundary_sims: input: output: output_dir + "/simulated_data/sweeps/boundary_sims/sim_{seed}_{region_size}.trees" - resources: time=6000, mem_mb=6000 + resources: time=60, mem_mb=6000 run: model = species.get_demographic_model(demo_model["id"]) mut_rate = model.mutation_rate @@ -588,7 +588,7 @@ rule neutral: input: output: output_dir + f"/simulated_data/sweeps/neutral/{demo_model['id']}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees", - resources: time=3000, mem_mb=8000 + resources: time=30, mem_mb=7000 run: model = species.get_demographic_model(demo_model["id"]) mutation_rate = model.mutation_rate @@ -620,7 +620,7 @@ rule bgs: input: output: output_dir + f"/simulated_data/sweeps/bgs/{demo_model['id']}/{{annot}}/{{dfe}}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees", - resources: time=3000, mem_mb=3000 + resources: time=30, mem_mb=8000 run: model = species.get_demographic_model(demo_model["id"]) mutation_rate = model.mutation_rate @@ -659,7 +659,7 @@ rule sweep: input: output: output_dir + f"/simulated_data/sweeps/sweep/{demo_model['id']}/{{popu}}/{{annot}}/{{dfe}}/{{coeff}}/{{tmult}}/{{seed}}/sim_{{chrom}}_{{left}}_{{right}}.trees", - resources: time=3000, mem_mb=16000 + resources: time=30, mem_mb=12000 run: model = species.get_demographic_model(demo_model["id"]) mutation_rate = model.mutation_rate @@ -754,7 +754,7 @@ rule get_stats: output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.ancFile", output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.samples" - resources: time=3000, mem_mb=2000 + resources: time=30, mem_mb=4000 run: params_dict, target_pops = _get_params_dict_from_wildcards(wildcards) dump_results(input, output, params_dict, target_pops, config["num_subwins"]) @@ -769,7 +769,7 @@ rule diploshic_fvs: output: output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.fv' - resources: time=30, mem_mb=1200 + resources: time=40, mem_mb=5000 run: with open(input[0],'r') as f: seq_len = f.read().strip() @@ -783,7 +783,7 @@ rule diploshic_pred: rules.diploshic_train_classifier.output output: output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.preds' - resources: time=30, mem_mb=1200 + resources: time=30, mem_mb=3000 run: cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.hdf5 {input[0]} {output[0]}" shell(cmd) @@ -820,7 +820,7 @@ rule merge_stats: input: stats_outs output: output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv' - resources: time=3000, mem_mb=350000, disk_mb=350000 + resources: time=1500, mem_mb=350000, disk_mb=350000 run: #print(input, flush=True) #import pdb; pdb.set_trace() @@ -830,7 +830,7 @@ rule merge_stats_shic1: input: shic_outs1 output: output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv' - resources: time=3000, mem_mb=150000, disk_mb=150000 + resources: time=3000, mem_mb=350000, disk_mb=350000 run: #print(input, flush=True) #import pdb; pdb.set_trace() @@ -840,7 +840,7 @@ rule merge_stats_shic2: input: shic_outs2 output: output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv' - resources: time=3000, mem_mb=150000, disk_mb=150000 + resources: time=3000, mem_mb=350000, disk_mb=350000 run: #print(input, flush=True) #import pdb; pdb.set_trace() @@ -850,7 +850,7 @@ rule merge_stats_shic3: input: shic_outs3 output: output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv' - resources: time=3000, mem_mb=150000, disk_mb=150000 + resources: time=3000, mem_mb=350000, disk_mb=350000 run: #print(input, flush=True) #import pdb; pdb.set_trace() @@ -860,7 +860,7 @@ rule merge_stats_shic3: rule merge_all_stats: input: [output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv'] output: output_dir + f'/simulated_data/sweeps/all_sims.stats.tsv' - resources: time=3000, mem_mb=150000, disk_mb=150000 + resources: time=3000, mem_mb=350000, disk_mb=350000 shell: "cat {input} > {output}"