Skip to content

Commit

Permalink
stuff & diploshic/vcf bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mufernando committed Jun 3, 2024
1 parent 8da0db2 commit 0602a2a
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 20 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,5 @@ dependencies:
- git+https://github.com/popgenmethods/smcpp
- scikit-allel
- git+https://github.com/xin-huang/dadi-cli
- git+https://github.com/kr-colab/diploSHIC.git@refs/pull/56/merge
- diploSHIC
11 changes: 5 additions & 6 deletions workflows/config/snakemake/oregon_profile_simple/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@ cluster:
--time={resources.time}
--job-name=smk-{rule}%j
--output=logs/{rule}/{rule}%j.out
--parsable
default-resources:
- time=60
- mem_mb=12000
- threads=1
cluster-status: "status-sacct.sh"
restart-times: 3
max-jobs-per-second: 10
max-status-checks-per-second: 1
local-cores: 1
latency-wait: 60
jobs: 500
keep-going: True
max-jobs-per-second: 1000
max-status-checks-per-second: 1000
jobs: 2000
rerun-incomplete: True
printshellcmds: True
scheduler: greedy
Expand Down
24 changes: 24 additions & 0 deletions workflows/config/snakemake/oregon_profile_simple/status-sacct.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash

# Check status of Slurm job

jobid="$1"

if [[ "$jobid" == Submitted ]]
then
echo smk-simple-slurm: Invalid job ID: "$jobid" >&2
echo smk-simple-slurm: Did you remember to add the flag --parsable to your sbatch call? >&2
exit 1
fi

output=`sacct -j "$jobid" --format State --noheader | head -n 1 | awk '{print $1}'`

if [[ $output =~ ^(COMPLETED).* ]]
then
echo success
elif [[ $output =~ ^(RUNNING|PENDING|COMPLETING|CONFIGURING|SUSPENDED).* ]]
then
echo running
else
echo failed
fi
2 changes: 1 addition & 1 deletion workflows/config/snakemake/sweep_config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# General configs
seed: 12345
replicates: 200
replicates: 1_000
output_dir: results

# Contig configs
Expand Down
26 changes: 13 additions & 13 deletions workflows/sweep_simulate.snake
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def dump_results(input, output, params_dict, target_pops, num_subwins=1):
if len(del_intervals) > 0:
tss = tss.delete_intervals(del_intervals)
tss = tss.trim()
tss.write_vcf(fh_vcf, position_transform = lambda x: np.fmax(1, np.round(x)))
tss.write_vcf(fh_vcf, position_transform = lambda x: 1 + np.round(x))
fh_vcf.close()
# write seqlen of shortened ts
with open(output[2], 'w') as f:
Expand Down Expand Up @@ -559,7 +559,7 @@ rule boundary_sims:
input:
output:
output_dir + "/simulated_data/sweeps/boundary_sims/sim_{seed}_{region_size}.trees"
resources: time=6000, mem_mb=6000
resources: time=60, mem_mb=6000
run:
model = species.get_demographic_model(demo_model["id"])
mut_rate = model.mutation_rate
Expand Down Expand Up @@ -588,7 +588,7 @@ rule neutral:
input:
output:
output_dir + f"/simulated_data/sweeps/neutral/{demo_model['id']}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees",
resources: time=3000, mem_mb=8000
resources: time=30, mem_mb=7000
run:
model = species.get_demographic_model(demo_model["id"])
mutation_rate = model.mutation_rate
Expand Down Expand Up @@ -620,7 +620,7 @@ rule bgs:
input:
output:
output_dir + f"/simulated_data/sweeps/bgs/{demo_model['id']}/{{annot}}/{{dfe}}/{{seed}}/sim_{chrom}_{{left}}_{{right}}.trees",
resources: time=3000, mem_mb=3000
resources: time=30, mem_mb=8000
run:
model = species.get_demographic_model(demo_model["id"])
mutation_rate = model.mutation_rate
Expand Down Expand Up @@ -659,7 +659,7 @@ rule sweep:
input:
output:
output_dir + f"/simulated_data/sweeps/sweep/{demo_model['id']}/{{popu}}/{{annot}}/{{dfe}}/{{coeff}}/{{tmult}}/{{seed}}/sim_{{chrom}}_{{left}}_{{right}}.trees",
resources: time=3000, mem_mb=16000
resources: time=30, mem_mb=12000
run:
model = species.get_demographic_model(demo_model["id"])
mutation_rate = model.mutation_rate
Expand Down Expand Up @@ -754,7 +754,7 @@ rule get_stats:
output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.ancFile",
output_dir + "/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}.diploshic.samples"

resources: time=3000, mem_mb=2000
resources: time=30, mem_mb=4000
run:
params_dict, target_pops = _get_params_dict_from_wildcards(wildcards)
dump_results(input, output, params_dict, target_pops, config["num_subwins"])
Expand All @@ -769,7 +769,7 @@ rule diploshic_fvs:

output:
output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.fv'
resources: time=30, mem_mb=1200
resources: time=40, mem_mb=5000
run:
with open(input[0],'r') as f:
seq_len = f.read().strip()
Expand All @@ -783,7 +783,7 @@ rule diploshic_pred:
rules.diploshic_train_classifier.output
output:
output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.preds'
resources: time=30, mem_mb=1200
resources: time=30, mem_mb=3000
run:
cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.hdf5 {input[0]} {output[0]}"
shell(cmd)
Expand Down Expand Up @@ -820,7 +820,7 @@ rule merge_stats:
input: stats_outs
output:
output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv'
resources: time=3000, mem_mb=350000, disk_mb=350000
resources: time=1500, mem_mb=350000, disk_mb=350000
run:
#print(input, flush=True)
#import pdb; pdb.set_trace()
Expand All @@ -830,7 +830,7 @@ rule merge_stats_shic1:
input: shic_outs1
output:
output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv'
resources: time=3000, mem_mb=150000, disk_mb=150000
resources: time=3000, mem_mb=350000, disk_mb=350000
run:
#print(input, flush=True)
#import pdb; pdb.set_trace()
Expand All @@ -840,7 +840,7 @@ rule merge_stats_shic2:
input: shic_outs2
output:
output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv'
resources: time=3000, mem_mb=150000, disk_mb=150000
resources: time=3000, mem_mb=350000, disk_mb=350000
run:
#print(input, flush=True)
#import pdb; pdb.set_trace()
Expand All @@ -850,7 +850,7 @@ rule merge_stats_shic3:
input: shic_outs3
output:
output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv'
resources: time=3000, mem_mb=150000, disk_mb=150000
resources: time=3000, mem_mb=350000, disk_mb=350000
run:
#print(input, flush=True)
#import pdb; pdb.set_trace()
Expand All @@ -860,7 +860,7 @@ rule merge_stats_shic3:
rule merge_all_stats:
input: [output_dir + f'/simulated_data/sweeps/all_sims.tmp.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims3.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims2.shic.stats.tsv', output_dir + f'/simulated_data/sweeps/all_sims1.shic.stats.tsv']
output: output_dir + f'/simulated_data/sweeps/all_sims.stats.tsv'
resources: time=3000, mem_mb=150000, disk_mb=150000
resources: time=3000, mem_mb=350000, disk_mb=350000
shell:
"cat {input} > {output}"

Expand Down

0 comments on commit 0602a2a

Please sign in to comment.