Skip to content

Commit

Permalink
update diploshic from hd5 to h5; removing any variants at last site
Browse files Browse the repository at this point in the history
  • Loading branch information
mufernando committed Jun 3, 2024
1 parent a3edfba commit 6137c2e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
3 changes: 2 additions & 1 deletion workflows/config/snakemake/oregon_profile_simple/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ cluster-status: "status-sacct.sh"
restart-times: 3
max-jobs-per-second: 1000
max-status-checks-per-second: 1000
jobs: 2000
jobs: 3500
rerun-incomplete: True
printshellcmds: True
latency-wait: 30
scheduler: greedy
use-conda: True
jobscript: "jobscript-wo-properties.sh"
4 changes: 2 additions & 2 deletions workflows/diploshic.snake
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ rule all:
expand("{tDir}/neut_0.fvec", tDir = ["train", "test"]),
expand("trainingSets/{cl}.fvec", cl = ["hard", "linkedHard", "soft", "linkedSoft", "neut"]),
"trained_model.json",
"trained_model.weights.hdf5"
"trained_model.weights.h5"

rule clone_discoal:
output:
Expand Down Expand Up @@ -156,7 +156,7 @@ rule train_classifier:
rules.make_training_sets.output
output:
"trained_model.json",
"trained_model.weights.hdf5"
"trained_model.weights.h5"
run:
# cpu training below
cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && {diploSHIC_exec} train trainingSets/ trainingSets/ trained_model --epochs=100"
Expand Down
8 changes: 6 additions & 2 deletions workflows/sweep_simulate.snake
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,10 @@ def dump_results(input, output, params_dict, target_pops, num_subwins=1):
if len(del_intervals) > 0:
tss = tss.delete_intervals(del_intervals)
tss = tss.trim()
# because we are shifting from 0-based to 1-based, I need to remove any sites that may have happened at the last position
sites_at_last = np.where(np.round(tss.sites_position)==config["focal_size"])[0]
assert sites_at_last.shape[0] < 4 # realistically we shouldn't get more than two or three hits there
tss = tss.delete_sites(sites_at_last)
tss.write_vcf(fh_vcf, position_transform = lambda x: 1 + np.round(x))
fh_vcf.close()
# write seqlen of shortened ts
Expand Down Expand Up @@ -538,7 +542,7 @@ shic_outs3 = [file_prefix+".stats.tsv.shic" for file_prefix in sw_outs_prefix_po

rule all:
input:
rules.diploshic_all.input,
rules.diploshic_all.input,
boundary_outs + trees_outs + stats_outs + vcf_outs + [output_dir + f'/simulated_data/sweeps/all_sims.stats.tsv', output_dir+f'/simulated_data/sweeps/rec_map_{chrom}_{config["num_windows"]}.tsv'] + annot_outs +anc_outs + fv_outs + pred_outs + shic_outs1 + shic_outs2 + shic_outs3
default_target: True

Expand Down Expand Up @@ -785,7 +789,7 @@ rule diploshic_pred:
output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.preds'
resources: time=30, mem_mb=3000
run:
cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.hdf5 {input[0]} {output[0]}"
cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.h5 {input[0]} {output[0]}"
shell(cmd)


Expand Down

0 comments on commit 6137c2e

Please sign in to comment.