From f08b9bc6d2e56a0c3c3444d84336b672bcd62fd1 Mon Sep 17 00:00:00 2001 From: mufernando Date: Mon, 3 Jun 2024 12:52:43 -0700 Subject: [PATCH] update diploshic from hd5 to h5; removing any variants at last site --- .../config/snakemake/oregon_profile_simple/config.yaml | 3 ++- workflows/diploshic.snake | 4 ++-- workflows/sweep_simulate.snake | 8 ++++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/workflows/config/snakemake/oregon_profile_simple/config.yaml b/workflows/config/snakemake/oregon_profile_simple/config.yaml index cb66c5c..d979a1b 100644 --- a/workflows/config/snakemake/oregon_profile_simple/config.yaml +++ b/workflows/config/snakemake/oregon_profile_simple/config.yaml @@ -17,9 +17,10 @@ cluster-status: "status-sacct.sh" restart-times: 3 max-jobs-per-second: 1000 max-status-checks-per-second: 1000 -jobs: 2000 +jobs: 3500 rerun-incomplete: True printshellcmds: True +latency-wait: 30 scheduler: greedy use-conda: True jobscript: "jobscript-wo-properties.sh" diff --git a/workflows/diploshic.snake b/workflows/diploshic.snake index 320cac0..5fadcc5 100644 --- a/workflows/diploshic.snake +++ b/workflows/diploshic.snake @@ -62,7 +62,7 @@ rule all: expand("{tDir}/neut_0.fvec", tDir = ["train", "test"]), expand("trainingSets/{cl}.fvec", cl = ["hard", "linkedHard", "soft", "linkedSoft", "neut"]), "trained_model.json", - "trained_model.weights.hdf5" + "trained_model.weights.h5" rule clone_discoal: output: @@ -156,7 +156,7 @@ rule train_classifier: rules.make_training_sets.output output: "trained_model.json", - "trained_model.weights.hdf5" + "trained_model.weights.h5" run: # cpu training below cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && {diploSHIC_exec} train trainingSets/ trainingSets/ trained_model --epochs=100" diff --git a/workflows/sweep_simulate.snake b/workflows/sweep_simulate.snake index 961a214..03352b7 100644 --- a/workflows/sweep_simulate.snake +++ b/workflows/sweep_simulate.snake @@ -413,6 +413,10 @@ def dump_results(input, output, params_dict, target_pops, num_subwins=1): if len(del_intervals) > 0: tss = tss.delete_intervals(del_intervals) tss = tss.trim() + # because we are shifting from 0-based to 1-based, I need to remove any sites that may have happened at the last position + sites_at_last = np.where(np.round(tss.sites_position)==config["focal_size"])[0] + assert sites_at_last.shape[0] < 4 # realistically we shouldn't get more than two or three hits there + tss = tss.delete_sites(sites_at_last) tss.write_vcf(fh_vcf, position_transform = lambda x: 1 + np.round(x)) fh_vcf.close() # write seqlen of shortened ts @@ -538,7 +542,7 @@ shic_outs3 = [file_prefix+".stats.tsv.shic" for file_prefix in sw_outs_prefix_po rule all: input: - rules.diploshic_all.input, + rules.diploshic_all.input, boundary_outs + trees_outs + stats_outs + vcf_outs + [output_dir + f'/simulated_data/sweeps/all_sims.stats.tsv', output_dir+f'/simulated_data/sweeps/rec_map_{chrom}_{config["num_windows"]}.tsv'] + annot_outs +anc_outs + fv_outs + pred_outs + shic_outs1 + shic_outs2 + shic_outs3 default_target: True @@ -785,7 +789,7 @@ rule diploshic_pred: output_dir + '/simulated_data/sweeps/{middle}/sim_{chrom}_{left}_{right}_{popu}.diploshic.preds' resources: time=30, mem_mb=3000 run: - cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.hdf5 {input[0]} {output[0]}" + cmd = f"export CUDA_VISIBLE_DEVICES=\"\" && diploSHIC predict trained_model.json trained_model.weights.h5 {input[0]} {output[0]}" shell(cmd)