-
Notifications
You must be signed in to change notification settings - Fork 4
/
train_variant_caller.sh
executable file
·151 lines (142 loc) · 4.02 KB
/
train_variant_caller.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/bin/bash
# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
# Wrapper script to train DL4VC model.
set -e
SCRIPTDIR="$( dirname "$( readlink -f "${BASH_SOURCE[0]}" )" )"
RUN_HELP=false
NUM_GPUS=1
EPOCHS=5
TRAIN_BATCH_SIZE=80
TEST_BATCH_SIZE=200
TRAIN_HDF=""
TEST_HDF=""
OUT_VCF="vc_model_eval.vcf"
SAMPLE_VCF=""
OUT_MODEL="vc_model.pth"
if [ "$#" -eq 0 ]; then
RUN_HELP=true
fi
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
-g)
NUM_GPUS="$2"
shift
shift
;;
-e)
EPOCHS="$2"
shift
shift
;;
--train-batch-size)
TRAIN_BATCH_SIZE="$2"
shift
shift
;;
--test-batch-size)
TEST_BATCH_SIZE="$2"
shift
shift
;;
--train-hdf)
TRAIN_HDF="$2"
shift
shift
;;
--test-hdf)
TEST_HDF="$2"
shift
shift
;;
--out-vcf)
OUT_VCF="$2"
shift
shift
;;
--sample-vcf)
SAMPLE_VCF="$2"
shift
shift
;;
--out-model)
OUT_MODEL="$2"
shift
shift
;;
-h|--help)
RUN_HELP=true
shift
;;
*)
shift
;;
esac
done
if [ ${RUN_HELP} = true ]; then
echo "Wrapper script for training variant caller."
echo "Options:"
echo "-g : Number of GPUs to use for training (default $NUM_GPUS)"
echo "-e : Number of epochs to train for (default $EPOCHS)"
echo "--train-batch-size : Batch size for training (default $TRAIN_BATCH_SIZE)"
echo "--test-batch-size : Batch size for evaluation while training (default $TEST_BATCH_SIZE)"
echo "--train-hdf : Path to training HDF file"
echo "--test-hdf L Path to evaluation HDF file"
echo "--out-vcf : Output path for VCF generated during evaluation (defauilt $OUT_VCF)"
echo "--sample-vcf : Path to sample VCF to pull VCF headers from"
echo "--out-model : Output path and name for trained model (default $OUT_MODEL)"
echo "-h : Print help message"
echo ""
exit 0
fi
printf "Run training...\n"
python $SCRIPTDIR/main.py \
--lr 0.0002 \
--grad-clip 1.0 \
--epochs $EPOCHS \
--log-interval 1 \
--gpus $NUM_GPUS \
--label-smoothing 0.001 \
--batch-size $TRAIN_BATCH_SIZE \
--test-batch-size $TEST_BATCH_SIZE \
--model-hidden-dropout 0.1 \
--model-batchnorm \
--train_file $TRAIN_HDF \
--test_file $TEST_HDF \
--num-data-workers 5 \
--trust-snp-only \
--non-snp-train-weight 2.0 \
--fp-train-weight 0.2 \
--model-use-q-scores \
--model-use-strands \
--auxillary-loss-weight 1.0 \
--auxillary-loss-bases-weight 0.01 \
--auxillary-loss-allele-weight 0.001 \
--loss-debug-freq 10000 \
--save_vcf_records \
--save_vcf_records_file $OUT_VCF \
--aux-keep-candidate-af \
--model-use-reads-ref-var-mask \
--close_match_window 2.0 \
--focal_loss_alpha 1. \
--focal_loss_gamma 0.2 \
--model-conv-layers 7 \
--model-residual-layer-start 5 \
--model-ave-pool-layers 2 \
--early_loss_weight 0.1 \
--model-init-conv-channels 128 \
--rm_var_reads_rate 0.0 \
--rm_non_var_reads_rate 0.0 \
--close_examples_sample_rate 0.15 \
--delay_augmentation_epochs 1 \
--save_hard_example_records \
--learn_early_loss_weight \
--model_pool_combine_dimension 0 \
--model-final-conv-channels 128 \
--model-bottleneck-size 32 \
--model_final_layer_dilation 2 \
--model_middle_layer_dilation 2 \
--model_concat_hw_reads \
--model-highway-single-reads \
--sample_vcf $SAMPLE_VCF \
--modelsave $OUT_MODEL