From 38ade62994996eebdae3c0ba893e7f963931311e Mon Sep 17 00:00:00 2001 From: Jacob Pfeil Date: Fri, 4 Nov 2016 16:09:55 +0000 Subject: [PATCH] Pass job to germline docker call (resolves #478) --- src/toil_scripts/gatk_germline/common.py | 3 ++- src/toil_scripts/gatk_germline/germline.py | 7 +++++-- src/toil_scripts/gatk_germline/test/test_germline.py | 3 ++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/toil_scripts/gatk_germline/common.py b/src/toil_scripts/gatk_germline/common.py index 67e48017..46b6ba86 100644 --- a/src/toil_scripts/gatk_germline/common.py +++ b/src/toil_scripts/gatk_germline/common.py @@ -22,7 +22,8 @@ def output_file_job(job, filename, file_id, output_dir, s3_key_path=None): work_dir = job.fileStore.getLocalTempDir() filepath = job.fileStore.readGlobalFile(file_id, os.path.join(work_dir, filename)) if urlparse(output_dir).scheme == 's3': - s3am_upload(job=job, fpath=os.path.join(work_dir, filepath), + s3am_upload(job, + fpath=os.path.join(work_dir, filepath), s3_dir=output_dir, s3_key_path=s3_key_path) elif os.path.exists(os.path.join(output_dir, filename)): diff --git a/src/toil_scripts/gatk_germline/germline.py b/src/toil_scripts/gatk_germline/germline.py index de7c0fae..337b342d 100755 --- a/src/toil_scripts/gatk_germline/germline.py +++ b/src/toil_scripts/gatk_germline/germline.py @@ -566,6 +566,7 @@ def prepare_bam(job, uuid, url, config, paired_url=None, rg_line=None): config.g1k_indel, config.mills, config.dbsnp, + realign=False, # Do not realign INDELs memory=config.xmx, cores=config.cores).encapsulate() sorted_bam.addChild(preprocess) @@ -674,8 +675,9 @@ def setup_and_run_bwakit(job, uuid, url, rg_line, config, paired_url=None): return job.addFollowOnJobFn(run_bwakit, bwa_config, - sort=False, # BAM files are sorted later in the pipeline + sort=False, # BAM files are sorted later in the pipeline trim=config.trim, + mark_secondary=True, # Mark split alignments as secondary cores=config.cores, disk=bwakit_disk).rv() @@ -739,7 +741,8 @@ def gatk_haplotype_caller(job, # Uses docker_call mock mode to replace output with hc_output file outputs = {'output.g.vcf': hc_output} - docker_call(job=job, work_dir=work_dir, + docker_call(job=job, + work_dir=work_dir, env={'JAVA_OPTS': '-Djava.io.tmpdir=/data/ -Xmx{}'.format(job.memory)}, parameters=command, tool='quay.io/ucsc_cgl/gatk:3.5--dba6dae49156168a909c43330350c6161dc7ecc2', diff --git a/src/toil_scripts/gatk_germline/test/test_germline.py b/src/toil_scripts/gatk_germline/test/test_germline.py index 6cf7ca30..927f96f2 100644 --- a/src/toil_scripts/gatk_germline/test/test_germline.py +++ b/src/toil_scripts/gatk_germline/test/test_germline.py @@ -214,7 +214,8 @@ def _get_default_inputs(self): inputs.ssec = None inputs.sorted = False inputs.cores = 4 - inputs.xmx = '8G' + inputs.file_size = '1G' + inputs.xmx = '4G' inputs.output_dir = self.workdir inputs.suffix = '' inputs.unsafe_mode = False