diff --git a/src/toil_scripts/gatk_germline/common.py b/src/toil_scripts/gatk_germline/common.py index 67e48017..46b6ba86 100644 --- a/src/toil_scripts/gatk_germline/common.py +++ b/src/toil_scripts/gatk_germline/common.py @@ -22,7 +22,8 @@ def output_file_job(job, filename, file_id, output_dir, s3_key_path=None): work_dir = job.fileStore.getLocalTempDir() filepath = job.fileStore.readGlobalFile(file_id, os.path.join(work_dir, filename)) if urlparse(output_dir).scheme == 's3': - s3am_upload(job=job, fpath=os.path.join(work_dir, filepath), + s3am_upload(job, + fpath=os.path.join(work_dir, filepath), s3_dir=output_dir, s3_key_path=s3_key_path) elif os.path.exists(os.path.join(output_dir, filename)): diff --git a/src/toil_scripts/gatk_germline/germline.py b/src/toil_scripts/gatk_germline/germline.py index de7c0fae..2dcd24ae 100755 --- a/src/toil_scripts/gatk_germline/germline.py +++ b/src/toil_scripts/gatk_germline/germline.py @@ -447,9 +447,7 @@ def download_shared_files(job, config): setattr(config, name, job.addChildJobFn(download_url_job, url, name=name, - s3_key_path=config.ssec, - disk='15G' # Estimated reference file size - ).rv()) + s3_key_path=config.ssec).rv()) finally: if getattr(config, name, None) is None and name not in nonessential_files: raise ValueError("Necessary configuration parameter is missing:\n{}".format(name)) @@ -566,6 +564,7 @@ def prepare_bam(job, uuid, url, config, paired_url=None, rg_line=None): config.g1k_indel, config.mills, config.dbsnp, + realign=False, # Do not realign INDELs memory=config.xmx, cores=config.cores).encapsulate() sorted_bam.addChild(preprocess) @@ -674,8 +673,9 @@ def setup_and_run_bwakit(job, uuid, url, rg_line, config, paired_url=None): return job.addFollowOnJobFn(run_bwakit, bwa_config, - sort=False, # BAM files are sorted later in the pipeline + sort=False, # BAM files are sorted later in the pipeline trim=config.trim, + mark_secondary=True, # Mark split alignments as secondary cores=config.cores, disk=bwakit_disk).rv() @@ -739,7 +739,8 @@ def gatk_haplotype_caller(job, # Uses docker_call mock mode to replace output with hc_output file outputs = {'output.g.vcf': hc_output} - docker_call(job=job, work_dir=work_dir, + docker_call(job=job, + work_dir=work_dir, env={'JAVA_OPTS': '-Djava.io.tmpdir=/data/ -Xmx{}'.format(job.memory)}, parameters=command, tool='quay.io/ucsc_cgl/gatk:3.5--dba6dae49156168a909c43330350c6161dc7ecc2', diff --git a/src/toil_scripts/gatk_germline/test/test_germline.py b/src/toil_scripts/gatk_germline/test/test_germline.py index 6cf7ca30..f978306e 100644 --- a/src/toil_scripts/gatk_germline/test/test_germline.py +++ b/src/toil_scripts/gatk_germline/test/test_germline.py @@ -215,6 +215,7 @@ def _get_default_inputs(self): inputs.sorted = False inputs.cores = 4 inputs.xmx = '8G' + inputs.file_size = '1G' inputs.output_dir = self.workdir inputs.suffix = '' inputs.unsafe_mode = False