Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pass job to germline docker call (resolves #478) #479

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/toil_scripts/gatk_germline/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def output_file_job(job, filename, file_id, output_dir, s3_key_path=None):
work_dir = job.fileStore.getLocalTempDir()
filepath = job.fileStore.readGlobalFile(file_id, os.path.join(work_dir, filename))
if urlparse(output_dir).scheme == 's3':
s3am_upload(job=job, fpath=os.path.join(work_dir, filepath),
s3am_upload(job,
fpath=os.path.join(work_dir, filepath),
s3_dir=output_dir,
s3_key_path=s3_key_path)
elif os.path.exists(os.path.join(output_dir, filename)):
Expand Down
11 changes: 6 additions & 5 deletions src/toil_scripts/gatk_germline/germline.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,9 +447,7 @@ def download_shared_files(job, config):
setattr(config, name, job.addChildJobFn(download_url_job,
url,
name=name,
s3_key_path=config.ssec,
disk='15G' # Estimated reference file size
).rv())
s3_key_path=config.ssec).rv())
finally:
if getattr(config, name, None) is None and name not in nonessential_files:
raise ValueError("Necessary configuration parameter is missing:\n{}".format(name))
Expand Down Expand Up @@ -566,6 +564,7 @@ def prepare_bam(job, uuid, url, config, paired_url=None, rg_line=None):
config.g1k_indel,
config.mills,
config.dbsnp,
realign=False, # Do not realign INDELs
memory=config.xmx,
cores=config.cores).encapsulate()
sorted_bam.addChild(preprocess)
Expand Down Expand Up @@ -674,8 +673,9 @@ def setup_and_run_bwakit(job, uuid, url, rg_line, config, paired_url=None):

return job.addFollowOnJobFn(run_bwakit,
bwa_config,
sort=False, # BAM files are sorted later in the pipeline
sort=False, # BAM files are sorted later in the pipeline
trim=config.trim,
mark_secondary=True, # Mark split alignments as secondary
cores=config.cores,
disk=bwakit_disk).rv()

Expand Down Expand Up @@ -739,7 +739,8 @@ def gatk_haplotype_caller(job,

# Uses docker_call mock mode to replace output with hc_output file
outputs = {'output.g.vcf': hc_output}
docker_call(job=job, work_dir=work_dir,
docker_call(job=job,
work_dir=work_dir,
env={'JAVA_OPTS': '-Djava.io.tmpdir=/data/ -Xmx{}'.format(job.memory)},
parameters=command,
tool='quay.io/ucsc_cgl/gatk:3.5--dba6dae49156168a909c43330350c6161dc7ecc2',
Expand Down
3 changes: 3 additions & 0 deletions src/toil_scripts/gatk_germline/germline_config_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ def generate_config():
# Required: S3 URL or local path to output directory
output-dir:

# Genomic intervals to restrict analysis
intervals:

# Required: Input BAM file is sorted (Default: False)
sorted:

Expand Down
1 change: 1 addition & 0 deletions src/toil_scripts/gatk_germline/test/test_germline.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def _get_default_inputs(self):
inputs.sorted = False
inputs.cores = 4
inputs.xmx = '8G'
inputs.file_size = '1G'
inputs.output_dir = self.workdir
inputs.suffix = ''
inputs.unsafe_mode = False
Expand Down