Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor gatk germline (resolves #280) #335

Merged
merged 29 commits into from
Sep 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e0960dc
Refactor gatk germline (resolves #280)
jpfeil Jun 7, 2016
95809f9
Add oncotator and synapse support (resolves #401)
jpfeil Jun 7, 2016
9797429
Upload intermediate VCF
jpfeil Aug 22, 2016
e235c71
Use job to pass resource requirements
jpfeil Aug 23, 2016
067302a
Addressed PR comments
jpfeil Aug 25, 2016
01387fd
Fixed typos and add preprocess test
jpfeil Aug 25, 2016
e1be438
Fix memory passsing bug in preprocessing
jpfeil Aug 25, 2016
f6de579
Added defaults to config and manifest
jpfeil Aug 25, 2016
a3f0f11
Fix bug when realigning sorted BAM
jpfeil Aug 25, 2016
3074706
Run hard filter independently
jpfeil Aug 26, 2016
3b5c7ec
Refactored joint genotyping and filtering
jpfeil Aug 26, 2016
1dc9457
Removed synapse and unnecessary common lib functions
jpfeil Aug 27, 2016
33409be
Removed functions in common lib
jpfeil Aug 29, 2016
dc1958d
Removed split joint vcf by name
jpfeil Aug 30, 2016
946442e
Add VQSR CLI and joint batching
jpfeil Aug 31, 2016
d858860
Improved batching method
jpfeil Sep 1, 2016
7344019
Add CombineGVCFs
jpfeil Sep 1, 2016
6936371
Add VQSR and joint genotype test
jpfeil Sep 2, 2016
14b8ba1
Modified logging statements
jpfeil Sep 3, 2016
6db248a
Add pipeline with VQSR test
jpfeil Sep 7, 2016
bcc27b8
Check number of samples earlier in the pipeline
jpfeil Sep 7, 2016
1496221
Remove default resource values
jpfeil Sep 7, 2016
12daa94
Update PromisedRequirements
jpfeil Sep 9, 2016
84048e0
Sync germline lib modules with toil-lib
jpfeil Sep 9, 2016
3f0877d
Add parameters for SNP and INDEL 1000G data
jpfeil Sep 15, 2016
4c319e6
Added config requirements for each function
jpfeil Sep 15, 2016
6dd338e
Use common lib generate file function
jpfeil Sep 22, 2016
be3f011
SQUASH: Add more documentation to the bwakit configuration function
jpfeil Sep 22, 2016
a51c70a
SQUASH: Addressed PR comments
jpfeil Sep 23, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def check_provided(distribution, min_version, max_version=None, optional=False):
entry_points={
'console_scripts': [
'toil-bwa = toil_scripts.bwa_alignment.bwa_alignment:main',
'toil-exome = toil_scripts.exome_variant_pipeline.exome_variant_pipeline:main']})
'toil-exome = toil_scripts.exome_variant_pipeline.exome_variant_pipeline:main',
'toil-germline = toil_scripts.gatk_germline.germline:main']})


class PyTest(TestCommand):
Expand Down
62 changes: 41 additions & 21 deletions src/toil_scripts/adam_gatk_pipeline/align_and_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@
# import job steps from other toil pipelines
from toil_scripts.adam_pipeline.adam_preprocessing import * #static_adam_preprocessing_dag
from toil_scripts.bwa_alignment.bwa_alignment import * #download_shared_files
from toil_scripts.gatk_germline.germline import * #batch_start
from toil_scripts.gatk_processing.gatk_preprocessing import * #download_gatk_files
from toil_scripts.gatk_germline.germline import * #run_gatk_germline_pipeline
from toil_lib.files import generate_file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm here, we removed this import because toil_scripts.gatk_processing.gatk_preprocessing moved into toil_scripts.gatk_germline.germline, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct.



Expand Down Expand Up @@ -187,31 +186,52 @@ def static_dag(job, uuid, rg_line, inputs):
's3://{s3_bucket}/analysis{dir_suffix}/{uuid}'.format(**args),
suffix='.adam').encapsulate()

# get head GATK preprocessing job function and encapsulate it
gatk_preprocess = job.wrapJobFn(download_gatk_files,
inputs,
[uuid,'s3://{s3_bucket}/alignment{dir_suffix}/{uuid}.bam'.format(**args)],
's3://{s3_bucket}/analysis{dir_suffix}/{uuid}'.format(**args),
suffix='.gatk').encapsulate()
# Configure options for Toil Germline pipeline. This function call only runs the preprocessing steps.
gatk_preprocessing_inputs = copy.deepcopy(inputs)
gatk_preprocessing_inputs.suffix = '.gatk'
gatk_preprocessing_inputs.preprocess = True
gatk_preprocessing_inputs.preprocess_only = True
gatk_preprocessing_inputs.output_dir = 's3://{s3_bucket}/analysis{dir_suffix}'.format(**args)

# get head GATK preprocessing job function and encapsulate it
gatk_preprocess = job.wrapJobFn(run_gatk_germline_pipeline,
GermlineSample(uuid,
's3://{s3_bucket}/alignment{dir_suffix}/{uuid}.bam'.format(**args),
None, # Does not require second URL or RG_Line
None),
gatk_preprocessing_inputs).encapsulate()

# Configure options for Toil Germline pipeline for preprocessed ADAM BAM file.
adam_call_inputs = inputs
gatk_call_inputs = copy.deepcopy(inputs)
adam_call_inputs.indexed = False
gatk_call_inputs.indexed = True
adam_call_inputs.suffix = '.adam'
adam_call_inputs.sorted = True
adam_call_inputs.preprocess = False
adam_call_inputs.run_vqsr = False
adam_call_inputs.joint_genotype = False
adam_call_inputs.output_dir = 's3://{s3_bucket}/analysis{dir_suffix}'.format(**args)

# get head GATK haplotype caller job function for the result of ADAM preprocessing and encapsulate it
gatk_adam_call = job.wrapJobFn(batch_start,
adam_call_inputs,
[uuid,'s3://{s3_bucket}/analysis{dir_suffix}/{uuid}/{uuid}.adam.bam'.format(**args)],
's3://{s3_bucket}/analysis{dir_suffix}/{uuid}'.format(**args),
suffix='.adam').encapsulate()
gatk_adam_call = job.wrapJobFn(run_gatk_germline_pipeline,
GermlineSample(uuid,
's3://{s3_bucket}/analysis{dir_suffix}/{uuid}/{uuid}.adam.bam'.format(**args),
None,
None),
adam_call_inputs).encapsulate()

# Configure options for Toil Germline pipeline for preprocessed GATK BAM file.
gatk_call_inputs = copy.deepcopy(inputs)
gatk_call_inputs.sorted = True
gatk_call_inputs.preprocess = False
gatk_call_inputs.run_vqsr = False
gatk_call_inputs.joint_genotype = False
gatk_call_inputs.output_dir = 's3://{s3_bucket}/analysis{dir_suffix}'.format(**args)

# get head GATK haplotype caller job function for the result of GATK preprocessing and encapsulate it
gatk_gatk_call = job.wrapJobFn(batch_start,
gatk_call_inputs,
[uuid,'s3://{s3_bucket}/analysis{dir_suffix}/{uuid}/{uuid}.gatk.bam'.format(**args)],
's3://{s3_bucket}/analysis{dir_suffix}/{uuid}'.format(**args),
suffix='.gatk').encapsulate()
gatk_gatk_call = job.wrapJobFn(run_gatk_germline_pipeline,
GermlineSample(uuid,
'S3://{s3_bucket}/analysis{dir_suffix}/{uuid}/{uuid}.gatk.bam'.format(**args),
None, None),
gatk_call_inputs).encapsulate()

# wire up dag
if not inputs.skip_alignment:
Expand Down
Loading