common-workflow-library · anton-khodak · May 31, 2016 · Jun 10, 2016 · Jun 10, 2016 · Jun 10, 2016
diff --git a/test/cnvkit-batch-job.json b/test/cnvkit-batch-job.json
@@ -0,0 +1,16 @@
+{
+  "bam_files": [
+    "*Tumor.bam"
+  ],
+  "normal":[
+    "*Normal.bam"
+  ],
+  "targets": "my_baits.bed",
+  "split": true,
+  "annotate": "refFlat.txt",
+  "fasta": "hg19.fasta",
+  "access": "data/access-5kb-mappable.hg19.bed",
+  "output_dir": "results/",
+  "diagram": true,
+  "scatter": true
+}
diff --git a/test/cnvkit-batch-test.yaml b/test/cnvkit-batch-test.yaml
@@ -0,0 +1,18 @@
+- args: [
+      "cnvkit.py",
+      "batch",
+       "--access", "data/access-5kb-mappable.hg19.bed",
+      "--annotate", "refFlat.txt",
+       "--diagram",
+      "--fasta", "hg19.fasta",
+      "--normal", "*Normal.bam",
+       "--output-dir", "results/",
+      "--processes", "1",
+       "--scatter",
+      "--split",
+      "--targets", "my_baits.bed",
+      "*Tumor.bam",
+  ]
+  job:  cnvkit-batch-job.json
+  tool: ../tools/cnvkit-batch.cwl
+  doc: General test of command line generation
diff --git a/test/cnvkit-scatter-job.json b/test/cnvkit-scatter-job.json
@@ -0,0 +1,13 @@
+{
+  "segment": "segment.cns",
+  "chromosome": "chr1",
+  "split": true,
+  "gene": "gen1, gen2",
+  "range_list": "chr -start-end",
+  "sample_id": "data/access-5kb-mappable.hg19.bed",
+  "vcf": "data.vcf",
+  "y_min": 3.04,
+  "y_max": 4.04,
+  "trend": true,
+  "output": "result.txt"
+}
diff --git a/test/cnvkit-scatter-test.yaml b/test/cnvkit-scatter-test.yaml
@@ -0,0 +1,19 @@
+- args: [
+      "cnvkit.py",
+      "scatter",
+      "--chromosome", "chr1",
+      "--gene", "gen1, gen2",
+      "--min-variant-depth", "20",
+      "--output", "result.txt",
+      "--range-list", "chr -start-end",
+       "--sample-id", "data/access-5kb-mappable.hg19.bed",
+       "--segment", "segment.cns",
+      "--trend",
+       "--vcf", "data.vcf",
+       "--width", "1000000.0",
+       "--y-max", "4.04",
+       "--y-min", "3.04",
+  ]
+  job:  cnvkit-scatter-job.json
+  tool: ../tools/cnvkit-scatter.cwl
+  doc: General test of command line generation
diff --git a/test/cnvkit-segmetrics-job.json b/test/cnvkit-segmetrics-job.json
@@ -0,0 +1,9 @@
+{
+  "cnarray": "*Tumor.bam",
+  "segments": "*Normal.cns",
+  "drop_low_coverage": true,
+  "output": "results/result.txt",
+  "stdev": true,
+  "mad": true,
+  "pi": true
+}
diff --git a/test/cnvkit-segmetrics-test.yaml b/test/cnvkit-segmetrics-test.yaml
@@ -0,0 +1,16 @@
+- args: [
+      "cnvkit.py",
+      "segmetrics",
+      "--alpha", "0.05",
+      "--bootstrap", "100",
+       "--drop-low-coverage",
+       "--mad",
+       "--output", "results/result.txt",
+       "--pi",
+       "--segments", "*Normal.cns",
+      "--stdev",
+      "*Tumor.bam"
+  ]
+  job:  cnvkit-segmetrics-job.json
+  tool: ../tools/cnvkit-segmetrics.cwl
+  doc: General test of command line generation
diff --git a/test/cnvkit-target-job.json b/test/cnvkit-target-job.json
@@ -0,0 +1,8 @@
+{
+  "interval": "*Tumor.bam",
+  "annotate": "refFlat.txt",
+  "avg_size": 33,
+  "output": "results.json",
+  "short_names": true,
+  "split": true
+}
diff --git a/test/cnvkit-target-test.yaml b/test/cnvkit-target-test.yaml
@@ -0,0 +1,14 @@
+- args: [
+      "cnvkit.py",
+      "target",
+      "--annotate",
+      "refFlat.txt",
+      "--avg-size", "33",
+       "--output", "results.json",
+       "--short-names",
+      "--split",
+      "*Tumor.bam"
+  ]
+  job:  cnvkit-target-job.json
+  tool: ../tools/cnvkit-target.cwl
+  doc: General test of command line generation
diff --git a/test/test-files/cnvkit-batch/draft.txt b/test/test-files/cnvkit-batch/draft.txt
@@ -0,0 +1,8 @@
+command from cnvkit batch tutorial (https://cnvkit.readthedocs.io/en/v0.7.11/pipeline.html#batch) I'm trying to run
+
+
+cnvkit.py batch *Tumor.bam --normal *Normal.bam \
+    --targets my_baits.bed --split --annotate refFlat.txt \
+    --fasta hg19.fasta --access data/access-5kb-mappable.hg19.bed \
+    --output-reference my_reference.cnn --output-dir results/ \
+    --diagram --scatter
diff --git a/test/test-files/cnvkit-batch/refFlat.txt b/test/test-files/cnvkit-batch/refFlat.txt
diff --git a/tools/cnvkit-batch.cwl b/tools/cnvkit-batch.cwl
@@ -0,0 +1,173 @@
+#!/usr/bin/env cwl-runner
+
+cwlVersion: "cwl:draft-3"
+
+class: CommandLineTool
+baseCommand: ['cnvkit.py', 'batch']
+
+requirements:
+  - class: InlineJavascriptRequirement
+
+description: |
+  Run the complete CNVkit pipeline on one or more BAM files.
+
+inputs:
+
+
+- id: bam_files
+  type:
+  - "null"
+  - type: array
+    items: string
+
+  description: Mapped sequence reads (.bam)
+  inputBinding:
+    position: 1
+
+- id: male_reference
+  type: ["null", boolean]
+  default: null
+  description: Use or assume a male reference (i.e. female samples will have +1
+                log-CNR of chrX; otherwise male samples would have -1 chrX).
+  inputBinding:
+    prefix: --male-reference 
+
+- id: count_reads
+  type: ["null", boolean]
+  default: null
+  description: Get read depths by counting read midpoints within each bin.
+                (An alternative algorithm).
+  inputBinding:
+    prefix: --count-reads
+
+- id: processes
+  type: ["null", int]
+  default: 1
+  description: Number of subprocesses used to running each of the BAM files in
+                parallel. Give 0 or a negative value to use the maximum number
+                of available CPUs. [Default - process each BAM in serial]
+  inputBinding:
+    prefix: --processes 
+
+- id: rlibpath
+  type: ["null", string]
+  description: Path to an alternative site-library to use for R packages.
+  inputBinding:
+    prefix: --rlibpath 
+
+- id: normal
+  type:
+  - "null"
+  - type: array
+    items: string
+
+  description: Normal samples (.bam) to construct the pooled reference.
+                If this option is used but no files are given, a "flat"
+                reference will be built.
+  inputBinding:
+    prefix: --normal 
+
+- id: fasta
+  type: ["null", string]
+  description: Reference genome, FASTA format (e.g. UCSC hg19.fa)
+  inputBinding:
+    prefix: --fasta 
+
+- id: targets
+  type: ["null", string]
+  description: Target intervals (.bed or .list)
+  inputBinding:
+    prefix: --targets 
+
+- id: antitargets
+  type: ["null", string]
+  description: Antitarget intervals (.bed or .list)
+  inputBinding:
+    prefix: --antitargets 
+
+- id: annotate
+  type: ["null", string]
+  description: UCSC refFlat.txt or ensFlat.txt file for the reference genome.
+                Pull gene names from this file and assign them to the target
+                regions.
+  inputBinding:
+    prefix: --annotate 
+
+- id: short_names
+  type: ["null", boolean]
+  default: null
+  description: Reduce multi-accession bait labels to be short and consistent.
+  inputBinding:
+    prefix: --short-names 
+
+- id: split
+  type: ["null", boolean]
+  default: null
+  description: Split large tiled intervals into smaller, consecutive targets.
+  inputBinding:
+    prefix: --split 
+
+- id: target_avg_size
+  type: ["null", int]
+  description: Average size of split target bins (results are approximate).
+  inputBinding:
+    prefix: --target-avg-size 
+
+- id: access
+  type: ["null", string]
+  description: Regions of accessible sequence on chromosomes (.bed), as
+                output by the 'access' command.
+  inputBinding:
+    prefix: --access 
+
+- id: antitarget_avg_size
+  type: ["null", int]
+  description: Average size of antitarget bins (results are approximate).
+  inputBinding:
+    prefix: --antitarget-avg-size 
+
+- id: antitarget_min_size
+  type: ["null", int]
+  description: Minimum size of antitarget bins (smaller regions are dropped).
+  inputBinding:
+    prefix: --antitarget-min-size 
+
+- id: output_reference
+  type: ["null", string]
+  description: Output filename/path for the new reference file being created.
+                (If given, ignores the -o/--output-dir option and will write the
+                file to the given path. Otherwise, "reference.cnn" will be
+                created in the current directory or specified output directory.)
+
+  inputBinding:
+    prefix: --output-reference 
+
+- id: reference
+  type: ["null", string]
+  description: Copy number reference file (.cnn).
+  inputBinding:
+    prefix: --reference
+
+- id: output_dir
+  type: ["null", string]
+  default: .
+  description: Output directory.
+  inputBinding:
+    prefix: --output-dir 
+
+- id: scatter
+  type: ["null", boolean]
+  default: null
+  description: Create a whole-genome copy ratio profile as a PDF scatter plot.
+  inputBinding:
+    prefix: --scatter
+
+- id: diagram
+  type: ["null", boolean]
+  default: null
+  description: Create a diagram of copy ratios on chromosomes as a PDF.
+  inputBinding:
+    prefix: --diagram 
+
+outputs:
+    []
diff --git a/tools/cnvkit-docker.cwl b/tools/cnvkit-docker.cwl
@@ -0,0 +1,24 @@
+class: DockerRequirement
+dockerPull:
+dockerFile: |
+  #################################################################
+  # Dockerfile
+  #
+  # Software:         cnvkit
+  # Software Version: 0.7.11
+  # Description:      cnvkit docker image
+  # Website:          https://github.com/etal/cnvkit
+  # Provides:
+  # Base Image:
+  # Build Cmd:
+  # Pull Cmd:
+  # Run Cmd:
+  #################################################################
+
+  FROM python:2.7
+  MAINTAINER Anton Khodak <[email protected]>
+
+  # Install cnvkit from pip
+  RUN pip install cnvkit
+
+  # Default command to execute at startup of the container