diff --git a/DESCRIPTION b/DESCRIPTION index 4ad5f66..683253b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,12 +11,15 @@ Description: The ggbio package extends and specializes the grammar of and grand linear views. High-level plots include sequence fragment length, edge-linked interval to data view, mismatch pileup, and several splicing summaries. -Authors@R: c(person("Tengfei", "Yin", role=c("aut"), - email="yintengfei@gmail.com"), - person("Michael", "Lawrence", role=c("aut", "ths", "cre"), - email="michafla@gene.com"), - person("Dianne", "Cook", role=c("aut", "ths")), - person("Johannes", "Rainer", role="ctb")) +Authors@R: c( + person("Tengfei", "Yin", role=c("aut"), + email="yintengfei@gmail.com"), + person("Michael", "Lawrence", role=c("aut", "ths", "cre"), + email="michafla@gene.com"), + person("Rohit","Satyam", role="ctb", + comment = "'ggbio' vignette translation from Sweave to Rmarkdown / HTML"), + person("Dianne", "Cook", role=c("aut", "ths")), + person("Johannes", "Rainer", role="ctb")) Depends: methods, BiocGenerics, ggplot2 (>= 1.0.0) Imports: grid, grDevices, graphics, stats, utils, gridExtra, scales, reshape2, gtable, Hmisc, biovizBase (>= 1.29.2), Biobase, S4Vectors (>= 0.13.13), diff --git a/vignettes/Makefile b/vignettes/Makefile deleted file mode 100644 index 8bcbfe8..0000000 --- a/vignettes/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Makefile to use knitr for package vignettes - -# put all PDF targets here, separated by spaces -PDFS= ggbio.pdf - -all: $(PDFS) - -clean: - rm -f *.tex *.bbl *.blg *.aux *.out *.log *.spl *tikzDictionary *.toc - rm -f figures/ggbio-* ggbio-* - cp fakevignettes/*.Rnw . - -%.pdf: %.Rnw - cp realvignettes/*.Rnw . - $(R_HOME)/bin/Rscript -e "library(knitr); knit2pdf('$*.Rnw')" - diff --git a/vignettes/fakevignettes/ggbio.Rnw b/vignettes/fakevignettes/ggbio.Rnw deleted file mode 100644 index 5d7a240..0000000 --- a/vignettes/fakevignettes/ggbio.Rnw +++ /dev/null @@ -1,9 +0,0 @@ - -%% This vignette is a stub. Please look at -%% .[/inst]/doc/realvignettes or run 'make real' -%% to copy the real vignettes in the current directory. - -\documentclass[10pt]{article} -%\VignetteIndexEntry{Part 0: Introduction and quick start} -\begin{document} -\end{document} diff --git a/vignettes/figures/ggbio-edit-space-1.png b/vignettes/figures/ggbio-edit-space-1.png new file mode 100644 index 0000000..6bbf212 Binary files /dev/null and b/vignettes/figures/ggbio-edit-space-1.png differ diff --git a/vignettes/figures/ggbio-line-1.png b/vignettes/figures/ggbio-line-1.png new file mode 100644 index 0000000..d008d3a Binary files /dev/null and b/vignettes/figures/ggbio-line-1.png differ diff --git a/vignettes/figures/ggbio-load-RNAediting-1.png b/vignettes/figures/ggbio-load-RNAediting-1.png new file mode 100644 index 0000000..4d6e408 Binary files /dev/null and b/vignettes/figures/ggbio-load-RNAediting-1.png differ diff --git a/vignettes/figures/ggbio-load-RNAediting-color-1.png b/vignettes/figures/ggbio-load-RNAediting-color-1.png new file mode 100644 index 0000000..ba4789c Binary files /dev/null and b/vignettes/figures/ggbio-load-RNAediting-color-1.png differ diff --git a/vignettes/figures/ggbio-lower-link-track-1.png b/vignettes/figures/ggbio-lower-link-track-1.png new file mode 100644 index 0000000..bfc5281 Binary files /dev/null and b/vignettes/figures/ggbio-lower-link-track-1.png differ diff --git a/vignettes/figures/ggbio-lower-mut-track-1.png b/vignettes/figures/ggbio-lower-mut-track-1.png new file mode 100644 index 0000000..bdfd78f Binary files /dev/null and b/vignettes/figures/ggbio-lower-mut-track-1.png differ diff --git a/vignettes/figures/ggbio-lower-point-track-1.png b/vignettes/figures/ggbio-lower-point-track-1.png new file mode 100644 index 0000000..ace3a45 Binary files /dev/null and b/vignettes/figures/ggbio-lower-point-track-1.png differ diff --git a/vignettes/figures/ggbio-morecolor2-1.png b/vignettes/figures/ggbio-morecolor2-1.png new file mode 100644 index 0000000..d09af93 Binary files /dev/null and b/vignettes/figures/ggbio-morecolor2-1.png differ diff --git a/vignettes/figures/ggbio-simple-wrapper-1.png b/vignettes/figures/ggbio-simple-wrapper-1.png new file mode 100644 index 0000000..38944a0 Binary files /dev/null and b/vignettes/figures/ggbio-simple-wrapper-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-1-1.png b/vignettes/figures/ggbio-unnamed-chunk-1-1.png new file mode 100644 index 0000000..4980c4f Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-1-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-1-2.png b/vignettes/figures/ggbio-unnamed-chunk-1-2.png new file mode 100644 index 0000000..2d56ccf Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-1-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-10-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-10-1.jpeg new file mode 100644 index 0000000..44695f5 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-10-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-10-1.png b/vignettes/figures/ggbio-unnamed-chunk-10-1.png new file mode 100644 index 0000000..1305be5 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-10-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-12-1.png b/vignettes/figures/ggbio-unnamed-chunk-12-1.png new file mode 100644 index 0000000..e7d109a Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-12-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-12-2.png b/vignettes/figures/ggbio-unnamed-chunk-12-2.png new file mode 100644 index 0000000..d05c50c Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-12-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-13-1.png b/vignettes/figures/ggbio-unnamed-chunk-13-1.png new file mode 100644 index 0000000..3c161af Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-13-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-13-2.png b/vignettes/figures/ggbio-unnamed-chunk-13-2.png new file mode 100644 index 0000000..18f47e7 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-13-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-13-3.png b/vignettes/figures/ggbio-unnamed-chunk-13-3.png new file mode 100644 index 0000000..cb9889c Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-13-3.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-13-4.png b/vignettes/figures/ggbio-unnamed-chunk-13-4.png new file mode 100644 index 0000000..c8a26b0 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-13-4.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-15-1.png b/vignettes/figures/ggbio-unnamed-chunk-15-1.png new file mode 100644 index 0000000..bef75c3 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-15-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-16-1.png b/vignettes/figures/ggbio-unnamed-chunk-16-1.png new file mode 100644 index 0000000..a40e62c Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-16-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-17-1.png b/vignettes/figures/ggbio-unnamed-chunk-17-1.png new file mode 100644 index 0000000..4b369ea Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-17-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-18-1.png b/vignettes/figures/ggbio-unnamed-chunk-18-1.png new file mode 100644 index 0000000..7a864fc Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-18-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-18-2.png b/vignettes/figures/ggbio-unnamed-chunk-18-2.png new file mode 100644 index 0000000..c8bf634 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-18-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-19-1.png b/vignettes/figures/ggbio-unnamed-chunk-19-1.png new file mode 100644 index 0000000..058c60f Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-19-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-19-2.png b/vignettes/figures/ggbio-unnamed-chunk-19-2.png new file mode 100644 index 0000000..f5a8f68 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-19-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-19-3.png b/vignettes/figures/ggbio-unnamed-chunk-19-3.png new file mode 100644 index 0000000..0ee41d6 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-19-3.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-2-1.png b/vignettes/figures/ggbio-unnamed-chunk-2-1.png new file mode 100644 index 0000000..f657827 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-2-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-2-2.png b/vignettes/figures/ggbio-unnamed-chunk-2-2.png new file mode 100644 index 0000000..fbc0ad8 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-2-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-21-1.png b/vignettes/figures/ggbio-unnamed-chunk-21-1.png new file mode 100644 index 0000000..27fbc8e Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-21-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-22-1.png b/vignettes/figures/ggbio-unnamed-chunk-22-1.png new file mode 100644 index 0000000..0ee8182 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-22-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-24-1.png b/vignettes/figures/ggbio-unnamed-chunk-24-1.png new file mode 100644 index 0000000..506f496 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-24-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-25-1.png b/vignettes/figures/ggbio-unnamed-chunk-25-1.png new file mode 100644 index 0000000..c210e79 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-25-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-26-1.png b/vignettes/figures/ggbio-unnamed-chunk-26-1.png new file mode 100644 index 0000000..18ba109 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-26-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-28-1.png b/vignettes/figures/ggbio-unnamed-chunk-28-1.png new file mode 100644 index 0000000..4162a42 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-28-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-29-1.png b/vignettes/figures/ggbio-unnamed-chunk-29-1.png new file mode 100644 index 0000000..0470ef1 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-29-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-3-1.png b/vignettes/figures/ggbio-unnamed-chunk-3-1.png new file mode 100644 index 0000000..07a2c65 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-3-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-30-1.png b/vignettes/figures/ggbio-unnamed-chunk-30-1.png new file mode 100644 index 0000000..b0f1154 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-30-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-31-1.png b/vignettes/figures/ggbio-unnamed-chunk-31-1.png new file mode 100644 index 0000000..b879225 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-31-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-32-1.png b/vignettes/figures/ggbio-unnamed-chunk-32-1.png new file mode 100644 index 0000000..77de67b Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-32-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-33-1.png b/vignettes/figures/ggbio-unnamed-chunk-33-1.png new file mode 100644 index 0000000..6bdf116 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-33-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-34-1.png b/vignettes/figures/ggbio-unnamed-chunk-34-1.png new file mode 100644 index 0000000..796fffc Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-34-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-36-1.png b/vignettes/figures/ggbio-unnamed-chunk-36-1.png new file mode 100644 index 0000000..8af9cc1 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-36-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-37-1.png b/vignettes/figures/ggbio-unnamed-chunk-37-1.png new file mode 100644 index 0000000..5184069 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-37-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-39-1.png b/vignettes/figures/ggbio-unnamed-chunk-39-1.png new file mode 100644 index 0000000..367c638 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-39-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-4-1.png b/vignettes/figures/ggbio-unnamed-chunk-4-1.png new file mode 100644 index 0000000..0dd0c7d Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-4-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-40-1.png b/vignettes/figures/ggbio-unnamed-chunk-40-1.png new file mode 100644 index 0000000..7e3b78e Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-40-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-40-2.png b/vignettes/figures/ggbio-unnamed-chunk-40-2.png new file mode 100644 index 0000000..21f533f Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-40-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-40-3.png b/vignettes/figures/ggbio-unnamed-chunk-40-3.png new file mode 100644 index 0000000..17b7ea7 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-40-3.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-40-4.png b/vignettes/figures/ggbio-unnamed-chunk-40-4.png new file mode 100644 index 0000000..aee8174 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-40-4.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-41-1.png b/vignettes/figures/ggbio-unnamed-chunk-41-1.png new file mode 100644 index 0000000..4c820d0 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-41-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-41-2.png b/vignettes/figures/ggbio-unnamed-chunk-41-2.png new file mode 100644 index 0000000..eb517f3 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-41-2.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-5-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-5-1.jpeg new file mode 100644 index 0000000..e154309 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-5-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-5-1.png b/vignettes/figures/ggbio-unnamed-chunk-5-1.png new file mode 100644 index 0000000..686eda3 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-5-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-5-2.jpeg b/vignettes/figures/ggbio-unnamed-chunk-5-2.jpeg new file mode 100644 index 0000000..c9ba624 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-5-2.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-6-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-6-1.jpeg new file mode 100644 index 0000000..0405942 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-6-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-6-1.png b/vignettes/figures/ggbio-unnamed-chunk-6-1.png new file mode 100644 index 0000000..011017a Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-6-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-6-2.jpeg b/vignettes/figures/ggbio-unnamed-chunk-6-2.jpeg new file mode 100644 index 0000000..e3af348 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-6-2.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-7-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-7-1.jpeg new file mode 100644 index 0000000..6faf42b Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-7-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-7-1.png b/vignettes/figures/ggbio-unnamed-chunk-7-1.png new file mode 100644 index 0000000..efe6f27 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-7-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-8-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-8-1.jpeg new file mode 100644 index 0000000..64acd03 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-8-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-8-1.png b/vignettes/figures/ggbio-unnamed-chunk-8-1.png new file mode 100644 index 0000000..f92c104 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-8-1.png differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-9-1.jpeg b/vignettes/figures/ggbio-unnamed-chunk-9-1.jpeg new file mode 100644 index 0000000..afc70f0 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-9-1.jpeg differ diff --git a/vignettes/figures/ggbio-unnamed-chunk-9-1.png b/vignettes/figures/ggbio-unnamed-chunk-9-1.png new file mode 100644 index 0000000..f6f8c30 Binary files /dev/null and b/vignettes/figures/ggbio-unnamed-chunk-9-1.png differ diff --git a/vignettes/ggbio.Rmd b/vignettes/ggbio.Rmd new file mode 100644 index 0000000..d394a40 --- /dev/null +++ b/vignettes/ggbio.Rmd @@ -0,0 +1,929 @@ +--- +title: "ggbio: visualization toolkits for genomic data" +author: +- name: Tengfei Yin + affiliation: Seven Bridges Genomics + email: tengfei.yin@sbgenomics.com +- name: Rohit Satyam + affiliation: "Vignette translation from Sweave to Rmarkdown / HTML" + +clean: false +date: "`r format(Sys.Date(), '%d %B, %Y')`" +package: ggbio +output: + BiocStyle::html_document: + toc_float: true + +vignette: > + %\VignetteIndexEntry{ggbio: visualization toolkits for genomic data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteDepends{EnsDb.Hsapiens.v75} + %\VignetteKeywords{visualization utilities} + %\VignettePackage{ggbio} + %\VignetteDepends{BiocStyle} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE, cache=FALSE, eval = TRUE, echo = FALSE} +library(knitr) +opts_chunk$set(fig.path='./figures/ggbio-', + fig.align='center', fig.show='asis', + eval = TRUE, + fig.width = 4.5, + fig.height = 4.5, + tidy = FALSE, + message = FALSE, + warning = FALSE) +options(replace.assign=TRUE,width=80) +``` + +# Citation{-} + +```{r citation} +citation("ggbio") +``` + +# Introduction + +`r Biocpkg("ggbio")` is a *Bioconductor* package building on top of `r Biocpkg("ggplot2")`, leveraging +the rich objects defined by *Bioconductor* and its statistical and +computational power, it provides a flexible genomic visualization +framework, extends the grammar of graphics into genomic data, and tries to +delivers high-quality, highly customizable graphics to the users. +What it features + +- `autoplot()` function provides ready-to-use template for *Bioconductor* + objects and different types of data. +- flexible low-level components to use the grammar of graphics to + build your graphics layer by layer. +- layout transformation, so you could generate a circular plot, + grandlinear plot, stacked overview more easily. +- flexible tracks function to bind any `r Biocpkg("ggplot2")`, `r Biocpkg("ggbio")` based + plots. + +# Case study: building your first tracks +In this section, you will learn + +- how to add an ideogram track. +- How to add the gene model track. +- how to add a track for BAM files to visualize coverage and mismatch summary. +- how to add a track for a VCF file to visualize the variants. + + +## Add an ideogram track + +`Ideogram` provides functionality to construct ideogram, +check the manual for more flexible methods. We build genome +*hg19, hg18, mm10, mm9* inside, so you don't have to download it +on the fly. When embedded with tracks, the ideogram show zoomed region +highlights automatically. `xlim` has a special function here, +is too changed highlighted zoomed region on the ideogram. + +```{r fig.width = 5.5, fig.height = 1.5} +library(ggbio) +p.ideo <- Ideogram(genome = "hg19") +p.ideo +library(GenomicRanges) +## special highlights instead of zoomin! +p.ideo + xlim(GRanges("chr2", IRanges(1e8, 1e8+10000000))) +``` + +## Add a gene model track + +### Introduction +The gene model track is one of the most frequently used tracks in genome +browser, it is composed of genetic features CDS, UTR, introns, exons +and non-genetic regions. In `r Biocpkg("ggbio")` we support three methods to make +gene model track: + +- `OrganismDb` object: recommended, support gene symbols and other combinations of columns as labels. +- `TxDb` object: doesn't support gene symbol labeling. +- `GRangesList` object: flexible, if you don't have + annotation package available for the first two methods, you could + prepare a data set parsed from gtf file, you can simply use it and + plot it as a gene model track. +- `EnsDb` object: supports gene symbol labeling, filtering, etc. + +### Make gene model from *OrganismDb* object + +*OrganismDb* object has a simpler API to retrieve data from +different annotation resources, so we could label our transcripts in +different ways + +```{r} +library(ggbio) +library(Homo.sapiens) +class(Homo.sapiens) +## +data(genesymbol, package = "biovizBase") +wh <- genesymbol[c("BRCA1", "NBR1")] +wh <- range(wh, ignore.strand = TRUE) +p.txdb <- autoplot(Homo.sapiens, which = wh) +p.txdb +autoplot(Homo.sapiens, which = wh, label.color = "black", color = "brown", + fill = "brown") +``` + +To change the intron geometry, use `gap.geom` to control it, +check out `geom_alignment` for more control parameters. + +```{r} +autoplot(Homo.sapiens, which = wh, gap.geom = "chevron") +``` + +To collapse all features, use `stat` 'reduce' + +```{r fig.width = 5.5, fig.height = 1.5} +autoplot(Homo.sapiens, which = wh, stat = "reduce") +``` + +Label could be turned off by setting it to `FALSE`, or you could +also use an expression to make a flexible label combination from column names. + +```{r} +columns(Homo.sapiens) +autoplot(Homo.sapiens, which = wh, columns = c("TXNAME", "GO"), names.expr = "TXNAME::GO") +``` + +### Make gene model from *TxDb* object + +*TxDb* doesn't contain any gene symbol information, so +we use tx_id as the default for the label. + +```{r} +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +autoplot(txdb, which = wh) +``` + +### Make gene model from *EnsDb* object + +An alternative source for gene models are the `EnsDb` objects from the +`r Biocpkg("ensembldb")` package that provides gene annotations provided from +Ensembl. The `r Biocpkg("ensembldb")` package provides a rich filtering system that +allows one to easily fetch specific information (genes/transcripts) from an +*EnsDb*. The *EnsDb* objects provide gene symbol annotations in +the column gene_name. Alternatively, we could use tx_id to label transcripts. + +In the example below, we plot the gene model of the gene **PHKG2**. We use a +*GenenameFilter* to specify which gene we want to plot. + +```{r} +library(EnsDb.Hsapiens.v75) +ensdb <- EnsDb.Hsapiens.v75 +autoplot(ensdb, GeneNameFilter("PHKG2")) +``` + +We can pass any filter class defined in the `r Biocpkg("AnnotationFilter")` package +with the argument *which*. Alternatively, we can combine filter classes using +an *AnnotationFilterList* or we can pass a filter expression in the form of a +*formula*. Below we pass such a filter expression to the function. + +```{r} +autoplot(ensdb, ~ symbol == "PHKG2", names.expr="gene_name") +``` + +We could also specify a genomic region and fetch all transcripts that overlap. +that region (also partially, i.e., with a part of an intron or an exon). + +```{r} +## We specify "*" as a strand, thus we query for genes encoded on both strands +gr <- GRanges(seqnames = 16, IRanges(30768000, 30770000), strand = "*") +autoplot(ensdb, GRangesFilter(gr), names.expr = "gene_name") +``` + +Also, we can specify the gene ids directly and plot all transcripts of these +genes (not only those overlapping with the region) + +```{r} +autoplot(ensdb, GeneIdFilter(c("ENSG00000196118", "ENSG00000156873"))) +``` + +### Make gene model from *GRangesList* object + +Sometimes your gene model is not available as none of +*OrganismDb* or *TxDb* object, it may be stored +in a table, you could simply parse it into a *GRangeList* +object. + +- each group indicates one transcript. +- names of groups are shown as labels +- this object must have a column containing following keyword: + cds, exon, intron, and it's not case sensitive. use `type` + to map this column. By default, we will try to parse the 'type' column. + +Let's make a sample *GRangesList* object which contains all +information, and some fake labels. + +```{r} +library(biovizBase) +gr.txdb <- crunch(txdb, which = wh) +## change column to 'model' +colnames(values(gr.txdb))[4] <- "model" +grl <- split(gr.txdb, gr.txdb$tx_id) +## fake some random names +names(grl) <- sample(LETTERS, size = length(grl), replace = TRUE) +grl +``` + +We get our example data ready, it meets all requirements, to make it a +gene model track it's pretty simple to use autoplot, but don't forget +mapping because we changed our column names, assume you store you +model keywords in column 'model'. + +```{r} +autoplot(grl, aes(type = model)) +ggplot() + geom_alignment(grl, type = "model") +``` + +## Add a reference track + +To add a reference track, we need to load a *BSgenome* object +from the annotation package. You can choose to plot the sequence as +*text, rect, segment*. + +### Semantic zoom + +Here we introduce semantic zoom in `r Biocpkg("ggbio")`, for some plots like +reference sequence, we use a pre-defined zoom level threshold to +automatically assign geom to the track unless the geom is explicitly +specified. In the example below, when your region is too wide, we show +the text 'zoom in to see text', when you zoom into different levels, it +shows you different details. `zoom` is a function we will +introduce more in section \@ref(sec:nav) when we introduce more +about navigation. + +You can pass a zoom-in factor into the `zoom` function; if it's +over 1 it's zooming out, if it's smaller than 1 it's zooming in. + +```{r fig.width = 5.5, fig.height = 1.5} +library(BSgenome.Hsapiens.UCSC.hg19) +bg <- BSgenome.Hsapiens.UCSC.hg19 +p.bg <- autoplot(bg, which = wh) +## no geom +p.bg +## segment +p.bg + zoom(1/100) +## rectangle +p.bg + zoom(1/1000) +## text +p.bg + zoom(1/2500) +``` + +To override a semantic zoom threshold, you simply provide a geom explicitly. + +```{r eval = FALSE} +library(BSgenome.Hsapiens.UCSC.hg19) +bg <- BSgenome.Hsapiens.UCSC.hg19 +## force to use geom 'segment' at this level +autoplot(bg, which = resize(wh, width = width(wh)/2000), geom = "segment") +``` + +## Add an alignment track + +`r Biocpkg("ggbio")` supports visualization of alignments file stored in bam, +`autoplot()` method accepts + +- bam file path (indexed) +- *BamFile* object +- *GappedAlignemnt* object + +It's simple to just pass a file path to `autoplot()` function, you can +stream a chunk of the region by providing the 'which' parameter. Otherwise +please use the method 'estimate' to show the overall estimated coverage. + +```{r} +fl.bam <- system.file("extdata", "wg-brca1.sorted.bam", package = "biovizBase") +wh <- keepSeqlevels(wh, "chr17") +autoplot(fl.bam, which = wh) +``` + +geom 'gapped pair' will show you alignments. + +```{r} +fl.bam <- system.file("extdata", "wg-brca1.sorted.bam", package = "biovizBase") +wh <- keepSeqlevels(wh, "chr17") +autoplot(fl.bam, which = resize(wh, width = width(wh)/10), geom = "gapped.pair") +``` + +To show mismatch proportion, you have to provide a reference sequence, +the mismatched proportion is color-coded in the bar chart. + +```{r} +library(BSgenome.Hsapiens.UCSC.hg19) +bg <- BSgenome.Hsapiens.UCSC.hg19 +p.mis <- autoplot(fl.bam, bsgenome = bg, which = wh, stat = "mismatch") +p.mis +``` + +To view the overall estimated coverage distribution, please use method +'estimate'. 'which' parameter also accepts characters. And there is a +hidden value called '..coverage..' to let you do simple transformations +in aes(). + +```{r} +autoplot(fl.bam, method = "estimate") +autoplot(fl.bam, method = "estimate", which = paste0("chr", 17:18), aes(y = log(..coverage..))) +``` + +## Add a variants track + +This track is supported by semantic zoom. +To view your variants file, you could + +- Import it using package `r Biocpkg("VariantAnntoation")` as + `VCF` object, then use `autoplot()` +- Convert it into `VRanges` object and use `autoplot()`. +- Simply provide the vcf file path in `autoplot()`. + +```{r} +library(VariantAnnotation) +fl.vcf <- system.file("extdata", "17-1409-CEU-brca1.vcf.bgz", package="biovizBase") +vcf <- readVcf(fl.vcf, "hg19") +vr <- as(vcf[, 1:3], "VRanges") +vr <- renameSeqlevels(vr, value = c("17" = "chr17")) +## small region contains data +gr17 <- GRanges("chr17", IRanges(41234400, 41234530)) +p.vr <- autoplot(vr, which = wh) +## none geom +p.vr +## rect geom +p.vr + xlim(gr17) +## text geom +p.vr + xlim(gr17) + zoom() +``` + +You can simply override geom + +```{r eval = FALSE} +autoplot(vr, which = wh, geom = "rect", arrow = FALSE) +``` + +## Building your tracks + +```{r fig.width = 8, fig.height = 5.5} +## tks <- tracks(p.ideo, mismatch = p.mis, dbSNP = p.vr, ref = p.bs, gene = p.txdb) +## tks <- tracks(fl.bam, fl.vcf, bs, Homo.sapiens) ## default ideo = FALSE, turned on +## tks <- tracks(fl.bam, fl.vcf, bs, Homo.sapiens, ideo = TRUE) +## tks + xlim(gr17) +gr17 <- GRanges("chr17", IRanges(41234415, 41234569)) +tks <- tracks(p.ideo, mismatch = p.mis, dbSNP = p.vr, ref = p.bg, gene = p.txdb, + heights = c(2, 3, 3, 1, 4)) + xlim(gr17) + theme_tracks_sunset() +tks +``` + +# Simple navigation {#sec:nav} + +We try to provide a simple navigation API for your plot, so you could +zoom in and zoom out, or go through view chunks one by one. + +- `zoom`: put a factor inside and you can zoom in or zoom out +- `nextView`: switch to the next view +- `prevView`: switch to the previous view + +Navigation function also works for track plots too. + +```{r fig.width = 8, fig.height = 5.5} +## zoom in +tks + zoom() +``` + +Try following command yourself. + +```{r eval = FALSE} +## zoom in with scale +p.txdb + zoom(1/8) +## zoom out +p.txdb + zoom(2) +## next view page +p.txdb + nextView() +## previous view page +p.txdb + prevView() +``` + +Don't forget `xlim` accept *GRanges* object (single row), so you could simply prepare a *GRanges* to store the region of interests and go through them one by one. + +# Overview plots {#sec:overview} + +An overview is a good way to show all events at the same time, and give overall summary statistics for the whole genome. + +In this chapter, we will introduce three different layouts that are used a lot in genomic data visualization. + +## How to make circular plots + +### Introduction + +The circular view is a special layout in `r Biocpkg("ggbio")`, this idea has been implemented +in many different software projects, for example, the `Circos` project. However, we keep the grammar of graphics for users, so mapping variables to aesthetics is very easy, `r Biocpkg("ggbio")` leverages the data structure defined in *Bioconductor* to make this process as simple as possible. + +### Building circular plot layer by layer + +OK, let's start to process some raw data in the format we want. The data used in this study is from this paper^[http://www.nature.com/ng/journal/v43/n10/full/ng.936.html]. In +this tutorial, We are going to +- Visualize somatic mutations as segments. +- Visualize inter- and intra-chromosome rearrangements as links. +- Visualize mutation scores as point tracks with a grid background. +- Add scales, ticks, and labels. +- To arrange multiple plots and legends. create multiple sample comparisons. + +All the raw data processed and stored in `GRanges` ready for use, you can simply load the sample data from `r Biocpkg("biovizBase")`. + +```{r processing} +data("CRC", package = "biovizBase") +``` + +`layout_circle` is depreciated, because you have to set up radius and trackWidth manually with this function for creating circular plot. + +We now present the new `circle` function; it +accepts `Granges` objects, and users don't have to specify the radius or track width, +you just add them one by one, it will be automatically created from the inner circle to the outside, unless you specify `trackWidth` and `radius` manually. To change the default radius and trackWidth for all tracks, you simply put them in the `ggbio` function. +- rule of thumb: `seqlengths`, `seqlevels`, and chromosomes + names should be exactly the same. +- to use `circle`, you have to use `r Biocpkg("ggbio")` constructor at the beginning instead of `r Biocpkg("ggplot")`. + +You can use `autoplot()` to create a single track easily like + +```{r} +head(hg19sub) +autoplot(hg19sub, layout = "circle", fill = "gray70") +``` + +However, the low-level `circle` function leaves you more +flexibility to build circular plots one by one. Let's start adding tracks one by one. +Let's use the same data to create an ideogram, label, and scale track, it layouts the circle in the order you created, from inside to outside. + +```{r} +p <- ggbio() + circle(hg19sub, geom = "ideo", fill = "gray70") + + circle(hg19sub, geom = "scale", size = 2) + + circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) +p +``` + +To simply override the setting, you can do it globally in `r Biocpkg("ggbio")` function or individually `circle` function by specifying parameters `trackWidth` and `radius`, you can also specify the global setting for buffer in between in `r Biocpkg("ggbio")` like in the example below. + +```{r} +p <- ggbio(trackWidth = 10, buffer = 0, radius = 10) + circle(hg19sub, geom = "ideo", fill = "gray70") + + circle(hg19sub, geom = "scale", size = 2) + + circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) +p +``` + +Then we add a "rectangle" track to show somatic mutation, this will look like vertical segments. + +```{r lower-mut-track} +head(mut.gr) +p <- ggbio() + circle(mut.gr, geom = "rect", color = "steelblue") + + circle(hg19sub, geom = "ideo", fill = "gray70") + + circle(hg19sub, geom = "scale", size = 2) + + circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) +p +``` + +Next, we need to add some "links" to show the rearrangement, of course, links can be used to map any kind of association between two or more different locations to indicate relationships like copies or fusions. To create a suitable structure to plot, please use another `GRanges` to +represent the end of the links, and store it as `elementMetadata` for the "start +point" `GRanges`. Here we named it "to.gr," which will be used later. + +```{r} +head(crc.gr) +``` + +In this example, we use "intrachromosomal" to label +rearrangement within the same chromosomes and use "interchromosomal" to label +rearrangement in different chromosomes. +Get a subset of link data for only one sample, "CRC1". + +```{r subset-crc-1} +gr.crc1 <- crc.gr[values(crc.gr)$individual == "CRC-1"] +``` + +Ok, add a "point" track with a grid background for rearrangement data and map `y` +to variable "score", map `size` to the variable "tumreads", and re-scale the size to a proper size range. + +```{r lower-point-track} +## Manually specify radius +p <- p + circle(gr.crc1, geom = "point", aes(y = score, size = tumreads), + color = "red", grid = TRUE, radius = 30) + scale_size(range = c(1, 2.5)) +p +``` + +Finally, let's add links and map color to rearrangement types. Remember, you need +to specify the `linked.to` parameter to the column that contains the endpoint of the data. + +```{r lower-link-track} +## specify radius manually +p <- p + circle(gr.crc1, geom = "link", linked.to = "to.gr", aes(color = rearrangements), radius = 23) +p +``` + +All those codes could be simply constructed by following the code + +```{r} +p <- ggbio() + + circle(gr.crc1, geom = "link", linked.to = "to.gr", aes(color = rearrangements)) + + circle(gr.crc1, geom = "point", aes(y = score, size = tumreads), + color = "red", grid = TRUE) + scale_size(range = c(1, 2.5)) + + circle(mut.gr, geom = "rect", color = "steelblue") + + circle(hg19sub, geom = "ideo", fill = "gray70") + + circle(hg19sub, geom = "scale", size = 2) + + circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) +p +``` + +### Complex arrangement of plots + +In this step, we are going to make multiple sample comparisons, this may require +some knowledge about packages `r CRANpkg("grid")` and `r CRANpkg("gridExtra")`. We will +introduce a more easy way to combine your graphics later after this. +We just want 9 single circular plots put together on one page, since we cannot +keep too many tracks, we only keep ideograms and links. Here is one sample. + +```{r arrangement} +grl <- split(crc.gr, values(crc.gr)$individual) +## need "unit", load grid + +library(grid) +crc.lst <- lapply(grl, function(gr.cur){ + print(unique(as.character(values(gr.cur)$individual))) + cols <- RColorBrewer::brewer.pal(3, "Set2")[2:1] + names(cols) <- c("interchromosomal", "intrachromosomal") + +p <- ggbio() + circle(gr.cur, geom = "link", linked.to = "to.gr", + aes(color = rearrangements)) + + circle(hg19sub, geom = "ideo", + color = "gray70", fill = "gray70") + + scale_color_manual(values = cols) + + labs(title = (unique(values(gr.cur)$individual))) + + theme(plot.margin = unit(rep(0, 4), "lines")) +}) +``` + +We wrap the function in grid level into a more user-friendly high-level function, +called `arrangeGrobByParsingLegend`. You can pass your ggplot2 +graphics to this function, specify the legend you want to keep on the right, +you can also specify the column/row numbers. Here, we assume all plots we have +passed follow the same color scale and have the same legend, so we only have to +keep one legend on the right. + +```{r simple-wrapper, fig.width = 7, fig.height = 5} +arrangeGrobByParsingLegend(crc.lst, widths = c(4, 1), legend.idx = 1, ncol = 3) +``` + +## How to make grandlinear plots + +### Introduction + +Let's use a subset of (PLINK)[https://github.com/stephenturner/qqman/blob/master/plink.assoc.txt.gz] output as our example test data. + +```{r simul_snp} +snp <- read.table(system.file("extdata", "plink.assoc.sub.txt", package = "biovizBase"), + header = TRUE) +require(biovizBase) +gr.snp <- transformDfToGr(snp, seqnames = "CHR", start = "BP", width = 1) +head(gr.snp) +## change the seqname order +require(GenomicRanges) +gr.snp <- keepSeqlevels(gr.snp, as.character(1:22)) +seqlengths(gr.snp) +## need to assign seqlengths +data(ideoCyto, package = "biovizBase") +seqlengths(gr.snp) <- as.numeric(seqlengths(ideoCyto$hg18)[1:22]) +## remove missing +gr.snp <- gr.snp[!is.na(gr.snp$P)] +## transform pvalue +values(gr.snp)$pvalue <- -log10(values(gr.snp)$P) +head(gr.snp) +## done +``` + +The data is ready, we need to pay attention + +- if seqlengths is missing, we use the data range, so the chromosome length is not accurate. +- use seqlevel to control the order of chromosomes. + + +### Coordinate genome + +In `autoplot`, the argument `coord` is just used to transform the data, +after that, you can use it as a common `GRanges`, all other geom/stat works for it. + +```{r line, fig.height = 4, fig.width = 6} +autoplot(gr.snp, geom = "point", coord = "genome", aes(y = pvalue)) +``` + +However, we recommend you use the more powerful function `plotGrandLinear` to generate the Manhattan plot introduced in the next section. + +### Convenient `plotGrandLinear` function + +For *Manhattan plot*, we have a function called +`plotGrandLinear`. aes(y = ) is required to indicate the y +value, e.g. p-value. + +Color mapping is automatically figured out by `r Biocpkg("ggbio")` following the rules + +- if `color` is present in `aes()`, like `aes(color = seqnames)`, it will assume it's mapping to a data column called 'seqnames'. +- if `color` is not wrapped in `aes()`, then this function + will - **recycle** them to all chromosomes. +- if `color` is a single character representing color, then just use one arbitrary color. + +Let's test some examples of controlling colors. + +```{r morecolor2, fig.height = 4, fig.width = 6} +plotGrandLinear(gr.snp, aes(y = pvalue), color = c("#7fc97f", "#fdc086")) +``` + +Let's add a cutoff line + +```{r fig.height = 4, fig.width = 6} +plotGrandLinear(gr.snp, aes(y = pvalue), color = c("#7fc97f", "#fdc086"), + cutoff = 3, cutoff.color = "blue", cutoff.size = 0.2) +``` + +Sometimes you use color to map other variables, so you may need a different color to separate chromosomes. + +```{r fig.height = 4, fig.width = 6} +plotGrandLinear(gr.snp, aes(y = pvalue, color = OR), spaceline = TRUE, legend = TRUE) +``` + +### How to highlight some points? + +You can provide a highlight `GRanges`, and each row highlights a set of overlapped SNPs, and is labeled by rownames or certain columns, there is more control in the +function as parameters, with prefix highlight.*, so you could control color, label size and color, etc. + +```{r fig.height = 4, fig.width = 6} +gro <- GRanges(c("1", "11"), IRanges(c(100, 2e6), width = 5e7)) +names(gro) <- c("group1", "group2") +plotGrandLinear(gr.snp, aes(y = pvalue), highlight.gr = gro) +``` + +## How to make stacked karyogram overview plots + + +### Introduction + +A karyotype is the number and appearance of chromosomes in the nucleus of a +eukaryotic cell^[http://en.wikipedia.org/wiki/Karyotype]. It's one kind +of overview when we want to show the distribution of certain events on the genome, +for example, binding sites for certain protein, and even compare them across samples +as an example shown in this section. +`GRanges` and `Seqinfo` objects are ideal containers for +storing the data needed for a karyogram plot. Here is the strategy we used for +generating ideogram templates. +- Although `seqlengths` is not required, it's highly recommended for plotting karyograms. If a `GRanges` object contains `seqlengths`, we know exactly how long each chromosome is, and will use this information to plot genome space, particularly we plot all levels included in it, - **NOT JUST** data space. +- If a `GRanges` has no `seqlengths`, we will issue a + warning and try to estimate the chromosome lengths from the data included. This is **NOT** accurate most of the time, so please pay attention to what you are going to visualize and make sure to set `seqlengths` beforehand. + + +### Create a karyogram template + +Let's first introduce how to use `autoplot` to generate karyogram graphics. +The easiest one is to just plot Seqinfo by using `autoplot` if your `GRanges` +object has seqinfo with seqlengths information. Then you add the data layer later. + +```{r} +data(ideoCyto, package = "biovizBase") +autoplot(seqinfo(ideoCyto$hg19), layout = "karyogram") +``` + +To show cytobands, your data need to have cytoband information, we stored some data for you, including *hg19, hg18, mm10, mm9*. + +```{r fig.width = 5, fig.height = 4} +## turn on cytobands if present +biovizBase::isIdeogram(ideoCyto$hg19) +autoplot(ideoCyto$hg19, layout = "karyogram", cytobands = TRUE) +``` + +To change order or only show a subset of the karyogram, you have to manipulate `seqlevels`, please check out the manual for `keepSeqlevels, seqlevels` in `r Biocpkg("GenomicRanges")` package for more information Or you could read the example below. + +### Add data on the karyogram layout + +If you have a single data set stored as `GRanges` to show on a karyogram layout, `autoplot` function is enough for you to plot the data on it. +We use default data in package `r Biocpkg("biovizBase")`, which is a subset of RNA +editing set in humans. The data involved in this `GRanges` is sparse, so +we cannot simply use it to make a karyogram template, otherwise, the estimated chromosome +lengths will be very rough and inaccurate. So what we need to do first is to *add seglength information to this object.* + +```{r load-RNAediting} +data(darned_hg19_subset500, package = "biovizBase") +dn <- darned_hg19_subset500 +library(GenomicRanges) +seqlengths(dn) +## add seqlengths +## we have seqlegnths information in another data set +seqlengths(dn) <- seqlengths(ideoCyto$hg19)[names(seqlengths(dn))] +## then we change order +dn <- keepSeqlevels(dn, paste0("chr", c(1:22, "X"))) +seqlengths(dn) +autoplot(dn, layout = "karyogram") +``` + +Then we take one step further, the power of `r Biocpkg("ggplot")` or `r Biocpkg("ggbio")` is the +flexible multivariate data mapping ability in graphics, making data exploration +much more convenient. In the following example, we are trying to map a +categorical variable 'exReg' to color, this variable is included in the data, +and have three levels, '3' indicates 3' utr, '5' means 5' utr, and 'C' means +coding region. We have some missing values indicated as `NA`, in default, +it's going to be shown in gray color, and keep in mind, since the basic +geom(geometric object) is a rectangle, and genome space is very large, so change +both color/fill color of the rectangle to specify both border and filled color +is necessary to get the data shown in different colors, otherwise if the region +is too small, the border color is going to override the fill color. + +```{r load-RNAediting-color} +## Since the default is a geom rectangle, even though it looks like a segment +## We still use both fill/color to map colors +autoplot(dn, layout = "karyogram", aes(color = exReg, fill = exReg)) +``` + +Or you can set the missing value to the particular color you want (NA values is not shown on the legend). + +```{r} +## Since the default is geom rectangle, even though it looks like a segment, +## We still use both fill/color to map colors +autoplot(dn, layout = "karyogram", aes(color = exReg, fill = exReg), alpha = 0.5) + + scale_color_discrete(na.value = "brown") +``` + +Well, sometimes we have too many values, so we want to separate them by groups and show them at different heights, below is a hack for that purpose, and in the next section, we will introduce a more flexible and general way to add data layer by layer. + +*The template chromosome y limits is [0, 10], which is why this hack works* + +```{r fig.width = 5, fig.height = 4} +## Let's remove the NA value +dn.nona <- dn[!is.na(dn$exReg)] +## Compute levels based on categories +dn.nona$levels <- as.numeric(factor(dn.nona$exReg)) +## Do a trick and show them at different heights +p.ylim <- autoplot(dn.nona, layout = "karyogram", aes(color = exReg, fill = exReg, + ymin = (levels - 1) * 10/3, + ymax = levels * 10 /3)) +``` + +### Add more data using the layout_karyogram function + +In this section, a lower-level function `layout_karyogram` is going +to be introduced. This is a convenient API for constructing a karyogram plot and +adding more data layer by layer. The function `ggplot` is just to create +a blank object to add a layer on. + +You need to pay attention to +- when you add plots layer by layer, seqnames of different data must be the + same to make sure the data are mapped to the same chromosome. For example, if + you name a chromosome following schema like *chr1* and use just the number + *1* to name other data, they will be treated as different chromosomes. +- cannot use the same aesthetic mapping multiple times for different + data. For example, if you have used aes(color = ), for one data, you cannot + use aes(color = ) anymore for mapping variables from other add-on data, this + is currently not allowed in `r Biocpkg("ggplot")`, even though you expect multiple color + legend shows up; this is going to confuse people about which is which. However, + `color` or `fill` without `aes()` wrap-around, is + allowed for any track, it's set to a single arbitrary color. +- Default rectangle y range is [0, 10], so when you add on more data layer + by layer on existing graphics, you can use `ylim` to control how to + normalize your data and plot it relative to chromosome space. For example, + with default, chromosome space is plotted between y [0, 10], if you use + `ylim = c(10 , 20)`, you will stack data right above each chromosome + and with equal width. For geom like 'point', which you need to specify 'y' + value in `aes()`, we will add a 5% margin on top and at the bottom of that + track. + +Many times we overlay different data sets, so let's break down the previous samples into 4 groups, treat them as different data, and build them layer by layer, assigning the color by hand. You could use `ylim` to control where they are plotted. + +```{r} +## Prepare the data +dn3 <- dn.nona[dn.nona$exReg == '3'] +dn5 <- dn.nona[dn.nona$exReg == '5'] +dnC <- dn.nona[dn.nona$exReg == 'C'] +dn.na <- dn[is.na(dn$exReg)] +## Now we have 4 different data sets +autoplot(seqinfo(dn3), layout = "karyogram") + + layout_karyogram(data = dn3, geom = "rect", ylim = c(0, 10/3), color = "#7fc97f") + + layout_karyogram(data = dn5, geom = "rect", ylim = c(10/3, 10/3*2), color = "#beaed4") + + layout_karyogram(data = dnC, geom = "rect", ylim = c(10/3*2, 10), color = "#fdc086") + + layout_karyogram(data = dn.na, geom = "rect", ylim = c(10, 10/3*4), color = "brown") +``` + +What's more, you could even change the geom for those data + +```{r edit-space, fig.width = 5, fig.height = 4} +dn$pvalue <- runif(length(dn)) * 10 +p <- autoplot(seqinfo(dn)) + layout_karyogram(dn, aes(x = start, y = pvalue), + geom = "point", color = "#fdc086") +p +``` + +### More flexible layout of a karyogram + +```{r fig.width = 5, fig.height = 4} +p.ylim + facet_wrap(~seqnames) +``` + +# Link ranges to your data {#sec:linkranges} +Plot `GRanges` object structure and link to an even-spaced parallel coordinates plot which represents the data in elementeMetadata. + +```{r fig.width = 8} +library(TxDb.Hsapiens.UCSC.hg19.knownGene) +library(ggbio) +data(genesymbol, package = "biovizBase") +txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene +model <- exonsBy(txdb, by = "tx") +model17 <- subsetByOverlaps(model, genesymbol["RBM17"]) +exons <- exons(txdb) +exon17 <- subsetByOverlaps(exons, genesymbol["RBM17"]) +## Reduce to make sure there is no overlap +## just for example +exon.new <- reduce(exon17) +## suppose +values(exon.new)$sample1 <- rnorm(length(exon.new), 10, 3) +values(exon.new)$sample2 <- rnorm(length(exon.new), 10, 10) +values(exon.new)$score <- rnorm(length(exon.new)) +values(exon.new)$significant <- sample(c(TRUE,FALSE), size = length(exon.new),replace = TRUE) +## data ready +exon.new +``` + +Make the plots, you can pass a list of annotation tracks too. + +```{r} +p17 <- autoplot(txdb, genesymbol["RBM17"]) +plotRangesLinkedToData(exon.new, stat.y = c("sample1", "sample2"), annotation = list(p17)) +``` + +For more information, check the manual. + +# Miscellaneous {#sec:misc} + +Every plot object produced by `r Biocpkg("ggplot2")` is essentially an `r Biocpkg("ggplot2")` object, so you could use all the tricks you know with `r Biocpkg("ggplot2")` on +`r Biocpkg("ggbio")` plots too, including scales, colors, themes, etc. + +## Themes + +In `r Biocpkg("ggbio")`, we developed some more themes to make things easier. + +## Plot theme + +Plot-level themes are like any other themes defined in `r Biocpkg("ggplot2")`, simply apply it to a plot. + +```{r} +p.txdb +p.txdb + theme_alignment() +p.txdb + theme_clear() +p.txdb + theme_null() +``` + +When you have multiple chromosomes encoded in seqnames, you could use +theme_genome to make a 'fake' linear view of genome coordinates +quickly by applying this theme, because it's not equal to chromosome +lengths, it's simply + +```{r} +library(GenomicRanges) +set.seed(1) +N <- 100 +gr <- GRanges(seqnames = sample(c("chr1", "chr2", "chr3"), + size = N, replace = TRUE), + IRanges(start = sample(1:300, size = N, replace = TRUE), + width = sample(70:75, size = N,replace = TRUE)), + strand = sample(c("+", "-"), size = N, replace = TRUE), + value = rnorm(N, 10, 3), score = rnorm(N, 100, 30), + sample = sample(c("Normal", "Tumor"), + size = N, replace = TRUE), + pair = sample(letters, size = N, + replace = TRUE)) +seqlengths(gr) <- c(400, 1000, 500) +autoplot(gr) +autoplot(gr) + theme_genome() +``` + +### Track theme + +Track-level themes are more complex; they control the whole look of the +tracks, it's essentially a theme object with some attributes +controlling the track's appearance. +See how we make a template, which you could customize in the same way + +```{r} +theme_tracks_sunset +``` + +The attributes you could control are basically passed to the `tracks()` constructor, including + +```{r, echo=FALSE, warning=FALSE} + +tbl <- data.table::data.table( + Col = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), + Track.attributes = c("label.bg.color", + "label.bg.fill","label.text.color","label.text.cex", + "label.text.angle","track.plot.color","track.bg.color","label.width"), + Type = c("character","character", + "character","numeric","numeric","character_OR_NULL", + "character_OR_NULL","unit") +) +kable(tbl, row.names = T, align = "c",caption = NULL) +``` + +# Session Information + +```{r session-info} +sessionInfo() +``` + diff --git a/vignettes/ggbio.Rnw b/vignettes/ggbio.Rnw deleted file mode 100644 index 5d7a240..0000000 --- a/vignettes/ggbio.Rnw +++ /dev/null @@ -1,9 +0,0 @@ - -%% This vignette is a stub. Please look at -%% .[/inst]/doc/realvignettes or run 'make real' -%% to copy the real vignettes in the current directory. - -\documentclass[10pt]{article} -%\VignetteIndexEntry{Part 0: Introduction and quick start} -\begin{document} -\end{document} diff --git a/vignettes/knit.sh b/vignettes/knit.sh deleted file mode 100755 index b873fbd..0000000 --- a/vignettes/knit.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -Rdevscript -e "library(knitr);knit('ggbio.Rnw')" -pdflatex ggbio.tex -pdflatex ggbio.tex -evince ggbio.pdf diff --git a/vignettes/realvignettes/ggbio.Rnw b/vignettes/realvignettes/ggbio.Rnw deleted file mode 100644 index ecdbac2..0000000 --- a/vignettes/realvignettes/ggbio.Rnw +++ /dev/null @@ -1,1080 +0,0 @@ -% \VignetteEngine{knitr::knitr} -% \VignetteIndexEntry{ggbio: visualize genomic data with grammar of graphics.} -% \VignetteDepends{EnsDb.Hsapiens.v75} -% \VignetteKeywords{visualization utilities} -% \VignettePackage{ggbio} -\documentclass{report} - -<>= -BiocStyle::latex() -@ - -<>= -library(knitr) -opts_chunk$set(fig.path='./figures/ggbio-', - fig.align='center', fig.show='asis', - eval = TRUE, - fig.width = 4.5, - fig.height = 4.5, - tidy = FALSE, - message = FALSE, - warning = FALSE) -options(replace.assign=TRUE,width=80) -@ - -\newcommand{\Rfunarg}[1]{{\texttt{#1}}} -\newcommand{\Bioc}{\software{Bioconductor}} -\newcommand{\IRanges}{\Biocpkg{IRanges}} -\newcommand{\biovizBase}{\Biocpkg{biovizBase}} -\newcommand{\ggbio}{\Biocpkg{ggbio}} -\newcommand{\visnab}{\Biocpkg{visnab}} -\newcommand{\ggplot}{\Biocpkg{ggplot2}} -\newcommand{\grid}{\CRANpkg{grid}} -\newcommand{\gridExtra}{\CRANpkg{gridExtra}} -\newcommand{\qplot}{\Rfunction{qplot}} -\newcommand{\autoplot}{\Rfunction{autoplot}} -\newcommand{\knitr}{\CRANpackage{knitr}} -\newcommand{\tracks}{\Rfunction{tracks}} -\newcommand{\gr}{\Rclass{GRanges}} - - -\bioctitle[ggbio:visualization toolkits for genomic data]{\ggbio{}: visualization toolkits for genomic data} -\author{Tengfei Yin\footnote{tengfei.yin@sbgenomics.com}} -\date{\today} - -\begin{document} - -\maketitle - -\tableofcontents -\newpage - - -\chapter{Getting started}\label{chapter:start} -\section{Citation} -<>= -citation("ggbio") -@ -\section{Introduction} -\ggbio{} is a \Bioc{} package building on top of \ggplot(), leveraging -the rich objects defined by \Bioc{} and its statistical and -computational power, it provides a flexible genomic visualization -framework, extends the grammar of graphics into genomic data, try to -delivers high quality, highly customizable graphics to the users. - -What it features -\begin{itemize} -\item \autoplot{} function provides ready-to-use template for \Bioc{} - objects and different types of data. -\item flexible low level components to use grammar of graphics to - build you graphics layer by layer. -\item layout transformation, so you could generate circular plot, - grandlinear plot, stacked overview more easily. -\item flexible tracks function to bind any \ggplot(), \ggbio{} based - plots. -\end{itemize} - - -\chapter{Case study: building your first tracks} -In this chapter, you will learn -\begin{itemize} -\item how to add ideogram track. -\item How to add gene model track. -\item how to add track for bam files to visualize coverage and mismatch summary. -\item how to add track for vcf file to visualize the variants. -\end{itemize} - - -\section{Add an ideogram track}\label{section:ideo} -\Rfunction{Ideogram} provides functionality to construct ideogram, -check the manual for more flexible methods. We build genome -\textit{hg19, hg18, mm10, mm9} inside, so you don't have download it -on the fly. When embed with tracks, ideogram show zoomed region -highlights automatically. \Rfunction{xlim} has special function here, -is too changed highlighted zoomed region on the ideogram. - -%% bug fixing -<<>>= -@ - -<<>>= -@ - -<<>>= -@ - - -<>= -library(ggbio) -p.ideo <- Ideogram(genome = "hg19") -p.ideo -library(GenomicRanges) -## special highlights instead of zoomin! -p.ideo + xlim(GRanges("chr2", IRanges(1e8, 1e8+10000000))) -@ - -\section{Add a gene model track}\label{section:gene} -\subsection{Introduction} - -Gene model track is one of the most frequently used track in genome -browser, it is composed of genetic features CDS, UTR, introns, exons -and non-genetic region. In \ggbio{} we support three methods to make -gene model track: - -\begin{itemize} - \item \Rclass{OrganismDb} object: recommended, support gene symbols and other - combination of columns as label. - \item \Rclass{TxDb} object: don't support gene symbol labeling. - \item \Rclass{GRangesList} object: flexible, if you don't have - annotation package available for the first two methods, you could - prepare a data set parsed from gtf file, you can simply use it and - plot it as gene model track. - \item \Rclass{EnsDb} object: supports gene symbol labeling, filtering etc. -\end{itemize} - -\subsection{Make gene model from \Rclass{OrganismDb} object} -\Rclass{OrganismDb} object has a simpler API to retrieve data from -different annotation resources, so we could label our transcripts in -different ways - - -<<>>= -library(ggbio) -library(Homo.sapiens) -class(Homo.sapiens) -## -data(genesymbol, package = "biovizBase") -wh <- genesymbol[c("BRCA1", "NBR1")] -wh <- range(wh, ignore.strand = TRUE) - - -p.txdb <- autoplot(Homo.sapiens, which = wh) -p.txdb - -autoplot(Homo.sapiens, which = wh, label.color = "black", color = "brown", - fill = "brown") -@ - -To change the intron geometry, use \Rcode{gap.geom} to control it, -check out \Rfunction{geom\_alignment} for more control parameters. - -<<>>= -autoplot(Homo.sapiens, which = wh, gap.geom = "chevron") -@ - -To collapse all features, use \Rcode{stat} 'reduce' - -<>= -autoplot(Homo.sapiens, which = wh, stat = "reduce") -@ - -Label could be turned off by setting it to \Rcode{FALSE}, you could -also use expression to make a flexible label combination from column names. - -<<>>= -columns(Homo.sapiens) -autoplot(Homo.sapiens, which = wh, columns = c("TXNAME", "GO"), names.expr = "TXNAME::GO") -@ - - -\subsection{Make gene model from \Rclass{TxDb} object} -\Rclass{TxDb} doesn't contain any gene symbol information, so -we use tx\_id as default for label. -<<>>= -library(TxDb.Hsapiens.UCSC.hg19.knownGene) -txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene -autoplot(txdb, which = wh) -@ - -\subsection{Make gene model from \Rclass{EnsDb} object} - -An alternative source for gene models are the \Rclass{EnsDb} objects from the -\Biocpkg{ensembldb} package that provide gene annotations provided from -Ensembl. The \Biocpkg{ensembldb} package provides a rich filtering system that -allows to easily fetch specific information (genes/transcripts) from an -\Rclass{EnsDb}. The \Rclass{EnsDb} objects provide gene symbol annotations in -the column gene\_name. Alternatively, we could use tx\_id to label transcripts. - -In the example below we plot the gene model of the gene PHKG2. We use a -\Rclass{GenenameFilter} to specify which gene we want to plot. - -<<>>= -library(EnsDb.Hsapiens.v75) -ensdb <- EnsDb.Hsapiens.v75 -autoplot(ensdb, GeneNameFilter("PHKG2")) -@ - -We can pass any filter class defined in the \Biocpkg{AnnotationFilter} package -with argument \Rclass{which}. Alternatively we can combine filter classes using -an \Rclass{AnnotationFilterList} or we can pass a filter expression in form of a -\Rclass{formula}. Below we pass such a filter expression to the function. - -<<>>= -autoplot(ensdb, ~ symbol == "PHKG2", names.expr="gene_name") -@ - -We could also specify a genomic region and fetch all transcripts overlapping -that region (also partially, i.e. with a part of an intron or an exon). - -<<>>= -## We specify "*" as strand, thus we query for genes encoded on both strands -gr <- GRanges(seqnames = 16, IRanges(30768000, 30770000), strand = "*") -autoplot(ensdb, GRangesFilter(gr), names.expr = "gene_name") -@ - -Also, we can spefify directly the gene ids and plot all transcripts of these -genes (not only those overlapping with the region) - -<<>>= -autoplot(ensdb, GeneIdFilter(c("ENSG00000196118", "ENSG00000156873"))) -@ - -\subsection{Make gene model from \Rclass{GRangesList} object} -Sometimes your gene model is not available as none of -\Rclass{OrganismDb} or \Rclass{TxDb} object, it's may be stored -in a table, you could simple parse it into a \Rclass{GRangeList} -object. -\begin{itemize} - \item each group indicate one transcripts - \item names of group are shown as labels - \item this object must has a column contains following key word: - cds, exon, intron, and it's not case senstitive. use \Rcode{type} - to map this column. By default, we will try to parse 'type' column. -\end{itemize} - -Let's make a sample \Rclass{GRangesList} object which contains all -information, and fake some labels. -<<>>= -library(biovizBase) -gr.txdb <- crunch(txdb, which = wh) -## change column to 'model' -colnames(values(gr.txdb))[4] <- "model" -grl <- split(gr.txdb, gr.txdb$tx_id) -## fake some randome names -names(grl) <- sample(LETTERS, size = length(grl), replace = TRUE) -grl -@ - -We get our example data ready, it meets all requirements, to make it a -gene model track it's pretty simple to use autoplot, but don't forget -mapping because we changed our column names, asssume you store you -model key words in column 'model'. - -<<>>= -autoplot(grl, aes(type = model)) -ggplot() + geom_alignment(grl, type = "model") -@ - - -\section{Add a reference track}\label{section:reference} -To add a reference track, we need to load a \Rclass{BSgenome} object -from the annotation package. You can choose to plot the sequence as -\textit{text, rect, segment}. - -\subsection{Semantic zoom} -Here we introduce semantic zoom in \ggbio{}, for some plots like -reference sequence, we use pre-defined zoom level threshold to -automatically assign geom to the track, unless the geom is explicitly -specified. In the example below, when your region is too wide we show -text 'zoom in to see text', when you zoom into different level, it -shows you different details. \Rfunction{zoom} is a function we will -introduce more in chapter \ref{chapter:nav} when we introduce more -about navigation. - -You can pass a zoom in factor into \Rfunction{zoom} function, if it's -over 1 it's zooming out, if it's smaller than 1 it's zooming in. - -<>= -library(BSgenome.Hsapiens.UCSC.hg19) -bg <- BSgenome.Hsapiens.UCSC.hg19 -p.bg <- autoplot(bg, which = wh) -## no geom -p.bg -## segment -p.bg + zoom(1/100) -## rectangle -p.bg + zoom(1/1000) -## text -p.bg + zoom(1/2500) -@ - - - -To override a zemantic zoom threshold, you simply provide a geom explicitly. -<>= -library(BSgenome.Hsapiens.UCSC.hg19) -bg <- BSgenome.Hsapiens.UCSC.hg19 -## force to use geom 'segment' at this level -autoplot(bg, which = resize(wh, width = width(wh)/2000), geom = "segment") -@ - - -\section{Add an alignment track}\label{section:bam} -\ggbio{} supports visuaization of alignemnts file stored in bam, -\autoplot{} method accepts -\begin{itemize} -\item bam file path (indexed) -\item \Rclass{BamFile} object -\item \Rclass{GappedAlignemnt} object -\end{itemize} - -It's simple to just pass a file path to \autoplot{} function, you can -stream a chunk of region by providing 'which' parameter. Otherwise -please use method 'estiamte' to show overall estiamted coverage. - - - -<<>>= -fl.bam <- system.file("extdata", "wg-brca1.sorted.bam", package = "biovizBase") -wh <- keepSeqlevels(wh, "chr17") -autoplot(fl.bam, which = wh) -@ - - -geom 'gapped pair' will show you alignments. - -<<>>= -fl.bam <- system.file("extdata", "wg-brca1.sorted.bam", package = "biovizBase") -wh <- keepSeqlevels(wh, "chr17") -autoplot(fl.bam, which = resize(wh, width = width(wh)/10), geom = "gapped.pair") -@ - - -To show mismatch proportion, you have to provide reference sequence, -the mismatched proportion is color coded in the bar chart. -<<>>= -library(BSgenome.Hsapiens.UCSC.hg19) -bg <- BSgenome.Hsapiens.UCSC.hg19 -p.mis <- autoplot(fl.bam, bsgenome = bg, which = wh, stat = "mismatch") -p.mis -@ - -To view overall estimated coverage distribution, please use method -'estiamte'. 'which' parameter also accept characters. And there is a -hidden value called '..coverage..' to let you do simple transformation -in aes(). -<<>>= -autoplot(fl.bam, method = "estimate") -autoplot(fl.bam, method = "estimate", which = paste0("chr", 17:18), aes(y = log(..coverage..))) -@ - -\section{Add a variants track}\label{section:vcf} -This track is supported by semantic zoom. - -To view your variants file, you could -\begin{itemize} -\item Import it using package \Biocpkg{VariantAnntoation} as - \Rclass{VCF} object, then use \autoplot{} -\item Convert it into \Rclass{VRanges} object and use \autoplot{}. -\item Simply provide vcf file path in \autoplot(). -\end{itemize} - -<<>>= -library(VariantAnnotation) -fl.vcf <- system.file("extdata", "17-1409-CEU-brca1.vcf.bgz", package="biovizBase") -vcf <- readVcf(fl.vcf, "hg19") -vr <- as(vcf[, 1:3], "VRanges") -vr <- renameSeqlevels(vr, value = c("17" = "chr17")) -## small region contains data -gr17 <- GRanges("chr17", IRanges(41234400, 41234530)) -p.vr <- autoplot(vr, which = wh) -## none geom -p.vr -## rect geom -p.vr + xlim(gr17) -## text geom -p.vr + xlim(gr17) + zoom() -@ - - -You can simply overide geom -<>= -autoplot(vr, which = wh, geom = "rect", arrow = FALSE) -@ - -%% If 'summary' is turned on, it will show you a summary of genotype -%% barchar for all samples on top and detailed one for specified samples -%% below it. - -%% We have 116 samples in this VCF data, -%% <<>>= -%% str(vcf) -%% autoplot(vcf, which = wh, sample = 1:3, summary = TRUE) -%% nms <- c("NA06984" "NA06985" "NA06986") -%% autoplot(vcf, which = wh, sample = 1:3, summary = TRUE) -%% @ - - -\section{Building your tracks} -<>= -## tks <- tracks(p.ideo, mismatch = p.mis, dbSNP = p.vr, ref = p.bs, gene = p.txdb) -## tks <- tracks(fl.bam, fl.vcf, bs, Homo.sapiens) ## default ideo = FALSE, turned on -## tks <- tracks(fl.bam, fl.vcf, bs, Homo.sapiens, ideo = TRUE) -## tks + xlim(gr17) -gr17 <- GRanges("chr17", IRanges(41234415, 41234569)) -tks <- tracks(p.ideo, mismatch = p.mis, dbSNP = p.vr, ref = p.bg, gene = p.txdb, - heights = c(2, 3, 3, 1, 4)) + xlim(gr17) + theme_tracks_sunset() -tks -@ - - - -\chapter{Simple navigation}\label{chapter:nav} -We try to provide a simple navigation API for your plot, so you could -zoom in and zoom out, or go through view chunks one by one. - -\begin{itemize} -\item \Rfunction{zoom}: put a factor inside and you can zoom in or zoom out -\item \Rfunction{nextView}: switch to next view -\item \Rfunction{prevView}: switch to previous view -\end{itemize} - -Navigation function also works for tracks plot too. - -<>= -## zoom in -tks + zoom() -@ - -Try following command yourself. -<>= -## zoom in with scale -p.txdb + zoom(1/8) -## zoom out -p.txdb + zoom(2) -## next view page -p.txdb + nextView() -## previous view page -p.txdb + prevView() -@ - -Don't forget \Rfunction{xlim} accept \Rclass{GRanges} object (single -row), so you could simply prepare a \Rclass{GRanges} to store the -region of interests and go through them one by one. - - - -\chapter{Overview plots}\label{chapter:overview} -Overview is a good way to show all events at the same time, give overall summary statiics for the whole genome. - -In this chapter, we will introcue three different layouts that are used a lots in genomic data visualization. -\section{how to make circular plots}\label{section:circular} -\subsection{Introduction} -Circular view is a special layout in \ggbio{} , this idea has been implemented -in many different software, for example, the \software{Circos} project. However, we keep the grammar of graphics for users, so mapping varialbes to aesthetics is very easy, \ggbio{} leverage the data structure defiend in \Bioc{} to make this process as simple as possible. - - - -\subsection{Buidling circular plot layer by layer} - -Ok, let's start to process some raw data to the format we want. The -data used in this study is from this a -paper\footnote{http://www.nature.com/ng/journal/v43/n10/full/ng.936.html}. In -this tutorial, We are going to -\begin{enumerate} -\item Visualize somatic mutation as segment. -\item Visualize inter,intro-chromosome rearrangement as links. -\item Visualize mutation score as point tracks with grid-background. -\item Add scale and ticks and labels. -\item To arrange multiple plots and legend. create multiple sample comparison. -\end{enumerate} - - -All the raw data processed and stored in \Rclass{GRanges} ready for use, you can simply load the sample data from \biovizBase{} - - -<>= -data("CRC", package = "biovizBase") -@ - - - -\Rfunction{layout\_circle} is depreicated, because you have to set up radius and trackWidth manually with this function for creating circular plot. - -We now present the new \Rfunction{circle} function, it -accepts \Robject{Granges} object, and users don't have to specify radius, track width, -you just add them one by one, it will be automatically created from innter circle to outside, unless you specify \Rcode{trackWidth} and \Rcode{radius} manually. To change default radius and trackWidth for all tracks, you simply put them in \Rfunction{ggbio} function. - -\begin{itemize} -\item rule of thumb \Rfunction{seqlengths}, \Rfunction{seqlevels} and chromosomes - names should be exactly the same. -\item to use \Rfunction{circle}, you have to use \Rfunction{ggbio} constructor at the beginning instead of \Rfunction{ggplot}. -\end{itemize} - -You can use \autoplot{} to create single track easily like -<<>>= -head(hg19sub) -autoplot(hg19sub, layout = "circle", fill = "gray70") -@ - - -Hoever, the low level \Rfunction{circle} function leave you more -flexibility to build circular plot one by one. Let's start to add -tracks one by one. - - - -Let's use the same data to create ideogram, label and scale track, it layouts the circle by the order you created from inside to outside. -<<>>= -p <- ggbio() + circle(hg19sub, geom = "ideo", fill = "gray70") + - circle(hg19sub, geom = "scale", size = 2) + - circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) -p -@ - -To simply override the setting, you can do it globally in \Rfunction{ggbio} function or individually \Rfunction{circle} function by specifying parametters \Rcode{trackWidth} and \Rcode{radius}, you can also specify the global settin for buffer in between in \Rfunction{ggbio} like example below. - -<<>>= -p <- ggbio(trackWidth = 10, buffer = 0, radius = 10) + circle(hg19sub, geom = "ideo", fill = "gray70") + - circle(hg19sub, geom = "scale", size = 2) + - circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) -p -@ - - -Then we add a "rectangle" track to show somatic mutation, this will looks like vertical -segments. -<>= -head(mut.gr) -p <- ggbio() + circle(mut.gr, geom = "rect", color = "steelblue") + - circle(hg19sub, geom = "ideo", fill = "gray70") + - circle(hg19sub, geom = "scale", size = 2) + - circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) -p -@ - - -Next, we need to add some "links" to show the rearrangement, of course, links -can be used to map any kind of association between two or more different -locations to indicate relationships like copies or fusions. To create a suitable structure to plot, please use another \Rclass{GRanges} to -represent the end of the links, and stored as elementMetadata for the "start -point" \Rclass{GRanges}. Here we named it as "to.gr" and will be used later. -<<>>= -head(crc.gr) -@ - - -Here in this example, we use "intrachromosomal" to label -rearrangement within the same chromosomes and use "interchromosomal" to label -rearrangement in different chromosomes. - - - -Get subset of links data for only one sample "CRC1" - -<>= -gr.crc1 <- crc.gr[values(crc.gr)$individual == "CRC-1"] -@ - -Ok, add a "point" track with grid background for rearrangement data and map `y` -to variable "score", map `size` to variable "tumreads", rescale the size to a -proper size range. - -<>= -## manually specify radius -p <- p + circle(gr.crc1, geom = "point", aes(y = score, size = tumreads), - color = "red", grid = TRUE, radius = 30) + scale_size(range = c(1, 2.5)) -p -@ - -% \clearpage -Finally, let's add links and map color to rearrangement types. Remember you need -to specify `linked.to` parameter to the column that contain end point of the data. -<>= -## specify radius manually -p <- p + circle(gr.crc1, geom = "link", linked.to = "to.gr", aes(color = rearrangements), - radius = 23) -p -@ - -All those code could be simply constructed by following code -<<>>= -p <- ggbio() + - circle(gr.crc1, geom = "link", linked.to = "to.gr", aes(color = rearrangements)) + - circle(gr.crc1, geom = "point", aes(y = score, size = tumreads), - color = "red", grid = TRUE) + scale_size(range = c(1, 2.5)) + - circle(mut.gr, geom = "rect", color = "steelblue") + - circle(hg19sub, geom = "ideo", fill = "gray70") + - circle(hg19sub, geom = "scale", size = 2) + - circle(hg19sub, geom = "text", aes(label = seqnames), vjust = 0, size = 3) -p -@ - - -\subsection{Complex arragnment of plots} -In this step, we are going to make multiple sample comparison, this may require -some knowledge about package \Rpackage{grid} and \Rpackage{gridExtra}. We will -introduce a more easy way to combine your graphics later after this. - -We just want 9 single circular plots put together in one page, since we cannot -keep too many tracks, we only keep ideogram and links. Here is one sample. - - - -<>= -grl <- split(crc.gr, values(crc.gr)$individual) -## need "unit", load grid -library(grid) -crc.lst <- lapply(grl, function(gr.cur){ - print(unique(as.character(values(gr.cur)$individual))) - cols <- RColorBrewer::brewer.pal(3, "Set2")[2:1] - names(cols) <- c("interchromosomal", "intrachromosomal") - p <- ggbio() + circle(gr.cur, geom = "link", linked.to = "to.gr", - aes(color = rearrangements)) + - circle(hg19sub, geom = "ideo", - color = "gray70", fill = "gray70") + - scale_color_manual(values = cols) + - labs(title = (unique(values(gr.cur)$individual))) + - theme(plot.margin = unit(rep(0, 4), "lines")) -}) -@ - -We wrap the function in grid level to a more user-friendly high level function, -called \Rfunction{arrangeGrobByParsingLegend}. You can pass your ggplot2 -graphics to this function , specify the legend you want to keep on the right, -you can also specify the column/row numbers. Here we assume all plots we have -passed follows the same color scale and have the same legend, so we only have to -keep one legend on the right. - - -<>= - -arrangeGrobByParsingLegend(crc.lst, widths = c(4, 1), legend.idx = 1, ncol = 3) -@ - -\section{How to make grandlinear plots}\label{section:grandlinear} -\subsection{Introduction} - -Let's use a subset of \software{PLINK} output (\url{https://github.com/stephenturner/qqman/blob/master/plink.assoc.txt.gz}) as our example test data. - -<>= -snp <- read.table(system.file("extdata", "plink.assoc.sub.txt", package = "biovizBase"), - header = TRUE) -require(biovizBase) -gr.snp <- transformDfToGr(snp, seqnames = "CHR", start = "BP", width = 1) -head(gr.snp) -## change the seqname order -require(GenomicRanges) -gr.snp <- keepSeqlevels(gr.snp, as.character(1:22)) -seqlengths(gr.snp) -## need to assign seqlengths -data(ideoCyto, package = "biovizBase") -seqlengths(gr.snp) <- as.numeric(seqlengths(ideoCyto$hg18)[1:22]) -## remove missing -gr.snp <- gr.snp[!is.na(gr.snp$P)] -## transform pvalue -values(gr.snp)$pvalue <- -log10(values(gr.snp)$P) -head(gr.snp) -## done -@ -The data is ready, we need to pay attention -\begin{itemize} -\item if seqlengths is missing, we use data range, so the chromosome length is not accurate -\item use seqlevel to control order of chromosome -\end{itemize} - -\subsection{Corrdinate genome} -In \autoplot{}, argument \Rfunarg{coord} is just used to transform the data, -after that, you can use it as common \Robject{GRanges}, all other geom/stat -works for it. - -<>= -autoplot(gr.snp, geom = "point", coord = "genome", aes(y = pvalue)) -@ - -However, we recommend you to use more powerful function \Rfunction{plotGrandLinear} to generate manhattan plot introduced in next section. - - -\subsection{Convenient \Rfunction{plotGrandLinear} function} -For \textit{Manhattan plot}, we have a function called -\Rfunction{plotGrandLinear}. aes(y = ) is required to indicate the y -value, e.g. p-value. - -Color mapping is automatically figured out by \ggbio{} following the rules -\begin{itemize} -\item if \Rfunarg{color} present in \Rcode{aes()}, like \Rcode{aes(color = - seqnames)}, it will assume it's mapping to data column called 'seqnames'. -\item if \Rfunarg{color} is not wrapped in \Rcode{aes()}, then this function - will \textbf{recylcle} them to all chromosomes. -\item if \Rfunarg{color} is single character representing color, then just use - one arbitrary color. -\end{itemize} - -Let's test some examples for controling colors. - -<>= -plotGrandLinear(gr.snp, aes(y = pvalue), color = c("#7fc97f", "#fdc086")) -@ - -Let's add a cutoff line -<>= -plotGrandLinear(gr.snp, aes(y = pvalue), color = c("#7fc97f", "#fdc086"), - cutoff = 3, cutoff.color = "blue", cutoff.size = 0.2) - -@ - -Sometimes you use color to mapping other varibles so you may need a different to separate chromosomes. - -<>= - -plotGrandLinear(gr.snp, aes(y = pvalue, color = OR), spaceline = TRUE, legend = TRUE) -@ - - -\subsection{How to highlight some points?} -You can provide a highlight \gr{}, and each row highlights a set of overlaped -snps, and labeled by rownames or certain columns, there is more control in the -function as parameters, with prefix highlight.*, so you could control color, -label size and color, etc. - -<>= -gro <- GRanges(c("1", "11"), IRanges(c(100, 2e6), width = 5e7)) -names(gro) <- c("group1", "group2") -plotGrandLinear(gr.snp, aes(y = pvalue), highlight.gr = gro) -@ - -\section{How to make stacked karyogram overview plots}\label{section:stacked} -\subsection{Introduction} -A karyotype is the number and appearance of chromosomes in the nucleus of a -eukaryotic cell\footnote{http://en.wikipedia.org/wiki/Karyotype}. It's one kind -of overview when we want to show distribution of certain events on the genome, -for example, binding sites for certain protein, even compare them across samples -as example shows in this section. - -\Robject{GRanges} and \Robject{Seqinfo} objects are an ideal container for -storing data needed for karyogram plot. Here is the strategy we used for -generating ideogram templates. -\begin{itemize} -\item Althouth \Robject{seqlengths} is not required, it's highly recommended for - plotting karyogram. If a \Robject{GRanges} object contains - \Robject{seqlengths}, we know exactly how long each chromosome is, and will - use this information to plot genome space, particularly we plot all levels - included in it, \textbf{NOT JUST} data space. -\item If a \Robject{GRanges} has no \Robject{seqlengths}, we will issue a - warning and try to estimate the chromosome lengths from data included. This is - \textbf{NOT} accurate most time, so please pay attention to what you are going - to visualize and make sure set \Robject{seqlengths} before hand. -\end{itemize} - -\subsection{Create karyogram temlate} -Let's first introduce how to use \autoplot{} to generate karyogram graphic. - -The most easy one is to just plot Seqinfo by using \autoplot{}, if your \gr{} -object has seqinfo with seqlengths information. Then you add data layer later. - -<<>>= -data(ideoCyto, package = "biovizBase") -autoplot(seqinfo(ideoCyto$hg19), layout = "karyogram") -@ - -To show cytobands, your data need to have cytoband information, we stored some data for you, including \textit{hg19, hg18, mm10, mm9}. - -<>= -## turn on cytobands if present -biovizBase::isIdeogram(ideoCyto$hg19) -autoplot(ideoCyto$hg19, layout = "karyogram", cytobands = TRUE) -@ - -To change order or only show a subset of the karyogram, you have to manipulate \Rfunction{seqlevels}, please check out manual for \Rfunction{keepSeqlevels, seqlevels} in \Biocpkg{GenomicRanges} package for more information. Or you could read the example below. - -\subsection{Add data on karyogram layout} -If you have single data set stored as \Rclass{GRanges} to show on a karyogram layout, \autoplot{} function is enough for you to plot the data on it. - -We use a default data in package \Rpackage{biovizBase}, which is a subset of RNA -editing set in human. The data involved in this \Robject{GRanges} is sparse, so -we cannot simply use it to make karyogram template, otherwise, the estimated chromosome -lengths will be very rough and inaccurate. So what we need to do first is to \emph{add seglength information to this object.} - - -<>= -data(darned_hg19_subset500, package = "biovizBase") -dn <- darned_hg19_subset500 -library(GenomicRanges) -seqlengths(dn) -## add seqlengths -## we have seqlegnths information in another data set -seqlengths(dn) <- seqlengths(ideoCyto$hg19)[names(seqlengths(dn))] -## then we change order -dn <- keepSeqlevels(dn, paste0("chr", c(1:22, "X"))) -seqlengths(dn) -autoplot(dn, layout = "karyogram") -@ - - -Then we take one step further, the power of \ggplot{} or \ggbio{} is the -flexible multivariate data mapping ability in graphics, make data exploration -much more convenient. In the following example, we are trying to map a -categorical variable 'exReg' to color, this variable is included in the data, -and have three levels, '3' indicate 3' utr, '5' means 5' utr and 'C' means -coding region. We have some missing values indicated as \Rcode{NA}, in default, -it's going to be shown in gray color, and keep in mind, since the basic -geom(geometric object) is rectangle, and genome space is very large, so change -both color/fill color of the rectangle to specify both border and filled color -is necessary to get the data shown as different color, otherwise if the region -is too small, border color is going to override the fill color. - -<>= -## since default is geom rectangle, even though it's looks like segment -## we still use both fill/color to map colors -autoplot(dn, layout = "karyogram", aes(color = exReg, fill = exReg)) -@ - -Or you can set the missing value to particular color yo -u want (NA values is not shown on the legend). - -<<>>= -## since default is geom rectangle, even though it's looks like segment -## we still use both fill/color to map colors -autoplot(dn, layout = "karyogram", aes(color = exReg, fill = exReg), alpha = 0.5) + - scale_color_discrete(na.value = "brown") -@ - -Well, sometimes we have too many values, we want to separate them by groups and show them at diffent height, below is a hack for that purpose and in next section, we will introduce a more flexible and general way to add data layer by layer. - -\emph{Template chromosome y limits is [0, 10], that's why this hack works} - -<>= -## let's remove the NA value -dn.nona <- dn[!is.na(dn$exReg)] -## compute levels based on categories -dn.nona$levels <- as.numeric(factor(dn.nona$exReg)) -## do a trcik show them at different height -p.ylim <- autoplot(dn.nona, layout = "karyogram", aes(color = exReg, fill = exReg, - ymin = (levels - 1) * 10/3, - ymax = levels * 10 /3)) -@ - -\subsection{Add more data using layout\_karyogram function} -In this section, a lower level function \Rfunction{layout\_karyogram} is going -to be introduced. This is convenient API for constructing karyogram plot and -adding more data layer by layer. Function \Rfunction{ggplot} is just to create -blank object to add layer on. - -You need to pay attention to -\begin{itemize} -\item when you add plots layer by layer, seqnames of different data must be the - same to make sure the data are mapped to the same chromosome. For example, if - you name chromosome following schema like \textit{chr1} and use just number - \textit{1} to name other data, they will be treated as different chromosomes. -\item cannot use the same aesthetics mapping multiple time for different - data. For example, if you have used aes(color = ), for one data, you cannot - use aes(color = ) anymore for mapping variables from other add-on data, this - is currently not allowed in \ggplot{}, even though you expect multiple color - legend shows up, this is going to confuse people which is which. HOWEVER, - \Rfunarg{color} or \Rfunarg{fill} without \Rcode{aes()} wrap around, is - allowed for any track, it's set single arbitrary color. -\item Default rectangle y range is [0, 10], so when you add on more data layer - by layer on existing graphics, you can use \Rfunarg{ylim} to control how to - normalize your data and plot it relative to chromosome space. For example, - with default, chromosome space is plotted between y [0, 10], if you use - \Rcode{ylim = c(10 , 20)}, you will stack data right above each chromosomes - and with equal width. For geom like 'point', which you need to specify 'y' - value in \Rcode{aes()}, we will add 5\% margin on top and at bottom of that - track. -\end{itemize} - -Many times we overlay different datas sets, so let's break down the previous samples into 4 groups and treat them as different data and build them layer by layer, assign the color by hand. You could use ylim to control where they are ploted. -<<>>= -## prepare the data -dn3 <- dn.nona[dn.nona$exReg == '3'] -dn5 <- dn.nona[dn.nona$exReg == '5'] -dnC <- dn.nona[dn.nona$exReg == 'C'] -dn.na <- dn[is.na(dn$exReg)] -## now we have 4 different data sets -autoplot(seqinfo(dn3), layout = "karyogram") + - layout_karyogram(data = dn3, geom = "rect", ylim = c(0, 10/3), color = "#7fc97f") + - layout_karyogram(data = dn5, geom = "rect", ylim = c(10/3, 10/3*2), color = "#beaed4") + - layout_karyogram(data = dnC, geom = "rect", ylim = c(10/3*2, 10), color = "#fdc086") + - layout_karyogram(data = dn.na, geom = "rect", ylim = c(10, 10/3*4), color = "brown") -@ - -What's more, you could even chagne the geom for those data -<>= -dn$pvalue <- runif(length(dn)) * 10 -p <- autoplot(seqinfo(dn)) + layout_karyogram(dn, aes(x = start, y = pvalue), - geom = "point", color = "#fdc086") -p -@ - -\subsection{More flexible layout of karyogram} -<>= -p.ylim + facet_wrap(~seqnames) -@ - -%% \chapter{Add a sashimi plot for splicing}\label{section:splicing} -\chapter{Link ranges to your data}\label{section:link} -Plot GRanges object structure and linked to a even spaced paralell -coordinates plot which represting the data in elementeMetadata. - -<>= -library(TxDb.Hsapiens.UCSC.hg19.knownGene) -library(ggbio) -data(genesymbol, package = "biovizBase") -txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene -model <- exonsBy(txdb, by = "tx") -model17 <- subsetByOverlaps(model, genesymbol["RBM17"]) -exons <- exons(txdb) -exon17 <- subsetByOverlaps(exons, genesymbol["RBM17"]) -## reduce to make sure there is no overlap -## just for example -exon.new <- reduce(exon17) -## suppose -values(exon.new)$sample1 <- rnorm(length(exon.new), 10, 3) -values(exon.new)$sample2 <- rnorm(length(exon.new), 10, 10) -values(exon.new)$score <- rnorm(length(exon.new)) -values(exon.new)$significant <- sample(c(TRUE,FALSE), size = length(exon.new),replace = TRUE) -## data ready -exon.new -@ - -Make the plots, you can pass a list of annotation tracks too. -<<>>= -p17 <- autoplot(txdb, genesymbol["RBM17"]) -plotRangesLinkedToData(exon.new, stat.y = c("sample1", "sample2"), annotation = list(p17)) -@ - -For more information, check the manual. -%% \chapter{Low level API}\label{chapter:low} -%% \begin{table}[h!t!b!p] -%% \begin{center} -%% \small{ -%% \begin{tabular}{|p{1.4cm}|p{3cm}|p{8cm}|p{0.6cm}|} -%% \hline -%% Comp & name & usage & icon\\\hline -%% \textbf{geom} &geom\_rect & rectangle& \includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_rect.pdf}\\ -%% &geom\_segment & segment& \includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_segment.pdf}\\ -%% &geom\_chevron & chevron&\includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_chevron.pdf}\\ -%% &geom\_arrow & arrow&\includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_arrow.pdf}\\ -%% &geom\_arch & arches &\includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_arch.pdf}\\ -%% &geom\_bar & bar &\includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_bar.pdf}\\ -%% &geom\_alignment & alignment (gene) & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/geom_alignment.pdf}\\\hline -%% \textbf{stat} -%% &stat\_coverage & coverage (of reads) & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_coverage_icon.pdf}\\ -%% &stat\_mismatch & mismatch pileup for alignments & -%% \includegraphics[height = 0.25cm,width = 0.6cm]{figures/stat_mismatch.pdf}\\ -%% &stat\_aggregate & aggregate in sliding window & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_aggregate.pdf}\\ -%% &stat\_stepping & avoid overplotting & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_stepping.pdf}\\ -%% &stat\_gene & consider gene structure & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_gene.pdf}\\ -%% &stat\_table & tabulate ranges & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_table.pdf}\\ -%% &stat\_identity & no change & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/stat_identity.pdf}\\\hline -%% \textbf{coord} &linear& ggplot2 linear but facet by chromosome & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/coord_linear.pdf}\\ -%% &genome& put everything on genomic coordinates& -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/coord_genome.pdf}\\ -%% &truncate gaps & compact view by shrinking gaps& -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/coord_truncate_gaps.pdf}\\\hline -%% \textbf{layout}& track & stacked tracks &\includegraphics[height = 0.25cm, width = 0.6cm]{figures/coord_linear.pdf}\\ -%% &karyogram & karyogram display & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/layout_karyogram.pdf}\\ -%% &circle & circular & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/layout_circle.pdf}\\\hline -%% \textbf{faceting}&formula & facet by formula & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/facet.pdf}\\ -%% &ranges & facet by ranges & -%% \includegraphics[height = 0.25cm, width = 0.6cm]{figures/facet_gr.pdf}\\\hline -%% \textbf{scale} &scale\_x\_sequnit&change x unit:Mb, kb, bp& \\ -%% &scale\_fill\_giemsa&ideogram color&\\ -%% &scale\_fill\_fold\_change&around 0 scaling, for heatmap.&\\\hline -%% \end{tabular} -%% } -%% \end{center} -%% \caption{Components of the basic grammar of graphics, with the extensions available in -%% \ggbio{}.} -%% \label{tab:components} -%% \end{table} - -\chapter{Miscellaneous}\label{chapter:misc} -Every plot object produced by \ggplot{} is essentially a \ggplot{} -object, so you could use all the tricks you know with \ggplot{} on -\ggbio{} plots too, including scales, colors, themes, etc. - -\section{Themes} -In \ggbio{}, we developed some more themes to make things easier. - -\subsection{Plot theme} -Plot level themes are like any other themes defined in \ggplot{}, -simply apply it to a plot. - -<<>>= -p.txdb -p.txdb + theme_alignment() -p.txdb + theme_clear() -p.txdb + theme_null() -@ %def - -When you have multiple chromosomes encoded in seqnames, you could use -theme\_genome to make a 'fake' linear view of genome coordinates -quickly by applying this theme, because it's not equal to chromosome -lengths, it's simply - -<<>>= -library(GenomicRanges) -set.seed(1) -N <- 100 -gr <- GRanges(seqnames = sample(c("chr1", "chr2", "chr3"), - size = N, replace = TRUE), - IRanges(start = sample(1:300, size = N, replace = TRUE), - width = sample(70:75, size = N,replace = TRUE)), - strand = sample(c("+", "-"), size = N, replace = TRUE), - value = rnorm(N, 10, 3), score = rnorm(N, 100, 30), - sample = sample(c("Normal", "Tumor"), - size = N, replace = TRUE), - pair = sample(letters, size = N, - replace = TRUE)) -seqlengths(gr) <- c(400, 1000, 500) -autoplot(gr) -autoplot(gr) + theme_genome() -@ %def - - -\subsection{Track theme} -Track level themes are more complex, it controls whole looking of the -tracks, it's essentially a theme object with some attributes -controlling the tracks appearance. - -See how we make a template, you could customize in the same way -<<>>= -theme_tracks_sunset -@ - -The attributes you could control is basically passed to tracks() -constructor, including - -\begin{table}[h!t!b!p] - \centering - \begin{tabular}{|c|c|} - \hline - label.bg.color & character \\\hline - label.bg.fill & character\\\hline - label.text.color & character\\\hline - label.text.cex & numeric\\\hline - label.text.angle & numeric\\\hline - track.plot.color & character\_OR\_NULL\\\hline - track.bg.color & character\_OR\_NULL\\\hline - label.width & unit\\\hline - \end{tabular} - \caption{tracks attributes} -\end{table} - - - -%% \section{Scales} - -\chapter{Session Information} - - - -<>= -sessionInfo() -@ - - -\end{document} -