Skip to content

Commit b8f770f

Browse files
committed
working on new chapter about ranges
1 parent a04149c commit b8f770f

4 files changed

+172
-35
lines changed

_bookdown.yml

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ rmd_files:
1111
"rna-seq-eda.Rmd",
1212
"many-genomic-models.Rmd",
1313
"isoform-analysis.Rmd",
14+
"tximeta-gene-range.Rmd",
1415
"references.Rmd"]
1516
bibliography: references.bib
1617
language:

isoform-assignment.Rmd

-35
This file was deleted.

references.bib

+28
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,31 @@ @Article{Alasoo2018
250250
url={https://doi.org/10.1038/s41588-018-0046-7}
251251
}
252252

253+
@article{Patro2017,
254+
author = {Patro, Rob and Duggal, Geet and Love, Michael I. and Irizarry, Rafael A. and Kingsford, Carl},
255+
journal = {Nature Methods},
256+
title = {Salmon provides fast and bias-aware quantification of transcript expression},
257+
url = {https://doi.org/10.1038/nmeth.4197},
258+
year = 2017
259+
}
260+
261+
@article{Soneson2015,
262+
url = {https://doi.org/10.12688/f1000research.7563.1},
263+
author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark},
264+
title = {{Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences}},
265+
journal = {F1000Research},
266+
year = 2015,
267+
Volume = 4,
268+
Issue = 1521
269+
}
270+
271+
@article{Love2020,
272+
url = {https://doi.org/10.1371/journal.pcbi.1007664},
273+
author = {Love, Michael I. and Soneson, Charlotte and Hickey, Peter F. and Johnson, Lisa K. and Pierce, N. Tessa and Shepherd, Lori and Morgan, Martin and Patro, Rob},
274+
title = {{Tximeta: Reference sequence checksums for provenance identification in RNA-seq}},
275+
journal = {PLOS Computational Biology},
276+
year = 2020,
277+
volume = 16,
278+
issue = 2,
279+
pages = {e1007664}
280+
}

tximeta-gene-range.Rmd

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Gene range assignment in tximeta
2+
3+
Objective: explore consequences of how features are located when data
4+
is summarized at gene level.
5+
6+
The *macrophage* package provides Salmon quantification files for a
7+
set of 24 RNA-seq samples from @Alasoo2018, "Shared genetic effects
8+
on chromatin and gene expression indicate a role for enhancer priming in
9+
immune response". Data for six female human donors is available in the
10+
package, with gene expression at baseline, after IFN-gamma stimulation,
11+
after exposure to *Salmonella*, and after a combination of the two
12+
treatments.
13+
14+
```{r message=FALSE}
15+
library(macrophage)
16+
library(dplyr)
17+
library(tximeta)
18+
dir <- system.file("extdata", package="macrophage")
19+
coldata <- read.csv(file.path(dir, "coldata.csv")) %>%
20+
mutate(files = file.path(dir, "quants", names, "quant.sf.gz"),
21+
condition = factor(condition_name),
22+
condition = relevel(condition, "naive")) %>%
23+
select(files, names=sample_id, condition, line_id)
24+
se <- tximeta(coldata, dropInfReps=TRUE, skipSeqinfo=TRUE)
25+
```
26+
27+
```{r message=FALSE}
28+
library(SummarizedExperiment)
29+
rowRanges(se)
30+
seqinfo(se)
31+
```
32+
33+
```{r}
34+
gse_default <- summarizeToGene(se)
35+
```
36+
37+
```{r}
38+
gse <- summarizeToGene(se, assignRanges="abundant")
39+
```
40+
41+
```{r message=FALSE}
42+
library(plyranges)
43+
default_5p <- rowRanges(gse_default) %>%
44+
anchor_5p() %>%
45+
mutate(width=1) %>%
46+
start()
47+
gene_dat <- rowRanges(gse) %>%
48+
anchor_5p() %>%
49+
mutate(width=1) %>%
50+
mutate(
51+
ave_tpm = rowMeans(assay(gse, "abundance")),
52+
dist = abs(start - default_5p))
53+
table(gene_dat$dist == 0)
54+
```
55+
56+
```{r}
57+
library(ggplot2)
58+
gene_dat %>%
59+
as_tibble() %>%
60+
filter(dist > 0) %>%
61+
mutate(
62+
log10_dist = cut(
63+
log10(dist),
64+
breaks=c(0,3:6,Inf),
65+
include.lowest=TRUE),
66+
tpm = cut(
67+
ave_tpm,
68+
breaks=c(0,1,10,100,Inf),
69+
include.lowest=TRUE)
70+
) %>%
71+
group_by(log10_dist, tpm) %>%
72+
tally() %>%
73+
ggplot(aes(log10_dist, n)) +
74+
geom_col() +
75+
ggtitle("change in 5' position of gene") +
76+
ylab("number of genes") +
77+
facet_wrap(~tpm, labeller=label_both)
78+
```
79+
80+
```{r message=FALSE}
81+
library(plotgardener)
82+
library(org.Hs.eg.db)
83+
txdb <- retrieveDb(gse)
84+
new_assembly <- assembly(
85+
Genome = "hg38",
86+
TxDb = txdb,
87+
OrgDb = org.Hs.eg.db,
88+
gene.id.column = "GENEID",
89+
display.column = "GENEID",
90+
BSgenome = NULL
91+
)
92+
```
93+
94+
```{r}
95+
set.seed(5)
96+
gene <- gene_dat %>%
97+
filter(strand == "-", dist > 1e5, ave_tpm > 100) %>%
98+
slice(sample(n(),1))
99+
```
100+
101+
```{r}
102+
par <- pgParams(
103+
chrom = seqnames(gene) %>% as.character(),
104+
chromstart = round((start(gene) - 5e5) / 1e5) * 1e5,
105+
chromend = round((end(gene) + 5e5) / 1e5) * 1e5,
106+
assembly = new_assembly,
107+
just = c("left", "bottom")
108+
)
109+
```
110+
111+
```{r}
112+
props <- gene$iso_prop[[1]] %>%
113+
sort(decreasing=TRUE) %>%
114+
head(5)
115+
props
116+
```
117+
118+
```{r}
119+
gene_hilite <- data.frame(
120+
gene=gene$gene_id,
121+
color="magenta"
122+
)
123+
txp_hilite <- data.frame(
124+
transcript=names(props),
125+
color=rep(c("dodgerblue","darkgoldenrod"),c(1,length(props)-1))
126+
)
127+
```
128+
129+
```{r}
130+
pageCreate(width = 5, height = 4, showGuides = TRUE)
131+
plotGenes(
132+
params = par, x = 0.5, y = 3.5, width = 4, height = 1,
133+
geneHighlights = gene_hilite
134+
)
135+
plotTranscripts(
136+
params = par, x = 0.5, y = 2.5, width = 4, height = 2.5,
137+
transcriptHighlights = txp_hilite, fill="grey90"
138+
)
139+
plotGenomeLabel(
140+
params = par, x = 0.5, y = 3.5, length = 4,
141+
just = c("left", "top")
142+
)
143+
```

0 commit comments

Comments
 (0)