Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR_MW_23Mar2023 #122

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ Imports:
htmltools,
fontawesome,
emojifont,
geneBasisR,
waiter,
cicerone,
shinyanimate
Expand All @@ -71,7 +70,8 @@ Suggests:
printr,
ggplot2,
heatmaply,
plotly
plotly,
geneBasisR
Remotes:
github::MarioniLab/geneBasisR,
github::camlab-bioml/inferorg,
Expand Down
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM rocker/verse
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y --no-install-recommends \
sudo \
libcurl4-openssl-dev \
libharfbuzz-dev \
libfribidi-dev \
libgsl-dev \
libgeos-dev \
&& rm -rf /var/lib/apt/lists/*

RUN R -e "install.packages(c('devtools','BiocManager', 'DT', 'shinyalert', 'reactable', 'bsplus', 'shinyjs', 'plyr', 'shinytest2', 'shinydashboard', 'shinyBS', 'rdrop2', 'shinyanimate', 'stringr', 'yardstick', 'htmltools', 'methods', 'stats', 'gridExtra', 'testthat', 'tools', 'printr', 'ggplot2', 'heatmaply', 'plotly', 'waiter', 'cicerone'), dependencies=T)"
RUN R -e "BiocManager::install(c('scran', 'Seurat', 'caret', 'scuttle', 'scater', 'SummarizedExperiment', 'SingleCellExperiment', 'clustifyr', 'zellkonverter'))"


RUN R -e "devtools::install_github('stephenturner/annotables', dependencies = F); devtools::install_github('camlab-bioml/inferorg', dependencies = F)"

RUn R -e "install.packages(c('sortable', 'randomcoloR', 'emojifont'))"

RUN R -e "devtools::install_github('MarioniLab/geneBasisR', dependencies = T)"

RUN R -e "devtools::install_github('react-R/reactR')"

RUN mkdir /cytosel

COPY . /cytosel

COPY R/ /cytosel/R

WORKDIR /cytosel

RUN ls

# RUN cd cytosel

# RUN yes | R -e "setwd('cytosel'); options(needs.promptUser = FALSE); library(devtools); devtools::load_all()"

EXPOSE 3888

CMD R -e "options(shiny.port = 3888,shiny.host='0.0.0.0'); devtools::load_all(); cytosel()"
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
export(cytosel)
import(emojifont)
import(fontawesome)
import(geneBasisR)
import(htmltools)
import(reactable)
import(shiny)
Expand Down
87 changes: 49 additions & 38 deletions R/analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,39 @@
#' @param columns A vector storing columns
#' @param pref_assay Assay loaded
#' @param p_val_type The type of p-value to use in marker group calculation. Can be either all or any.
#' @param use_rank Whether or not to use the logFC rank as the sorting criteria. Default is TRUE
#'
compute_fm <- function(sce, columns, pref_assay, allowed_genes,
p_val_type = "all") {
p_val_type = "all", use_rank = T) {

library(scran)

fms <- lapply(columns,
function(col) {

test_type <- ifelse(pref_assay == "counts", "binom", "t")
ag <- intersect(rownames(sce), allowed_genes)
fm <- scran::findMarkers(sce[ag,], SummarizedExperiment::colData(sce)[[col]],
test.type = test_type,
# BPPARAM = MulticoreParam(),
pval.type = p_val_type,
assay.type = pref_assay)
# fm <- scran::findMarkers(sce[ag,], SummarizedExperiment::colData(sce)[[col]],
# test.type = test_type,
# # BPPARAM = MulticoreParam(),
# pval.type = p_val_type,
# assay.type = pref_assay)

sort_by_stat <- function(frame, sort_by_rank = use_rank) {
if (isTRUE(sort_by_rank)) {
return (frame[order(frame$rank.logFC.detected, decreasing=F),])
} else {
return (frame[order(frame$median.logFC.cohen, decreasing=T),])
}
}

score <- scran::scoreMarkers(assay(sce[ag,], pref_assay),
colData(sce)[[columns]])

fm <- lapply(score, FUN = function(X) sort_by_stat(X))
# fm <- SimpleList(fm)

for(n in names(fm)) {
fm[[n]] <- fm[[n]][rownames(fm[[n]]) %in% allowed_genes,]
}
fm
})

names(fms) <- columns

fms

}

Expand All @@ -47,28 +54,30 @@ compute_fm <- function(sce, columns, pref_assay, allowed_genes,
#' @param allowed_genes Set of allowed genes
#' @param in_session whether the function is being called in a shiny session or not
#' @importFrom dplyr mutate tally group_by filter pull slice_head arrange summarize ungroup
#' @import geneBasisR
get_markers <- function(fms, panel_size, marker_strategy, sce, allowed_genes,
in_session = T) {

library(geneBasisR)

cell_types_wout_markers <- c()
columns <- names(fms)
original_multimarkers <- c()

marker <- list(recommended_markers = c(), scratch_markers = c(), top_markers = c())

if(marker_strategy == "geneBasis") {
sce2 <- retain_informative_genes(sce[allowed_genes,], n = 10*panel_size)
genes <- gene_search(sce2, n_genes=panel_size)
sce2 <- geneBasisR::retain_informative_genes(sce[allowed_genes,], n = 10*panel_size)
genes <- geneBasisR::gene_search(sce2, n_genes=panel_size)
marker <- list(
recommended_markers = genes$gene[!is.na(genes$gene)],
scratch_markers = c(),
top_markers = genes$gene[!is.na(genes$gene)]
)
} else {
for(col in columns) {
fm <- fms[[col]]
n <- length(fm)

n <- length(fms)

fm <- fms

top_select <- ceiling((panel_size + 10) / n)

Expand All @@ -84,24 +93,27 @@ get_markers <- function(fms, panel_size, marker_strategy, sce, allowed_genes,
original_multimarkers <- c()
for(i in seq_len(n)) {
f <- fm[[i]]
f <- f[!(rownames(f) %in% recommended),] |> as.data.frame()

f <- f[!(rownames(f) %in% recommended),]

## Only keep markers that are over-expressed
f[is.na(f)] <- 0
f <- f[f$summary.logFC > 0,]
f <- f[f$median.logFC.cohen > 0,]

if(nrow(f) > 0){
selected_markers <- rownames(f)[seq_len(top_select)]
recommended_df <- bind_rows(recommended_df,
tibble(marker = selected_markers,
cell_type = names(fm)[i],
summary.logFC = f[selected_markers,]$summary.logFC))
median.logFC.cohen = f[selected_markers,]$median.logFC.cohen))

# print(recommended_df)

expressed_markers <- rownames(f)[seq_len(25)]
highly_expressed <- bind_rows(highly_expressed,
tibble(marker = expressed_markers,
cell_type = names(fm)[i],
summary.logFC = f[expressed_markers,]$summary.logFC))
median.logFC.cohen = f[expressed_markers,]$median.logFC.cohen))


}else{
Expand Down Expand Up @@ -155,7 +167,7 @@ get_markers <- function(fms, panel_size, marker_strategy, sce, allowed_genes,
remove <- recommended_df %>%
filter(cell_type %in% markers_per_cell_type$cell_type) %>%
group_by(marker) %>%
summarize(mean_lfc = mean(summary.logFC)) %>%
summarize(mean_lfc = mean(median.logFC.cohen)) %>%
arrange(mean_lfc) %>%
slice_head(n = 1) %>%
pull(marker)
Expand Down Expand Up @@ -195,7 +207,7 @@ get_markers <- function(fms, panel_size, marker_strategy, sce, allowed_genes,

## Only keep markers that are over-expressed
f[is.na(f)] <- 0
f <- f[f$summary.logFC > 0,]
f <- f[f$median.logFC.cohen > 0,]
new_markers_add <- rownames(f)[seq_len(subset(multimarkers, cell_type == i)$num_markers)]
recommended <- c(recommended, new_markers_add)
}
Expand All @@ -205,7 +217,7 @@ get_markers <- function(fms, panel_size, marker_strategy, sce, allowed_genes,

scratch <- unique(scratch)
top <- recommended #unique(top)
}
marker <- list(recommended_markers = recommended[!is.na(recommended)],
scratch_markers = scratch[!is.na(scratch)],
top_markers = top[!is.na(top)])
Expand All @@ -229,7 +241,7 @@ create_multimarker_frame <- function(frame, recommended_markers) {
mutate(num_markers = length(unique(marker)),
highest = max(cell_types_expressed_in),
lowest = min(cell_types_expressed_in)) |> ungroup() |>
arrange(summary.logFC) |> filter(lowest > 1))
arrange(median.logFC.cohen) |> filter(lowest > 1))
}


Expand All @@ -240,7 +252,7 @@ create_multimarker_frame <- function(frame, recommended_markers) {
#' recommended_markers, scratch_markers, and top_markers
#' @param fms Stored findMarkers outputs
get_associated_cell_types <- function(markers, fms) {
fm <- fms[[1]] # For now, we're only doing this for the first
fm <- fms # For now, we're only doing this for the first

all_markers <- unique(unlist(markers[c('recommended_markers',
'scratch_markers', 'top_markers')]))
Expand All @@ -250,9 +262,9 @@ get_associated_cell_types <- function(markers, fms) {
tmp <- f[tm,] # get summary statistics for this gene

## return p value if it's a marker, or 1 otherwise
# ifelse(tmp$summary.logFC < 0, 1, tmp$FDR)
# ifelse(tmp$median.logFC.cohen < 0, 1, tmp$FDR)
## New: return summary logFC
-tmp$summary.logFC
-tmp$median.logFC.cohen
})
names(pvals)[which.min(pvals)]
})
Expand Down Expand Up @@ -593,16 +605,15 @@ get_allowed_genes <- function(selected_applications, applications_parsed, sce) {
#' @importFrom dplyr mutate_if mutate_at
get_cell_type_add_markers_reactable <- function(fm, current_markers) {
fm <- fm[!rownames(fm) %in% current_markers,]
fm <- fm[fm$summary.logFC > 0,]
fm <- fm[fm$median.logFC.cohen > 0,]
fm <- as.data.frame(head(fm, 50))
fm <- rownames_to_column(fm, 'Gene')
fm <- fm[,c("Gene", "FDR", "summary.logFC")]
fm$FDR <- signif(fm$FDR, 3)
fm$summary.logFC <- round(fm$summary.logFC, digits = 3)
list(fm = fm,
table <- data.frame(Gene = rownames(fm),
logFC = round(fm$median.logFC.cohen, digits = 3))

list(fm = table,
reactable =
reactable(
fm,
table,
selection = "multiple",
onClick = "select"
)
Expand Down
Loading