From 092d0adedd1fe5fdbcfb49310525d3183eeefd55 Mon Sep 17 00:00:00 2001 From: Michael Geuenich Date: Wed, 7 Dec 2022 21:54:07 -0500 Subject: [PATCH] viz doublet entropies --- .gitignore | 2 + .../doublet-finder-entropies.R | 45 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 pipeline/rem-cell-type-from-training/doublet-finder-entropies.R diff --git a/.gitignore b/.gitignore index a8c0258..66813eb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ output/* .snakemake/* slurm* data/* +.Rhistory + diff --git a/pipeline/rem-cell-type-from-training/doublet-finder-entropies.R b/pipeline/rem-cell-type-from-training/doublet-finder-entropies.R new file mode 100644 index 0000000..fb0778c --- /dev/null +++ b/pipeline/rem-cell-type-from-training/doublet-finder-entropies.R @@ -0,0 +1,45 @@ +suppressPackageStartupMessages({ + library(tidyverse) +}) +source("pipeline/whatsthatcell-helpers.R") + +doublets1 <- read_tsv("output/v6/results/doublet-id/scRNASeq-doublet-id-10x Chromium (v2) A.tsv") +colnames(doublets1)[2:3] <- c("score", "classification") + +doublets2 <- read_tsv("output/v6/results/doublet-id/scRNASeq-doublet-id-10x Chromium (v2) B.tsv") +colnames(doublets2)[2:3] <- c("score", "classification") + +doublets <- bind_rows( + doublets1, + doublets2 +) + +files <- list.files("output/v6/results/rem_cell_type/", full.names = TRUE) +entropies <- lapply(files, read_tsv) |> + bind_rows() |> + mutate(al = case_when(grepl('AL_alg-multinom', params) ~ "multinom", + grepl('AL_alg-rf', params) ~ 'rf'), + strat = case_when(grepl('strat-highest_entropy', params) ~ 'highest_entropy', + grepl('strat-lowest_maxp', params) ~ 'lowest_maxp'), + init = case_when(grepl('init-random', params) ~ "random", + grepl('init-ranking', params) ~ "ranking"), + ct = str_extract(params, 'rem_celltype.*'), + s = str_extract(params, '-seed-.*')) |> + mutate(ct = gsub("rem_celltype-", "", ct), + ct = gsub("-seed-[0-9]", "", ct), + s = gsub("-seed-", "", s)) |> + select(-params) + +pdf(snakemake@output$pdf, height = 8, width = 20) + entropies |> + left_join(select(doublets, -params), by = "cell_id") |> + ggplot(aes(x = gt_cell_type, y = criterion_val, fill = classification)) + + geom_boxplot() + + labs(x = "Ground truth", y = "Entropy", "Doublet classification") + + facet_grid(num_missing_cells ~ ct) + + whatsthatcell_theme() + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +dev.off() + + +