diff --git a/pipeline/figures/AR-params.R b/pipeline/figures/AR-params.R index 0492b13..debe3d7 100644 --- a/pipeline/figures/AR-params.R +++ b/pipeline/figures/AR-params.R @@ -1,14 +1,15 @@ -suppressPackageStartupMessages( +suppressPackageStartupMessages({ library(tidyverse) -) + library(patchwork) +}) source("pipeline/whatsthatcell-helpers.R") -cytof_acc <- read_tsv("output/v7/results/overall-CyTOF-benchmarking-accuracies.tsv") |> +cytof_acc <- read_tsv(snakemake@input$cytof_acc) |> mutate(cohort = "CyTOF") -scrna_acc <- read_tsv("output/v7/results/overall-scRNASeq-benchmarking-accuracies.tsv") |> +scrna_acc <- read_tsv(snakemake@input$scrna_acc) |> mutate(cohort = "scRNASeq") -snrna_acc <- read_tsv("output/v7/results/overall-snRNASeq-benchmarking-accuracies.tsv") |> +snrna_acc <- read_tsv(snakemake@input$snrna_acc) |> mutate(cohort = "snRNASeq") acc <- bind_rows(cytof_acc, scrna_acc, snrna_acc) @@ -42,7 +43,7 @@ plot_knn_res <- function(acc, fill, cohort){ } -pdf("output/v7/paper-figures/supp-AR-res.pdf", height = 14, width = 9) +pdf(snakemake@output$res, height = 14, width = 9) (plot_knn_res(cytof_acc, "res", "CyTOF") / plot_knn_res(scrna_acc, "res", "scRNASeq") / plot_knn_res(snrna_acc, "res", "snRNASeq")) + @@ -50,7 +51,7 @@ pdf("output/v7/paper-figures/supp-AR-res.pdf", height = 14, width = 9) dev.off() -pdf("output/v7/paper-figures/supp-AR-knn.pdf", height = 14, width = 9) +pdf(snakemake@output$knn, height = 14, width = 9) (plot_knn_res(cytof_acc, "knn", "CyTOF") / plot_knn_res(scrna_acc, "knn", "scRNASeq") / plot_knn_res(snrna_acc, "knn", "snRNASeq")) + diff --git a/pipeline/figures/cell-type-similarity.R b/pipeline/figures/cell-type-similarity.R index 9e27514..93d39b7 100644 --- a/pipeline/figures/cell-type-similarity.R +++ b/pipeline/figures/cell-type-similarity.R @@ -1,9 +1,9 @@ library(tidyverse) library(ComplexHeatmap) -cytof_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-CyTOF-seed-0.tsv") -scrna_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-scRNASeq-seed-0.tsv") -snrna_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-snRNASeq-seed-0.tsv") +cytof_sim <- read_tsv(snakemake@input$cytof_sim) +scrna_sim <- read_tsv(snakemake@input$scrna_sim) +snrna_sim <- read_tsv(snakemake@input$snrna_sim) plot_dist_mat <- function(df){ pivot_wider(df, names_from = "cell_type2", values_from = "cosine_similarity") |> @@ -14,14 +14,14 @@ plot_dist_mat <- function(df){ Heatmap(name = "Cosine\nsimilarity") } -pdf("output/v7/paper-figures/Supp-cell-type-sim-scRNASeq.pdf", height = 5, width = 5.5) +pdf(snakemake@input$scrna, height = 5, width = 5.5) plot_dist_mat(scrna_sim) dev.off() -pdf("output/v7/paper-figures/Supp-cell-type-sim-snRNASeq.pdf", height = 5, width = 5.5) +pdf(snakemake@input$snrna, height = 5, width = 5.5) plot_dist_mat(snrna_sim) dev.off() -pdf("output/v7/paper-figures/Supp-cell-type-sim-CyTOF.pdf", height = 5, width = 5.5) +pdf(snakemake@input$cytof, height = 5, width = 5.5) plot_dist_mat(cytof_sim) dev.off() diff --git a/pipeline/figures/compare-lr-rf-random-and-ranking.R b/pipeline/figures/compare-lr-rf-random-and-ranking.R index d13e010..85bd38d 100644 --- a/pipeline/figures/compare-lr-rf-random-and-ranking.R +++ b/pipeline/figures/compare-lr-rf-random-and-ranking.R @@ -6,10 +6,11 @@ suppressPackageStartupMessages({ library(magick) library(ggpubr) }) +devtools::load_all("/ggplot2") source("pipeline/whatsthatcell-helpers.R") ## Part A: Proportion of cells initially selected -get_proportion_rep <- function(sce_path, selected_cells_path, cohort){ +get_proportion_rep <- function(sce_path, rand_files, rank_files, cohort){ sce <- readRDS(sce_path) cell_types <- unique(sce$cell_type) @@ -18,13 +19,6 @@ get_proportion_rep <- function(sce_path, selected_cells_path, cohort){ cell_types <- unique(sce$CellType) } - rand_files <- list.files(selected_cells_path, - pattern = "random", - full.names = TRUE) - rank_files <- list.files(selected_cells_path, - pattern = "ranking", - full.names = TRUE) - random <- lapply(rand_files, function(x){ sel_cells <- read_tsv(x) |> filter(iteration == 0) |> @@ -48,14 +42,17 @@ get_proportion_rep <- function(sce_path, selected_cells_path, cohort){ } props <- bind_rows( - get_proportion_rep("data/CyTOF/CyTOF-train-seed-0.rds", - "output/v8/data/CyTOF/Active-Learning_entropy/AL-batches-subset", + get_proportion_rep(snakemake@input$cytof, + snakemake@input$cytof_AL_rand, + snakemake@input$cytof_AL_rank, "CyTOF"), - get_proportion_rep("data/scRNASeq/scRNASeq-train-seed-0.rds", - "output/v8/data/scRNASeq/Active-Learning_entropy/AL-batches-subset", + get_proportion_rep(snakemake@input$scrna, + snakemake@input$scrna_AL_rand, + snakemake@input$scrna_AL_rank, "scRNASeq"), - get_proportion_rep("data/snRNASeq/snRNASeq-train-seed-0.rds", - "output/v8/data/snRNASeq/Active-Learning_entropy/AL-batches-subset", + get_proportion_rep(snakemake@input$snrna, + snakemake@input$snrna_AL_rand, + snakemake@input$snrna_AL_rank, "snRNASeq") ) @@ -72,11 +69,7 @@ prop <- props |> ## Part B: heatmaps -f <- c("output/v8/results/overall-CyTOF-benchmarking-accuracies.tsv", - "output/v8/results/overall-scRNASeq-benchmarking-accuracies.tsv", - "output/v8/results/overall-snRNASeq-benchmarking-accuracies.tsv") - -acc <- lapply(f, function(x){ +acc <- lapply(snakemake@input$accs, function(x){ df <- read_tsv(x) |> mutate(cohort = case_when(grepl("CyTOF", basename(x)) ~ "CyTOF", grepl("snRNASeq", basename(x)) ~ "snRNASeq", @@ -149,7 +142,7 @@ ranking_vs_random <- create_heatmap(acc, "scRNASeq", "random_vs_ranked") + ranking <- draw(ranking_vs_random, column_title = "Selecting initial cells based on marker expression") -pdf("output/v8/paper-figures/ranking-random-heatmap.pdf", height = 3.5, width = 7) +pdf(snakemake@output$rank_rand_hm, height = 3.5, width = 7) ranking dev.off() @@ -159,20 +152,20 @@ lr_vs_rf <- create_heatmap(acc, "scRNASeq", "lr_vs_rf") + lr_vs_rf <- draw(lr_vs_rf, column_title = "F1-score improvement by random forest compared to logistic regression") -pdf("output/v8/paper-figures/lr-rf-heatmap.pdf", height = 3.5, width = 7) +pdf(snakemake@output$lr_rf_hm, height = 3.5, width = 7) lr_vs_rf dev.off() ## Full main figure -ranking <- image_read_pdf("output/v8/paper-figures/ranking-random-heatmap.pdf") +ranking <- image_read_pdf(snakemake@output$rank_rand_hm) ranking <- ggplot() + background_image(ranking) -lr_vs_rf <- image_read_pdf("output/v8/paper-figures/lr-rf-heatmap.pdf") +lr_vs_rf <- image_read_pdf(snakemake@output$lr_rf_hm) lr_vs_rf <- ggplot() + background_image(lr_vs_rf) -pdf("output/v8/paper-figures/lr-and-rf-heatmaps.pdf", height = 8.8, width = 7) +pdf(snakemake@output$main_fig, height = 8.8, width = 7) (lr_vs_rf / wrap_elements(full = prop) / ranking) + plot_layout(heights = c(3, 1.2, 3)) + plot_annotation(tag_levels = "A") dev.off() @@ -212,7 +205,7 @@ rf_vs_rf_ranking <- filter(acc, corrupted == 0) |> whatsthatcell_theme() + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust =1)) -pdf("output/v8/paper-figures/rf-vs-lr-supplementary.pdf", height = 13, width = 12) +pdf(snakemake@output$rf_lr_supp, height = 13, width = 12) rf_vs_rf_random / rf_vs_rf_ranking + plot_layout(guides = "collect") + plot_annotation(tag_levels = "A") dev.off() @@ -254,7 +247,7 @@ rand_vs_rank_rf <- filter(acc, corrupted == 0) |> whatsthatcell_theme() + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust =1)) -pdf("output/v8/paper-figures/random-vs-ranking-supplementary.pdf", height = 13, width = 12) +pdf(snakemake@output$rand_rank_supp, height = 13, width = 12) rand_vs_rank_lr / rand_vs_rank_rf + plot_layout(guides = "collect") + plot_annotation(tag_levels = "A") dev.off() diff --git a/pipeline/figures/figure1.R b/pipeline/figures/figure1.R index 1a9d665..a605374 100644 --- a/pipeline/figures/figure1.R +++ b/pipeline/figures/figure1.R @@ -10,17 +10,18 @@ source("pipeline/whatsthatcell-helpers.R") ### [ FIGURE 1A - BENCHMARKING OVERVIEW ] ##### -schematic <- image_read("illustrator-figures/benchmarking-schematic.ai") |> - image_ggplot() +# had to remove because magick causes issues with docker +# schematic <- image_read(snakemake@input$schematic) |> +# image_ggplot() ### [ FIGURE 1B - DATASET COMPOSOTION ] ##### set.seed(42) -CyTOF <- readRDS("data/CyTOF/CyTOF-full.rds") +CyTOF <- readRDS(snakemake@input$cytof) CyTOF <- runTSNE(CyTOF) -scRNA <- readRDS("data/scRNASeq/scRNASeq-full.rds") +scRNA <- readRDS(snakemake@input$scrna) scRNA <- runTSNE(scRNA) -snRNA <- readRDS("data/snRNASeq/snRNASeq-full.rds") +snRNA <- readRDS(snakemake@input$snrna) snRNA <- runTSNE(snRNA) @@ -130,10 +131,10 @@ snrna_plot <- ((wrap_elements(full = snrna_tsne, ignore_tag = TRUE) & labs(title snrna_bar) + plot_layout(heights = c(3,1)) -pdf("output/v8/paper-figures/figure1.pdf", height = 12.5, width = 17.5) +pdf(snakemake@output$fig1, height = 9.25, width = 17.5) wrap_elements((scrna_plot | snrna_plot | cytof_plot) + plot_layout(widths = c(1, 1.2, 1))) / - wrap_elements(schematic) + - plot_layout(heights = c(2, 0.7)) + + #wrap_elements(schematic) + + #plot_layout(heights = c(2, 0.7)) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = 22)) dev.off() diff --git a/pipeline/figures/figure3.R b/pipeline/figures/figure3.R index 8f9fa8c..63b5f2d 100644 --- a/pipeline/figures/figure3.R +++ b/pipeline/figures/figure3.R @@ -6,12 +6,11 @@ suppressPackageStartupMessages({ library(scales) library(magick) }) +devtools::load_all("/ggplot2") source("pipeline/whatsthatcell-helpers.R") ### [ ACCURACIES ] #### -f <- list.files("output/v8/results/", pattern = "overall", full.names = TRUE) - -acc <- lapply(f, function(x){ +acc <- lapply(snakemake@input$accs, function(x){ df <- read_tsv(x) |> mutate(cohort = case_when(grepl("CyTOF", basename(x)) ~ "CyTOF", grepl("snRNASeq", basename(x)) ~ "snRNASeq", @@ -34,13 +33,10 @@ acc <- lapply(f, function(x){ sel_meth_cols <- sel_met_cols -schematic <- image_read("illustrator-figures/AR-schematic.ai") |> - image_ggplot() - eval <- full_acc_plot_wrapper(acc, "rf", "ranking", "") & labs(fill = "Selection method") -pdf("output/v8/paper-figures/figure3-overall-benchmark.pdf", height = 8.4, width = 9) +pdf(snakemake@output$overall_fig, height = 8.4, width = 9) eval dev.off() diff --git a/pipeline/figures/imbalance-plots.R b/pipeline/figures/imbalance-plots.R index 33ac1b3..9e36731 100644 --- a/pipeline/figures/imbalance-plots.R +++ b/pipeline/figures/imbalance-plots.R @@ -1,9 +1,10 @@ library(tidyverse) library(scales) +library(patchwork) -sc_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-scRNASeq.tsv") -sn_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-snRNASeq.tsv") -cy_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-CyTOF.tsv") +sc_acc <- read_tsv(snakemake@input$scrna_acc) +sn_acc <- read_tsv(snakemake@input$snrna_acc) +cy_acc <- read_tsv(snakemake@input$cytof_acc) source("pipeline/whatsthatcell-helpers.R") @@ -79,7 +80,7 @@ plotting_order <- filter(comb_imb_score, modality == "snRNASeq" & al == "RF" & i arrange(mean_estimate) |> pull(strat) -pdf("output/v8/paper-figures/imbalance-main-figure.pdf", height = 3, width = 7) +pdf(snakemake@output$main_fig, height = 3, width = 7) filter(comb_imb_score, modality == "snRNASeq" & al == "RF" & init == "ranking") |> mutate(strat = factor(strat, levels = plotting_order)) |> ggplot(aes(x = comp, y = diff, fill = strat)) + @@ -95,7 +96,7 @@ pdf("output/v8/paper-figures/imbalance-main-figure.pdf", height = 3, width = 7) dev.off() -pdf("output/v8/paper-figures/imbalance-supplementary.pdf", height = 18, width = 9) +pdf(snakemake@output$sup_fig, height = 18, width = 9) (scrnaseq / snrnaseq / cytof) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect", heights = c(2, 2, 1)) diff --git a/pipeline/figures/pred-labeling.R b/pipeline/figures/pred-labeling.R index 1ba64bd..67709fd 100644 --- a/pipeline/figures/pred-labeling.R +++ b/pipeline/figures/pred-labeling.R @@ -2,14 +2,15 @@ suppressPackageStartupMessages({ library(tidyverse) library(ggalluvial) library(patchwork) + library(scales) }) source("pipeline/whatsthatcell-helpers.R") ### PREDICTIVE LABELLING ACCURACY -pred_lab_acc <- read_tsv("output/v8/new/pred-labeling-accuracy.tsv") +pred_lab_acc <- read_tsv(snakemake@input$acc) # SUPPLEMENTAL FILE -pdf("output/v8/paper-figures/Supp-pred-labelling-acc.pdf", height = 10, width = 12) +pdf(snakemake@output$supp, height = 10, width = 12) pred_lab_acc |> filter(.metric == "f_meas") |> mutate(pred_sel = gsub("top", "", pred_sel), @@ -66,16 +67,16 @@ pred_lab_acc_plot <- plot_pred_lab_acc(pred_lab_acc_pub, "CyTOF") | ## Benchmarking with predictive labelling data -scrna <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-scRNASeq.tsv") |> +scrna <- read_tsv(snakemake@input$scrna) |> mutate(AL_alg = sub(".*-ALAlg-", "", selection_procedure), selection_procedure = sub("-ALAlg-.*", "", selection_procedure), cohort = "scRNASeq") -snrna <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-snRNASeq.tsv") |> +snrna <- read_tsv(snakemake@input$snrna) |> mutate(AL_alg = sub(".*-ALAlg-", "", selection_procedure), selection_procedure = sub("-ALAlg-.*", "", selection_procedure), cohort = "snRNASeq") -cytof <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-CyTOF.tsv") |> +cytof <- read_tsv(snakemake@input$cytof) |> mutate(AL_alg = sub(".*-ALAlg-", "", selection_procedure), selection_procedure = sub("-ALAlg-.*", "", selection_procedure), cohort = "CyTOF") @@ -135,12 +136,12 @@ comp_dataset <- acc_gap |> mutate(selection_procedure = case_when(selection_procedure == "random" ~ "Random", selection_procedure == "MarkerSeurat-clustering" ~ "AR Marker", selection_procedure == "NoMarkerSeurat-clustering" ~ "AR No Marker", - selection_procedure == "0.05-maxp-AL" ~ "AL 0.05 maxp", - selection_procedure == "0.25-maxp-AL" ~ "AL 0.25 maxp", - selection_procedure == "0.75-entropy-AL" ~ "AL 0.75 entropy", - selection_procedure == "0.95-entropy-AL" ~ "AL 0.95 entropy", - selection_procedure == "highest-entropy-AL" ~ "AL highest entropy", - selection_procedure == "lowest-maxp-AL" ~ "AL lowest maxp")) + selection_procedure == "0.05-maxp-AL" ~ "AL 0.05-maxp", + selection_procedure == "0.25-maxp-AL" ~ "AL 0.25-maxp", + selection_procedure == "0.75-entropy-AL" ~ "AL 0.75-entropy", + selection_procedure == "0.95-entropy-AL" ~ "AL 0.95-entropy", + selection_procedure == "highest-entropy-AL" ~ "AL Highest entropy", + selection_procedure == "lowest-maxp-AL" ~ "AL Lowest maxp")) plot_comp <- function(df, ncol, ylab = "", xlab = ""){ if(ylab != ""){ @@ -152,6 +153,7 @@ plot_comp <- function(df, ncol, ylab = "", xlab = ""){ df |> ggplot(aes(x = gap, y = .estimate, colour = selection_procedure)) + geom_point() + + scale_color_manual(values = sel_met_cols) + labs(x = xlab, y = ylab, colour = "Selection procedure") + @@ -178,9 +180,9 @@ gap_comb <- (cytof_gap | scrnaseq_gap | snrnaseq_gap) + plot_layout(guides = "collect", widths = c(0.65, 1, 1)) # Detecting mislabelled cells -cytof <- list.files("output/v8/identify_mislabelled/CyTOF/", full.names = TRUE) -scrna <- list.files("output/v8/identify_mislabelled/scRNASeq/", full.names = TRUE) -snrna <- list.files("output/v8/identify_mislabelled/snRNASeq/", full.names = TRUE) +cytof <- snakemake@input$mislabelled_cytof +scrna <- snakemake@input$mislabelled_scrna +snrna <- snakemake@input$mislabelled_snrna mislabelled_pred <- lapply(c(cytof, scrna, snrna), function(x){ df <- read_tsv(x) @@ -206,7 +208,7 @@ mislabelled_pred_plot <- mislabelled_pred |> whatsthatcell_theme() + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) -pdf("output/v8/paper-figures/pred-labelling.pdf", height = 14, width = 12) +pdf(snakemake@output$main, height = 14, width = 12) (wrap_elements(full = pred_lab_acc_plot + plot_layout(guides = "collect"))) / wrap_elements(full = lr_vs_rf & labs(title = "")) / wrap_elements(full = gap_comb) / @@ -244,71 +246,71 @@ plot_sup_gap <- function(df, sel_cohort){ theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) } -pdf("output/v8/paper-figures/Supp-CyTOF-f1-improvement.pdf", width = 12, height = 8) +pdf(snakemake@output$sup_cytof, width = 12, height = 8) plot_sup_gap(sup_acc_gap, "CyTOF") dev.off() -pdf("output/v8/paper-figures/Supp-scRNASeq-f1-improvement.pdf", width = 12, height = 8) +pdf(snakemake@output$sup_scrna, width = 12, height = 8) plot_sup_gap(sup_acc_gap, "scRNASeq") dev.off() -pdf("output/v8/paper-figures/Supp-snRNASeq-f1-improvement.pdf", width = 12, height = 8) +pdf(snakemake@output$sup_snrna, width = 12, height = 8) plot_sup_gap(sup_acc_gap, "snRNASeq") dev.off() -### As function of number of cells predictively labelled -test <- bind_rows( - select(pred_lab_acc, method, cell_selection, multinom) |> - dplyr::rename(".estimate" = "multinom"), - baseline_acc -) |> - filter(cell_selection != "top200") |> - mutate(cell_selection = factor(cell_selection, c("baseline", "top10", "top50", "top100"))) |> - #filter(method == "Random-Forest,10,0.4,100,NA,0,MarkerSeurat-clustering,kap,NA,scRNASeq") |> - separate(method, c("alg", "knn", "res", "cell_num", "initial", "seed", - "selection_procedure", ".metric", "AL_alg", "cohort"), sep = ",", - remove = FALSE) |> - filter(.metric == "sensitivity") |> - filter(initial == "NA" | is.na(initial) | initial == "random") |> - filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") +# ### As function of number of cells predictively labelled +# test <- bind_rows( +# select(pred_lab_acc, method, cell_selection, multinom) |> +# dplyr::rename(".estimate" = "multinom"), +# baseline_acc +# ) |> +# filter(cell_selection != "top200") |> +# mutate(cell_selection = factor(cell_selection, c("baseline", "top10", "top50", "top100"))) |> +# #filter(method == "Random-Forest,10,0.4,100,NA,0,MarkerSeurat-clustering,kap,NA,scRNASeq") |> +# separate(method, c("alg", "knn", "res", "cell_num", "initial", "seed", +# "selection_procedure", ".metric", "AL_alg", "cohort"), sep = ",", +# remove = FALSE) |> +# filter(.metric == "sensitivity") |> +# filter(initial == "NA" | is.na(initial) | initial == "random") |> +# filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") -test |> - filter(alg == "Random-Forest", selection_procedure == "random") |> - ggplot(aes(x = cell_selection, y = .estimate, group = method, colour = cell_num)) + - geom_point() + - geom_line() + - facet_grid(selection_procedure ~ cohort + alg) + - whatsthatcell_theme() - -left_join(pred_lab_acc, - select(baseline_acc, -cell_selection), - by = "method") |> - #pivot_wider(names_from = "cell_selection", values_from = "multinom") - pivot_longer() - -med_gap <- acc_gap |> - filter(.metric == "f_meas") |> - filter(initial == "NA" | is.na(initial) | initial == "random") |> - filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") |> - group_by(method, cell_num, selection_procedure, cohort, cell_selection, pred_labeller) |> - summarize(median_gap = median(na.omit(gap))) - -med_gap |> - filter(pred_labeller == "multinom" & method ) |> - ggplot(aes(x = cell_selection, y = median_gap, colour = cell_num)) + - geom_line() + - facet_grid(selection_procedure ~ cohort + method) + - whatsthatcell_theme() - - -med_gap |> - filter(cell_selection != "top200") |> - mutate(cell_selection = factor(cell_selection, levels = c("top10", "top50", "top100"))) |> - filter(pred_labeller == "multinom" & method == "CyTOF-LDA" & cell_num == 100, cohort == "CyTOF" & - selection_procedure == "0.05-maxp-AL") |> - ggplot(aes(x = cell_selection, y = median_gap)) + - geom_point() - #geom_line()# + - # facet_grid(selection_procedure ~ cohort + method) + - # whatsthatcell_theme() +# test |> +# filter(alg == "Random-Forest", selection_procedure == "random") |> +# ggplot(aes(x = cell_selection, y = .estimate, group = method, colour = cell_num)) + +# geom_point() + +# geom_line() + +# facet_grid(selection_procedure ~ cohort + alg) + +# whatsthatcell_theme() + +# left_join(pred_lab_acc, +# select(baseline_acc, -cell_selection), +# by = "method") |> +# #pivot_wider(names_from = "cell_selection", values_from = "multinom") +# pivot_longer() + +# med_gap <- acc_gap |> +# filter(.metric == "f_meas") |> +# filter(initial == "NA" | is.na(initial) | initial == "random") |> +# filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") |> +# group_by(method, cell_num, selection_procedure, cohort, cell_selection, pred_labeller) |> +# summarize(median_gap = median(na.omit(gap))) + +# med_gap |> +# filter(pred_labeller == "multinom" & method ) |> +# ggplot(aes(x = cell_selection, y = median_gap, colour = cell_num)) + +# geom_line() + +# facet_grid(selection_procedure ~ cohort + method) + +# whatsthatcell_theme() + + +# med_gap |> +# filter(cell_selection != "top200") |> +# mutate(cell_selection = factor(cell_selection, levels = c("top10", "top50", "top100"))) |> +# filter(pred_labeller == "multinom" & method == "CyTOF-LDA" & cell_num == 100, cohort == "CyTOF" & +# selection_procedure == "0.05-maxp-AL") |> +# ggplot(aes(x = cell_selection, y = median_gap)) + +# geom_point() +# #geom_line()# + +# # facet_grid(selection_procedure ~ cohort + method) + +# # whatsthatcell_theme() diff --git a/pipeline/figures/rem-cell-type.R b/pipeline/figures/rem-cell-type.R index cbf7380..183f37a 100644 --- a/pipeline/figures/rem-cell-type.R +++ b/pipeline/figures/rem-cell-type.R @@ -1,27 +1,13 @@ suppressPackageStartupMessages({ library(tidyverse) library(patchwork) + library(scales) }) source("pipeline/whatsthatcell-helpers.R") # Read in data ### scRNASeq -scrna <- c( - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-JIMT1*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-AU565*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-JIMT1*highest*rf*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-AU565*highest*rf*20*"), - full.names = TRUE) -) - -scrna <- lapply(scrna, read_tsv) |> +scrna <- lapply(snakemake@input$scrna, read_tsv) |> bind_rows() |> mutate(al = case_when(grepl('rf', params) ~ "rf", grepl('multinom', params) ~ "multinom"), @@ -44,18 +30,8 @@ scrna_box <- scrna |> ### snRNASeq snrna <- c( - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Ductal*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Endothelial*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Schwann*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type_group/", - pattern = glob2rx("*random*l1*sn*high*multi*num-20*"), - full.names = TRUE) + snakemake@input$snrna, + snakemake@input$snrna_group ) snrna <- lapply(snrna, read_tsv) |> @@ -78,17 +54,7 @@ snrna_box <- snrna |> theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) ### Cytof -cytof <- c( - list.files("output/v7/results/rem_cell_type", - pattern = glob2rx("*random*rem-Classical*CyTOF*highest*multi*20*"), - full.names = TRUE), - list.files("output/v7/results/rem_cell_type", - pattern = glob2rx("*random*CD8*CyTOF*highest*multi*20*"), - full.names = TRUE) -) - - -cytof <- lapply(cytof, read_tsv) |> +cytof <- lapply(snakemake@input$cytof, read_tsv) |> bind_rows() |> mutate(al = case_when(grepl('rf', params) ~ "rf", grepl('multinom', params) ~ "multinom"), @@ -107,7 +73,7 @@ cytof_box <- cytof |> ## Combine -pdf("output/v8/paper-figures/rem-cell-type.pdf", height = 6, width = 14) +pdf(snakemake@output$main, height = 6, width = 14) (scrna_box | snrna_box | cytof_box) + plot_layout(guides = "collect", widths = c(2, 2, 1)) + plot_annotation(tag_levels = "A") & @@ -118,18 +84,8 @@ dev.off() ## Supplemental for RF for snRNASeq and CyTOF snrna_sup <- c( - list.files("output/v8/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Ductal*highest*rf*20*"), - full.names = TRUE), - list.files("output/v8/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Endothelial*highest*rf*20*"), - full.names = TRUE), - list.files("output/v8/results/rem_cell_type/", - pattern = glob2rx("Init-sel-random-rem-Schwann*highest*rf*20*"), - full.names = TRUE), - list.files("output/v8/results/rem_cell_type_group/", - pattern = glob2rx("*random*l1*sn*high*rf*num-20*"), - full.names = TRUE) + snakemake@input$snrna_supp, + snakemake@input$snrna_supp_group ) snrna_sup <- lapply(snrna_sup, read_tsv) |> @@ -139,17 +95,8 @@ snrna_sup <- lapply(snrna_sup, read_tsv) |> params = gsub(".*rem_celltype-", "", params), params = gsub("-seed-[0-9]", "", params)) -cytof_sup <- c( - list.files("output/v8/results/rem_cell_type", - pattern = glob2rx("*random*rem-Classical*CyTOF*highest*rf*20*"), - full.names = TRUE), - list.files("output/v8/results/rem_cell_type", - pattern = glob2rx("*random*CD8*CyTOF*highest*rf*20*"), - full.names = TRUE) -) - -cytof_sup <- lapply(cytof_sup, read_tsv) |> +cytof_sup <- lapply(snakemake@input$cytof_supp, read_tsv) |> bind_rows() |> mutate(al = case_when(grepl('rf', params) ~ "rf", grepl('multinom', params) ~ "multinom"), @@ -170,7 +117,6 @@ snrna_sup_box <- snrna_sup |> theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) - cytof_sup_box <- cytof_sup |> ggplot(aes(x = gt_cell_type, y = criterion_val, fill = as.character(num_missing_cells))) + @@ -181,7 +127,7 @@ cytof_sup_box <- cytof_sup |> whatsthatcell_theme() + theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) -pdf("output/v8/paper-figures/supp-rem-cell-type.pdf", height = 6, width = 10) +pdf(snakemake@output$supp, height = 6, width = 10) (snrna_sup_box | cytof_sup_box) + plot_layout(guides = "collect", widths = c(2, 1)) + plot_annotation(tag_levels = "A") & theme(legend.position = 'bottom') diff --git a/pipeline/paper-figures.smk b/pipeline/paper-figures.smk index 8a8bd2b..50ebd72 100644 --- a/pipeline/paper-figures.smk +++ b/pipeline/paper-figures.smk @@ -6,6 +6,9 @@ final_figures = { 'fig3_acc': output + "paper-figures/overall-accuracies.pdf", 'fig4': output + "paper-figures/rem-cell-type.pdf", 'fig5': output + "paper-figures/pred-labelling.pdf", + + # supplemental figures + 'ar_params': output + "paper-figures/Supp-AR-res.pdf" } # Figure 1 @@ -64,11 +67,11 @@ rule overall_acc: # Figure 4 -rule rem_cell_types: +rule paper_rem_cell_types: input: scrna = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_line}-scRNASeq-highest_entropy-ALAlg-{al}-cell_num-20-seed-{s}.tsv", cell_line = ['JIMT1', 'AU565'], al = ['multinom', 'rf'], s = train_test_seeds), snrna = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", cell_type = ['Ductal', 'Endothelial', 'Schwann'], s = train_test_seeds), - snrna_group = expand(output + "results/rem_cell_type/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", s = train_test_seeds), + snrna_group = expand(output + "results/rem_cell_type_group/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", s = train_test_seeds), cytof = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-CyTOF-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", cell_type = ['Classical Monocytes', 'CD8 T cells'], s = train_test_seeds), snrna_supp = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-snRNASeq-highest_entropy-ALAlg-rf-cell_num-20-seed-{s}.tsv", cell_type = ['Ductal', 'Endothelial', 'Schwann'], s = train_test_seeds), snrna_supp_group = expand(output + "results/rem_cell_type_group/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-rf-cell_num-20-seed-{s}.tsv", s = train_test_seeds), @@ -96,23 +99,10 @@ rule self_training: sup_scrna = output + "paper-figures/Supp-scRNASeq-f1-improvement.pdf", sup_snrna = output + "paper-figures/Supp-snRNASeq-f1-improvement.pdf" script: - "figures/pred-labelling.R" + "figures/pred-labeling.R" #### Supplemental figs -# similarity fig -rule cell_type_sim: - input: - cytof_sim = output + "results/cell-type-similarity/similarity-CyTOF-seed-0.tsv", - scrna_sim = output + "results/cell-type-similarity/similarity-scRNASeq-seed-0.tsv", - snrna_sim = output + "results/cell-type-similarity/similarity-snRNASeq-seed-0.tsv" - output: - scrna = output + "paper-figures/Supp-cell-type-sim-scRNASeq.pdf", - snrna = output + "paper-figures/Supp-cell-type-sim-snRNASeq.pdf", - cytof = output + "paper-figures/Supp-cell-type-sim-CyTOF.pdf" - script: - "figures/cell-type-similarity.R" - # ar params rule AR_params: input: diff --git a/pipeline/whatsthatcell-helpers.R b/pipeline/whatsthatcell-helpers.R index 7ffccc0..3396990 100644 --- a/pipeline/whatsthatcell-helpers.R +++ b/pipeline/whatsthatcell-helpers.R @@ -687,7 +687,7 @@ al_colours <- function(){ c("LR" = "#DA94D4", "RF" = "#7EA3CC") } -sel_met_cols <- hue_pal()(9) +sel_met_cols <- c("#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB", "#FF61C3") #hue_pal()(9) names(sel_met_cols) <- c("AR Marker", "AR No Marker", "Random", "AL 0.05-maxp", "AL 0.25-maxp", "AL 0.75-entropy", "AL 0.95-entropy", "AL Highest entropy", "AL Lowest maxp")