diff --git a/pipeline/figures/AR-params.R b/pipeline/figures/AR-params.R
index 0492b13..debe3d7 100644
--- a/pipeline/figures/AR-params.R
+++ b/pipeline/figures/AR-params.R
@@ -1,14 +1,15 @@
-suppressPackageStartupMessages(
+suppressPackageStartupMessages({
   library(tidyverse)
-)
+  library(patchwork)
+})
 source("pipeline/whatsthatcell-helpers.R")
 
 
-cytof_acc <- read_tsv("output/v7/results/overall-CyTOF-benchmarking-accuracies.tsv") |> 
+cytof_acc <- read_tsv(snakemake@input$cytof_acc) |> 
   mutate(cohort = "CyTOF")
-scrna_acc <- read_tsv("output/v7/results/overall-scRNASeq-benchmarking-accuracies.tsv") |> 
+scrna_acc <- read_tsv(snakemake@input$scrna_acc) |> 
   mutate(cohort = "scRNASeq")
-snrna_acc <- read_tsv("output/v7/results/overall-snRNASeq-benchmarking-accuracies.tsv") |> 
+snrna_acc <- read_tsv(snakemake@input$snrna_acc) |> 
   mutate(cohort = "snRNASeq")
 
 acc <- bind_rows(cytof_acc, scrna_acc, snrna_acc)
@@ -42,7 +43,7 @@ plot_knn_res <- function(acc, fill, cohort){
 }
 
 
-pdf("output/v7/paper-figures/supp-AR-res.pdf", height = 14, width = 9)
+pdf(snakemake@output$res, height = 14, width = 9)
   (plot_knn_res(cytof_acc, "res", "CyTOF") /
     plot_knn_res(scrna_acc, "res", "scRNASeq") /
     plot_knn_res(snrna_acc, "res", "snRNASeq")) +
@@ -50,7 +51,7 @@ pdf("output/v7/paper-figures/supp-AR-res.pdf", height = 14, width = 9)
 dev.off()
 
 
-pdf("output/v7/paper-figures/supp-AR-knn.pdf", height = 14, width = 9)
+pdf(snakemake@output$knn, height = 14, width = 9)
 (plot_knn_res(cytof_acc, "knn", "CyTOF") /
     plot_knn_res(scrna_acc, "knn", "scRNASeq") /
     plot_knn_res(snrna_acc, "knn", "snRNASeq")) +
diff --git a/pipeline/figures/cell-type-similarity.R b/pipeline/figures/cell-type-similarity.R
index 9e27514..93d39b7 100644
--- a/pipeline/figures/cell-type-similarity.R
+++ b/pipeline/figures/cell-type-similarity.R
@@ -1,9 +1,9 @@
 library(tidyverse)
 library(ComplexHeatmap)
 
-cytof_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-CyTOF-seed-0.tsv")
-scrna_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-scRNASeq-seed-0.tsv")
-snrna_sim <- read_tsv("output/v7/results/cell-type-similarity/similarity-snRNASeq-seed-0.tsv")
+cytof_sim <- read_tsv(snakemake@input$cytof_sim)
+scrna_sim <- read_tsv(snakemake@input$scrna_sim)
+snrna_sim <- read_tsv(snakemake@input$snrna_sim)
 
 plot_dist_mat <- function(df){
   pivot_wider(df, names_from = "cell_type2", values_from = "cosine_similarity") |> 
@@ -14,14 +14,14 @@ plot_dist_mat <- function(df){
     Heatmap(name = "Cosine\nsimilarity")
 }
 
-pdf("output/v7/paper-figures/Supp-cell-type-sim-scRNASeq.pdf", height = 5, width = 5.5)
+pdf(snakemake@input$scrna, height = 5, width = 5.5)
   plot_dist_mat(scrna_sim)
 dev.off()
 
-pdf("output/v7/paper-figures/Supp-cell-type-sim-snRNASeq.pdf", height = 5, width = 5.5)
+pdf(snakemake@input$snrna, height = 5, width = 5.5)
   plot_dist_mat(snrna_sim)
 dev.off()
 
-pdf("output/v7/paper-figures/Supp-cell-type-sim-CyTOF.pdf", height = 5, width = 5.5)
+pdf(snakemake@input$cytof, height = 5, width = 5.5)
   plot_dist_mat(cytof_sim)
 dev.off()
diff --git a/pipeline/figures/compare-lr-rf-random-and-ranking.R b/pipeline/figures/compare-lr-rf-random-and-ranking.R
index d13e010..85bd38d 100644
--- a/pipeline/figures/compare-lr-rf-random-and-ranking.R
+++ b/pipeline/figures/compare-lr-rf-random-and-ranking.R
@@ -6,10 +6,11 @@ suppressPackageStartupMessages({
   library(magick)
   library(ggpubr)
 })
+devtools::load_all("/ggplot2")
 source("pipeline/whatsthatcell-helpers.R")
 
 ## Part A: Proportion of cells initially selected
-get_proportion_rep <- function(sce_path, selected_cells_path, cohort){
+get_proportion_rep <- function(sce_path, rand_files, rank_files, cohort){
   sce <- readRDS(sce_path)
   
   cell_types <- unique(sce$cell_type)
@@ -18,13 +19,6 @@ get_proportion_rep <- function(sce_path, selected_cells_path, cohort){
     cell_types <- unique(sce$CellType)
   }
   
-  rand_files <- list.files(selected_cells_path, 
-                           pattern = "random",
-                           full.names = TRUE)
-  rank_files <- list.files(selected_cells_path, 
-                           pattern = "ranking",
-                           full.names = TRUE)
-  
   random <- lapply(rand_files, function(x){
     sel_cells <- read_tsv(x) |>
       filter(iteration == 0) |> 
@@ -48,14 +42,17 @@ get_proportion_rep <- function(sce_path, selected_cells_path, cohort){
 }
 
 props <- bind_rows(
-  get_proportion_rep("data/CyTOF/CyTOF-train-seed-0.rds", 
-                     "output/v8/data/CyTOF/Active-Learning_entropy/AL-batches-subset", 
+  get_proportion_rep(snakemake@input$cytof,
+                     snakemake@input$cytof_AL_rand,
+                     snakemake@input$cytof_AL_rank,
                      "CyTOF"),
-  get_proportion_rep("data/scRNASeq/scRNASeq-train-seed-0.rds", 
-                     "output/v8/data/scRNASeq/Active-Learning_entropy/AL-batches-subset", 
+  get_proportion_rep(snakemake@input$scrna,
+                     snakemake@input$scrna_AL_rand,
+                     snakemake@input$scrna_AL_rank,
                      "scRNASeq"),
-  get_proportion_rep("data/snRNASeq/snRNASeq-train-seed-0.rds", 
-                     "output/v8/data/snRNASeq/Active-Learning_entropy/AL-batches-subset", 
+  get_proportion_rep(snakemake@input$snrna,
+                     snakemake@input$snrna_AL_rand,
+                     snakemake@input$snrna_AL_rank,
                      "snRNASeq")
 )
 
@@ -72,11 +69,7 @@ prop <- props |>
 
 
 ## Part B: heatmaps
-f <- c("output/v8/results/overall-CyTOF-benchmarking-accuracies.tsv", 
-       "output/v8/results/overall-scRNASeq-benchmarking-accuracies.tsv", 
-       "output/v8/results/overall-snRNASeq-benchmarking-accuracies.tsv")
-
-acc <- lapply(f, function(x){
+acc <- lapply(snakemake@input$accs, function(x){
   df <- read_tsv(x) |> 
     mutate(cohort = case_when(grepl("CyTOF", basename(x)) ~ "CyTOF",
                               grepl("snRNASeq", basename(x)) ~ "snRNASeq",
@@ -149,7 +142,7 @@ ranking_vs_random <- create_heatmap(acc, "scRNASeq", "random_vs_ranked") +
 
 ranking <- draw(ranking_vs_random, column_title = "Selecting initial cells based on marker expression")
 
-pdf("output/v8/paper-figures/ranking-random-heatmap.pdf", height = 3.5, width = 7)
+pdf(snakemake@output$rank_rand_hm, height = 3.5, width = 7)
   ranking
 dev.off()
 
@@ -159,20 +152,20 @@ lr_vs_rf <- create_heatmap(acc, "scRNASeq", "lr_vs_rf") +
 
 lr_vs_rf <- draw(lr_vs_rf, column_title = "F1-score improvement by random forest compared to logistic regression")
 
-pdf("output/v8/paper-figures/lr-rf-heatmap.pdf", height = 3.5, width = 7)
+pdf(snakemake@output$lr_rf_hm, height = 3.5, width = 7)
   lr_vs_rf
 dev.off()
 
 ## Full main figure
-ranking <- image_read_pdf("output/v8/paper-figures/ranking-random-heatmap.pdf")
+ranking <- image_read_pdf(snakemake@output$rank_rand_hm)
 ranking <- ggplot() +
   background_image(ranking)
 
-lr_vs_rf <- image_read_pdf("output/v8/paper-figures/lr-rf-heatmap.pdf")
+lr_vs_rf <- image_read_pdf(snakemake@output$lr_rf_hm)
 lr_vs_rf <- ggplot() +
   background_image(lr_vs_rf)
 
-pdf("output/v8/paper-figures/lr-and-rf-heatmaps.pdf", height = 8.8, width = 7)
+pdf(snakemake@output$main_fig, height = 8.8, width = 7)
    (lr_vs_rf / wrap_elements(full = prop) / ranking) + plot_layout(heights = c(3, 1.2, 3)) + plot_annotation(tag_levels = "A")
 dev.off()
 
@@ -212,7 +205,7 @@ rf_vs_rf_ranking <- filter(acc, corrupted == 0) |>
   whatsthatcell_theme() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust =1))
 
-pdf("output/v8/paper-figures/rf-vs-lr-supplementary.pdf", height = 13, width = 12)
+pdf(snakemake@output$rf_lr_supp, height = 13, width = 12)
   rf_vs_rf_random / rf_vs_rf_ranking + plot_layout(guides = "collect") + 
     plot_annotation(tag_levels = "A")
 dev.off()
@@ -254,7 +247,7 @@ rand_vs_rank_rf <- filter(acc, corrupted == 0) |>
   whatsthatcell_theme() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust =1))
 
-pdf("output/v8/paper-figures/random-vs-ranking-supplementary.pdf", height = 13, width = 12)
+pdf(snakemake@output$rand_rank_supp, height = 13, width = 12)
   rand_vs_rank_lr / rand_vs_rank_rf + plot_layout(guides = "collect") + 
     plot_annotation(tag_levels = "A")
 dev.off()
diff --git a/pipeline/figures/figure1.R b/pipeline/figures/figure1.R
index 1a9d665..a605374 100644
--- a/pipeline/figures/figure1.R
+++ b/pipeline/figures/figure1.R
@@ -10,17 +10,18 @@ source("pipeline/whatsthatcell-helpers.R")
 
 
 ### [ FIGURE 1A - BENCHMARKING OVERVIEW ] #####
-schematic <- image_read("illustrator-figures/benchmarking-schematic.ai") |> 
-  image_ggplot()
+# had to remove because magick causes issues with docker
+# schematic <- image_read(snakemake@input$schematic) |> 
+#   image_ggplot()
 
 ### [ FIGURE 1B - DATASET COMPOSOTION ] #####
 set.seed(42)
 
-CyTOF <- readRDS("data/CyTOF/CyTOF-full.rds")
+CyTOF <- readRDS(snakemake@input$cytof)
 CyTOF <- runTSNE(CyTOF)
-scRNA <- readRDS("data/scRNASeq/scRNASeq-full.rds")
+scRNA <- readRDS(snakemake@input$scrna)
 scRNA <- runTSNE(scRNA)
-snRNA <- readRDS("data/snRNASeq/snRNASeq-full.rds")
+snRNA <- readRDS(snakemake@input$snrna)
 snRNA <- runTSNE(snRNA)
 
 
@@ -130,10 +131,10 @@ snrna_plot <- ((wrap_elements(full = snrna_tsne, ignore_tag = TRUE) & labs(title
                  snrna_bar) + 
   plot_layout(heights = c(3,1))
 
-pdf("output/v8/paper-figures/figure1.pdf", height = 12.5, width = 17.5)
+pdf(snakemake@output$fig1, height = 9.25, width = 17.5)
   wrap_elements((scrna_plot | snrna_plot | cytof_plot) + plot_layout(widths = c(1, 1.2, 1))) /
-    wrap_elements(schematic) +
-    plot_layout(heights = c(2, 0.7)) +
+    #wrap_elements(schematic) +
+    #plot_layout(heights = c(2, 0.7)) +
     plot_annotation(tag_levels = 'A') &
     theme(plot.tag = element_text(size = 22))
 dev.off()
diff --git a/pipeline/figures/figure3.R b/pipeline/figures/figure3.R
index 8f9fa8c..63b5f2d 100644
--- a/pipeline/figures/figure3.R
+++ b/pipeline/figures/figure3.R
@@ -6,12 +6,11 @@ suppressPackageStartupMessages({
   library(scales)
   library(magick)
 })
+devtools::load_all("/ggplot2")
 source("pipeline/whatsthatcell-helpers.R")
 
 ### [ ACCURACIES ] ####
-f <- list.files("output/v8/results/", pattern = "overall", full.names = TRUE)
-
-acc <- lapply(f, function(x){
+acc <- lapply(snakemake@input$accs, function(x){
   df <- read_tsv(x) |> 
     mutate(cohort = case_when(grepl("CyTOF", basename(x)) ~ "CyTOF",
                               grepl("snRNASeq", basename(x)) ~ "snRNASeq",
@@ -34,13 +33,10 @@ acc <- lapply(f, function(x){
 
 sel_meth_cols <- sel_met_cols
 
-schematic <- image_read("illustrator-figures/AR-schematic.ai") |> 
-  image_ggplot()
-
 eval <- full_acc_plot_wrapper(acc, "rf", "ranking", "") &
   labs(fill = "Selection method")
 
-pdf("output/v8/paper-figures/figure3-overall-benchmark.pdf", height = 8.4, width = 9)
+pdf(snakemake@output$overall_fig, height = 8.4, width = 9)
   eval
 dev.off()
 
diff --git a/pipeline/figures/imbalance-plots.R b/pipeline/figures/imbalance-plots.R
index 33ac1b3..9e36731 100644
--- a/pipeline/figures/imbalance-plots.R
+++ b/pipeline/figures/imbalance-plots.R
@@ -1,9 +1,10 @@
 library(tidyverse)
 library(scales)
+library(patchwork)
 
-sc_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-scRNASeq.tsv")
-sn_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-snRNASeq.tsv")
-cy_acc <- read_tsv("output/v8/imbalance/acc/imbalance-acc-CyTOF.tsv")
+sc_acc <- read_tsv(snakemake@input$scrna_acc)
+sn_acc <- read_tsv(snakemake@input$snrna_acc)
+cy_acc <- read_tsv(snakemake@input$cytof_acc)
 
 source("pipeline/whatsthatcell-helpers.R")
 
@@ -79,7 +80,7 @@ plotting_order <- filter(comb_imb_score, modality == "snRNASeq" & al == "RF" & i
   arrange(mean_estimate) |> 
   pull(strat)
 
-pdf("output/v8/paper-figures/imbalance-main-figure.pdf", height = 3, width = 7)
+pdf(snakemake@output$main_fig, height = 3, width = 7)
   filter(comb_imb_score, modality == "snRNASeq" & al == "RF" & init == "ranking") |> 
     mutate(strat = factor(strat, levels = plotting_order)) |> 
     ggplot(aes(x = comp, y = diff, fill = strat)) +
@@ -95,7 +96,7 @@ pdf("output/v8/paper-figures/imbalance-main-figure.pdf", height = 3, width = 7)
 dev.off()
 
 
-pdf("output/v8/paper-figures/imbalance-supplementary.pdf", height = 18, width = 9)
+pdf(snakemake@output$sup_fig, height = 18, width = 9)
   (scrnaseq / snrnaseq / cytof) + 
     plot_annotation(tag_levels = "A") + 
   plot_layout(guides = "collect", heights = c(2, 2, 1))
diff --git a/pipeline/figures/pred-labeling.R b/pipeline/figures/pred-labeling.R
index 1ba64bd..67709fd 100644
--- a/pipeline/figures/pred-labeling.R
+++ b/pipeline/figures/pred-labeling.R
@@ -2,14 +2,15 @@ suppressPackageStartupMessages({
   library(tidyverse)
   library(ggalluvial)
   library(patchwork)
+  library(scales)
 })
 source("pipeline/whatsthatcell-helpers.R")
 
 ### PREDICTIVE LABELLING ACCURACY
-pred_lab_acc <- read_tsv("output/v8/new/pred-labeling-accuracy.tsv")
+pred_lab_acc <- read_tsv(snakemake@input$acc)
 
 # SUPPLEMENTAL FILE
-pdf("output/v8/paper-figures/Supp-pred-labelling-acc.pdf", height = 10, width = 12)
+pdf(snakemake@output$supp, height = 10, width = 12)
   pred_lab_acc |> 
     filter(.metric == "f_meas") |> 
     mutate(pred_sel = gsub("top", "", pred_sel),
@@ -66,16 +67,16 @@ pred_lab_acc_plot <- plot_pred_lab_acc(pred_lab_acc_pub, "CyTOF") |
 
 
 ## Benchmarking with predictive labelling data
-scrna <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-scRNASeq.tsv") |> 
+scrna <- read_tsv(snakemake@input$scrna) |> 
   mutate(AL_alg = sub(".*-ALAlg-", "",  selection_procedure),
          selection_procedure = sub("-ALAlg-.*", "", selection_procedure),
          cohort = "scRNASeq")
-snrna <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-snRNASeq.tsv") |> 
+snrna <- read_tsv(snakemake@input$snrna) |> 
   mutate(AL_alg = sub(".*-ALAlg-", "",  selection_procedure),
          selection_procedure = sub("-ALAlg-.*", "", selection_procedure),
          cohort = "snRNASeq")
 
-cytof <- read_tsv("output/v8/new/pred2/benchmark-predictive-labeling-CyTOF.tsv") |> 
+cytof <- read_tsv(snakemake@input$cytof) |> 
   mutate(AL_alg = sub(".*-ALAlg-", "",  selection_procedure),
          selection_procedure = sub("-ALAlg-.*", "", selection_procedure),
          cohort = "CyTOF")
@@ -135,12 +136,12 @@ comp_dataset <- acc_gap |>
   mutate(selection_procedure = case_when(selection_procedure == "random" ~ "Random",
                                          selection_procedure == "MarkerSeurat-clustering" ~ "AR Marker",
                                          selection_procedure == "NoMarkerSeurat-clustering" ~ "AR No Marker",
-                                         selection_procedure == "0.05-maxp-AL" ~ "AL 0.05 maxp",
-                                         selection_procedure == "0.25-maxp-AL" ~ "AL 0.25 maxp",
-                                         selection_procedure == "0.75-entropy-AL" ~ "AL 0.75 entropy",
-                                         selection_procedure == "0.95-entropy-AL" ~ "AL 0.95 entropy",
-                                         selection_procedure == "highest-entropy-AL" ~ "AL highest entropy",
-                                         selection_procedure == "lowest-maxp-AL" ~ "AL lowest maxp"))
+                                         selection_procedure == "0.05-maxp-AL" ~ "AL 0.05-maxp",
+                                         selection_procedure == "0.25-maxp-AL" ~ "AL 0.25-maxp",
+                                         selection_procedure == "0.75-entropy-AL" ~ "AL 0.75-entropy",
+                                         selection_procedure == "0.95-entropy-AL" ~ "AL 0.95-entropy",
+                                         selection_procedure == "highest-entropy-AL" ~ "AL Highest entropy",
+                                         selection_procedure == "lowest-maxp-AL" ~ "AL Lowest maxp"))
 
 plot_comp <- function(df, ncol, ylab = "", xlab = ""){
   if(ylab != ""){
@@ -152,6 +153,7 @@ plot_comp <- function(df, ncol, ylab = "", xlab = ""){
   df |> 
     ggplot(aes(x = gap, y = .estimate, colour = selection_procedure)) +
     geom_point() +
+    scale_color_manual(values = sel_met_cols) +
     labs(x = xlab,
          y = ylab,
          colour = "Selection procedure") +
@@ -178,9 +180,9 @@ gap_comb <- (cytof_gap | scrnaseq_gap | snrnaseq_gap) +
   plot_layout(guides = "collect", widths = c(0.65, 1, 1))
 
 # Detecting mislabelled cells
-cytof <- list.files("output/v8/identify_mislabelled/CyTOF/", full.names = TRUE)
-scrna <- list.files("output/v8/identify_mislabelled/scRNASeq/", full.names = TRUE)
-snrna <- list.files("output/v8/identify_mislabelled/snRNASeq/", full.names = TRUE)
+cytof <- snakemake@input$mislabelled_cytof
+scrna <- snakemake@input$mislabelled_scrna
+snrna <- snakemake@input$mislabelled_snrna
 
 mislabelled_pred <- lapply(c(cytof, scrna, snrna), function(x){
   df <- read_tsv(x)
@@ -206,7 +208,7 @@ mislabelled_pred_plot <- mislabelled_pred |>
   whatsthatcell_theme() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
 
-pdf("output/v8/paper-figures/pred-labelling.pdf", height = 14, width = 12)
+pdf(snakemake@output$main, height = 14, width = 12)
   (wrap_elements(full = pred_lab_acc_plot + plot_layout(guides = "collect"))) /
     wrap_elements(full = lr_vs_rf & labs(title = "")) /
     wrap_elements(full = gap_comb) /
@@ -244,71 +246,71 @@ plot_sup_gap <- function(df, sel_cohort){
     theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
 }
 
-pdf("output/v8/paper-figures/Supp-CyTOF-f1-improvement.pdf", width = 12, height = 8)
+pdf(snakemake@output$sup_cytof, width = 12, height = 8)
   plot_sup_gap(sup_acc_gap, "CyTOF")
 dev.off()
 
-pdf("output/v8/paper-figures/Supp-scRNASeq-f1-improvement.pdf", width = 12, height = 8)
+pdf(snakemake@output$sup_scrna, width = 12, height = 8)
   plot_sup_gap(sup_acc_gap, "scRNASeq")
 dev.off()
 
-pdf("output/v8/paper-figures/Supp-snRNASeq-f1-improvement.pdf", width = 12, height = 8)
+pdf(snakemake@output$sup_snrna, width = 12, height = 8)
   plot_sup_gap(sup_acc_gap, "snRNASeq")
 dev.off()
 
-### As function of number of cells predictively labelled
-test <- bind_rows(
-  select(pred_lab_acc, method, cell_selection, multinom) |> 
-    dplyr::rename(".estimate" = "multinom"),
-  baseline_acc
-) |> 
-  filter(cell_selection != "top200") |> 
-  mutate(cell_selection = factor(cell_selection, c("baseline", "top10", "top50", "top100"))) |> 
-  #filter(method == "Random-Forest,10,0.4,100,NA,0,MarkerSeurat-clustering,kap,NA,scRNASeq") |>
-  separate(method, c("alg", "knn", "res", "cell_num", "initial", "seed", 
-                     "selection_procedure", ".metric", "AL_alg", "cohort"), sep = ",",
-           remove = FALSE) |> 
-  filter(.metric == "sensitivity") |> 
-  filter(initial == "NA" | is.na(initial) | initial == "random") |> 
-  filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom")
+# ### As function of number of cells predictively labelled
+# test <- bind_rows(
+#   select(pred_lab_acc, method, cell_selection, multinom) |> 
+#     dplyr::rename(".estimate" = "multinom"),
+#   baseline_acc
+# ) |> 
+#   filter(cell_selection != "top200") |> 
+#   mutate(cell_selection = factor(cell_selection, c("baseline", "top10", "top50", "top100"))) |> 
+#   #filter(method == "Random-Forest,10,0.4,100,NA,0,MarkerSeurat-clustering,kap,NA,scRNASeq") |>
+#   separate(method, c("alg", "knn", "res", "cell_num", "initial", "seed", 
+#                      "selection_procedure", ".metric", "AL_alg", "cohort"), sep = ",",
+#            remove = FALSE) |> 
+#   filter(.metric == "sensitivity") |> 
+#   filter(initial == "NA" | is.na(initial) | initial == "random") |> 
+#   filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom")
   
-test |> 
-  filter(alg == "Random-Forest", selection_procedure == "random") |> 
-  ggplot(aes(x = cell_selection, y = .estimate, group = method, colour = cell_num)) +
-  geom_point() +
-  geom_line() +
-  facet_grid(selection_procedure ~ cohort + alg) +
-  whatsthatcell_theme()
-
-left_join(pred_lab_acc, 
-          select(baseline_acc, -cell_selection), 
-          by = "method") |> 
-  #pivot_wider(names_from = "cell_selection", values_from = "multinom")
-  pivot_longer()
-
-med_gap <- acc_gap |> 
-  filter(.metric == "f_meas") |> 
-  filter(initial == "NA" | is.na(initial) | initial == "random") |> 
-  filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") |> 
-  group_by(method, cell_num, selection_procedure, cohort, cell_selection, pred_labeller) |> 
-  summarize(median_gap = median(na.omit(gap)))
-
-med_gap |> 
-  filter(pred_labeller == "multinom" & method ) |> 
-  ggplot(aes(x = cell_selection, y = median_gap, colour = cell_num)) +
-  geom_line() +
-  facet_grid(selection_procedure ~ cohort + method) +
-  whatsthatcell_theme()
-
-
-med_gap |> 
-  filter(cell_selection != "top200") |> 
-  mutate(cell_selection = factor(cell_selection, levels = c("top10", "top50", "top100"))) |> 
-  filter(pred_labeller == "multinom" & method == "CyTOF-LDA" & cell_num == 100, cohort == "CyTOF" &
-         selection_procedure == "0.05-maxp-AL") |> 
-  ggplot(aes(x = cell_selection, y = median_gap)) +
-  geom_point() 
-  #geom_line()# +
-  # facet_grid(selection_procedure ~ cohort + method) +
-  # whatsthatcell_theme()
+# test |> 
+#   filter(alg == "Random-Forest", selection_procedure == "random") |> 
+#   ggplot(aes(x = cell_selection, y = .estimate, group = method, colour = cell_num)) +
+#   geom_point() +
+#   geom_line() +
+#   facet_grid(selection_procedure ~ cohort + alg) +
+#   whatsthatcell_theme()
+
+# left_join(pred_lab_acc, 
+#           select(baseline_acc, -cell_selection), 
+#           by = "method") |> 
+#   #pivot_wider(names_from = "cell_selection", values_from = "multinom")
+#   pivot_longer()
+
+# med_gap <- acc_gap |> 
+#   filter(.metric == "f_meas") |> 
+#   filter(initial == "NA" | is.na(initial) | initial == "random") |> 
+#   filter(AL_alg == "NA" | is.na(AL_alg) | AL_alg == "multinom") |> 
+#   group_by(method, cell_num, selection_procedure, cohort, cell_selection, pred_labeller) |> 
+#   summarize(median_gap = median(na.omit(gap)))
+
+# med_gap |> 
+#   filter(pred_labeller == "multinom" & method ) |> 
+#   ggplot(aes(x = cell_selection, y = median_gap, colour = cell_num)) +
+#   geom_line() +
+#   facet_grid(selection_procedure ~ cohort + method) +
+#   whatsthatcell_theme()
+
+
+# med_gap |> 
+#   filter(cell_selection != "top200") |> 
+#   mutate(cell_selection = factor(cell_selection, levels = c("top10", "top50", "top100"))) |> 
+#   filter(pred_labeller == "multinom" & method == "CyTOF-LDA" & cell_num == 100, cohort == "CyTOF" &
+#          selection_procedure == "0.05-maxp-AL") |> 
+#   ggplot(aes(x = cell_selection, y = median_gap)) +
+#   geom_point() 
+#   #geom_line()# +
+#   # facet_grid(selection_procedure ~ cohort + method) +
+#   # whatsthatcell_theme()
 
diff --git a/pipeline/figures/rem-cell-type.R b/pipeline/figures/rem-cell-type.R
index cbf7380..183f37a 100644
--- a/pipeline/figures/rem-cell-type.R
+++ b/pipeline/figures/rem-cell-type.R
@@ -1,27 +1,13 @@
 suppressPackageStartupMessages({
   library(tidyverse)
   library(patchwork)
+  library(scales)
 })
 source("pipeline/whatsthatcell-helpers.R")
 
 # Read in data
 ### scRNASeq
-scrna <- c(
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-JIMT1*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-AU565*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-JIMT1*highest*rf*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-AU565*highest*rf*20*"),
-             full.names = TRUE)
-)
-
-scrna <- lapply(scrna, read_tsv) |> 
+scrna <- lapply(snakemake@input$scrna, read_tsv) |> 
   bind_rows() |> 
   mutate(al = case_when(grepl('rf', params) ~ "rf",
                         grepl('multinom', params) ~ "multinom"),
@@ -44,18 +30,8 @@ scrna_box <- scrna |>
 
 ### snRNASeq
 snrna <- c(
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Ductal*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Endothelial*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Schwann*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type_group/",
-             pattern = glob2rx("*random*l1*sn*high*multi*num-20*"),
-             full.names = TRUE)
+  snakemake@input$snrna,
+  snakemake@input$snrna_group
 )
 
 snrna <- lapply(snrna, read_tsv) |> 
@@ -78,17 +54,7 @@ snrna_box <- snrna |>
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
 
 ### Cytof
-cytof <- c(
-  list.files("output/v7/results/rem_cell_type",
-             pattern = glob2rx("*random*rem-Classical*CyTOF*highest*multi*20*"),
-             full.names = TRUE),
-  list.files("output/v7/results/rem_cell_type",
-             pattern = glob2rx("*random*CD8*CyTOF*highest*multi*20*"),
-             full.names = TRUE)
-)
-
-
-cytof <- lapply(cytof, read_tsv) |> 
+cytof <- lapply(snakemake@input$cytof, read_tsv) |> 
   bind_rows() |> 
   mutate(al = case_when(grepl('rf', params) ~ "rf",
                         grepl('multinom', params) ~ "multinom"),
@@ -107,7 +73,7 @@ cytof_box <- cytof |>
 
 
 ## Combine
-pdf("output/v8/paper-figures/rem-cell-type.pdf", height = 6, width = 14)
+pdf(snakemake@output$main, height = 6, width = 14)
 (scrna_box | snrna_box | cytof_box) +
   plot_layout(guides = "collect", widths = c(2, 2, 1)) +
   plot_annotation(tag_levels = "A") & 
@@ -118,18 +84,8 @@ dev.off()
 
 ## Supplemental for RF for snRNASeq and CyTOF
 snrna_sup <- c(
-  list.files("output/v8/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Ductal*highest*rf*20*"),
-             full.names = TRUE),
-  list.files("output/v8/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Endothelial*highest*rf*20*"),
-             full.names = TRUE),
-  list.files("output/v8/results/rem_cell_type/", 
-             pattern = glob2rx("Init-sel-random-rem-Schwann*highest*rf*20*"),
-             full.names = TRUE),
-  list.files("output/v8/results/rem_cell_type_group/",
-             pattern = glob2rx("*random*l1*sn*high*rf*num-20*"),
-             full.names = TRUE)
+  snakemake@input$snrna_supp,
+  snakemake@input$snrna_supp_group
 )
 
 snrna_sup <- lapply(snrna_sup, read_tsv) |> 
@@ -139,17 +95,8 @@ snrna_sup <- lapply(snrna_sup, read_tsv) |>
          params = gsub(".*rem_celltype-", "", params),
          params = gsub("-seed-[0-9]", "", params))
 
-cytof_sup <- c(
-  list.files("output/v8/results/rem_cell_type",
-             pattern = glob2rx("*random*rem-Classical*CyTOF*highest*rf*20*"),
-             full.names = TRUE),
-  list.files("output/v8/results/rem_cell_type",
-             pattern = glob2rx("*random*CD8*CyTOF*highest*rf*20*"),
-             full.names = TRUE)
-)
-
 
-cytof_sup <- lapply(cytof_sup, read_tsv) |> 
+cytof_sup <- lapply(snakemake@input$cytof_supp, read_tsv) |> 
   bind_rows() |> 
   mutate(al = case_when(grepl('rf', params) ~ "rf",
                         grepl('multinom', params) ~ "multinom"),
@@ -170,7 +117,6 @@ snrna_sup_box <- snrna_sup |>
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
 
 
-
 cytof_sup_box <- cytof_sup |> 
   ggplot(aes(x = gt_cell_type, y = criterion_val, 
              fill = as.character(num_missing_cells))) +
@@ -181,7 +127,7 @@ cytof_sup_box <- cytof_sup |>
   whatsthatcell_theme() +
   theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
 
-pdf("output/v8/paper-figures/supp-rem-cell-type.pdf", height = 6, width = 10)
+pdf(snakemake@output$supp, height = 6, width = 10)
   (snrna_sup_box | cytof_sup_box) +
     plot_layout(guides = "collect", widths = c(2, 1)) +
     plot_annotation(tag_levels = "A") & theme(legend.position = 'bottom')
diff --git a/pipeline/paper-figures.smk b/pipeline/paper-figures.smk
index 8a8bd2b..50ebd72 100644
--- a/pipeline/paper-figures.smk
+++ b/pipeline/paper-figures.smk
@@ -6,6 +6,9 @@ final_figures = {
     'fig3_acc': output + "paper-figures/overall-accuracies.pdf",
     'fig4': output + "paper-figures/rem-cell-type.pdf",
     'fig5': output + "paper-figures/pred-labelling.pdf",
+
+    # supplemental figures
+    'ar_params': output + "paper-figures/Supp-AR-res.pdf"
 }
 
 # Figure 1
@@ -64,11 +67,11 @@ rule overall_acc:
 
 
 # Figure 4
-rule rem_cell_types:
+rule paper_rem_cell_types:
     input:
         scrna = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_line}-scRNASeq-highest_entropy-ALAlg-{al}-cell_num-20-seed-{s}.tsv", cell_line = ['JIMT1', 'AU565'], al = ['multinom', 'rf'], s = train_test_seeds),
         snrna = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", cell_type = ['Ductal', 'Endothelial', 'Schwann'], s = train_test_seeds),
-        snrna_group = expand(output + "results/rem_cell_type/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", s = train_test_seeds),
+        snrna_group = expand(output + "results/rem_cell_type_group/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", s = train_test_seeds),
         cytof = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-CyTOF-highest_entropy-ALAlg-multinom-cell_num-20-seed-{s}.tsv", cell_type = ['Classical Monocytes', 'CD8 T cells'], s = train_test_seeds),
         snrna_supp = expand(output + "results/rem_cell_type/Init-sel-random-rem-{cell_type}-snRNASeq-highest_entropy-ALAlg-rf-cell_num-20-seed-{s}.tsv", cell_type = ['Ductal', 'Endothelial', 'Schwann'], s = train_test_seeds),
         snrna_supp_group = expand(output + "results/rem_cell_type_group/Init-sel-random-rem-schwann_l1-snRNASeq-highest_entropy-ALAlg-rf-cell_num-20-seed-{s}.tsv", s = train_test_seeds),
@@ -96,23 +99,10 @@ rule self_training:
         sup_scrna = output + "paper-figures/Supp-scRNASeq-f1-improvement.pdf",
         sup_snrna = output + "paper-figures/Supp-snRNASeq-f1-improvement.pdf"
     script:
-        "figures/pred-labelling.R"
+        "figures/pred-labeling.R"
 
 
 #### Supplemental figs
-# similarity fig
-rule cell_type_sim:
-    input:
-        cytof_sim = output + "results/cell-type-similarity/similarity-CyTOF-seed-0.tsv",
-        scrna_sim = output + "results/cell-type-similarity/similarity-scRNASeq-seed-0.tsv",
-        snrna_sim = output + "results/cell-type-similarity/similarity-snRNASeq-seed-0.tsv"
-    output:
-        scrna = output + "paper-figures/Supp-cell-type-sim-scRNASeq.pdf",
-        snrna = output + "paper-figures/Supp-cell-type-sim-snRNASeq.pdf",
-        cytof = output + "paper-figures/Supp-cell-type-sim-CyTOF.pdf"
-    script:
-        "figures/cell-type-similarity.R"
-
 # ar params
 rule AR_params:
     input:
diff --git a/pipeline/whatsthatcell-helpers.R b/pipeline/whatsthatcell-helpers.R
index 7ffccc0..3396990 100644
--- a/pipeline/whatsthatcell-helpers.R
+++ b/pipeline/whatsthatcell-helpers.R
@@ -687,7 +687,7 @@ al_colours <- function(){
   c("LR" = "#DA94D4", "RF" = "#7EA3CC")
 }
 
-sel_met_cols <- hue_pal()(9)
+sel_met_cols <- c("#F8766D", "#D39200", "#93AA00", "#00BA38", "#00C19F", "#00B9E3", "#619CFF", "#DB72FB", "#FF61C3") #hue_pal()(9)
 names(sel_met_cols) <- c("AR Marker", "AR No Marker", "Random", "AL 0.05-maxp", "AL 0.25-maxp",
                          "AL 0.75-entropy", "AL 0.95-entropy", "AL Highest entropy", "AL Lowest maxp")