SpatialHackathon · vucinick · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023 · Dec 12, 2023
diff --git a/preprocessing/shuffling/shuffle_coordinates.r b/preprocessing/shuffling/shuffle_coordinates.r
@@ -0,0 +1,56 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Kim Vucinic; modified template and created script
+
+suppressPackageStartupMessages(library(optparse))
+
+# Arguments
+option_list <- list(
+  make_option(
+    c("-c", "--coordinates"),
+    type = "character", default = NULL,
+    help = "Path to coordinates (as tsv)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("-o", "--out_file"),
+    type = "character", default = NULL,
+    help = "Output file."
+  )
+)
+
+# Description
+description <- "Shuffling coordinates in coordinates.tsv"
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+# Use these filepaths as input
+coord_file <- opt$coordinates
+
+# Seed
+seed <- opt$seed
+set.seed(seed)
+
+## Your code goes here
+df <- read.delim(coord_file, sep = "\t", row.names = 1)
+if (any(!(c("x", "y") %in% colnames(df)))){
+     stop("X and y coordinates are not present in the file. Check your file.")
+}
+
+# Randomize IDs, but keep the same order of IDs (not really necessary)
+df_order <- rownames(df)
+rownames(df) <- sample(rownames(df))
+df_final <- df[order(match(rownames(df), df_order)),]
+
+## Write output
+outfile <- file(opt$out_file)
+write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE)
diff --git a/preprocessing/shuffling/shuffle_coordinates.yml b/preprocessing/shuffling/shuffle_coordinates.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - r-base==4.3.1
+  - r-optparse=1.7.3
diff --git a/preprocessing/shuffling/shuffle_labels.r b/preprocessing/shuffling/shuffle_labels.r
@@ -0,0 +1,55 @@
+#!/usr/bin/env Rscript
+
+# Author_and_contribution: Niklas Mueller-Boetticher; created template
+# Author_and_contribution: Kim Vucinic; modified template and created script
+
+suppressPackageStartupMessages(library(optparse))
+
+# Arguments
+option_list <- list(
+  make_option(
+    c("-l", "--labels"),
+    type = "character", default = NULL,
+    help = "Labels from domain clustering. Path to labels (as tsv)."
+  ),
+  make_option(
+    c("--seed"),
+    type = "integer", default = NULL,
+    help = "Seed to use for random operations."
+  ),
+  make_option(
+    c("-o", "--out_file"),
+    type = "character", default = NULL,
+    help = "Output file."
+  )
+)
+
+# Description
+description <- "Shuffling labels..."
+
+opt_parser <- OptionParser(
+  usage = description,
+  option_list = option_list
+)
+opt <- parse_args(opt_parser)
+
+# Use these filepaths as input
+label_file <- opt$labels
+
+# Seed
+seed <- opt$seed
+set.seed(seed)
+
+## Your code goes here
+df <- read.delim(label_file, sep = "\t", row.names = 1)
+if (!("label" %in% colnames(df))){
+     stop("Label column not present in the file. Check your file.")
+}
+
+# Randomize labels
+df_randomized <- data.frame(label = sample(df$label))
+rownames(df_randomized) <- rownames(df)
+
+## Write output
+outfile <- file(opt$out_file)
+write.table(df_randomized, outfile, sep = "\t", col.names = NA, quote = FALSE)
diff --git a/preprocessing/shuffling/shuffle_labels.yml b/preprocessing/shuffling/shuffle_labels.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - r-base==4.3.1
+  - r-optparse=1.7.3