diff --git a/preprocessing/shuffling_coordinates/shuffle_coordinates.r b/preprocessing/shuffling_coordinates/shuffle_coordinates.r new file mode 100644 index 00000000..18c0922d --- /dev/null +++ b/preprocessing/shuffling_coordinates/shuffle_coordinates.r @@ -0,0 +1,56 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Kim Vucinic; modified template and created script + +suppressPackageStartupMessages(library(optparse)) + +# Arguments +option_list <- list( + make_option( + c("-c", "--coordinates"), + type = "character", default = NULL, + help = "Path to coordinates (as tsv)." + ), + make_option( + c("--seed"), + type = "integer", default = NULL, + help = "Seed to use for random operations." + ), + make_option( + c("-o", "--out_file"), + type = "character", default = NULL, + help = "Output file." + ) +) + +# Description +description <- "Shuffling coordinates in coordinates.tsv" + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +# Use these filepaths as input +coord_file <- opt$coordinates + +# Seed +seed <- opt$seed +set.seed(seed) + +## Your code goes here +df <- read.delim(coord_file, sep = "\t", row.names = 1) +if (any(!(c("x", "y") %in% colnames(df)))){ + stop("X and y coordinates are not present in the file. Check your file.") +} + +# Randomize IDs, but keep the same order of IDs (not really necessary) +df_order <- rownames(df) +rownames(df) <- sample(rownames(df)) +df_final <- df[order(match(rownames(df), df_order)),] + +## Write output +outfile <- file(opt$out_file) +write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file diff --git a/preprocessing/shuffling_coordinates/shuffle_coordinates.yml b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml new file mode 100644 index 00000000..4e2066e6 --- /dev/null +++ b/preprocessing/shuffling_coordinates/shuffle_coordinates.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge +dependencies: + - r-base==4.3.1 + - r-optparse=1.7.3 \ No newline at end of file diff --git a/preprocessing/shuffling_labels/shuffle_labels.r b/preprocessing/shuffling_labels/shuffle_labels.r new file mode 100755 index 00000000..7269be6d --- /dev/null +++ b/preprocessing/shuffling_labels/shuffle_labels.r @@ -0,0 +1,54 @@ +#!/usr/bin/env Rscript + +# Author_and_contribution: Niklas Mueller-Boetticher; created template +# Author_and_contribution: Kim Vucinic; modified template and created script + +suppressPackageStartupMessages(library(optparse)) + +# Arguments +option_list <- list( + make_option( + c("-l", "--labels"), + type = "character", default = NULL, + help = "Labels from domain clustering. Path to labels (as tsv)." + ), + make_option( + c("--seed"), + type = "integer", default = NULL, + help = "Seed to use for random operations." + ), + make_option( + c("-o", "--out_file"), + type = "character", default = NULL, + help = "Output file." + ) +) + +# Description +description <- "Shuffling labels..." + +opt_parser <- OptionParser( + usage = description, + option_list = option_list +) +opt <- parse_args(opt_parser) + +# Use these filepaths as input +label_file <- opt$labels + +# Seed +seed <- opt$seed +set.seed(seed) + +## Your code goes here +df <- read.delim(label_file, sep = "\t", row.names = 1) +if (!("label" %in% colnames(df))){ + stop("Label column not present in the file. Check your file.") +} + +# Randomize labels +df$label <- sample(df$label) + +## Write output +outfile <- file(opt$out_file) +write.table(df, outfile, sep = "\t", col.names = NA, quote = FALSE) \ No newline at end of file diff --git a/preprocessing/shuffling_labels/shuffle_labels.yml b/preprocessing/shuffling_labels/shuffle_labels.yml new file mode 100644 index 00000000..4e2066e6 --- /dev/null +++ b/preprocessing/shuffling_labels/shuffle_labels.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge +dependencies: + - r-base==4.3.1 + - r-optparse=1.7.3 \ No newline at end of file