Skip to content
Open
56 changes: 56 additions & 0 deletions preprocessing/shuffling_coordinates/shuffle_coordinates.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Kim Vucinic; modified template and created script

suppressPackageStartupMessages(library(optparse))

# Arguments
option_list <- list(
make_option(
c("-c", "--coordinates"),
type = "character", default = NULL,
help = "Path to coordinates (as tsv)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("-o", "--out_file"),
type = "character", default = NULL,
help = "Output file."
)
)

# Description
description <- "Shuffling coordinates in coordinates.tsv"

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

# Use these filepaths as input
coord_file <- opt$coordinates

# Seed
seed <- opt$seed
set.seed(seed)

## Your code goes here
df <- read.delim(coord_file, sep = "\t", row.names = 1)
if (any(!(c("x", "y") %in% colnames(df)))){
stop("X and y coordinates are not present in the file. Check your file.")
}

# Randomize IDs, but keep the same order of IDs (not really necessary)
df_order <- rownames(df)
rownames(df) <- sample(rownames(df))
df_final <- df[order(match(rownames(df), df_order)),]

## Write output
outfile <- file(opt$out_file)
write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE)
5 changes: 5 additions & 0 deletions preprocessing/shuffling_coordinates/shuffle_coordinates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
dependencies:
- r-base==4.3.1
- r-optparse=1.7.3
54 changes: 54 additions & 0 deletions preprocessing/shuffling_labels/shuffle_labels.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Kim Vucinic; modified template and created script

suppressPackageStartupMessages(library(optparse))

# Arguments
option_list <- list(
make_option(
c("-l", "--labels"),
type = "character", default = NULL,
help = "Labels from domain clustering. Path to labels (as tsv)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("-o", "--out_file"),
type = "character", default = NULL,
help = "Output file."
)
)

# Description
description <- "Shuffling labels..."

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

# Use these filepaths as input
label_file <- opt$labels

# Seed
seed <- opt$seed
set.seed(seed)

## Your code goes here
df <- read.delim(label_file, sep = "\t", row.names = 1)
if (!("label" %in% colnames(df))){
stop("Label column not present in the file. Check your file.")
}

# Randomize labels
df$label <- sample(df$label)

## Write output
outfile <- file(opt$out_file)
write.table(df, outfile, sep = "\t", col.names = NA, quote = FALSE)
5 changes: 5 additions & 0 deletions preprocessing/shuffling_labels/shuffle_labels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
dependencies:
- r-base==4.3.1
- r-optparse=1.7.3