Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Shuffling labels and coordinates #136

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
56 changes: 56 additions & 0 deletions preprocessing/shuffling_coordinates/shuffle_coordinates.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Kim Vucinic; modified template and created script

suppressPackageStartupMessages(library(optparse))

# Arguments
option_list <- list(
make_option(
c("-c", "--coordinates"),
type = "character", default = NULL,
help = "Path to coordinates (as tsv)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("-o", "--out_file"),
type = "character", default = NULL,
help = "Output file."
)
)

# Description
description <- "Shuffling coordinates in coordinates.tsv"

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

# Use these filepaths as input
coord_file <- opt$coordinates

# Seed
seed <- opt$seed
set.seed(seed)

## Your code goes here
df <- read.delim(coord_file, sep = "\t", row.names = 1)
if (any(!(c("x", "y") %in% colnames(df)))){
stop("X and y coordinates are not present in the file. Check your file.")
}

# Randomize IDs, but keep the same order of IDs (not really necessary)
df_order <- rownames(df)
rownames(df) <- sample(rownames(df))
df_final <- df[order(match(rownames(df), df_order)),]

## Write output
outfile <- file(opt$out_file)
write.table(df_final, outfile, sep = "\t", col.names = NA, quote = FALSE)
5 changes: 5 additions & 0 deletions preprocessing/shuffling_coordinates/shuffle_coordinates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
dependencies:
- r-base==4.3.1
- r-optparse=1.7.3
54 changes: 54 additions & 0 deletions preprocessing/shuffling_labels/shuffle_labels.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env Rscript

# Author_and_contribution: Niklas Mueller-Boetticher; created template
# Author_and_contribution: Kim Vucinic; modified template and created script

suppressPackageStartupMessages(library(optparse))

# Arguments
option_list <- list(
make_option(
c("-l", "--labels"),
type = "character", default = NULL,
help = "Labels from domain clustering. Path to labels (as tsv)."
),
make_option(
c("--seed"),
type = "integer", default = NULL,
help = "Seed to use for random operations."
),
make_option(
c("-o", "--out_file"),
type = "character", default = NULL,
help = "Output file."
)
)

# Description
description <- "Shuffling labels..."

opt_parser <- OptionParser(
usage = description,
option_list = option_list
)
opt <- parse_args(opt_parser)

# Use these filepaths as input
label_file <- opt$labels

# Seed
seed <- opt$seed
set.seed(seed)

## Your code goes here
df <- read.delim(label_file, sep = "\t", row.names = 1)
if (!("label" %in% colnames(df))){
stop("Label column not present in the file. Check your file.")
}

# Randomize labels
df$label <- sample(df$label)

## Write output
outfile <- file(opt$out_file)
write.table(df, outfile, sep = "\t", col.names = NA, quote = FALSE)
5 changes: 5 additions & 0 deletions preprocessing/shuffling_labels/shuffle_labels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
dependencies:
- r-base==4.3.1
- r-optparse=1.7.3