Skip to content

Commit

Permalink
updated reshape_chroms, added progress_bar
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanbass committed Apr 6, 2023
1 parent f39e80b commit 4b8e303
Show file tree
Hide file tree
Showing 12 changed files with 288 additions and 66 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ URL: https://ethanbass.github.io/chromConverter, https://github.com/ethanbass/ch
BugReports: https://github.com/ethanbass/chromConverter/issues
Imports:
bitops,
fs,
purrr,
readxl,
reticulate,
Expand All @@ -29,6 +30,7 @@ Imports:
Suggests:
entab,
mzR,
pbapply,
testthat (>= 3.0.0)
Config/reticulate: list( packages = list( list(package = "scipy"),
list(package="numpy"),
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
the `read_chemstation_uv` function.
* Added `extract_metadata` function for extracting metadata from a list of chromatograms
and returning it as a `data.frame` or `tibble`.
* Added `progress_bar` option in `read_chroms`.
* Updated `reshape_chroms` and `reshape_chrom` to allow switching between "wide" and "long" formats.
* Added wide format option in `read_mzml`.
* Added automatic detection of file formats by `read_chroms`.
* Minor changes to storage of metadata in attributes for the purpose of simplification.
* Minor updates to documentation.

Expand Down
7 changes: 5 additions & 2 deletions R/attach_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ extract_metadata <- function(chrom_list,
"data_format", "parser","format_out"),
format_out = c("data.frame", "tibble")
){
if (is.matrix(chrom_list) | is.data.frame(chrom_list)){
chrom_list <- list(chrom_list)
}
what <- match.arg(what, several.ok = TRUE)
format_out <- match.arg(format_out, c("data.frame", "tibble"))
metadata <- purrr::map_df(chrom_list, function(chrom){
Expand All @@ -264,10 +267,10 @@ extract_metadata <- function(chrom_list,
})
if (format_out == "tibble"){
metadata <- tibble::add_column(.data = metadata,
data.frame(name=names(chrom_list)),
data.frame(name = names(chrom_list)),
.before=TRUE)
} else if (format_out == "data.frame"){
metadata <- data.frame(metadata, row.names = names(chrom_list))
metadata <- data.frame(name = names(chrom_list), metadata, row.names = names(chrom_list))
}
metadata
}
54 changes: 29 additions & 25 deletions R/parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){
#' @param what What types of data to return (argument to \code{\link[RaMS]{grabMSdata}}.
#' Options include \code{MS1}, \code{MS2}, \code{BPC}, \code{TIC}, \code{DAD},
#' \code{chroms}, \code{metadata}, or \code{everything}).
#' @param verbose Argument to \code{\link[RaMS]{grabMSdata}} controlling \code{verbosity}.
#' @param ... Additional arguments to \code{\link[RaMS]{grabMSdata}}.
#' @return If \code{RaMS} is selected, the function will return a list of "tidy"
#' \code{data.table} objects. If \code{mzR} is selected, the function will return a
Expand All @@ -268,43 +269,46 @@ read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){
#' @export read_mzml

read_mzml <- function(path, format_out = c("matrix", "data.frame"),
data_format = c("wide","long"),
data_format = c("long","wide"),
parser=c("RaMS","mzR"),
what=c("MS1","MS2", "BPC", "TIC", "DAD",
"chroms", "metadata", "everything"), ...){
"chroms", "metadata", "everything"), verbose = FALSE,
...){
parser <- match.arg(parser, c("RaMS", "mzR"))
format_out <- match.arg(format_out, c("matrix", "data.frame"))
data_format <- match.arg(data_format, c("wide","long"))
data_format <- match.arg(data_format, c("long","wide"))
what <- match.arg(what, c("MS1","MS2", "BPC", "TIC", "DAD",
"chroms", "metadata", "everything"), several.ok = TRUE)
if (all(c("MS1","MS2", "BPC", "TIC", "DAD",
"chroms", "metadata", "everything") %in% what)){
what <- grep("everything",what, invert = TRUE,value = TRUE)
}
if (parser == "RaMS"){
data <- RaMS::grabMSdata(path, grab_what = what, ...)
}
if (parser == "mzR"){
if (!requireNamespace("mzR", quietly = TRUE)) {
stop(
"The `mzR` package must be installed from Bioconductor to read `mzML` files:
BiocManager::install('mzR')",
call. = FALSE)
}
x <- mzR::openMSfile(path)
info <- mzR::header(x)
UV_scans <- which(info$msLevel==0)
rts <- info[UV_scans,"retentionTime"]
lambdas <- seq(info$scanWindowLowerLimit[UV_scans[1]], info$scanWindowUpperLimit[UV_scans[1]])
pks <- mzR::peaks(x)
data <- t(sapply(UV_scans, function(j) pks[[j]][,2]))
rownames(data) <- rts
colnames(data) <- lambdas
if (data_format == "long"){
data <- reshape_chrom(data)
data <- RaMS::grabMSdata(path, grab_what = what, verbosity = verbose, ...)
if (data_format == "wide"){
data <- reshape_chroms(data, data_format = "wide")
}
if (format_out == "data.frame"){
data <- as.data.frame(data)
} else if (parser == "mzR"){
if (!requireNamespace("mzR", quietly = TRUE)) {
stop(
"The `mzR` package is not installed. Please install it from Bioconductor:
BiocManager::install('mzR')",
call. = FALSE)
}
x <- mzR::openMSfile(path)
info <- mzR::header(x)
UV_scans <- which(info$msLevel==0)
rts <- info[UV_scans,"retentionTime"]
lambdas <- seq(info$scanWindowLowerLimit[UV_scans[1]], info$scanWindowUpperLimit[UV_scans[1]])
pks <- mzR::peaks(x)
data <- t(sapply(UV_scans, function(j) pks[[j]][,2]))
rownames(data) <- rts
colnames(data) <- lambdas
if (data_format == "long"){
data <- reshape_chrom(data)
}
if (format_out == "data.frame"){
data <- as.data.frame(data)
}
}
data
Expand Down
50 changes: 32 additions & 18 deletions R/read_chroms.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#' \code{cdf}, \code{mzml}, or \code{animl}.
#' @param read_metadata Logical, whether to attach metadata (if it's available).
#' Defaults to TRUE.
#' @param progress_bar Logical. Whether to show progress bar. Defaults to
#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.
#' @param dat Existing list of chromatograms to append results.
#' (Defaults to NULL).
#' @return A list of chromatograms in \code{matrix} or \code{data.frame} format,
Expand Down Expand Up @@ -73,31 +75,42 @@ read_chroms <- function(paths, find_files,
data_format = c("wide","long"),
export = FALSE, path_out = NULL,
export_format = c("csv", "cdf", "mzml", "animl"),
read_metadata = TRUE, dat = NULL){
if (length(format_in) > 1){
stop("Please specify the file format of your chromatograms by setting the `format_in` argument.")
}
format_in <- match.arg(format_in, c("agilent_d", "chemstation", "chemstation_uv",
"chemstation_ch", "chemstation_fid",
"chemstation_csv", "masshunter_dad",
"shimadzu_fid", "shimadzu_dad", "chromeleon_uv",
"thermoraw", "mzml", "waters_arw",
"waters_raw", "msd", "csd", "wsd", "other"))
read_metadata = TRUE, progress_bar, dat = NULL){
data_format <- match.arg(data_format, c("wide","long"))
format_out <- match.arg(format_out, c("matrix", "data.frame"))
parser <- match.arg(parser, c("", "chromconverter", "aston","entab",
"thermoraw", "openchrom", "rainbow"))
if (missing(progress_bar)){
progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE)
}
if (missing(find_files)){
if (!(format_in %in% c("agilent_d", "waters_raw"))){
ft <- all(file_test("-f", paths))
if (length(format_in) == 1){
if (!(format_in %in% c("agilent_d", "waters_raw"))){
ft <- all(file_test("-f", paths))
} else {
ext <- switch(format_in,
agilent_d = "\\.d",
waters_raw = "\\.raw")
ft <- all(grepl(ext, paths, ignore.case = TRUE))
}
find_files <- !ft
} else{
ext <- switch(format_in,
agilent_d = "\\.d",
waters_raw = "\\.raw")
ft <- all(grepl(ext, paths, ignore.case = TRUE))
find_files <- FALSE
}
find_files <- !ft
}
if (length(format_in) > 1){
if (!find_files){
format_in <- get_filetype(ifelse(length(paths)>1, paths[[1]], paths))
} else{
stop("Please specify the file format of your chromatograms by setting the `format_in` argument.")
}
}
format_in <- match.arg(format_in, c("agilent_d", "chemstation", "chemstation_uv",
"chemstation_ch", "chemstation_fid",
"chemstation_csv", "masshunter_dad",
"shimadzu_fid", "shimadzu_dad", "chromeleon_uv",
"thermoraw", "mzml", "waters_arw",
"waters_raw", "msd", "csd", "wsd", "other"))
if (parser == ""){
parser <- check_parser(format_in, find = TRUE)
}
Expand Down Expand Up @@ -244,7 +257,8 @@ read_chroms <- function(paths, find_files,
} ))
} else {file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])}
if (parser != "openchrom"){
data <- lapply(X = files, function(file){
laplee <- choose_apply_fnc(progress_bar)
data <- laplee(X = files, function(file){
df <- try(converter(file), silent = TRUE)
})
errors <- which(sapply(data, function(x) inherits(x,"try-error")))
Expand Down
102 changes: 89 additions & 13 deletions R/reshape_chroms.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,118 @@
#' @param idx Indices of chromatograms to convert
#' @param sample_var String with name of new column containing sample IDs.
#' @param lambdas Wavelength(s) to include.
#' @param data_format Whether to return data in \code{wide} or \code{long} format.
#' @param combine Whether to combine chromatograms into a single \code{data.frame}
#' (applicable only if \code{data_format} is TRUE).
#' @param ... Additional arguments to \code{reshape_chrom}.
#' @return A list of chromatographic matrices in long format.
#' @author Ethan Bass

reshape_chroms <- function(x, idx, sample_var = "sample", lambdas){
reshape_chroms <- function(x, idx, sample_var = "sample", lambdas=NULL,
data_format, combine = TRUE, ...){
if (missing(data_format)){
data_format <- switch(attr(x[[1]],"data_format"),
long="wide",wide="long")
}
if (missing(idx)){
idx <- seq_along(x)
}
dat <- lapply(idx, function(i){
xx <- reshape_chrom(x[[i]], lambdas)
xx[,sample_var] <- names(x)[[i]]
if (is.null(lambdas)){
if (data_format == "wide"){
lambda.idx <- grep("lambda", colnames(x[[i]]))
lambdas <- unique(as.data.frame(x[[i]])[,lambda.idx])
} else if (data_format == "long"){
lambdas <- colnames(x[[i]])
}
}
xx <- reshape_chrom(x[[i]], lambdas = lambdas, data_format = data_format, ...)
if (data_format == "long"){
xx[,sample_var] <- names(x)[[i]]
}
xx
})
do.call(rbind,dat)
if (combine & data_format == "long"){
dat <- do.call(rbind,dat)
} else {
names(dat) <- names(x)
}
dat
}

#' @noRd
reshape_chrom <- function(x, data_format, ...){
# if (missing(data_format)){
# data_format <- switch(attr(x[[1]],"data_format"),
# long="wide", wide="long")
# }
fn <- switch(data_format,
long = reshape_chrom_long,
wide = reshape_chrom_wide)

fn(x, ...)
}

#' Reshapes a single chromatogram from wide to long format
#' Reshapes a single chromatogram from wide to long format
#' @name reshape_chrom
#' @importFrom stats reshape
#' @param x A chromatographic matrix in wide format.
#' @param lambdas Wavelength(s) to include.
#' @return A chromatographic matrix in long format.
#' @author Ethan Bass
#' @noRd
reshape_chrom <- function(x, lambdas){
reshape_chrom_long <- function(x, lambdas, format_out=c("data.frame","matrix")){
if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "long"){
warning("The data already appear to be in long format!", immediate. = TRUE)
}
if (ncol(x) == 1)
stop("The provided data is already in long format!")
x <- as.data.frame(x)
format_out <- match.arg(format_out,c("data.frame","matrix"))
xx <- as.data.frame(x)
if (!missing(lambdas)){
x <- x[,lambdas, drop=FALSE]
xx <- xx[,lambdas, drop = FALSE]
}
data <- reshape(as.data.frame(rt=rownames(x),x), direction = "long",
varying = list(1:ncol(x)), v.names="absorbance",
times = colnames(x), timevar = "lambda",
idvar="rt", ids=rownames(x))
data <- reshape(as.data.frame(rt=rownames(xx), xx), direction = "long",
varying = list(1:ncol(xx)), v.names="absorbance",
times = colnames(xx), timevar = "lambda",
idvar = "rt", ids = rownames(xx))
rownames(data) <- NULL
data$rt <- as.numeric(data$rt)
data$lambda <- as.numeric(data$lambda)
data[,c(3,2,1)]
data <- data[,c(3,2,1)]
if (format_out == "matrix"){
data <- as.matrix(data)
}
data <- transfer_metadata(data, x)
attr(data, "data_format") <- "long"
data
}


#' @noRd
reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt",
value_var = "int", drop){
if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "wide"){
warning("The data already appear to be in wide format!",immediate. = TRUE)
}
x <- as.data.frame(x)
if (missing(drop)){
drop <- colnames(x)[which(sapply(x,is.character))]
}
if (missing(value_var)){
value_var <- colnames(x)[grep("int|abs", colnames(x),ignore.case = TRUE)]
}
if (!missing(lambdas)){
x <- x[which(x[,lambda_var] %in% lambdas),]
}
data <- reshape(x, idvar=time_var, timevar=lambda_var, v.names = value_var,
new.row.names = unique(x$rt), direction="wide", drop=drop)
colnames(data) <- gsub(paste0(value_var,"."),"", colnames(data))
data <- as.matrix(data)
rownames(data) <- data[,1]
data <- data[,-1]
data <- transfer_metadata(data, x)
attr(data, "data_format") <- "wide"
data
}

Loading

0 comments on commit 4b8e303

Please sign in to comment.