From c6b1f973e81ab8be52812d82b0bc470e443f0fb7 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 28 Nov 2023 16:54:01 -0500 Subject: [PATCH 01/51] v0.5.1 fix "mustang chemstation" v179 files (8-byte encoding) --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/read_chemstation_ch.R | 25 +++++++++++++++++-------- inst/CITATION | 2 +- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c98d9d3..cfd0006 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.5.0 +Version: 0.5.1 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index a11e7d7..ebee9ca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +## chromConverter 0.5.1 + +* Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. + ## chromConverter 0.5.0 ### New features diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index 64ee883..e3022e3 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -35,7 +35,16 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), if (version == "179"){ seek(f, 348) filetype <- paste(readBin(f, "character", n = 2), collapse = "") - version <- paste(version, filetype, sep = "_") + if (filetype == "OL"){ + bytes = "8b" + } else if (filetype == "GC"){ + seek(f, offsets$software) + n <- get_nchar(f) + soft <- cc_collapse(readBin(f, "character", n = n)) + chemstation_version <- strsplit(soft, " ")[[1]][1] + bytes <- ifelse(chemstation_version == "Mustang", "8b", "4b") + } + version <- paste(version, bytes, sep = "_") } decoder <- switch(version, "8" = decode_delta, @@ -43,8 +52,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), "30" = decode_delta, "130" = decode_delta, "181" = decode_double_delta, - "179_GC" = decode_double_array_gc, - "179_OL" = decode_double_array_ol) + "179_4b" = decode_double_array_4byte, + "179_8b" = decode_double_array_8byte) # Sample Info # offsets <- list(sample = 858, description = 1369, method = 2574, @@ -94,8 +103,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), "81" = 10, "30" = 13, "130" = 14, - "179_GC" = 10, - "179_OL" = 10, + "179_4b" = 10, + "179_8b" = 10, "181" = 10) meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ @@ -188,7 +197,7 @@ decode_double_delta <- function(file, offset) { #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). -decode_double_array_gc <- function(file, offset) { +decode_double_array_4byte <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") offset <- 6144 @@ -202,7 +211,7 @@ decode_double_array_gc <- function(file, offset) { #' Decode double array #' @noRd -decode_double_array_ol <- function(file, offset) { +decode_double_array_8byte <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") offset <- 6144 @@ -306,7 +315,7 @@ get_agilent_offsets <- function(version){ units = 326, data_start = 512 ) - } else if (version %in% c("179","179_GC", "179_OL", "181")){ + } else if (version %in% c("179","179_4b", "179_8b", "181")){ offsets <- list( version = 326, file_type = 347, #0x15B diff --git a/inst/CITATION b/inst/CITATION index 445a7b9..da7678c 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: chromatographic file converter", author = "Ethan Bass", year = "2023", - version = "version 0.5.0", + version = "version 0.5.1", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2023).", From 1e1dc3a4f75aa6adc8f92c9a7de418415f86f5d7 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:09:37 -0500 Subject: [PATCH 02/51] refactor: read_shimadzu, add new formats now allows parsing multiple types of chromatograms from the same file --- R/read_peaklist.R | 24 ++- R/read_shimadzu_ascii.R | 341 +++++++++++++++++++++++++++------------- man/read_peaklist.Rd | 3 +- man/read_shimadzu.Rd | 21 ++- 4 files changed, 260 insertions(+), 129 deletions(-) diff --git a/R/read_peaklist.R b/R/read_peaklist.R index 25a88aa..3bc6325 100644 --- a/R/read_peaklist.R +++ b/R/read_peaklist.R @@ -10,8 +10,7 @@ #' include: \code{chemstation} or \code{shimadzu}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which #' case file extension will be deduced from \code{format_in}. -#' @param data_format Whether to output data in wide or long format. Either -#' \code{wide} or \code{long}. +#' @param data_format Either \code{chromatographr} or \code{original}. #' @param read_metadata Logical, whether to attach metadata (if it's available). #' Defaults to TRUE. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or @@ -63,19 +62,12 @@ read_peaklist <- function(paths, find_files, parser <- purrr::partial(read_chemstation_reports, data_format = data_format, metadata_format = metadata_format) - } else if (format_in == "shimadzu_fid"){ + } else if (grepl("shimadzu", format_in)){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - parser <- partial(read_shimadzu, format_in = "fid", what = "peak_table", - # format_out = format_out, + parser <- partial(read_shimadzu, what = "peak_table", data_format = "wide", read_metadata = read_metadata, peaktable_format = data_format) - } else if (format_in == "shimadzu_dad"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) - parser <- partial(read_shimadzu, format_in = "dad", what = "peak_table", - # format_out = format_out, - data_format = "wide", - read_metadata = read_metadata) } if (find_files){ @@ -103,9 +95,13 @@ read_peaklist <- function(paths, find_files, try(parser(file), silent = TRUE) }) data <- lapply(seq_along(data), function(i){ - lapply(data[[i]], function(xx){ - cbind(sample = file_names[i], xx) - }) + if (inherits(data[[i]], "list")){ + lapply(data[[i]], function(xx){ + cbind(sample = file_names[i], xx) + }) + } else { + cbind(sample = file_names[i], data[[i]]) + } }) class(data) <- "peak_list" names(data) <- file_names diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index d79ec6b..cc86267 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -7,14 +7,16 @@ #' @name read_shimadzu #' @importFrom utils tail read.csv #' @importFrom stringr str_split_fixed -#' @param file path to file -#' @param format_in Format of files. \code{fid} or \code{dad}. +#' @param file Path to file. +#' @param what Whether to extract \code{chromatogram}, \code{peak_table}, and/or +#' \code{ms_spectra}. Accepts multiple arguments. +#' @param include Which chromatograms to include. Options are \code{fid}, +#' \code{dad}, \code{uv}, \code{tic}, and \code{status}. +#' @param format_in This argument is deprecated and is no longer required. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param peaktable_format Whether to return peak tables in \code{chromatographr} or #' \code{original} format. -#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. -#' Accepts multiple arguments. #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. @@ -23,140 +25,269 @@ #' @author Ethan Bass #' @export -read_shimadzu <- function(file, format_in, +read_shimadzu <- function(file, what = "chromatogram", + format_in = NULL, + include = c("fid", "dad", "uv", "tic", "status"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), peaktable_format = c("chromatographr", "original"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ - if (missing(format_in)) - stop("`format_in` must be specified. The options are `fid` or `dad`.") + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ + if (!is.null(format_in)){ + warning("The `format_in` argument is deprecated, since the `read_shimadzu` + function no longer requires you to specify the file format. Please use the + `include` argument instead to specify which chromatograms you'd like to read.") + } + what <- match.arg(what, c("chromatogram", "peak_table", "ms_spectra"), + several.ok = TRUE) + include <- match.arg(include, c("fid", "dad", "uv", "tic", "status"), + several.ok = TRUE) format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("wide", "long")) peaktable_format <- match.arg(peaktable_format, c("chromatographr","original")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) - what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) + x <- readLines(file) - sep <- substr(x[2], 17, 17) - headings <- grep("\\[*\\]", x) - peaktab.idx <- grep("\\[Peak Table", x) - chrom_heading <- switch(format_in, - "fid" = "\\[Chromatogram .*]", - "dad" = "\\[PDA 3D]") - chrom.idx <- grep(chrom_heading, x) + sep <- substr(x[grep("Type", x)[1]], 5, 5) + ### extract chromatograms ### if (any(what == "chromatogram")){ - if (length(chrom.idx) != 0){ - header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) - met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2, 2]), ",", ".") - if (decimal_separator == ","){ - met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) - } - if (format_in == "fid"){ - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", - na.strings = c("[FractionCollectionReport]","#ofFractions"), - dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - rownames(xx) <- xx[, 1] - xx <- xx[, 2, drop = FALSE] - colnames(xx) <- "Intensity" - if (data_format == "long"){ - xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) - } - } else if (format_in == "dad"){ - nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]), 2]) - ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]), 2]) - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", - na.strings = c("[FractionCollectionReport]", "#ofFractions"), - row.names = 1, nrows = nrows, dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - colnames(xx) <- as.numeric(gsub("X", "", colnames(xx)))*0.01 - if (data_format == "long"){ - xx <- reshape_chrom(xx, data_format = "long") - } - } - if (format_out == "data.frame"){ - xx <- as.data.frame(xx) - } - } else{ + regex <- c("fid" = "\\[Chromatogram .*]", + "dad" = "\\[PDA 3D]", + "uv" = "\\[PDA Multi Chromatogram\\(Ch\\d+\\)]", + "status" = "\\[LC Status Trace\\(.+\\)]", + "tic" = "\\[MS Chromatogram\\]") + regex <- regex[include] + chrom.idx <- lapply(regex, function(reg) grep(reg,x)) + chrom.idx <- chrom.idx[lengths(chrom.idx) > 0] + + if (length(chrom.idx) == 0){ if (length(what) == 1){ - stop("Chromatogram not found.") - } else{ - warning("Chromatogram not found.") - what = "peak_table" + stop("Chromatograms not found!") + } else { + warning("Chromatograms not found.") + what <- what[grep("chroms", what, invert = TRUE)] } } + chromatogram <- lapply(seq_along(chrom.idx), function(i){ + read_shimadzu_chrom <- switch(names(chrom.idx)[i], "dad" = read_shimadzu_dad, + read_shimadzu_chromatogram) + xx <- lapply(chrom.idx[[i]], function(idx){ + read_shimadzu_chrom(file = file, x = x, chrom.idx = idx, + sep = sep, data_format = data_format, + read_metadata = TRUE, format_out = format_out) + }) + names(xx) <- x[chrom.idx[[i]]] + if (collapse) xx <- collapse_list(xx) + xx + }) + names(chromatogram) <- names(chrom.idx) } - ### extract peak_table + ### extract peak tables ### if (any(what == "peak_table")){ + peaktab.idx <- grep("\\[Peak Table|\\[MC Peak Table", x) + pktab_type <- substr(x[peaktab.idx], 2, 3) + if (length(peaktab.idx) == 0){ if (length(what) == 1){ stop("Peak table not found!") } else{ warning("Peak table not found!") - what <- "chromatogram" + what <- what[grep("peak_table", what, invert = TRUE)] + } + } + peak_table <- lapply(seq_along(peaktab.idx), function(i){ + read_shimadzu_peaktable(file = file, x, idx = peaktab.idx[i], sep = sep, + format_in = pktab_type[i], + format_out = peaktable_format) + }) + names(peak_table) <- gsub("\\[|\\]","", x[peaktab.idx]) + } + + ### extract MS spectra ### + if (any(what == "ms_spectra")){ + spectra.idx <- grep("\\[MS Spectrum\\]", x) + if (length(spectra.idx) == 0){ + if (length(what) == 1){ + stop("MS spectra were not found!") + } else{ + warning("MS spectra were not found!") + what <- what[grep("spectra", what, invert = TRUE)] } } - peak_tab <- lapply(peaktab.idx, function(idx){ - nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) - if (!is.na(nrows) && nrows > 0){ - time_column <- grep("R.Time", strsplit(x = x[[idx + 2]], split = sep)[[1]]) - t1 <- strsplit(x = x[[idx + 3]], split = sep)[[1]][time_column] - decimal_separator <- ifelse(grepl(".", t1), ".", ",") - - peak_tab <- read.csv(file, skip = (idx + 1), sep = sep, nrows = nrows, - dec = decimal_separator) - if (peaktable_format == "chromatographr"){ - peak_tab <- peak_tab[, c("R.Time", "I.Time", "F.Time", "Area", "Height")] - colnames(peak_tab) <- c("rt", "start", "end", "area", "height") - # cbind(sample = gsub("\\[|\\]","", x[idx]), peak_tab) - } - peak_tab - } else{NA} + ms_spectra <- lapply(spectra.idx, function(idx){ + read_shimadzu_spectrum(x, idx = idx, sep = sep) }) - names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx]) } - if ("peak_table" %in% what & "chromatogram" %in% what){ - what <- "both" + + xx <- mget(what) + if (collapse) xx <- collapse_list(xx) + xx +} + +#' @noRd +collapse_list <- function(x){ + while(is.list(x) && length(x) == 1){ + x <- x[[1]] } + x +} + +#' Read Shimadzu Metadata +#' @noRd +read_shimadzu_metadata <- function(x, met = NULL, sep){ + + headings <- grep("\\[*\\]", x) + names(headings) <- x[headings] + + idx <- which(x[headings] %in% c("[Header]", "[File Information]", + "[Sample Information]", "[Original Files]", + "[File Description]", "[Configuration]") ) + meta_start <- headings[min(idx)] + meta_end <- headings[max(idx) + 1] + meta <- x[(meta_start + 1):(meta_end - 1)] + meta <- meta[meta != ""] + meta <- meta[-grep("\\[", meta)] + meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) + if (!is.null("met")){ + meta <- rbind(meta, met) + } + rownames(meta) <- meta[, 1] + meta <- as.list(meta[,2]) + meta +} + +#' Read Shimadzu Chromatogram +#' @noRd +read_shimadzu_chromatogram <- function(file, x, chrom.idx, sep, data_format, + read_metadata, format_out){ + header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + first_time <- strsplit(x[header[[2]]+2], "\t")[[1]][1] + decimal_separator <- ifelse(grepl(",", first_time), ",", ".") + + if (decimal_separator == ","){ + times.idx <- grep("Time|Intensity Multiplier", met[,1]) + met[times.idx, 2] <- gsub(",", ".", met[times.idx, 2]) + } + + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", + # na.strings = c("[FractionCollectionReport]", "#ofFractions", "\\["), + dec = decimal_separator, + nrows = as.numeric(met[grep("# of Points", met),2])) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + if (data_format == "wide"){ + rownames(xx) <- xx[, 1] + xx <- xx[, 2, drop = FALSE] + colnames(xx) <- "Intensity" + } + # if (data_format == "long"){ + # xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) + # } if (format_out == "data.frame"){ xx <- as.data.frame(xx) } - xx <- switch(what, "chromatogram" = xx, - "peak_table" = peak_tab, - "both" = list(chromatogram = xx, peak_table = peak_tab)) if (read_metadata){ - idx <- which(x[headings] %in% c("[Header]", "[File Information]", - "[Sample Information]", "[Original Files]", - "[File Description]", "[Configuration]") ) - meta_start <- headings[min(idx)] - meta_end <- headings[max(idx) + 1] - meta <- x[(meta_start + 1):(meta_end - 1)] - meta <- meta[meta!=""] - meta <- meta[-grep("\\[", meta)] - meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) - if (exists("met")){ - meta <- rbind(meta, met) - } - rownames(meta) <- meta[, 1] - meta <- as.list(meta[,2]) - if (inherits(xx, "list")){ - xx <- lapply(xx, function(xxx){ - attach_metadata(xxx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, parser = "chromConverter") - }) - } else{ - xx <- attach_metadata(xx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, - parser = "chromConverter") - } + meta <- read_shimadzu_metadata(x, met = met, sep = sep) + xx <- attach_metadata(xx, meta, format_in = "shimadzu_chrom", + source_file = file, format_out = format_out, + data_format = data_format, + parser = "chromConverter") } xx } +#' Read Shimadzu DAD Array +#' @noRd +read_shimadzu_dad <- function(file, x, chrom.idx, sep, data_format, + read_metadata, format_out){ + header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + first_time <- strsplit(x[header[[2]]+3], "\t")[[1]][1] + decimal_separator <- ifelse(grepl(",", first_time), ",", ".") + + if (decimal_separator == ","){ + times.idx <- grep("Time|Intensity Multiplier", met[,1]) + met[times.idx, 2] <- gsub(",", ".", met[times.idx, 2]) + } + + nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]), 2]) + ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]), 2]) + xx <- read.csv(file, skip = header[[2]]+1, sep = sep, colClasses = "numeric", + na.strings = c("[FractionCollectionReport]", "#ofFractions"), + row.names = 1, nrows = nrows, dec = decimal_separator) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + colnames(xx) <- as.numeric(gsub("X", "", colnames(xx)))*0.01 + if (data_format == "long"){ + xx <- reshape_chrom(xx, data_format = "long") + } + if (format_out == "data.frame"){ + xx <- as.data.frame(xx) + } + if (read_metadata){ + meta <- read_shimadzu_metadata(x, met = met, sep = sep) + xx <- attach_metadata(xx, meta, format_in = "shimadzu_chrom", + source_file = file, format_out = format_out, + data_format = data_format, + parser = "chromConverter") + } + xx +} + +#' Read Shimadzu Peak Table +#' @noRd +read_shimadzu_peaktable <- function(file, x, idx, sep, format_in, format_out){ + nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) + table_start <- grep("Peak#", x[idx:(idx + nrows)]) + idx - 1 + if (!is.na(nrows) && nrows > 0){ + time_column <- grep("R.Time|Ret.Time", strsplit(x = x[[table_start]], split = sep)[[1]]) + t1 <- strsplit(x = x[[table_start + 3]], split = sep)[[1]][time_column] + decimal_separator <- ifelse(grepl(",", t1), ",", ".") + + peak_tab <- read.csv(file, skip = table_start-1, sep = sep, nrows = nrows, + dec = decimal_separator) + if (format_out == "chromatographr"){ + column_names <- switch(format_in, "MC" = c("Ret.Time", "Proc.From", "Proc.To", "Area", "Height"), + c("R.Time", "I.Time", "F.Time", "Area", "Height")) + peak_tab <- peak_tab[, column_names] + colnames(peak_tab) <- c("rt", "start", "end", "area", "height") + # cbind(sample = gsub("\\[|\\]","", x[idx]), peak_tab) + } + peak_tab + } else { + NA + } +} + +#' Read Shimadzu MS Spectrum +#' @noRd +read_shimadzu_spectrum <- function(x, idx, sep){ + nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) + table_start <- grep("Intensity", x[idx:(idx + nrows)]) + idx + decimal_separator <- ifelse(grepl(".", strsplit(x[table_start + 4], split = sep)[[1]][1]), ".", ",") + + spectrum <- read.csv(file, skip = table_start-1, sep = sep, nrows = nrows, + dec = decimal_separator) + spectrum +} + +#' Extract Header from Shimadzu ASCII Files +#' @noRd +extract_shimadzu_header <- function(x, chrom.idx, sep){ + index <- chrom.idx + 1 + line <- x[index] + l <- length(strsplit(x = line, split = sep)[[1]]) + header <- strsplit(x = line, split = sep)[[1]] + while (l > 1) { + index <- index + 1 + line <- strsplit(x = x[index], split = sep)[[1]] + l <- length(line) + if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1]))) | grepl("R.Time|Ret.Time", line[1])) + break + header <- rbind(header, line) + } + list(header, index-1) +} diff --git a/man/read_peaklist.Rd b/man/read_peaklist.Rd index e2b6f2a..52e9b8f 100644 --- a/man/read_peaklist.Rd +++ b/man/read_peaklist.Rd @@ -29,8 +29,7 @@ include: \code{chemstation} or \code{shimadzu}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} -\item{data_format}{Whether to output data in wide or long format. Either -\code{wide} or \code{long}.} +\item{data_format}{Either \code{chromatographr} or \code{original}.} \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index e814f83..a1afb20 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -6,19 +6,27 @@ \usage{ read_shimadzu( file, - format_in, + what = "chromatogram", + format_in = NULL, + include = c("fid", "dad", "uv", "tic", "status"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), peaktable_format = c("chromatographr", "original"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw") + metadata_format = c("chromconverter", "raw"), + collapse = TRUE ) } \arguments{ -\item{file}{path to file} +\item{file}{Path to file.} + +\item{what}{Whether to extract \code{chromatogram}, \code{peak_table}, and/or +\code{ms_spectra}. Accepts multiple arguments.} -\item{format_in}{Format of files. \code{fid} or \code{dad}.} +\item{format_in}{This argument is deprecated and is no longer required.} + +\item{include}{Which chromatograms to include. Options are \code{fid}, +\code{dad}, \code{uv}, \code{tic}, and \code{status}.} \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} @@ -27,9 +35,6 @@ read_shimadzu( \item{peaktable_format}{Whether to return peak tables in \code{chromatographr} or \code{original} format.} -\item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}. -Accepts multiple arguments.} - \item{read_metadata}{Whether to read metadata from file.} \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or From 47e80109a69413933e24ba1613d765c888e65be1 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:09:51 -0500 Subject: [PATCH 03/51] feat: add mzxml alias to read_chroms --- R/read_chroms.R | 25 +++++++++++++------------ R/utils.R | 39 +++++++++++++-------------------------- man/read_chroms.Rd | 19 ++++++++++--------- 3 files changed, 36 insertions(+), 47 deletions(-) diff --git a/R/read_chroms.R b/R/read_chroms.R index 5687842..4281433 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -8,12 +8,13 @@ #' [rainbow](https://rainbow-api.readthedocs.io/), or internal parsers. #' #' Provides a general interface to chromConverter parsers. Currently recognizes -#' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'MassHunter' (\code{.dad}) -#' files, 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' -#' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' -#' (\code{.txt}). Also, wraps Openchrom parsers, which include many additional -#' formats. To use 'Entab', 'ThermoRawFileParser', or 'Openchrom' parsers, -#' they must be manually installed. Please see the instructions in the +#' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent +#' MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), +#' 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' +#' (\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps +#' Openchrom parsers, which include many additional formats. To use 'Entab', +#' 'ThermoRawFileParser', or 'Openchrom' parsers, they must be manually installed. +#' Please see the instructions in the #' [README](https://ethanbass.github.io/chromConverter/) for further details. #' #' @name read_chroms @@ -26,8 +27,8 @@ #' \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, #' \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, #' \code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -#' \code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{cdf}, \code{mdf}, -#' \code{msd}, \code{csd}, \code{wsd}, or \code{other}. +#' \code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{mzxml}, +#' \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, or \code{other}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which #' case file extension will be deduced from \code{format_in}. #' @param parser What parser to use. Current option are \code{chromconverter}, @@ -81,7 +82,7 @@ read_chroms <- function(paths, find_files, "masshunter_dad", "chromeleon_uv", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", - "waters_arw", "waters_raw", + "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", @@ -134,7 +135,7 @@ read_chroms <- function(paths, find_files, "chemstation_81", "chemstation_181", "chemstation_fid", "chemstation_csv", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", - "chromeleon_uv", "thermoraw", "mzml", + "chromeleon_uv", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "cdf", "other")) if (parser == ""){ @@ -221,13 +222,13 @@ read_chroms <- function(paths, find_files, metadata_format = metadata_format) } else if (format_in == "shimadzu_fid"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_shimadzu, format_in = "fid", + converter <- partial(read_shimadzu, include = "fid", format_out = format_out, data_format = data_format, read_metadata = read_metadata, metadata_format = metadata_format) } else if (format_in == "shimadzu_dad"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_shimadzu, format_in = "dad", + converter <- partial(read_shimadzu, include = "dad", format_out = format_out, data_format = data_format, read_metadata = read_metadata, metadata_format = metadata_format) diff --git a/R/utils.R b/R/utils.R index 352bc09..bbbd2d9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -67,8 +67,8 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chemstation_130", "chemstation_131", "openlab_131", "chemstation_179", "chemstation_81", - "chemstation_181", "mzml", "mdf", - "shimadzu_fid", "shimadzu_dad", + "chemstation_181", "mzml", "mzxml", + "mdf", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "waters_arw"), aston = c("chemstation", "chemstation_uv", "chemstation_131", @@ -128,7 +128,8 @@ extract_filenames <- function(files){ file_names <- strsplit(files, "/") file_names <- gsub("\\.[Dd]", "", sapply(file_names, function(n){ - ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) + ifelse(any(grepl("\\.[Dd]", n)), + grep("\\.[Dd]", n, value = TRUE), tail(n,1)) })) } else { file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1]) @@ -142,16 +143,20 @@ format_to_extension <- function(format_in){ switch(format_in, "agilent_d" = ".d|.D", "chemstation_uv" = ".uv|.UV", - "chemstation_ch" = ".ch|.CH", + "chemstation_ch" = ".ch|.CH", "chemstation_fid" = ".ch|.CH", - "chemstation_csv" = ".csv|.CSV", + "chemstation_csv" = ".csv|.CSV", "masshunter_dad" = ".sp|.SP", "shimadzu_fid" = ".txt", "shimadzu_dad" = ".txt", "chromeleon_uv" = ".txt", - "thermoraw" = ".raw", "mzml" = ".mzml", "waters_arw" = ".arw", - "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", - "mdf" = ".mdf|.MDF", "other"=".") + "thermoraw" = ".raw", + "mzml" = ".mzml", "mzxml" = ".mzxml", + "waters_arw" = ".arw", + "waters_raw" = ".raw", + "msd" = ".", "csd" =".", + "wsd" =".", "mdf" = ".mdf|.MDF", + "other"=".") } #' @noRd @@ -183,24 +188,6 @@ set_temp_directory <- function(){ } } -#' Extract header from Shimadzu ascii files -#' @noRd -extract_shimadzu_header <- function(x, chrom.idx, sep){ - index <- chrom.idx + 1 - line <- x[index] - l <- length(strsplit(x = line, split = sep)[[1]]) - header <- strsplit(x = line, split = sep)[[1]] - while (l > 1) { - index <- index + 1 - line <- strsplit(x = x[index], split = sep)[[1]] - l <- length(line) - if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1])))) - break - header <- rbind(header, line) - } - list(header,index) -} - #' Check for suggested package #' @noRd #' @keywords internal diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index a0261a4..f0e653b 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -10,7 +10,7 @@ read_chroms( format_in = c("agilent_d", "agilent_dx", "chemstation", "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", "chromeleon_uv", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", - "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), + "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), @@ -39,8 +39,8 @@ include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, \code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -\code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{cdf}, \code{mdf}, -\code{msd}, \code{csd}, \code{wsd}, or \code{other}.} +\code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{mzxml}, +\code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, or \code{other}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} @@ -101,12 +101,13 @@ parsers from \href{https://github.com/bovee/aston}{Aston}, } \details{ Provides a general interface to chromConverter parsers. Currently recognizes -'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'MassHunter' (\code{.dad}) -files, 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' -(\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' -(\code{.txt}). Also, wraps Openchrom parsers, which include many additional -formats. To use 'Entab', 'ThermoRawFileParser', or 'Openchrom' parsers, -they must be manually installed. Please see the instructions in the +'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent +MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), +'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' +(\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps +Openchrom parsers, which include many additional formats. To use 'Entab', +'ThermoRawFileParser', or 'Openchrom' parsers, they must be manually installed. +Please see the instructions in the \href{https://ethanbass.github.io/chromConverter/}{README} for further details. } \section{Side effects}{ From c9f6c8017d00944d95d87c73b6dd77785c42084e Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:10:33 -0500 Subject: [PATCH 04/51] refactor: attach_metadata (shimadzu) --- R/attach_metadata.R | 54 +++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 650a896..db8b2e7 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -47,23 +47,21 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter", format_out = format_out) - }, "shimadzu" = { + }, "shimadzu_dad" = { structure(x, - instrument = meta$`Instrument Name`, - detector = meta$`Detector Name`, - software_name = meta$`Application Name`, - software_version = meta$Version, - method = meta$`Method File`, - batch = meta$`Batch File`, - operator = meta$`Operator Name`, + instrument = get_metadata_field(meta, "Instrument Name"), + detector = get_metadata_field(meta, "Detector Name"), + software_name = get_metadata_field(meta, "Application Name"), + software_version = get_metadata_field(meta, "Version"), + method = get_metadata_field(meta, "Method File"), + batch = get_metadata_field(meta, "Batch File"), + operator = get_metadata_field(meta, "Operator Name"), run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), - sample_name = meta$`Sample Name`, - sample_id = meta$`Sample ID`, - sample_injection_volume = meta$`Injection Volume`, - sample_amount = meta$`Injection Volume`, + sample_name = get_metadata_field(meta, "Sample Name"), + sample_id = get_metadata_field(meta, "Sample ID"), + sample_injection_volume = get_metadata_field(meta, "Injection Volume"), + sample_amount = get_metadata_field(meta, "Injection Volume"), time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), - # start_time = meta$`Start Time(min)`, - # end_time = meta$`End Time(min)`, time_interval = meta$`Interval(msec)`, time_interval_unit = get_time_unit( grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"), @@ -76,6 +74,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter", format_out = format_out) + }, "shimadzu_chrom" = { + structure(x, + instrument = get_metadata_field(meta, "Instrument Name"), + detector = get_metadata_field(meta, "Detector Name"), + software_name = get_metadata_field(meta, "Application Name"), + software_version = get_metadata_field(meta, "Version"), + method = get_metadata_field(meta, "Method File"), + batch = get_metadata_field(meta, "Batch File"), + operator = get_metadata_field(meta, "Operator Name"), + run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), + sample_name = get_metadata_field(meta, "Sample Name"), + sample_id = get_metadata_field(meta, "Sample ID"), + sample_injection_volume = get_metadata_field(meta, "Injection Volume"), + sample_amount = get_metadata_field(meta, "Injection Volume"), + time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), + time_interval = meta$`Interval(msec)`, + time_interval_unit = get_time_unit( + grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"), + time_unit = get_time_unit( + grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"), + wavelength = get_metadata_field(meta, "Wavelength(nm)"), + bandwidth = get_metadata_field(meta, "Bandwidth(nm)"), + # detector_end = meta$`End Wavelength(nm)`, + detector_unit = get_metadata_field(meta, "Intensity Units"), + source_file = source_file, + data_format = data_format, + parser = "chromconverter", + format_out = format_out) }, "chromeleon" = { datetime.idx <- unlist(sapply(c("Date$", "Time$"), function(str) grep(str, names(meta)))) datetime <- unlist(meta[datetime.idx]) From 991168f211fa3cad257e9e62d7594fdb94937afb Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:11:00 -0500 Subject: [PATCH 05/51] test: update shimadzu peaklist test --- tests/testthat/test-read_chroms.R | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index f3798bf..97b9193 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -166,17 +166,16 @@ test_that("read_peaklist can read chemstation reports", { test_that("read_peaklist can read 'Shimadzu' fid files", { path <- "testdata/ladder.txt" x <- read_peaklist(path, format_in = "shimadzu_fid", progress_bar = FALSE) - expect_equal(class(x[[1]]), "list") - expect_equal(class(x[[1]][[1]]), "data.frame") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(colnames(x[[1]][[1]]), + expect_equal(class(x[[1]]), "data.frame") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) + x <- read_peaklist(path, format_in = "shimadzu_fid", data_format = "original") - expect_equal(class(x[[1]]), "list") - expect_equal(class(x[[1]][[1]]), "data.frame") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(colnames(x[[1]][[1]]), + expect_equal(class(x[[1]]), "data.frame") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]]), c("sample","Peak.","R.Time","I.Time","F.Time","Area","Height", "A.H","Conc.","Mark","ID.","Name", "k.", "Plate..", "Plate.Ht.", "Tailing", "Resolution", "Sep.Factor", "Area.Ratio", "Height.Ratio", From 593b678564b45cdc4a252630243fe63636c6f1c8 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:11:14 -0500 Subject: [PATCH 06/51] docs: update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 703d14e..09d17ca 100644 --- a/README.md +++ b/README.md @@ -144,14 +144,14 @@ For downstream analyses of chromatographic data, you can also check out my packa ## Contributing -Contributions of source code, ideas, or documentation are very welcome. Please get in touch (preferable by opening a GitHub [issue](https://github.com/ethanbass/chromatographR/issues)) to discuss any suggestions or to file a bug report. Some good reasons to file an issue: +Contributions of source code, ideas, or documentation are always welcome. Please get in touch (preferable by opening a GitHub [issue](https://github.com/ethanbass/chromatographR/issues)) to discuss any suggestions or to file a bug report. Some good reasons to file an issue: - You've found an actual bug. - You're getting a cryptic error message that you don't understand. - You have a file format you'd like to read that isn't currently supported by chromatographR. (If you do this, please make sure to include a link to an example file!) - You have a new feature you'd like to see implemented. -**Note:** Before filing a bug report, please make sure to install the latest development version of chromConverter from GitHub, in case your bug has already been patched. After installing the latest version, you may also need to refresh your R session to remove the older version from the cache. +**Note: Before filing a bug report, please make sure to install the latest development version of chromConverter from GitHub**, in case your bug has already been patched. After installing the latest version, you may also need to refresh your R session to remove the older version from the cache. ### Other related packages @@ -163,4 +163,4 @@ You can cite chromConverter as follows: Bass, E. (2023). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. -If you use external libraries to convert your files, please cite them as well in published work. +If you use external libraries to convert your files, it is suggested to also cite these libraries in published work. From efd8f37176865dd629d6b1adef7cf50988197fb5 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:12:50 -0500 Subject: [PATCH 07/51] docs: update NEWS --- NEWS.md | 3 +++ R/read_shimadzu_lcd.R | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ebee9ca..7f021dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,9 @@ ## chromConverter 0.5.1 * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. +* Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. +* Refactored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. +* Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`. ## chromConverter 0.5.0 diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index 24afab6..60dafce 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -205,7 +205,7 @@ decode_shimadzu_block <- function(file) { count <- 1 buffer <- list(0,0,0,0) - for (i in c(1:2)){ + for (i in seq_len(2)){ n_bytes <- readBin(file, "integer", n = 1, size = 2) start <- seek(file, NA, "current") From 9ed3eec9b1e7ff661778604b6626bbe8b70065eb Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:14:32 -0500 Subject: [PATCH 08/51] style: title case and alphabetized list of formats --- README.md | 15 ++++++++------- inst/CITATION | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 09d17ca..4b02af1 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,14 @@ chromConverter aims to facilitate the conversion of chromatography data from var ### Formats ##### ChromConverter -- Chromeleon UV ascii (`.txt`) -- mzML (`.mzml`) -- Shimadzu LabSolutions ascii (`.txt`) -- Waters ascii (`.arw`) -- 'Agilent Chemstation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) - 'Agilent Chemstation' & 'OpenLab' `.uv` files (versions 131, 31) -- 'Shimadzu' `.lcd` (*provisional support* for PDA stream) +- 'Agilent Chemstation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) +- ÅNDI (Analytical Data Interchange) chromatography format (`.cdf`) +- mzML (`.mzml`) +- 'Shimadzu LabSolutions' ascii (`.txt`) +- 'Shimadzu LabSolutions'`.lcd` (*provisional support* for PDA stream) +- 'Thermo Scientific Chromeleon' UV ascii (`.txt`) +- 'Waters' ascii (`.arw`) ##### External Libraries ###### Aston/Entab (*Entab requires separate installation, see [instructions below](README.md#Installation)*) @@ -161,6 +162,6 @@ Contributions of source code, ideas, or documentation are always welcome. Please You can cite chromConverter as follows: -Bass, E. (2023). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. +Bass, E. (2023). chromConverter: Chromatographic File Converter. http://doi.org/10.5281/zenodo.6792521. If you use external libraries to convert your files, it is suggested to also cite these libraries in published work. diff --git a/inst/CITATION b/inst/CITATION index da7678c..184f6bb 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,13 +2,13 @@ citHeader("To cite chromConverter in publications, please use:") citEntry( entry = "manual", - title = "chromConverter: chromatographic file converter", + title = "chromConverter: Chromatographic File Converter", author = "Ethan Bass", year = "2023", version = "version 0.5.1", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2023).", - "chromConverter: chromatographic file converter.", + "chromConverter: Chromatographic File Converter.", "http://doi.org/10.5281/zenodo.6792521.") ) From 9a196b5fd2200356c2334eec1d7bcd0159669abd Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 12:52:24 -0500 Subject: [PATCH 09/51] docs: (read_shimadzu) add collapse argument --- R/read_shimadzu_ascii.R | 7 +++++-- man/read_shimadzu.Rd | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index cc86267..c3180cf 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -20,8 +20,11 @@ #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @param collapse Whether to collapse lists that only contain a single element. +#' @return A nested list of elements from the specified \code{file}, where the +#' top levels are chromatograms, peak tables, and/or mass spectra according to +#' the value of \code{what}. Chromatograms are returned in the format specified +#' by \code{format_out} (retention time x wavelength). #' @author Ethan Bass #' @export diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index a1afb20..f685e12 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -39,10 +39,14 @@ read_shimadzu( \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} + +\item{collapse}{Whether to collapse lists that only contain a single element.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A nested list of elements from the specified \code{file}, where the +top levels are chromatograms, peak tables, and/or mass spectra according to +the value of \code{what}. Chromatograms are returned in the format specified +by \code{format_out} (retention time x wavelength). } \description{ Reads 'Shimadzu' ascii files into R. These files are exported from From 658f89522828954332eb2e9a3668e34503ddf1df Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 20:52:40 -0500 Subject: [PATCH 10/51] feat: add collapse argument to call_rainbow --- R/call_rainbow.R | 5 ++++- R/read_shimadzu_ascii.R | 3 ++- man/call_rainbow.Rd | 6 +++++- man/read_shimadzu.Rd | 3 ++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/R/call_rainbow.R b/R/call_rainbow.R index d86562f..0b29dad 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -17,6 +17,8 @@ #' @param by How to order the list that is returned. Either \code{detector} #' (default) or \code{name}. #' @param read_metadata Logical. Whether to attach metadata. Defaults to TRUE. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. #' @author Ethan Bass #' @return Returns a (nested) list of \code{matrices} or \code{data.frames} according to #' the value of \code{format_out}. Data is ordered according to the value of @@ -28,7 +30,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), by = c("detector","name"), what = NULL, - read_metadata = TRUE){ + read_metadata = TRUE, collapse = TRUE){ check_rb_configuration() by <- match.arg(by, c("detector","name")) format_out <- match.arg(format_out, c("matrix","data.frame")) @@ -63,6 +65,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun read_metadata = read_metadata) }) names(dtr_dat) <- extract_rb_names(dtr) + if (collapse) dtr_dat <- collapse_list(dtr_dat) dtr_dat }) } else if (by == "name"){ diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index c3180cf..7c869ff 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -20,7 +20,8 @@ #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. -#' @param collapse Whether to collapse lists that only contain a single element. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. #' @return A nested list of elements from the specified \code{file}, where the #' top levels are chromatograms, peak tables, and/or mass spectra according to #' the value of \code{what}. Chromatograms are returned in the format specified diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index ca4fddb..2076c6c 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -12,7 +12,8 @@ call_rainbow( data_format = c("wide", "long"), by = c("detector", "name"), what = NULL, - read_metadata = TRUE + read_metadata = TRUE, + collapse = TRUE ) } \arguments{ @@ -32,6 +33,9 @@ call_rainbow( \code{ELSD}). This argument only applies if \code{by == "detector"}.} \item{read_metadata}{Logical. Whether to attach metadata. Defaults to TRUE.} + +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} } \value{ Returns a (nested) list of \code{matrices} or \code{data.frames} according to diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index f685e12..2378c1f 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -40,7 +40,8 @@ read_shimadzu( \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} -\item{collapse}{Whether to collapse lists that only contain a single element.} +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} } \value{ A nested list of elements from the specified \code{file}, where the From b2648bcac1fdfc280e33bef748b365c3f89c286d Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 22:21:30 -0500 Subject: [PATCH 11/51] add dots for parser to read_chroms --- R/read_chroms.R | 70 +++++++++++++++++++++++----------------------- man/read_chroms.Rd | 5 +++- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/R/read_chroms.R b/R/read_chroms.R index 4281433..9076618 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -60,6 +60,7 @@ #' sample names default to the basename of the specified files. #' @param dat Existing list of chromatograms to append results. #' (Defaults to NULL). +#' @param ... Additional arguments to parser. #' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, #' according to the value of \code{format_out}. #' @section Side effects: If \code{export} is TRUE, chromatograms will be @@ -80,6 +81,7 @@ read_chroms <- function(paths, find_files, "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", "chromeleon_uv", + "shimadzu_txt", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", @@ -96,7 +98,7 @@ read_chroms <- function(paths, find_files, metadata_format = c("chromconverter", "raw"), progress_bar, cl = 1, verbose = getOption("verbose"), - sample_names = NULL, dat = NULL){ + sample_names = NULL, dat = NULL, ...){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix", "data.frame")) parser <- match.arg(tolower(parser), c("", "chromconverter", "aston","entab", @@ -127,14 +129,15 @@ read_chroms <- function(paths, find_files, stop("Please specify the file format of your chromatograms by setting the `format_in` argument.") } } - format_in <- match.arg(format_in, c("agilent_d", "agilent_dx", "chemstation", + format_in <- match.arg(tolower(format_in), c("agilent_d", "agilent_dx", "chemstation", "chemstation_uv", "chemstation_ch", "chemstation_30", "chemstation_31", "chemstation_130", "chemstation_131", "openlab_131", "chemstation_179", "chemstation_81", "chemstation_181", "chemstation_fid", "chemstation_csv", "masshunter_dad", - "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", + "shimadzu_fid", "shimadzu_dad", + "shimadzu_txt", "shimadzu_lcd", "chromeleon_uv", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "cdf", "other")) @@ -181,19 +184,16 @@ read_chroms <- function(paths, find_files, rainbow_parser <- partial(call_rainbow, format_in = format_in, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, ...) if (format_in == "agilent_d"){ - pattern <- ifelse(is.null(pattern), ".D", pattern) converter <- rainbow_parser } else if (format_in == "agilent_dx"){ - pattern <- ifelse(is.null(pattern), ".dx", pattern) converter <- partial(read_agilent_dx, path_out = path_out, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "masshunter_dad"){ - pattern <- ifelse(is.null(pattern), ".sp", pattern) converter <- switch(parser, "aston" = partial(sp_converter, format_out = format_out, data_format = data_format, @@ -201,7 +201,6 @@ read_chroms <- function(paths, find_files, metadata_format = metadata_format), "entab" = entab_parser) } else if (format_in == "chemstation_uv" | grepl("31", format_in)){ - pattern <- ifelse(is.null(pattern), ".uv", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_uv, format_out = format_out, @@ -215,30 +214,30 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser, "rainbow" = rainbow_parser) } else if (format_in == "chromeleon_uv"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_chromeleon, format_out = format_out, data_format = data_format, read_metadata = read_metadata, metadata_format = metadata_format) } else if (format_in == "shimadzu_fid"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, include = "fid", format_out = format_out, data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) + metadata_format = metadata_format, ...) } else if (format_in == "shimadzu_dad"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, include = "dad", format_out = format_out, data_format = data_format, read_metadata = read_metadata, metadata_format = metadata_format) - } else if (format_in == "shimadzu_lcd"){ - pattern <- ifelse(is.null(pattern), ".lcd", pattern) + } else if (format_in == "shimadzu_ascii"){ + converter <- partial(read_shimadzu, include = "dad", + format_out = format_out, data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format) + } else if (format_in == "shimadzu_lcd"){ converter <- partial(read_shimadzu_lcd, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "thermoraw"){ - pattern <- ifelse(is.null(pattern), ".raw", pattern) converter <- switch(parser, "thermoraw" = partial(read_thermoraw, path_out = path_out, format_out = format_out, @@ -246,27 +245,28 @@ read_chroms <- function(paths, find_files, metadata_format = metadata_format, verbose = verbose), "entab" = entab_parser) - } else if (format_in == "mzml"){ - pattern <- ifelse(is.null(pattern), ".mzML", pattern) - converter <- partial(read_mzml, format_out = format_out) + } else if (format_in %in% c("mzml","mzxml")){ + converter <- partial(read_mzml, format_out = format_out, ...) } else if (format_in == "waters_arw"){ - pattern <- ifelse(is.null(pattern), ".arw", pattern) converter <- partial(read_waters_arw, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "waters_raw"){ - pattern <- ifelse(is.null(pattern), ".raw", pattern) - converter <- rainbow_parser + converter <- switch(parser, "rainbow" = rainbow_parser, + "chromconverter" = partial(read_waters_raw, + format_out = format_out, + data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format)) } else if (format_in == "chemstation_csv"){ - pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) } else if (grepl("chemstation", format_in)){ - pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_ch, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "rainbow" = rainbow_parser, "entab" = entab_parser) } else if (format_in %in% c("msd", "csd", "wsd")){ @@ -279,12 +279,12 @@ read_chroms <- function(paths, find_files, format_in = format_in, export_format = export_format, return_paths = return_paths, verbose = verbose) } else if (format_in == "mdf"){ - pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) + # pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) converter <- partial(read_mdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "cdf"){ - pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) + # pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) converter <- partial(read_cdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata) @@ -297,21 +297,21 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser ) } - + pattern <- ifelse(is.null(pattern), format_to_extension(format_in), pattern) if (find_files){ files <- find_files(paths, pattern) } else { files <- paths if (!is.null(pattern)){ match <- grep(pattern, files, ignore.case = TRUE) - if (length(match) == 0){ - warning("The provided files do not match the expected file extension. - Please confirm that the specified format ('format_in') is correct.", - immediate. = TRUE) - } else if (length(match) < length(files)){ - warning(paste("Some of the files do not have the expected file extension:", - files[match]), immediate. = TRUE) - } + if (length(match) == 0){ + warning("The provided files do not match the expected file extension. + Please confirm that the specified format ('format_in') is correct.", + immediate. = TRUE) + } else if (length(match) < length(files)){ + warning(paste("Some of the files do not have the expected file extension:", + files[match]), immediate. = TRUE) + } } } if (all(grepl("\\.[Dd]$|\\.[Dd]?[/\\\\]",files))){ diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index f0e653b..fed1582 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -24,7 +24,8 @@ read_chroms( cl = 1, verbose = getOption("verbose"), sample_names = NULL, - dat = NULL + dat = NULL, + ... ) } \arguments{ @@ -86,6 +87,8 @@ sample names default to the basename of the specified files.} \item{dat}{Existing list of chromatograms to append results. (Defaults to NULL).} + +\item{...}{Additional arguments to parser.} } \value{ A list of chromatograms in \code{matrix} or \code{data.frame} format, From a275ed462f166711ea3e4cefd68863b0edfaf256 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 22:22:43 -0500 Subject: [PATCH 12/51] add `read_waters_raw` internal parser --- NAMESPACE | 1 + R/read_waters_raw.R | 94 ++++++++++++++++++++++++++++++++++++++++++ man/read_waters_raw.Rd | 40 ++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 R/read_waters_raw.R create mode 100644 man/read_waters_raw.Rd diff --git a/NAMESPACE b/NAMESPACE index d4c2cab..72b6288 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(read_shimadzu_lcd) export(read_thermoraw) export(read_varian_peaklist) export(read_waters_arw) +export(read_waters_raw) export(sp_converter) export(uv_converter) import(magrittr) diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R new file mode 100644 index 0000000..9f7906d --- /dev/null +++ b/R/read_waters_raw.R @@ -0,0 +1,94 @@ +#' Read 'Waters' raw +#' +#' Parser for reading Waters (.raw) files into R. +#' +#' @param path Path to \code{.raw} file. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. +#' @param read_metadata Logical. Whether to attach metadata. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @note For now this parser only reads 1D chromatograms (not mass spectra or +#' DAD data). +#' @author Ethan Bass +#' @export + +read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + + uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE) + meta_path <- grep("\\.INF", uv_paths, value=TRUE) + uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE) + + dat <- lapply(uv_paths, read_waters_chro, format_out = format_out, + data_format = data_format, read_metadata = read_metadata, + metadata_format = metadata_format) + + meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE, encoding = "Latin-1") + meta <- iconv(meta, sub="") + meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1] + meta <- gsub("^ |\\$CC\\$","", sapply(strsplit(meta, ","), function(x) x[1])) + + names(dat) <- meta + dat +} + +#' Read 'Waters' chromatograms +#' +#' Parser for reading Waters CHRO (.dat) files into R. +#' +#' @importFrom utils head tail +#' @param path Path to \code{.dat} file. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. +#' @param read_metadata Logical. Whether to attach metadata. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @noRd + +#magic 80000100 08000200 + +read_waters_chro <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + # metadata_format <- switch(metadata_format, + # chromconverter = "waters_uv", raw = "raw") + + f <- file(path, "rb") + on.exit(close(f)) + + seek(f, 0, "end") + end <- seek(f, 0, "end") + + seek(f, 128, "start") + start <- seek(f, 128, "start") + + x<-readBin(f, "numeric", size = 4, n = (end-start)/4) + times <- x[seq(1,length(x), by = 2)] + int <- x[seq(2,length(x), by = 2)] + if (data_format == "long"){ + dat <- data.frame(times = times, int = int) + } else { + dat <- data.frame(row.names = times, int = int) + } + if (format_out == "matrix"){ + dat <- as.matrix(dat) + } + dat +} diff --git a/man/read_waters_raw.Rd b/man/read_waters_raw.Rd new file mode 100644 index 0000000..c966ea8 --- /dev/null +++ b/man/read_waters_raw.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waters_raw.R +\name{read_waters_raw} +\alias{read_waters_raw} +\title{Read 'Waters' raw} +\usage{ +read_waters_raw( + path, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") +) +} +\arguments{ +\item{path}{Path to \code{.raw} file.} + +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Either \code{wide} (default) or \code{long}.} + +\item{read_metadata}{Logical. Whether to attach metadata.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} +} +\value{ +A chromatogram in the format specified by \code{format_out} +(retention time x wavelength). +} +\description{ +Parser for reading Waters (.raw) files into R. +} +\note{ +For now this parser only reads 1D chromatograms (not mass spectra or +DAD data). +} +\author{ +Ethan Bass +} From a271d15227915451a8aef211b1c5970c8fbc17d4 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 22:23:36 -0500 Subject: [PATCH 13/51] refactor: pull inference of extensions into function --- R/utils.R | 65 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/R/utils.R b/R/utils.R index bbbd2d9..8b1e6a2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -58,7 +58,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ #' Check parser #' @noRd -check_parser <- function(format_in, parser=NULL, find = FALSE){ +check_parser <- function(format_in, parser = NULL, find = FALSE){ allowed_formats <- list(openchrom = c("msd","csd","wsd"), chromconverter = c("agilent_dx", "cdf", "chemstation_csv", "chemstation_ch", "chemstation_fid", @@ -69,7 +69,8 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chemstation_179", "chemstation_81", "chemstation_181", "mzml", "mzxml", "mdf", "shimadzu_fid", "shimadzu_dad", - "shimadzu_lcd", "waters_arw"), + "shimadzu_lcd", "waters_arw", + "waters_raw", "waters_chro"), aston = c("chemstation", "chemstation_uv", "chemstation_131", "masshunter_dad", "other"), @@ -97,15 +98,19 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } possible_parsers <- names(allowed_formats)[grep(format_in, allowed_formats)] if (length(possible_parsers) > 1){ - possible_parsers <- possible_parsers[match( - c("thermoraw", "entab", "chromconverter", "rainbow", "aston"), possible_parsers)] - if (any(is.na(possible_parsers))){ - possible_parsers <- possible_parsers[-which(is.na(possible_parsers))] + if (format_in == "waters_raw"){ + possible_parsers <- c("rainbow") + } else{ + possible_parsers <- possible_parsers[match( + c("thermoraw", "entab", "chromconverter", "rainbow", "aston"), possible_parsers)] + if (any(is.na(possible_parsers))){ + possible_parsers <- possible_parsers[-which(is.na(possible_parsers))] + } } } possible_parsers[1] } else{ - if (!(format_in %in% allowed_formats[[parser]])){ + if (!(format_in %in% allowed_formats[[tolower(parser)]])){ stop("Mismatched arguments!", "\n\n", "The ", paste0(sQuote(format_in), " format can be converted using the following parsers: ", paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), collapse = ", "), ". \n \n", "The ", sQuote(parser), " parser can take the following formats as inputs: \n", @@ -141,22 +146,36 @@ extract_filenames <- function(files){ #' @noRd format_to_extension <- function(format_in){ switch(format_in, - "agilent_d" = ".d|.D", - "chemstation_uv" = ".uv|.UV", - "chemstation_ch" = ".ch|.CH", - "chemstation_fid" = ".ch|.CH", - "chemstation_csv" = ".csv|.CSV", - "masshunter_dad" = ".sp|.SP", - "shimadzu_fid" = ".txt", - "shimadzu_dad" = ".txt", - "chromeleon_uv" = ".txt", - "thermoraw" = ".raw", - "mzml" = ".mzml", "mzxml" = ".mzxml", - "waters_arw" = ".arw", - "waters_raw" = ".raw", - "msd" = ".", "csd" =".", - "wsd" =".", "mdf" = ".mdf|.MDF", - "other"=".") + "agilent_d" = "\\.d$", + "agilent_dx" = "\\.dx$", + "chemstation_uv" = "\\.uv$", + "chemstation_31" = "\\.uv$", + "chemstation_131" = "\\.uv$", + "chemstation_ch" = "\\.ch$", + "chemstation_fid" = "\\.ch$", + "chemstation_179" = "\\.ch$", + "chemstation_181" = "\\.ch$", + "chemstation_81" = "\\.ch$", + "chemstation_30" = "\\.ch$", + "chemstation_130" = "\\.ch$", + "chemstation_csv" = "\\.csv$", + "masshunter_dad" = "\\.sp$", + "shimadzu_txt" = "\\.txt$", + "shimadzu_fid" = "\\.txt$", + "shimadzu_dad" = "\\.txt$", + "shimadzu_lcd" = "\\.lcd$", + "chromeleon_uv" = "\\.txt$", + "thermoraw" = "\\.raw$", + "cdf" = "\\.cdf$", + "mzml" = "\\.mzml$", + "mzxml" = "\\.mzxml$", + "waters_arw" = "\\.arw$", + "waters_raw" = "\\.raw$", + "msd" = "\\.", + "csd" ="\\.", + "wsd" ="\\.", + "mdf" = "\\.mdf$", + "other" = "\\.") } #' @noRd From 1ecbbec11d368948fc08f9c090ab89f9f12293ec Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 22:23:46 -0500 Subject: [PATCH 14/51] update news, v 0.6.0 --- DESCRIPTION | 2 +- NEWS.md | 7 +++++-- inst/CITATION | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cfd0006..19cfed3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.5.1 +Version: 0.6.0 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 7f021dc..b25145d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,12 @@ -## chromConverter 0.5.1 +## chromConverter 0.6.0 * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. -* Refactored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. +* Added internal parser for 1D 'Waters RAW' chromatograms. +* Added `collapse` argument to call_rainbow to collapse superfluous lists. +* Re-factored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. * Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`. +* Added `...` argument to `read_chroms` for supplying additional arguments to parsers. ## chromConverter 0.5.0 diff --git a/inst/CITATION b/inst/CITATION index 184f6bb..c890776 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: Chromatographic File Converter", author = "Ethan Bass", year = "2023", - version = "version 0.5.1", + version = "version 0.6.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2023).", From 9731c15f11b26f6f28ba5627180916358c4dc70b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 12 Dec 2023 22:52:22 -0500 Subject: [PATCH 15/51] fix: codoc mismatch error --- man/read_chroms.Rd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index fed1582..8cf5e63 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -9,8 +9,9 @@ read_chroms( find_files, format_in = c("agilent_d", "agilent_dx", "chemstation", "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", - "chromeleon_uv", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", - "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), + "chromeleon_uv", "shimadzu_txt", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", + "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", + "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), From ae41b3c5446b0d22f6fe25904d4ff252649dd345 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 13 Dec 2023 10:58:19 -0500 Subject: [PATCH 16/51] docs: update read_chemstation, etc --- R/read_chemstation_ch.R | 20 +++++++++++++++++--- R/read_chemstation_uv.R | 18 +++++++++++++----- R/read_shimadzu_lcd.R | 2 +- R/read_waters_raw.R | 2 +- man/read_chemstation_ch.Rd | 19 +++++++++++++++---- man/read_chemstation_uv.Rd | 18 +++++++++++++----- man/read_shimadzu_lcd.Rd | 2 +- man/read_waters_raw.Rd | 2 +- 8 files changed, 62 insertions(+), 21 deletions(-) diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index e3022e3..3fe120f 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -1,4 +1,12 @@ -#' Parser for reading Agilent ('.ch') files into R +#' Read 'ChemStation' CH files +#' +#' Agilent \code{.ch} files come in several different varieties. This parser +#' can automatically detect and read several versions of these files from +#' 'Agilent ChemStation' and 'OpenLab', including versions \code{30} and \code{130}, +#' which are generally produced by ultraviolet detectors, as well as \code{81}, +#' \code{179}, and \code{181} which are generally produced by flame ionization +#' detectors. +#' #' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file #' @param format_out Matrix or data.frame. @@ -7,8 +15,14 @@ #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. #' @author Ethan Bass -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @return A 2D chromatogram in the format specified by \code{data_format} and +#' \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +#' be returned with retention times as rows and wavelengths as columns. If +#' \code{long} format is requested, three columns will be returned: one for the +#' retention time, one for the wavelength and one for the intensity. The +#' \code{format_out} argument determines whether the chromatogram is returned as +#' a \code{matrix} or \code{data.frame}. Metadata can be attached to the +#' chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). diff --git a/R/read_chemstation_uv.R b/R/read_chemstation_uv.R index 340967c..0e73566 100644 --- a/R/read_chemstation_uv.R +++ b/R/read_chemstation_uv.R @@ -1,6 +1,8 @@ -#' Read 'Chemstation' DAD files +#' Read 'ChemStation' DAD files #' -#' Parser for reading Agilent UV (.uv) files into R. +#' Agilent \code{.uv} files come in several different varieties. This parser can +#' automatically detect and read several versions of these files from +#' 'Agilent ChemStation' and 'OpenLab', including versions \code{31} and \code{131}. #' #' @importFrom utils head tail #' @param path Path to \code{.uv} file. @@ -9,12 +11,18 @@ #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @return A 3D chromatogram in the format specified by \code{data_format} and +#' \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +#' be returned with retention times as rows and wavelengths as columns. If +#' \code{long} format is requested, three columns will be returned: one for the +#' retention time, one for the wavelength and one for the intensity. The +#' \code{format_out} argument determines whether the chromatogram is returned as +#' a \code{matrix} or \code{data.frame}. Metadata can be attached to the +#' chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. #' @author Ethan Bass #' @note This function was adapted from the parser in the rainbow project #' licensed under GPL 3 by Evan Shi -#' (https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). +#' \url{https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html}. #' @export read_chemstation_uv <- function(path, format_out = c("matrix", "data.frame"), diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index 60dafce..c45bd85 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -1,6 +1,6 @@ #' Shimadzu LCD parser #' -#' Read 3D PDA data stream from 'Shimadzu' LCD files. +#' Reads 3D PDA data stream from 'Shimadzu' LCD files. #' #' A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are #' encoded as 'Microsoft' OLE documents. The parser relies on the diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R index 9f7906d..ff60280 100644 --- a/R/read_waters_raw.R +++ b/R/read_waters_raw.R @@ -1,4 +1,4 @@ -#' Read 'Waters' raw +#' Read 'Waters' RAW #' #' Parser for reading Waters (.raw) files into R. #' diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 2cf355d..5c52314 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_ch.R \name{read_chemstation_ch} \alias{read_chemstation_ch} -\title{Parser for reading Agilent ('.ch') files into R} +\title{Read 'ChemStation' CH files} \usage{ read_chemstation_ch( path, @@ -25,11 +25,22 @@ read_chemstation_ch( or \code{raw}.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A 2D chromatogram in the format specified by \code{data_format} and +\code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +be returned with retention times as rows and wavelengths as columns. If +\code{long} format is requested, three columns will be returned: one for the +retention time, one for the wavelength and one for the intensity. The +\code{format_out} argument determines whether the chromatogram is returned as +a \code{matrix} or \code{data.frame}. Metadata can be attached to the +chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. } \description{ -Parser for reading Agilent ('.ch') files into R +Agilent \code{.ch} files come in several different varieties. This parser +can automatically detect and read several versions of these files from +'Agilent ChemStation' and 'OpenLab', including versions \code{30} and \code{130}, +which are generally produced by ultraviolet detectors, as well as \code{81}, +\code{179}, and \code{181} which are generally produced by flame ionization +detectors. } \note{ This function was adapted from the diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index ac278ab..b8e121b 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_uv.R \name{read_chemstation_uv} \alias{read_chemstation_uv} -\title{Read 'Chemstation' DAD files} +\title{Read 'ChemStation' DAD files} \usage{ read_chemstation_uv( path, @@ -25,16 +25,24 @@ read_chemstation_uv( or \code{raw}.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A 3D chromatogram in the format specified by \code{data_format} and +\code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +be returned with retention times as rows and wavelengths as columns. If +\code{long} format is requested, three columns will be returned: one for the +retention time, one for the wavelength and one for the intensity. The +\code{format_out} argument determines whether the chromatogram is returned as +a \code{matrix} or \code{data.frame}. Metadata can be attached to the +chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. } \description{ -Parser for reading Agilent UV (.uv) files into R. +Agilent \code{.uv} files come in several different varieties. This parser can +automatically detect and read several versions of these files from +'Agilent ChemStation' and 'OpenLab', including versions \code{31} and \code{131}. } \note{ This function was adapted from the parser in the rainbow project licensed under GPL 3 by Evan Shi -(https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). +\url{https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html}. } \author{ Ethan Bass diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd index b929954..1af0ea7 100644 --- a/man/read_shimadzu_lcd.Rd +++ b/man/read_shimadzu_lcd.Rd @@ -21,7 +21,7 @@ read_shimadzu_lcd( \item{read_metadata}{Logical. Whether to attach metadata.} } \description{ -Read 3D PDA data stream from 'Shimadzu' LCD files. +Reads 3D PDA data stream from 'Shimadzu' LCD files. } \details{ A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are diff --git a/man/read_waters_raw.Rd b/man/read_waters_raw.Rd index c966ea8..74e615c 100644 --- a/man/read_waters_raw.Rd +++ b/man/read_waters_raw.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_waters_raw.R \name{read_waters_raw} \alias{read_waters_raw} -\title{Read 'Waters' raw} +\title{Read 'Waters' RAW} \usage{ read_waters_raw( path, From 6f6fa799703d285c1cb4cc7abbe0a5725f72c2ab Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 13:23:31 -0500 Subject: [PATCH 17/51] fix/feat: read_shimadzu - fixed bug affecting attachment of metadata - fixed bug in read_shimadzu_spectrum (starting one row too late) - added options for ms_format (either data.frame or list) - added retention times as names to ms_format list output --- R/read_shimadzu_ascii.R | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index 7c869ff..fc7c77f 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -37,6 +37,7 @@ read_shimadzu <- function(file, what = "chromatogram", peaktable_format = c("chromatographr", "original"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), + ms_format = c("data.frame", "list"), collapse = TRUE){ if (!is.null(format_in)){ warning("The `format_in` argument is deprecated, since the `read_shimadzu` @@ -51,6 +52,7 @@ read_shimadzu <- function(file, what = "chromatogram", data_format <- match.arg(data_format, c("wide", "long")) peaktable_format <- match.arg(peaktable_format, c("chromatographr","original")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + ms_format <- match.arg(ms_format, c("data.frame", "list")) x <- readLines(file) sep <- substr(x[grep("Type", x)[1]], 5, 5) @@ -123,8 +125,15 @@ read_shimadzu <- function(file, what = "chromatogram", } } ms_spectra <- lapply(spectra.idx, function(idx){ - read_shimadzu_spectrum(x, idx = idx, sep = sep) + read_shimadzu_spectrum(file, x, idx = idx, sep = sep) }) + if (exists("peak_table") && "MC Peak Table" %in% names(peak_table)){ + rt.idx <- grep("^Ret.Time$|^rt$", colnames(peak_table$`MC Peak Table`)) + names(ms_spectra) <- peak_table$`MC Peak Table`[, rt.idx] + } + if (ms_format == "data.frame"){ + ms_spectra <- ms_list_to_dataframe(ms_spectra) + } } xx <- mget(what) @@ -133,13 +142,18 @@ read_shimadzu <- function(file, what = "chromatogram", } #' @noRd -collapse_list <- function(x){ - while(is.list(x) && length(x) == 1){ - x <- x[[1]] +ms_list_to_dataframe <- function(x){ + if (!is.null(names(x))){ + ms <- lapply(seq_along(x), function(i){ + cbind(rt = as.numeric(names(x)[i]), x[[i]]) + }) + } else { + ms <- lapply(seq_along(x), function(i){ + cbind(idx = as.numeric(i), x[[i]]) + }) } - x + as.data.frame(do.call(rbind, ms)) } - #' Read Shimadzu Metadata #' @noRd read_shimadzu_metadata <- function(x, met = NULL, sep){ @@ -268,9 +282,9 @@ read_shimadzu_peaktable <- function(file, x, idx, sep, format_in, format_out){ #' Read Shimadzu MS Spectrum #' @noRd -read_shimadzu_spectrum <- function(x, idx, sep){ +read_shimadzu_spectrum <- function(file, x, idx, sep){ nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) - table_start <- grep("Intensity", x[idx:(idx + nrows)]) + idx + table_start <- grep("Intensity", x[idx:(idx + nrows)]) + idx - 1 decimal_separator <- ifelse(grepl(".", strsplit(x[table_start + 4], split = sep)[[1]][1]), ".", ",") spectrum <- read.csv(file, skip = table_start-1, sep = sep, nrows = nrows, From 0268ab997ccdf66afbddafd04ba9cb60467526b4 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 13:24:36 -0500 Subject: [PATCH 18/51] feat: added `collapse_list` fnc to utilities --- R/utils.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 8b1e6a2..44d4918 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,7 +10,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ on.exit(close(f)) magic <- readBin(f, what = "raw", n = 4) - magic <- paste(paste0("x",as.character(magic)),collapse="/") + magic <- paste(paste0("x",as.character(magic)),collapse = "/") # magic filetype <- switch(magic, "x01/x32/x00/x00" = "AgilentChemstationMS", @@ -36,7 +36,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ if (filetype == "chemstation_131"){ seek(f, 348) magic2 <- readBin(f, what="character", n = 2) - magic2 <- paste(magic2, collapse="") + magic2 <- paste(magic2, collapse = "") filetype <- switch(magic2, "OL" = "openlab_131", "LC" = "chemstation_131") } @@ -254,3 +254,11 @@ rename_list <- function(x, new_names){ names(x) <- new_names x } + +#' @noRd +collapse_list <- function(x){ + while(is.list(x) && length(x) == 1){ + x <- x[[1]] + } + x +} From df189b4719b93c898ea6f75e1d1a4010484850a4 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 14:17:49 -0500 Subject: [PATCH 19/51] feat: add andi MS parser --- NEWS.md | 1 + R/read_cdf.R | 144 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 124 insertions(+), 21 deletions(-) diff --git a/NEWS.md b/NEWS.md index b25145d..379b364 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ ## chromConverter 0.6.0 +* Added parser for reading ANDI MS (`.cdf`) files. * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. * Added internal parser for 1D 'Waters RAW' chromatograms. diff --git a/R/read_cdf.R b/R/read_cdf.R index bb85d0f..e689e50 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -1,12 +1,14 @@ #' Read CDF file -#' @param file path to file +#' @param file Path to file. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' For 2D files, "long" format returns the retention time as the first column of #' the data.frame or matrix while "wide" format returns the retention time as the #' rownames of the object. -#' @param read_metadata Whether to read metadata from file. #' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @return A chromatogram in the format specified by the \code{format_out} and #' \code{data_format} arguments (retention time x wavelength). #' @author Ethan Bass @@ -14,32 +16,46 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), - what = "chromatogram", read_metadata = TRUE){ + what = "chromatogram", read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ check_for_pkg("ncdf4") nc <- ncdf4::nc_open(file) if ("ordinate_values" %in% names(nc$var)){ - format = "chrom" + format <- "chrom" + } else if (all(c("intensity_values", "mass_values", + "scan_index", "scan_acquisition_time") %in% names(nc$var))){ + format <- "ms" } else { - format = "ms" + format <- "unknown" } ncdf4::nc_close(nc) - fn <- switch(format, chrom = read_andi_chrom, ms = andi_ms_error) + fn <- switch(format, chrom = read_andi_chrom, ms = read_andi_ms, + unknown = function(...){ + stop("The format of the provided cdf file could not be recognized.") + }) fn(file = file, data_format = data_format, format_out = format_out, - what = what, read_metadata = read_metadata) -} - -#' @noRd -andi_ms_error <- function(...){ - stop("The `cdf` MS format is not yet supported by an internal `chromConverter` parser. - Please try the OpenChrom `msd` parser instead.") + what = what, read_metadata = read_metadata, metadata_format = metadata_format) } #' Read ANDI chrom file +#' @param file Path to file. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' For 2D files, "long" format returns the retention time as the first column of +#' the data.frame or matrix while "wide" format returns the retention time as the +#' rownames of the object. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). +#' @author Ethan Bass #' @noRd read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ + what = "chromatogram", read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix","data.frame")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) @@ -61,21 +77,23 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), if (format_out == "matrix"){ data <- as.matrix(data) } + chromatogram <- data } if (any(what == "peak_table")){ peak_table_vars <- names(which(sapply(nc$var, function(x){ x$dim[[1]]$name }) == "peak_number")) if (length(peak_table_vars) > 0){ - peak_tab <- sapply(peak_table_vars, function(var){ + peak_table <- sapply(peak_table_vars, function(var){ ncdf4::ncvar_get(nc, varid = var) }) - peak_tab <- as.data.frame(peak_tab) + peak_table <- as.data.frame(peak_table) } } - if ("peak_table" %in% what & "chromatogram" %in% what){ - what <- "both" - } + # if ("peak_table" %in% what & "chromatogram" %in% what){ + # what <- "both" + # } + data <- mget(what) data <- switch(what, "chromatogram" = data, "peak_table" = peak_tab, "both" = list(chromatogram=data, peak_table = peak_tab)) @@ -86,7 +104,7 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), # parser = "chromconverter", source_file = file) if (inherits(data, "list")){ data <- lapply(data, function(xx){ - attach_metadata(data, meta = meta, format_in = "cdf", + attach_metadata(xx, meta = meta, format_in = "cdf", format_out = format_out, data_format = data_format, parser = "chromconverter", source_file = file) }) @@ -99,3 +117,87 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), ncdf4::nc_close(nc) data } + +#' Read ANDI MS file +#' @param file Path to file. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return the total ion chromatogram in \code{wide} +#' or \code{long} format. The "long" format returns the retention time as the +#' first column of the data.frame or matrix while "wide" format returns the +#' retention time as the rownames of the object. +#' @param what Whether to extract \code{chromatogram} and/or \code{ms_spectra}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @param ms_format Whether to return mass spectral data as a (long) +#' \code{data.frame} or a list. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). +#' @author Ethan Bass +#' @noRd + +read_andi_ms <- function(file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + what = "chromatogram", ms_format = c("data.frame", "list"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + ms_format <- match.arg(ms_format, c("data.frame","list")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "cdf", raw = "raw") + what <- match.arg(what, c("chromatogram", "ms_spectra"), several.ok = TRUE) + nc <- ncdf4::nc_open(file) + if (any(what == "chromatogram")){ + y <- ncdf4::ncvar_get(nc, "total_intensity") + x <- ncdf4::ncvar_get(nc, "scan_acquisition_time") + data = data.frame(RT = x, Intensity = y) + if (data_format == "wide"){ + rownames(data) <- data[, 1] + data <- data[,-1, drop = FALSE] + } + if (format_out == "matrix"){ + data <- as.matrix(data) + } + chromatogram <- data + } + if (any(what == "ms_spectra")){ + int <- ncdf4::ncvar_get(nc, "intensity_values") + mz <- ncdf4::ncvar_get(nc, "mass_values") + scan_idx <- ncdf4::ncvar_get(nc, "scan_index") + rt <- ncdf4::ncvar_get(nc, "scan_acquisition_time") + zeros <- as.list(rep(NA, length(which(scan_idx==0)) - 1)) + if (ms_format == "data.frame"){ + n_scans <- diff(c(scan_idx, length(mz))) + rts <- unlist(sapply(seq_along(rt), function(i){rep(rt[i], n_scans[i])})) + ms_spectra <- data.frame(rt = rts, mz = mz, intensity = int) + } else if (ms_format == "list"){ + scans <- mapply(function(x,y){ + cbind(mz = x, intensity = y) + }, split_at(mz, scan_idx+1), split_at(int, scan_idx+1)) + ms_spectra <- c(zeros, scans) + names(ms_spectra) <- rt + } + } + + data <- mget(what) + if (collapse) data <- collapse_list(data) + if (read_metadata){ + meta <- ncdf4::ncatt_get(nc, varid = 0) + if (inherits(data, "list")){ + data <- lapply(data, function(xx){ + attach_metadata(xx, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + }) + } else{ + data <- attach_metadata(data, meta = meta, format_in = metadata_format, + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + } + ncdf4::nc_close(nc) + data +} From 1163c7095dd2409bcb20b325fa005e6c613bdc55 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 16:13:36 -0500 Subject: [PATCH 20/51] fix: read chemstation .ms files with entab --- NEWS.md | 1 + R/call_entab.R | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 379b364..15ae67a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ ## chromConverter 0.6.0 * Added parser for reading ANDI MS (`.cdf`) files. +* Fixed parsing of Agilent MS files with 'entab' reader. * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. * Added internal parser for 1D 'Waters RAW' chromatograms. diff --git a/R/call_entab.R b/R/call_entab.R index 4074f0a..1649f80 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -28,17 +28,24 @@ call_entab <- function(file, data_format = c("wide", "long"), metadata_format <- switch(metadata_format, chromconverter = format_in, raw = "raw") r <- entab::Reader(file) - x <- entab::as.data.frame(r) - signal.idx <- grep("signal", colnames(x)) - if (length(signal.idx) == 1){ - colnames(x)[signal.idx] <- "wavelength" + if (is.null(format_in)){ + format_in <- r$parser() } - if (data_format == "wide"){ + x <- entab::as.data.frame(r) + if (format_in == "chemstation_uv"){ + signal.idx <- grep("signal", colnames(x)) + if (length(signal.idx) == 1){ + colnames(x)[signal.idx] <- "wavelength" + } + if (data_format == "wide"){ x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength", value_var = "intensity") - } - if (format_out == "matrix"){ - x <- as.matrix(x) + if (format_out == "matrix"){ + x <- as.matrix(x) + } + } + } else if (format_in == "chemstation_ms"){ + colnames(x)[1] <- "rt" } if (read_metadata){ meta <- r$metadata() From 63f5bc4527286b63ce6f863961b9f241e969b278 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:47:08 -0500 Subject: [PATCH 21/51] fix: read_cdf, added collapse argument --- R/read_cdf.R | 18 +++++++++--------- man/read_cdf.Rd | 12 ++++++++++-- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/R/read_cdf.R b/R/read_cdf.R index e689e50..34920a6 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -9,6 +9,8 @@ #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. #' @return A chromatogram in the format specified by the \code{format_out} and #' \code{data_format} arguments (retention time x wavelength). #' @author Ethan Bass @@ -17,7 +19,8 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ check_for_pkg("ncdf4") nc <- ncdf4::nc_open(file) if ("ordinate_values" %in% names(nc$var)){ @@ -34,7 +37,8 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), stop("The format of the provided cdf file could not be recognized.") }) fn(file = file, data_format = data_format, format_out = format_out, - what = what, read_metadata = read_metadata, metadata_format = metadata_format) + what = what, read_metadata = read_metadata, + metadata_format = metadata_format, collapse = collapse) } #' Read ANDI chrom file @@ -55,7 +59,8 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix","data.frame")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) @@ -94,14 +99,9 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), # what <- "both" # } data <- mget(what) - data <- switch(what, "chromatogram" = data, - "peak_table" = peak_tab, - "both" = list(chromatogram=data, peak_table = peak_tab)) + if (collapse) data <- collapse_list(data) if (read_metadata){ meta <- ncdf4::ncatt_get(nc, varid = 0) - # data <- attach_metadata(data, meta = meta, format_in = format_in, - # format_out = format_out, data_format = data_format, - # parser = "chromconverter", source_file = file) if (inherits(data, "list")){ data <- lapply(data, function(xx){ attach_metadata(xx, meta = meta, format_in = "cdf", diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index b666739..1dae443 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -9,11 +9,13 @@ read_cdf( format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), what = "chromatogram", - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE ) } \arguments{ -\item{file}{path to file} +\item{file}{Path to file.} \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} @@ -25,6 +27,12 @@ rownames of the object.} \item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} + +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} } \value{ A chromatogram in the format specified by the \code{format_out} and From 1f626a5c8a014cf79bc388948358c7bbb6fa7395 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:47:36 -0500 Subject: [PATCH 22/51] fix: entab parser so it works again to reformat masshunter etc --- R/call_entab.R | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/R/call_entab.R b/R/call_entab.R index 1649f80..30ab593 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -28,11 +28,9 @@ call_entab <- function(file, data_format = c("wide", "long"), metadata_format <- switch(metadata_format, chromconverter = format_in, raw = "raw") r <- entab::Reader(file) - if (is.null(format_in)){ - format_in <- r$parser() - } + file_format <- r$parser() x <- entab::as.data.frame(r) - if (format_in == "chemstation_uv"){ + if (grepl("dad$|uv$", file_format)){ signal.idx <- grep("signal", colnames(x)) if (length(signal.idx) == 1){ colnames(x)[signal.idx] <- "wavelength" @@ -44,7 +42,14 @@ call_entab <- function(file, data_format = c("wide", "long"), x <- as.matrix(x) } } - } else if (format_in == "chemstation_ms"){ + } else if (grepl("fid$", file_format)){ + if (data_format == "wide"){ + x <- data.frame(row.names = x$time, intensity = x$intensity) + } + if (format_out == "matrix"){ + x <- as.matrix(x) + } + } else if (grepl("ms$", file_format)){ colnames(x)[1] <- "rt" } if (read_metadata){ From f2457377a42cdf7f8f3996825b8e2383924702fb Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:48:03 -0500 Subject: [PATCH 23/51] ui: shimadzu_txt back to shimadzu_ascii --- R/read_chroms.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/read_chroms.R b/R/read_chroms.R index 9076618..8aa2d4c 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -34,7 +34,7 @@ #' @param parser What parser to use. Current option are \code{chromconverter}, #' \code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or #' \code{rainbow}. -#' @param format_out R object format (i.e. data.frame or matrix). +#' @param format_out Class of output (i.e. data.frame or matrix). #' @param data_format Whether to output data in wide or long format. Either #' \code{wide} or \code{long}. #' @param export Logical. If TRUE, the program will export files in the format @@ -81,7 +81,7 @@ read_chroms <- function(paths, find_files, "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", "chromeleon_uv", - "shimadzu_txt", + "shimadzu_ascii", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", @@ -137,7 +137,7 @@ read_chroms <- function(paths, find_files, "chemstation_81", "chemstation_181", "chemstation_fid", "chemstation_csv", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", - "shimadzu_txt", "shimadzu_lcd", + "shimadzu_ascii", "shimadzu_lcd", "chromeleon_uv", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "cdf", "other")) @@ -287,7 +287,7 @@ read_chroms <- function(paths, find_files, # pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) converter <- partial(read_cdf, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, ...) } else { converter <- switch(parser, "aston" = partial(trace_converter, format_out = format_out, From cf2b20856b17ce995186e9e28d5a29fa1363d6cf Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:48:31 -0500 Subject: [PATCH 24/51] style: seq_len --- R/read_shimadzu_lcd.R | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index c45bd85..da26d85 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -1,6 +1,6 @@ #' Shimadzu LCD parser #' -#' Reads 3D PDA data stream from 'Shimadzu' LCD files. +#' Read 3D PDA data stream from 'Shimadzu' LCD files. #' #' A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are #' encoded as 'Microsoft' OLE documents. The parser relies on the @@ -174,9 +174,6 @@ export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, if (missing(path_out)){ path_out <- tempfile() } - if (.Platform$OS.type == "windows"){ - path_out <- gsub("\\\\", "/", path_out) - } if (remove_null_bytes){ reticulate::py_run_string("data = data.replace(b'\\x00', b'')") } @@ -205,7 +202,7 @@ decode_shimadzu_block <- function(file) { count <- 1 buffer <- list(0,0,0,0) - for (i in seq_len(2)){ + for (i in c(1:2)){ n_bytes <- readBin(file, "integer", n = 1, size = 2) start <- seek(file, NA, "current") From 27f87e51bc2389f10d599b4ce20c7e560bdba054 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:48:47 -0500 Subject: [PATCH 25/51] docs: specify masslynx --- R/read_waters_raw.R | 4 ++-- man/read_waters_raw.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R index ff60280..d6b64f2 100644 --- a/R/read_waters_raw.R +++ b/R/read_waters_raw.R @@ -1,6 +1,6 @@ #' Read 'Waters' RAW #' -#' Parser for reading Waters (.raw) files into R. +#' Parser for reading 'Waters MassLynx (.raw) files into R. #' #' @param path Path to \code{.raw} file. #' @param format_out Matrix or data.frame. @@ -43,7 +43,7 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), #' Read 'Waters' chromatograms #' -#' Parser for reading Waters CHRO (.dat) files into R. +#' Parser for reading 'Waters MassLynx' CHRO (.dat) files into R. #' #' @importFrom utils head tail #' @param path Path to \code{.dat} file. diff --git a/man/read_waters_raw.Rd b/man/read_waters_raw.Rd index 74e615c..14efc61 100644 --- a/man/read_waters_raw.Rd +++ b/man/read_waters_raw.Rd @@ -29,7 +29,7 @@ A chromatogram in the format specified by \code{format_out} (retention time x wavelength). } \description{ -Parser for reading Waters (.raw) files into R. +Parser for reading 'Waters MassLynx (.raw) files into R. } \note{ For now this parser only reads 1D chromatograms (not mass spectra or From 0480c39bf00a56df41f0e7f4d970770906d012ba Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:49:10 -0500 Subject: [PATCH 26/51] docs: catch-up --- man/read_chroms.Rd | 4 ++-- man/read_shimadzu.Rd | 1 + man/read_shimadzu_lcd.Rd | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 8cf5e63..6d4f298 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -9,7 +9,7 @@ read_chroms( find_files, format_in = c("agilent_d", "agilent_dx", "chemstation", "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", - "chromeleon_uv", "shimadzu_txt", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", + "chromeleon_uv", "shimadzu_ascii", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, @@ -51,7 +51,7 @@ case file extension will be deduced from \code{format_in}.} \code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or \code{rainbow}.} -\item{format_out}{R object format (i.e. data.frame or matrix).} +\item{format_out}{Class of output (i.e. data.frame or matrix).} \item{data_format}{Whether to output data in wide or long format. Either \code{wide} or \code{long}.} diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index 2378c1f..b315734 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -14,6 +14,7 @@ read_shimadzu( peaktable_format = c("chromatographr", "original"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), + ms_format = c("data.frame", "list"), collapse = TRUE ) } diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd index 1af0ea7..b929954 100644 --- a/man/read_shimadzu_lcd.Rd +++ b/man/read_shimadzu_lcd.Rd @@ -21,7 +21,7 @@ read_shimadzu_lcd( \item{read_metadata}{Logical. Whether to attach metadata.} } \description{ -Reads 3D PDA data stream from 'Shimadzu' LCD files. +Read 3D PDA data stream from 'Shimadzu' LCD files. } \details{ A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are From ee90c1f9f38c45d167d9544459c6ba4f42da5cec Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:49:40 -0500 Subject: [PATCH 27/51] fix: shimadzu_ascii in utils --- R/utils.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index 44d4918..2923f14 100644 --- a/R/utils.R +++ b/R/utils.R @@ -16,6 +16,10 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ "x01/x32/x00/x00" = "AgilentChemstationMS", "x02/x02/x00/x00" = "AgilentMasshunterDADHeader", # "x02/x33/x30/x00" = "AgilentChemstationMWD", + "x02/x32/x00/x00" = "AgilentChemstationMWD", + # "x02/x33/x00/x00" = "AgilentChemstationMWD", + # "x03/x31/x00/x00" = "AgilentChemstationMWD2" + # "x01/x32/x00/x00" = "AgilentChemstationMS" "x03/x02/x00/x00" = "AgilentMasshunterDAD", "x02/x33/x30/x00" = "chemstation_30", "x02/x33/x31/x00" = "chemstation_31", @@ -68,7 +72,8 @@ check_parser <- function(format_in, parser = NULL, find = FALSE){ "openlab_131", "chemstation_179", "chemstation_81", "chemstation_181", "mzml", "mzxml", - "mdf", "shimadzu_fid", "shimadzu_dad", + "mdf", "shimadzu_ascii", + "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "waters_arw", "waters_raw", "waters_chro"), aston = c("chemstation", "chemstation_uv", From b165dfc4949105b35ac79685a9ac0a67bcf91569 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:52:26 -0500 Subject: [PATCH 28/51] docs: ms_format argument in read_shimadzu --- R/read_cdf.R | 2 +- R/read_shimadzu_ascii.R | 2 ++ man/read_shimadzu.Rd | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/R/read_cdf.R b/R/read_cdf.R index 34920a6..dbb4579 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -130,7 +130,7 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. #' @param ms_format Whether to return mass spectral data as a (long) -#' \code{data.frame} or a list. +#' \code{data.frame} or a \code{list}. #' @return A chromatogram in the format specified by the \code{format_out} and #' \code{data_format} arguments (retention time x wavelength). #' @author Ethan Bass diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index fc7c77f..7430c16 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -20,6 +20,8 @@ #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. +#' @param ms_format Whether to return mass spectral data as a (long) +#' \code{data.frame} or a \code{list}. #' @param collapse Logical. Whether to collapse lists that only contain a single #' element. #' @return A nested list of elements from the specified \code{file}, where the diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index b315734..f821cad 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -41,6 +41,9 @@ read_shimadzu( \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} +\item{ms_format}{Whether to return mass spectral data as a (long) +\code{data.frame} or a \code{list}.} + \item{collapse}{Logical. Whether to collapse lists that only contain a single element.} } From a347da3792b19d25f3b473cfc659ddcc6b73d503 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 17 Dec 2023 23:55:46 -0500 Subject: [PATCH 29/51] fix: add split_at function --- R/utils.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/utils.R b/R/utils.R index 2923f14..f10bfae 100644 --- a/R/utils.R +++ b/R/utils.R @@ -267,3 +267,8 @@ collapse_list <- function(x){ } x } + +#' Split vector by position +#' @note From https://stackoverflow.com/questions/16357962/r-split-numeric-vector-at-position +#' @noRd +split_at <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) From 054d490d799648668b79c04a32eb5cb6f193141d Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 18 Dec 2023 16:43:55 -0500 Subject: [PATCH 30/51] fix: `shimadzu_ascii` format in read_chroms --- R/read_chroms.R | 8 ++++---- R/utils.R | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/read_chroms.R b/R/read_chroms.R index 8aa2d4c..de01175 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -227,12 +227,12 @@ read_chroms <- function(paths, find_files, converter <- partial(read_shimadzu, include = "dad", format_out = format_out, data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) + metadata_format = metadata_format, ...) } else if (format_in == "shimadzu_ascii"){ - converter <- partial(read_shimadzu, include = "dad", - format_out = format_out, data_format = data_format, + converter <- partial(read_shimadzu, format_out = format_out, + data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) + metadata_format = metadata_format, ...) } else if (format_in == "shimadzu_lcd"){ converter <- partial(read_shimadzu_lcd, format_out = format_out, data_format = data_format, diff --git a/R/utils.R b/R/utils.R index f10bfae..b9fe9e8 100644 --- a/R/utils.R +++ b/R/utils.R @@ -180,7 +180,8 @@ format_to_extension <- function(format_in){ "csd" ="\\.", "wsd" ="\\.", "mdf" = "\\.mdf$", - "other" = "\\.") + "other" = "\\.", + "\\.") } #' @noRd From 03681bbae0ff1e475277153fc9e3ce92c572fa32 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 18 Dec 2023 16:44:10 -0500 Subject: [PATCH 31/51] style: zombie-code --- R/attach_metadata.R | 17 ----------------- R/read_cdf.R | 3 --- 2 files changed, 20 deletions(-) diff --git a/R/attach_metadata.R b/R/attach_metadata.R index db8b2e7..d5e374b 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -144,23 +144,6 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter" ) - # } else if (format_in == "entab"){ - # structure(x, instrument = meta$instrument, - # detector = NA, - # software = meta$Version, - # method = meta$method, - # batch = meta$SeqPathAndFile, - # operator = meta$operator, - # run_datetime = meta$run_date, - # sample_name = meta$sample, - # sample_id = NA, - # injection_volume = meta$InjVolume, - # time_range = NA, - # time_interval = NA, - # detector_range = NA, - # format = data_format, - # parser = "entab", - # format_out = format_out) }, "chemstation" = { datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") meta$date <- as.POSIXct(meta$date, tz = "UTC", tryFormats = datetime_formats) diff --git a/R/read_cdf.R b/R/read_cdf.R index dbb4579..5ef75cb 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -95,9 +95,6 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), peak_table <- as.data.frame(peak_table) } } - # if ("peak_table" %in% what & "chromatogram" %in% what){ - # what <- "both" - # } data <- mget(what) if (collapse) data <- collapse_list(data) if (read_metadata){ From 6cf6d57abe21b9ccda42f80a90000f57ea19cf9b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 18 Dec 2023 16:44:40 -0500 Subject: [PATCH 32/51] fix: call_entab NULL format bug --- R/call_entab.R | 2 +- man/call_entab.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/call_entab.R b/R/call_entab.R index 30ab593..141b999 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -12,7 +12,7 @@ #' @export call_entab <- function(file, data_format = c("wide", "long"), - format_in = NULL, + format_in = "", format_out = c("matrix", "data.frame"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ diff --git a/man/call_entab.Rd b/man/call_entab.Rd index 342dc3f..601261d 100644 --- a/man/call_entab.Rd +++ b/man/call_entab.Rd @@ -7,7 +7,7 @@ call_entab( file, data_format = c("wide", "long"), - format_in = NULL, + format_in = "", format_out = c("matrix", "data.frame"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") From 5ff5fbec3e9899a76496c3321ca37d01ec4310d5 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 18 Dec 2023 16:45:43 -0500 Subject: [PATCH 33/51] style: long line --- R/read_chemstation_ch.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index 3fe120f..ca31c72 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -35,7 +35,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) - metadata_format <- switch(metadata_format, chromconverter = "chemstation", raw = "raw") + metadata_format <- switch(metadata_format, chromconverter = "chemstation", + raw = "raw") f <- file(path, "rb") on.exit(close(f)) From ec66cd9fabea83786fe73c27e301d47d3033ad44 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 18 Dec 2023 17:15:47 -0500 Subject: [PATCH 34/51] refactor: condensed three "configure" functions into one --- NAMESPACE | 4 +- R/{aston_parsers.R => call_aston.R} | 37 +-------------- R/call_rainbow.R | 37 +-------------- R/read_shimadzu_lcd.R | 38 +-------------- R/utils.R | 47 +++++++++++++++++++ man/configure_olefile.Rd | 24 ---------- ...ton.Rd => configure_python_environment.Rd} | 15 +++--- man/configure_rainbow.Rd | 23 --------- man/sp_converter.Rd | 2 +- man/uv_converter.Rd | 2 +- 10 files changed, 62 insertions(+), 167 deletions(-) rename R/{aston_parsers.R => call_aston.R} (82%) delete mode 100644 man/configure_olefile.Rd rename man/{configure_aston.Rd => configure_python_environment.Rd} (54%) delete mode 100644 man/configure_rainbow.Rd diff --git a/NAMESPACE b/NAMESPACE index 72b6288..9acf4fa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,8 @@ export(call_entab) export(call_openchrom) export(call_rainbow) -export(configure_aston) -export(configure_olefile) export(configure_openchrom) -export(configure_rainbow) +export(configure_python_environment) export(extract_metadata) export(read_agilent_dx) export(read_cdf) diff --git a/R/aston_parsers.R b/R/call_aston.R similarity index 82% rename from R/aston_parsers.R rename to R/call_aston.R index e76e98e..38c0efd 100644 --- a/R/aston_parsers.R +++ b/R/call_aston.R @@ -131,48 +131,13 @@ trace_converter <- function(file, format_out = c("matrix", "data.frame"), x } -#' Configure Aston -#' -#' Configures reticulate to use Aston file parsers. -#' @name configure_aston -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_aston <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use Aston parsers. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("pandas","scipy","numpy","aston") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], - pip = TRUE) - } - } - assign_trace_file() - if (return_boolean){ - return(env) - } -} - #' @noRd check_aston_configuration <- function(){ assign_trace_file() if (length(trace_file) == 0){ ans <- readline("Aston not found. Configure Aston? (y/n)?") if (ans %in% c('y', "Y", "YES", "yes", "Yes")){ - configure_aston() + configure_python_environment(parser = "aston") } } } diff --git a/R/call_rainbow.R b/R/call_rainbow.R index 0b29dad..d792c67 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -111,41 +111,6 @@ extract_rb_names <- function(xx){ }) } - -#' Configure rainbow -#' -#' Configures reticulate to use rainbow file parsers. -#' @name configure_rainbow -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_rainbow <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("numpy","rainbow-api") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], pip = TRUE) - } - } - assign_rb_read() - if (return_boolean){ - env - } -} - #' @noRd assign_rb_read <- function(){ pos <- 1 @@ -160,7 +125,7 @@ check_rb_configuration <- function(){ if (length(rb_read) == 0){ ans <- readline("rainbow not found. Configure rainbow? (y/n)?") if (ans %in% c('y', "Y", "YES", "yes", "Yes")){ - configure_rainbow() + configure_python_environment(parser = "rainbow") } } } diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index da26d85..b00239d 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -46,7 +46,7 @@ read_shimadzu_lcd <- function(path, format_out = c("matrix", "data.frame"), olefile_installed <- reticulate::py_module_available("olefile") if (!olefile_installed){ - configure_olefile() + configure_python_environment(parser = "olefile") } # read wavelengths from "Wavelength Table" stream @@ -294,39 +294,3 @@ read_shimadzu_wavelengths <- function(path){ lambdas } - -#' Configure olefile -#' -#' Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' -#' LCD parser. -#' @name configure_olefile -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_olefile <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use the Shimadzu LCD parser. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("olefile") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], - pip = TRUE) - } - } - if (return_boolean){ - return(env) - } -} - diff --git a/R/utils.R b/R/utils.R index b9fe9e8..d6b8411 100644 --- a/R/utils.R +++ b/R/utils.R @@ -273,3 +273,50 @@ collapse_list <- function(x){ #' @note From https://stackoverflow.com/questions/16357962/r-split-numeric-vector-at-position #' @noRd split_at <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) + +#' Configure python environment +#' +#' Configures reticulate environment for parsers. +#' @name configure_python_environment +#' @param parser Either \code{aston}, \code{rainbow}, or \code{olefile} (for +#' \code{read_shimadzu_lcd}). +#' @param return_boolean Logical. Whether to return a Boolean value indicating +#' if the chromConverter environment is correctly configured. +#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value +#' indicating whether the chromConverter environment is configured correctly. +#' Otherwise, there is no return value. +#' @author Ethan Bass +#' @import reticulate +#' @export +configure_python_environment <- function(parser, return_boolean = FALSE){ + install <- FALSE + if (!dir.exists(miniconda_path())){ + install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)") + if (install %in% c('y', "Y", "YES", "yes", "Yes")){ + install_miniconda() + } + } + env <- reticulate::configure_environment("chromConverter") + if (!env){ + reqs <- get_parser_reqs(parser) + reqs_available <- sapply(reqs, reticulate::py_module_available) + if (!all(reqs_available)){ + conda_install(envname = "chromConverter", reqs[which(!reqs_available)], + pip = TRUE) + } + } + assign_fn <- switch(parser, aston = assign_trace_file, + rainbow = assign_rb_read, + olefile = function(...){}) + assign_fn() + if (return_boolean){ + env + } +} + +#' @noRd +get_parser_reqs <- function(parser){ + switch(parser, "aston" = c("pandas","scipy","numpy","aston"), + "olefile" = c("olefile"), + "rainbow" = c("numpy", "rainbow-api")) +} diff --git a/man/configure_olefile.Rd b/man/configure_olefile.Rd deleted file mode 100644 index 4eef869..0000000 --- a/man/configure_olefile.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_shimadzu_lcd.R -\name{configure_olefile} -\alias{configure_olefile} -\title{Configure olefile} -\usage{ -configure_olefile(return_boolean = FALSE) -} -\arguments{ -\item{return_boolean}{Logical. Whether to return a Boolean value indicating -if the chromConverter environment is correctly configured.} -} -\value{ -If \code{return_boolean} is \code{TRUE}, returns a Boolean value -indicating whether the chromConverter environment is configured correctly. -Otherwise, there is no return value. -} -\description{ -Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' -LCD parser. -} -\author{ -Ethan Bass -} diff --git a/man/configure_aston.Rd b/man/configure_python_environment.Rd similarity index 54% rename from man/configure_aston.Rd rename to man/configure_python_environment.Rd index 518e76e..348ae2e 100644 --- a/man/configure_aston.Rd +++ b/man/configure_python_environment.Rd @@ -1,12 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R -\name{configure_aston} -\alias{configure_aston} -\title{Configure Aston} +% Please edit documentation in R/utils.R +\name{configure_python_environment} +\alias{configure_python_environment} +\title{Configure python environment} \usage{ -configure_aston(return_boolean = FALSE) +configure_python_environment(parser, return_boolean = FALSE) } \arguments{ +\item{parser}{Either \code{aston}, \code{rainbow}, or \code{olefile} (for +\code{read_shimadzu_lcd}).} + \item{return_boolean}{Logical. Whether to return a Boolean value indicating if the chromConverter environment is correctly configured.} } @@ -16,7 +19,7 @@ indicating whether the chromConverter environment is configured correctly. Otherwise, there is no return value. } \description{ -Configures reticulate to use Aston file parsers. +Configures reticulate environment for parsers. } \author{ Ethan Bass diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd deleted file mode 100644 index 9b4ffe6..0000000 --- a/man/configure_rainbow.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/call_rainbow.R -\name{configure_rainbow} -\alias{configure_rainbow} -\title{Configure rainbow} -\usage{ -configure_rainbow(return_boolean = FALSE) -} -\arguments{ -\item{return_boolean}{Logical. Whether to return a Boolean value indicating -if the chromConverter environment is correctly configured.} -} -\value{ -If \code{return_boolean} is \code{TRUE}, returns a Boolean value -indicating whether the chromConverter environment is configured correctly. -Otherwise, there is no return value. -} -\description{ -Configures reticulate to use rainbow file parsers. -} -\author{ -Ethan Bass -} diff --git a/man/sp_converter.Rd b/man/sp_converter.Rd index 7863de9..d0c2c38 100644 --- a/man/sp_converter.Rd +++ b/man/sp_converter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R +% Please edit documentation in R/call_aston.R \name{sp_converter} \alias{sp_converter} \title{Converter for Agilent MassHunter UV files} diff --git a/man/uv_converter.Rd b/man/uv_converter.Rd index 679a3a0..4e53f5c 100644 --- a/man/uv_converter.Rd +++ b/man/uv_converter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R +% Please edit documentation in R/call_aston.R \name{uv_converter} \alias{uv_converter} \title{Converter for Agilent ChemStation UV files} From 94e6f01cc561df2523fabc5f274aa0c1a0e70dd7 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 20 Dec 2023 21:45:06 -0500 Subject: [PATCH 35/51] feat: export write_cdf, added additional arguments --- NAMESPACE | 1 + R/write_chroms.R | 65 ++++++++++++++++++++++++++++++++++++------------ man/write_cdf.Rd | 43 ++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 16 deletions(-) create mode 100644 man/write_cdf.Rd diff --git a/NAMESPACE b/NAMESPACE index 9acf4fa..168b73c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(read_waters_arw) export(read_waters_raw) export(sp_converter) export(uv_converter) +export(write_cdf) import(magrittr) import(reticulate) import(xml2) diff --git a/R/write_chroms.R b/R/write_chroms.R index cefbc09..821d1c3 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -13,33 +13,61 @@ export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE) #' @noRd export_cdfs <- function(data, path_out){ sapply(seq_along(data), function(i){ - write_cdf(data[[i]], sample_name = names(data)[i], - path_out = path_out) + write_cdf(data[[i]], path_out = path_out, sample_name = names(data)[i]) }) } #' Write CDF file from chromatogram +#' +#' Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography +#' format (ASTM E1947-98). This format can only accomodate unidimensional data. +#' For two-dimensional chromatograms, the column to export can be specified +#' using the \code{lambda} argument. Othewise, a warning will be generated and +#' the first column of the chromatogram will be exported. +#' #' @author Ethan Bass -#' @noRd -write_cdf <- function(x, sample_name, path_out){ +#' @param x A chromatogram in (wide) format. +#' @param path_out The path to write the file. +#' @param sample_name The name of the file. +#' @param lambda The wavelength to export (for 2-dimensional chromatograms). +#' Must be a string matching one the columns in \code{x} or the index of the +#' column to export. +#' @param force Whether to overwrite existing files at the specified path. +#' Defaults to \code{FALSE}. +#' @return No return value. The function is called for its side effects. +#' @section Side effects: +#' Exports a chromatogram in ANDI chromatography format (netCDF) in the directory +#' specified by \code{path_out}. The file will be named according to the value +#' of \code{sample_name}. If no \code{sample_name} is provided, the +#' \code{sample_name} attribute will be used if it exists. +#' @export +write_cdf <- function(x, path_out, sample_name, lambda = NULL, force = FALSE){ check_for_pkg("ncdf4") - if (ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ - warning("The supplies chromatogram contains more than two dimensions. Only + if (missing(sample_name)){ + sample_name <- attr(x,"sample_name") + if (is.null(sample_name)){ + stop("Sample name must be provided.") + } + } + if (is.null(lambda) && ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ + warning("The supplied chromatogram contains more than two dimensions. Only the first two dimensions will be written to the ANDI chrom file.", immediate. = TRUE) } + lambda <- ifelse(is.null(lambda), 1, lambda) if (attr(x, "data_format") == "wide"){ - x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[,1]) + x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[, lambda]) x <- transfer_metadata(x1, x) } - # if (!missing(column)){ - # x1 <- x[,column,drop = FALSE] - # x <- transfer_metadata(x1,x) - # } filename <- fs::path_ext_remove(fs::path_file(sample_name)) file_out <- fs::path(path_out, filename, ext = "cdf") if (fs::file_exists(file_out)){ - warning(paste("File", sQuote(basename(file_out)), "already exists and will not be overwritten."), immediate. = TRUE) + if (!force){ + stop(paste("File", sQuote(basename(file_out)), + "already exists and will not be overwritten.")) + } else{ + fs::file_delete(file_out) + } } # define dimensions point_number <- ncdf4::ncdim_def("point_number", "", @@ -49,9 +77,11 @@ write_cdf <- function(x, sample_name, path_out){ # define variables nc_time <- ncdf4::ncvar_def("raw_data_retention", "", dim = point_number) nc_intensity <- ncdf4::ncvar_def("ordinate_values", "", dim = point_number) - other_vars <- c("actual_delay_time", "actual_run_time_length", "actual_sampling_interval", - "detector_maximum_value", "detector_minimum_value") - other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "",list())) + other_vars <- c("actual_delay_time", "actual_run_time_length", + "actual_sampling_interval", "detector_maximum_value", + "detector_minimum_value") + + other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "", list())) # write netcdf file ncdf4::nc_create(file_out, c(list(nc_time, nc_intensity), other_vars)) @@ -73,6 +103,7 @@ write_cdf <- function(x, sample_name, path_out){ # write metadata as global attributes meta <- format_metadata_for_cdf(x) nc_add_global_attributes(nc = nc, meta = meta, sample_name = sample_name) + # finish writing file ncdf4::nc_close(nc) } @@ -83,7 +114,9 @@ nc_add_global_attributes <- function(nc, meta, sample_name){ sapply(seq_along(meta), function(i){ ncdf4::ncatt_put(nc = nc, varid = 0, attname = names(meta)[i], attval = meta[[i]], - prec = ifelse(names(meta)[i] %in% c("sample_amount", "sample_injection_volume"), + prec = ifelse(names(meta)[i] %in% + c("sample_amount", + "sample_injection_volume"), "float","text")) }) if (!is.null(sample_name)){ diff --git a/man/write_cdf.Rd b/man/write_cdf.Rd new file mode 100644 index 0000000..ce363e6 --- /dev/null +++ b/man/write_cdf.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_chroms.R +\name{write_cdf} +\alias{write_cdf} +\title{Write CDF file from chromatogram} +\usage{ +write_cdf(x, path_out, sample_name, lambda = NULL, force = FALSE) +} +\arguments{ +\item{x}{A chromatogram in (wide) format.} + +\item{path_out}{The path to write the file.} + +\item{sample_name}{The name of the file.} + +\item{lambda}{The wavelength to export (for 2-dimensional chromatograms). +Must be a string matching one the columns in \code{x} or the index of the +column to export.} + +\item{force}{Whether to overwrite existing files at the specified path. +Defaults to \code{FALSE}.} +} +\value{ +No return value. The function is called for its side effects. +} +\description{ +Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography +format (ASTM E1947-98). This format can only accomodate unidimensional data. +For two-dimensional chromatograms, the column to export can be specified +using the \code{lambda} argument. Othewise, a warning will be generated and +the first column of the chromatogram will be exported. +} +\section{Side effects}{ + +Exports a chromatogram in ANDI chromatography format (netCDF) in the directory +specified by \code{path_out}. The file will be named according to the value +of \code{sample_name}. If no \code{sample_name} is provided, the +\code{sample_name} attribute will be used if it exists. +} + +\author{ +Ethan Bass +} From 9201a7010f84e545988ccb2f3854ac7d8f9ed05e Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 20 Dec 2023 21:47:17 -0500 Subject: [PATCH 36/51] feat: added dots to read_cdf to accomodate `ms_format` argument --- R/read_cdf.R | 16 +++++++++++----- man/read_cdf.Rd | 9 +++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/R/read_cdf.R b/R/read_cdf.R index 5ef75cb..b1d15e4 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -11,8 +11,11 @@ #' or \code{raw}. #' @param collapse Logical. Whether to collapse lists that only contain a single #' element. +#' @param ... Additional arguments to parser. The \code{ms_format} argument +#' can be used here to specify whether to return mass spectra in \code{list} +#' format or as a \code{data.frame}. #' @return A chromatogram in the format specified by the \code{format_out} and -#' \code{data_format} arguments (retention time x wavelength). +#' \code{data_format} arguments. #' @author Ethan Bass #' @export @@ -20,7 +23,7 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), what = "chromatogram", read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), - collapse = TRUE){ + collapse = TRUE, ...){ check_for_pkg("ncdf4") nc <- ncdf4::nc_open(file) if ("ordinate_values" %in% names(nc$var)){ @@ -38,7 +41,7 @@ read_cdf <- function(file, format_out = c("matrix", "data.frame"), }) fn(file = file, data_format = data_format, format_out = format_out, what = what, read_metadata = read_metadata, - metadata_format = metadata_format, collapse = collapse) + metadata_format = metadata_format, collapse = collapse, ...) } #' Read ANDI chrom file @@ -129,13 +132,16 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), #' @param ms_format Whether to return mass spectral data as a (long) #' \code{data.frame} or a \code{list}. #' @return A chromatogram in the format specified by the \code{format_out} and -#' \code{data_format} arguments (retention time x wavelength). +#' \code{data_format} arguments and MS spectra as either a long-format +#' \code{data.frame} or a \code{list} of spectra, according to the value of +#' \code{ms_format}. #' @author Ethan Bass #' @noRd read_andi_ms <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - what = "chromatogram", ms_format = c("data.frame", "list"), + what = "chromatogram", + ms_format = c("data.frame", "list"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), collapse = TRUE){ diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index 1dae443..abe89f8 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -11,7 +11,8 @@ read_cdf( what = "chromatogram", read_metadata = TRUE, metadata_format = c("chromconverter", "raw"), - collapse = TRUE + collapse = TRUE, + ... ) } \arguments{ @@ -33,10 +34,14 @@ or \code{raw}.} \item{collapse}{Logical. Whether to collapse lists that only contain a single element.} + +\item{...}{Additional arguments to parser. The \code{ms_format} argument +can be used here to specify whether to return mass spectra in \code{list} +format or as a \code{data.frame}.} } \value{ A chromatogram in the format specified by the \code{format_out} and -\code{data_format} arguments (retention time x wavelength). +\code{data_format} arguments. } \description{ Read CDF file From c747453841c59d10632e6218b9aa96903e14f083 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 20 Dec 2023 21:47:38 -0500 Subject: [PATCH 37/51] docs: updates to documentation --- R/read_chemstation_ch.R | 12 ++++++------ R/read_chemstation_report.R | 6 ++++-- R/read_mdf.R | 1 + R/read_varian_peaklist.R | 5 +++-- man/read_chemstation_ch.Rd | 12 ++++++------ man/read_chemstation_reports.Rd | 10 +++++++--- man/read_varian_peaklist.Rd | 3 +++ 7 files changed, 30 insertions(+), 19 deletions(-) diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index ca31c72..32a8123 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -17,12 +17,12 @@ #' @author Ethan Bass #' @return A 2D chromatogram in the format specified by \code{data_format} and #' \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will -#' be returned with retention times as rows and wavelengths as columns. If -#' \code{long} format is requested, three columns will be returned: one for the -#' retention time, one for the wavelength and one for the intensity. The -#' \code{format_out} argument determines whether the chromatogram is returned as -#' a \code{matrix} or \code{data.frame}. Metadata can be attached to the -#' chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. +#' be returned with retention times as rows and a single column for the intensity. +#' If \code{long} format is requested, two columns will be returned: one for the +#' retention time and one for the intensity. The \code{format_out} argument +#' determines whether the chromatogram is returned as a \code{matrix} or +#' \code{data.frame}. Metadata can be attached to the chromatogram as +#' \code{\link{attributes}} if \code{read_metadata} is {TRUE}. #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). diff --git a/R/read_chemstation_report.R b/R/read_chemstation_report.R index 3d6f986..ca04e29 100644 --- a/R/read_chemstation_report.R +++ b/R/read_chemstation_report.R @@ -1,9 +1,11 @@ -#' Read Agilent Chemstation Reports -#' @param files Paths to Chemstation report files. +#' Read 'Agilent ChemStation' Reports +#' @param files Paths to 'ChemStation' report files. #' @param data_format Format to output data. Either \code{chromatographr} or #' \code{chemstation}. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. +#' @return A data.frame containing the information from the specified +#' 'ChemStation' report. #' @author Ethan Bass #' @export diff --git a/R/read_mdf.R b/R/read_mdf.R index fb42c0b..37e0468 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -65,6 +65,7 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), } #' Extract MDF metadata +#' @author Ethan Bass #' @noRd extract_mdf_metadata <- function(x){ x <- stringr::str_replace_all(x, "\xb5", "micro") diff --git a/R/read_varian_peaklist.R b/R/read_varian_peaklist.R index f3b4de5..ed87ca9 100644 --- a/R/read_varian_peaklist.R +++ b/R/read_varian_peaklist.R @@ -3,6 +3,7 @@ #' @param file Path to Varian peak list file. #' @importFrom utils read.csv #' @author Ethan Bass +#' @return A data.frame containing the information from the specified report. #' @export read_varian_peaklist <- function(file){ @@ -14,11 +15,11 @@ read_varian_peaklist <- function(file){ column_names[1] <- "compound" colnames(x) <- column_names - x <- x[-which(x$`Line#` == ""), ] + x <- x[-which(x$`Line#` == ""),] x <- x[-which(x$`Line#` == "Line#"),] x$Area <- as.numeric(x$Area) x$Height <- as.numeric(x$Height) - x <- x[,-16] + x <- x[, -16] x } diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 5c52314..adfe6a2 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -27,12 +27,12 @@ or \code{raw}.} \value{ A 2D chromatogram in the format specified by \code{data_format} and \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will -be returned with retention times as rows and wavelengths as columns. If -\code{long} format is requested, three columns will be returned: one for the -retention time, one for the wavelength and one for the intensity. The -\code{format_out} argument determines whether the chromatogram is returned as -a \code{matrix} or \code{data.frame}. Metadata can be attached to the -chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. +be returned with retention times as rows and a single column for the intensity. +If \code{long} format is requested, two columns will be returned: one for the +retention time and one for the intensity. The \code{format_out} argument +determines whether the chromatogram is returned as a \code{matrix} or +\code{data.frame}. Metadata can be attached to the chromatogram as +\code{\link{attributes}} if \code{read_metadata} is {TRUE}. } \description{ Agilent \code{.ch} files come in several different varieties. This parser diff --git a/man/read_chemstation_reports.Rd b/man/read_chemstation_reports.Rd index 4c6010b..d957244 100644 --- a/man/read_chemstation_reports.Rd +++ b/man/read_chemstation_reports.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_report.R \name{read_chemstation_reports} \alias{read_chemstation_reports} -\title{Read Agilent Chemstation Reports} +\title{Read 'Agilent ChemStation' Reports} \usage{ read_chemstation_reports( files, @@ -11,7 +11,7 @@ read_chemstation_reports( ) } \arguments{ -\item{files}{Paths to Chemstation report files.} +\item{files}{Paths to 'ChemStation' report files.} \item{data_format}{Format to output data. Either \code{chromatographr} or \code{chemstation}.} @@ -19,8 +19,12 @@ read_chemstation_reports( \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} } +\value{ +A data.frame containing the information from the specified +'ChemStation' report. +} \description{ -Read Agilent Chemstation Reports +Read 'Agilent ChemStation' Reports } \author{ Ethan Bass diff --git a/man/read_varian_peaklist.Rd b/man/read_varian_peaklist.Rd index 410d0f6..db1f4de 100644 --- a/man/read_varian_peaklist.Rd +++ b/man/read_varian_peaklist.Rd @@ -10,6 +10,9 @@ read_varian_peaklist(file) \arguments{ \item{file}{Path to Varian peak list file.} } +\value{ +A data.frame containing the information from the specified report. +} \description{ Read 'Varian' peaklist. Read peak list(s) from 'Varian MS Workstation'. From e3bc2f57edd62b28624a93bd6a379867332571fb Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 21 Dec 2023 11:40:32 -0500 Subject: [PATCH 38/51] docs: read_chroms --- R/read_chroms.R | 44 +++++++++++++++++++++++++++----------------- man/read_chroms.Rd | 42 +++++++++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/R/read_chroms.R b/R/read_chroms.R index de01175..ca10262 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -1,13 +1,14 @@ #' Read Chromatograms #' -#' Reads chromatograms from specified folders or vector of paths using file -#' parsers from [Aston](https://github.com/bovee/aston), +#' Reads chromatograms from specified folders or vector of paths using either an +#' internal parser or bindings to an external library, such as +#' [Aston](https://github.com/bovee/aston), #' [Entab](https://github.com/bovee/entab), #' [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), #' [OpenChrom](https://lablicate.com/platform/openchrom), -#' [rainbow](https://rainbow-api.readthedocs.io/), or internal parsers. +#' [rainbow](https://rainbow-api.readthedocs.io/). #' -#' Provides a general interface to chromConverter parsers. Currently recognizes +#' Provides a unified interface to all chromConverter parsers. Currently recognizes #' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent #' MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), #' 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' @@ -17,8 +18,13 @@ #' Please see the instructions in the #' [README](https://ethanbass.github.io/chromConverter/) for further details. #' +#' If paths to individual files are provided, \code{read_chroms} will try to +#' infer the file format and select an appropriate parser. However, when +#' providing paths to directories, the file format must be specified using the +#' \code{format_in} argument. +#' #' @name read_chroms -#' @param paths paths to files or folders containing files +#' @param paths Paths to data files or directories containing the files. #' @param find_files Logical. Set to \code{TRUE} (default) if you are providing #' the function with a folder or vector of folders containing the files. #' Otherwise, set to\code{FALSE}. @@ -26,14 +32,15 @@ #' include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, #' \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, #' \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, -#' \code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -#' \code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{mzxml}, -#' \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, or \code{other}. +#' \code{shimadzu_ascii}, \code{shimadzu_fid}, \code{shimadzu_dad}, +#' \code{thermoraw}, \code{waters_arw}, \code{waters_raw}, \code{mzml}, +#' \code{mzxml}, \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, +#' or \code{other}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which #' case file extension will be deduced from \code{format_in}. -#' @param parser What parser to use. Current option are \code{chromconverter}, -#' \code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or -#' \code{rainbow}. +#' @param parser What parser to use (optional). Current option are +#' \code{chromconverter}, \code{aston}, \code{entab}, \code{thermoraw}, +#' \code{openchrom}, or \code{rainbow}. #' @param format_out Class of output (i.e. data.frame or matrix). #' @param data_format Whether to output data in wide or long format. Either #' \code{wide} or \code{long}. @@ -57,16 +64,21 @@ #' @param verbose Logical. Whether to print output from external parsers to the #' R console. #' @param sample_names An optional character vector of sample names. Otherwise -#' sample names default to the basename of the specified files. +#' sample names default to the \code{\link{basename}} of the specified files. #' @param dat Existing list of chromatograms to append results. #' (Defaults to NULL). #' @param ... Additional arguments to parser. #' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, -#' according to the value of \code{format_out}. +#' according to the value of \code{format_out}. Chromatograms may be returned +#' in either \code{wide} or \code{long} format according to the value of +#' \code{data_format}. #' @section Side effects: If \code{export} is TRUE, chromatograms will be #' exported in the format specified by \code{export_format} in the folder -#' specified by \code{path_out}. Currently, the only option for export is -#' \code{csv} unless the \code{parser} is \code{openchrom}. +#' specified by \code{path_out}. Currently, the most versatile option for +#' exporting files is code{csv}. However, one-dimensional chromatograms can +#' also be exported in ANDI Chromatography (netCDF) format by selecting +#' \code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are +#' available as additional options. #' @import reticulate #' @importFrom utils write.csv file_test #' @importFrom purrr partial @@ -279,12 +291,10 @@ read_chroms <- function(paths, find_files, format_in = format_in, export_format = export_format, return_paths = return_paths, verbose = verbose) } else if (format_in == "mdf"){ - # pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) converter <- partial(read_mdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "cdf"){ - # pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) converter <- partial(read_cdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata, ...) diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 6d4f298..9283ba6 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -30,7 +30,7 @@ read_chroms( ) } \arguments{ -\item{paths}{paths to files or folders containing files} +\item{paths}{Paths to data files or directories containing the files.} \item{find_files}{Logical. Set to \code{TRUE} (default) if you are providing the function with a folder or vector of folders containing the files. @@ -40,16 +40,17 @@ Otherwise, set to\code{FALSE}.} include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, -\code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -\code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{mzxml}, -\code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, or \code{other}.} +\code{shimadzu_ascii}, \code{shimadzu_fid}, \code{shimadzu_dad}, +\code{thermoraw}, \code{waters_arw}, \code{waters_raw}, \code{mzml}, +\code{mzxml}, \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, +or \code{other}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} -\item{parser}{What parser to use. Current option are \code{chromconverter}, -\code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or -\code{rainbow}.} +\item{parser}{What parser to use (optional). Current option are +\code{chromconverter}, \code{aston}, \code{entab}, \code{thermoraw}, +\code{openchrom}, or \code{rainbow}.} \item{format_out}{Class of output (i.e. data.frame or matrix).} @@ -84,7 +85,7 @@ of clusters to use or a cluster object created by R console.} \item{sample_names}{An optional character vector of sample names. Otherwise -sample names default to the basename of the specified files.} +sample names default to the \code{\link{basename}} of the specified files.} \item{dat}{Existing list of chromatograms to append results. (Defaults to NULL).} @@ -93,18 +94,21 @@ sample names default to the basename of the specified files.} } \value{ A list of chromatograms in \code{matrix} or \code{data.frame} format, -according to the value of \code{format_out}. +according to the value of \code{format_out}. Chromatograms may be returned +in either \code{wide} or \code{long} format according to the value of +\code{data_format}. } \description{ -Reads chromatograms from specified folders or vector of paths using file -parsers from \href{https://github.com/bovee/aston}{Aston}, +Reads chromatograms from specified folders or vector of paths using either an +internal parser or bindings to an external library, such as +\href{https://github.com/bovee/aston}{Aston}, \href{https://github.com/bovee/entab}{Entab}, \href{https://github.com/compomics/ThermoRawFileParser}{ThermoRawFileParser}, \href{https://lablicate.com/platform/openchrom}{OpenChrom}, -\href{https://rainbow-api.readthedocs.io/}{rainbow}, or internal parsers. +\href{https://rainbow-api.readthedocs.io/}{rainbow}. } \details{ -Provides a general interface to chromConverter parsers. Currently recognizes +Provides a unified interface to all chromConverter parsers. Currently recognizes 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' @@ -113,12 +117,20 @@ Openchrom parsers, which include many additional formats. To use 'Entab', 'ThermoRawFileParser', or 'Openchrom' parsers, they must be manually installed. Please see the instructions in the \href{https://ethanbass.github.io/chromConverter/}{README} for further details. + +If paths to individual files are provided, \code{read_chroms} will try to +infer the file format and select an appropriate parser. However, when +providing paths to directories, the file format must be specified using the +\code{format_in} argument. } \section{Side effects}{ If \code{export} is TRUE, chromatograms will be exported in the format specified by \code{export_format} in the folder -specified by \code{path_out}. Currently, the only option for export is -\code{csv} unless the \code{parser} is \code{openchrom}. +specified by \code{path_out}. Currently, the most versatile option for +exporting files is code{csv}. However, one-dimensional chromatograms can +also be exported in ANDI Chromatography (netCDF) format by selecting +\code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are +available as additional options. } \examples{ From 97aafc254b4a122e869d100df9bc1c38fd9d006b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 21 Dec 2023 11:40:49 -0500 Subject: [PATCH 39/51] docs: readme and news --- NEWS.md | 8 ++++--- README.md | 64 +++++++++++++++++++++++++++---------------------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/NEWS.md b/NEWS.md index 15ae67a..84f52b9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,12 +3,14 @@ * Added parser for reading ANDI MS (`.cdf`) files. * Fixed parsing of Agilent MS files with 'entab' reader. * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. -* Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. -* Added internal parser for 1D 'Waters RAW' chromatograms. -* Added `collapse` argument to call_rainbow to collapse superfluous lists. * Re-factored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. * Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`. +* Exported `write_cdf` and added additional arguments (`lambda` and `force`) for greater control by users. +* Added internal parser for 1D 'Waters RAW' chromatograms (`read_waters_raw`). +* Added `collapse` argument to `call_rainbow` and to collapse superfluous lists. * Added `...` argument to `read_chroms` for supplying additional arguments to parsers. +* Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. +* Updated documentation of various functions. ## chromConverter 0.5.0 diff --git a/README.md b/README.md index 4b02af1..71199bb 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ### Overview -chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It currently consists of wrappers around file parsers from various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), [rainbow](https://rainbow-api.readthedocs.io/), and [OpenChrom](https://lablicate.com/platform/openchrom) as well as some parsers written directly in R for (mostly) text-based formats. +chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It includes a number of parsers written directly in R as well as bindings to various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), [rainbow](https://rainbow-api.readthedocs.io/), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), and [OpenChrom](https://lablicate.com/platform/openchrom). ### Formats @@ -68,35 +68,6 @@ install.packages("chromConverter", repos="https://ethanbass.r-universe.dev/", ty **Note:** There are some changes in recent versions of RStudio that messed up the accessibility of the python bindings through `reticulate`. If you wish to access python-based parsers (e.g. aston or rainbow) through a recent version of RStudio, it is suggested to first change the default settings in RStudio. To do this, open `Tools:Global Options...:Python` and uncheck the box that says `Automatically activate project-local Python environments`. Then restart RStudio. Alternatively, this issue can be resolved by selecting the desired python interpreter in the Python settings pane. It is recommended to use a local installation of miniconda, which can be installed by running `reticulate::install_miniconda()`. -#### Optional additional dependencies - -Some of the parsers rely on external software libraries that must be manually installed. - -##### **Aston** - -To install Aston, call the `configure_aston()` function to install miniconda along with the necessary python dependencies. Running `read_chroms` with the Aston parser selected should also trigger a prompt to install Aston. If you're running Windows, you may need to install the latest version of ['Microsoft Visual C++'](https://docs.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170) if you don't already have it. - -##### **Entab** - -[Entab](https://github.com/bovee/entab) is a Rust-based parsing framework for converting a variety of scientific file formats into tabular data. To use parsers from Entab, you must first install [Rust](https://www.rust-lang.org/tools/install) and Entab-R. After following the [instructions](https://www.rust-lang.org/tools/install) to install Rust, you can install Entab from GitHub as follows: - -``` -remotes::install_github("https://github.com/bovee/entab/", subdir = "entab-r") -``` - -##### **ThermoRawFileParser** - -Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser) on the command-line. To install the ThermoRawFileParser, follow the instructions [here](https://github.com/compomics/ThermoRawFileParser). If you are running Linux or Mac OS X, you will also need to install [mono](https://www.mono-project.com/download/stable/#download-lin), following the instructions provided at the link. In addition, when you use chromConverter to convert Thermo RAW files for the first time you will be asked to enter the path to the program. - -##### **OpenChrom** -###### (**Note:** Support for the commmand line interface has been removed from OpenChrom (as of `version 1.5.0`). Older versions (e.g. `1.4.x`) should still work for now). - -[OpenChrom](https://lablicate.com/platform/openchrom) is opensource chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: - -1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. -2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. -3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path = "path_to_openchrom_executable"). - ### Usage ##### Importing chromatograms @@ -112,7 +83,7 @@ The `read_chroms` function will attempt to determine an appropriate parser to us ###### Exporting files -If you'd like to automatically export the files, include the argument `export = TRUE` along with the path where you'd like to export the files (`path_out`). Some parsers (e.g. `OpenChrom` and `ThermoRawFileParser`) need to export files for their basic operations. Thus, if these parsers are selected, you will need to specify an argument to `path_out`. +If you'd like to automatically export the files, include the argument `export = TRUE` along with the path where you'd like to export the files (`path_out`) and the desired file format (`export_format`). Some parsers (e.g. `OpenChrom` and `ThermoRawFileParser`) need to export files for their basic operations. Thus, if these parsers are selected, you will need to specify an argument to `path_out`. ``` library(chromConverter) @@ -133,12 +104,41 @@ chromConverter includes some options to extract metadata from the provided files ##### Importing peak lists -The `read_peak` list function can be used to import peak lists from 'Chemstation' or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. +The `read_peak` list function can be used to import peak lists from 'Chemstation' REPORT files or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. ``` pks <- read_chroms(, format_in = "chemstation") ``` +#### Optional additional dependencies + +Some of the parsers rely on external software libraries that must be manually installed. + +##### **Aston** + +To install Aston, call the `configure_aston()` function to install miniconda along with the necessary python dependencies. Running `read_chroms` with the Aston parser selected should also trigger a prompt to install Aston. If you're running Windows, you may need to install the latest version of ['Microsoft Visual C++'](https://docs.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170) if you don't already have it. + +##### **Entab** + +[Entab](https://github.com/bovee/entab) is a Rust-based parsing framework for converting a variety of scientific file formats into tabular data. To use parsers from Entab, you must first install [Rust](https://www.rust-lang.org/tools/install) and Entab-R. After following the [instructions](https://www.rust-lang.org/tools/install) to install Rust, you can install Entab from GitHub as follows: + +``` +remotes::install_github("https://github.com/bovee/entab/", subdir = "entab-r") +``` + +##### **ThermoRawFileParser** + +Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser) on the command-line. To install the ThermoRawFileParser, follow the instructions [here](https://github.com/compomics/ThermoRawFileParser). If you are running Linux or Mac OS X, you will also need to install [mono](https://www.mono-project.com/download/stable/#download-lin), following the instructions provided at the link. In addition, when you use chromConverter to convert Thermo RAW files for the first time you will be asked to enter the path to the program. + +##### **OpenChrom** +###### (**Note:** Support for the commmand line interface has been removed from OpenChrom (as of `version 1.5.0`). Older versions (e.g. `1.4.x`) should still work for now). + +[OpenChrom](https://lablicate.com/platform/openchrom) is opensource chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: + +1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. +2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. +3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path = "path_to_openchrom_executable"). + ### Further analysis For downstream analyses of chromatographic data, you can also check out my package [chromatographR](https://ethanbass.github.io/chromatographR/). For interactive visualization of chromatograms, you can check out my new package [ShinyChromViewer](https://github.com/ethanbass/ShinyChromViewer) (alpha release). From 24c79bc21daa23fb711296175c93d6f34dfd7fae Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 21 Dec 2023 11:41:26 -0500 Subject: [PATCH 40/51] test: skip read_mzml on CRAN --- tests/testthat/test-read_chroms.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 97b9193..96ac5f5 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -62,12 +62,14 @@ test_that("Shimadzu ascii parser works", { }) test_that("read_mzml works", { + skip_on_cran() ext_filepath <- system.file("extdata", package = "RaMS") DAD_filepath <- list.files(ext_filepath, full.names = TRUE, - pattern = "uv_test_mini.mzML") + pattern = "uv_test_mini.mzML.gz") dad_long <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE) expect_equal(dad_long, - RaMS::grabMSdata(files = DAD_filepath, grab_what = "DAD", verbosity = FALSE) + RaMS::grabMSdata(files = DAD_filepath, grab_what = "DAD", + verbosity = FALSE) ) dad_wide <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE, data_format = "wide") @@ -132,14 +134,15 @@ test_that("read_chroms exports cdf files correctly", { path_out <- tempdir(check = TRUE) on.exit(unlink(c(fs::path(path_out, "ladder", ext = "cdf"), path_out))) file <- "testdata/ladder.txt" - x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, path_out = path_out, - export_format = "cdf", progress_bar = FALSE) + x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, + path_out = path_out, export_format = "cdf", + progress_bar = FALSE) x1_out <- read_cdf(fs::path(path_out, "ladder", ext = "cdf")) expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) }) test_that("read_peaklist can read chemstation reports", { - path <- "testdata/RUTIN2.D/Report.TXT" + path <- "testdata/RUTIN2.D/" x <- read_peaklist(path, format_in = "chemstation") expect_equal(class(x[[1]]), "list") expect_equal(class(x[[1]][[1]]), "data.frame") From 4111163d58500c25c7af8fcd88992a53c767dd23 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 21 Dec 2023 11:55:54 -0500 Subject: [PATCH 41/51] docs: minor changes, fixed typo and capitalization --- NEWS.md | 24 ++++++++++++------------ R/call_openchrom.R | 2 +- R/read_chroms.R | 6 +++--- R/write_chroms.R | 4 ++-- README.md | 8 ++++---- man/call_openchrom.Rd | 2 +- man/read_chroms.Rd | 6 +++--- man/write_cdf.Rd | 4 ++-- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index 84f52b9..3d115a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -17,12 +17,12 @@ ### New features * Added support for parallel processing through `pbapply` package. (**Note**: The `pbapply` package must be manually installed to enable parallel processing). -* Added internal parser for 'Agilent Chemstation' version 31 files (through `read_chemstation_uv` function). +* Added internal parser for 'Agilent ChemStation' version 31 files (through `read_chemstation_uv` function). * Added support for 'Agilent OpenLab' version 131 files through internal parser. * Added preliminary support for reading 'Agilent' (`.dx`) files (through `read_agilentdx` function). -* Added support for reading 'Chemstation' REPORT files. +* Added support for reading 'ChemStation' REPORT files. * Added parser for Shimadzu `.lcd` files through the `read_shimadzu_lcd` function. Only the PDA stream (not MS) is currently supported. -* Added `read_peaklist` function for reading peak lists. Currently 'Agilent Chemstation' and 'Shimadzu ASCII' formats are supported. +* Added `read_peaklist` function for reading peak lists. Currently 'Agilent ChemStation' and 'Shimadzu ASCII' formats are supported. * Added `verbose` argument to control console output for external parsers ('OpenChrom' and 'ThermoRawFileParser'). ### Other Improvements @@ -54,7 +54,7 @@ ### New features -* Added support for "Chemstation" UV (`.ch`) files (version 30). +* Added support for "ChemStation" UV (`.ch`) files (version 30). ### Minor improvements @@ -66,7 +66,7 @@ ### Bug fixes * Fixed bug preventing compilation of PDF manual. -* Fixed new bug causing failure to correctly read names of chemstation files from .D directory. +* Fixed new bug causing failure to correctly read names of 'ChemStation' files from .D directory. ## chromConverter 0.4.0 @@ -87,13 +87,13 @@ data origin. * Standardized datetime stamps so they are always converted to POSIXct format. * Now use `fs` package for parsing paths, eliminating buggy `check_paths` function. -* Fixed bug causing sloppy 'Chemstation' FID metadata. -* Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. +* Fixed bug causing sloppy 'ChemStation' FID metadata. +* Fixed bug that caused padding of 'ChemStation 130' files with extra zeros. * Added additional tests. ## chromConverter 0.3.3 -* Added R-based parser for "Chemstation" UV (`.uv`) files (version 131) through +* Added R-based parser for "ChemStation" UV (`.uv`) files (version 131) through the `read_chemstation_uv` function. * Added `extract_metadata` function for extracting metadata from a list of chromatograms and returning it as a `data.frame` or `tibble`. @@ -112,8 +112,8 @@ and returning it as a `data.frame` or `tibble`. ## chromConverter 0.3.1 -* Added support for "Chemstation" UV (`.ch`) files (version 130). -* Added provisional support for "Chemstation" FID (version 8). +* Added support for "ChemStation" UV (`.ch`) files (version 130). +* Added provisional support for "ChemStation" FID (version 8). * Changed name of `read_chemstation_fid` function to `read_chemstation_ch`. * Ignore case when matching file extensions in `read_chroms`. * Added note to README about configuring RStudio correctly for accessing python parsers. @@ -121,12 +121,12 @@ and returning it as a `data.frame` or `tibble`. ## chromConverter 0.3.0 * Fixed bug causing "Chromeleon" metadata parser to fail. -* Fixed bug in "Chemstation" metadata parser. +* Fixed bug in "ChemStation" metadata parser. * Changed `format_data` argument to `data_format` to select wide or long format. * Added support for parsing `mzML` files with `RaMS`. * Added support for parsing "Agilent" (`.D`) and "Waters" (`.raw`) files with [rainbow](https://rainbow-api.readthedocs.io/). * Made `data_format` option available consistently for choosing `wide` or `long` format. -* Added parser in R for "Chemstation" FID (`.ch`) data (versions 81, 179 & 181). +* Added parser in R for "ChemStation" FID (`.ch`) data (versions 81, 179 & 181). * Improved error handling when loading python modules. * Improved error-handling for parsing metadata so small problems no longer error out the whole program. diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 557ad82..05adc88 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -5,7 +5,7 @@ #' [OpenChrom](https://lablicate.com/platform/openchrom) (version 0.4.0) must be #' manually installed. The command line interface is no longer supported in the #' latest versions of OpenChrom (starting with version 0.5.0), so the function -#' will not work with these new versions. +#' will not work with these newer versions. #' #' The \code{call_openchrom} works by creating an \code{xml} batchfile and #' feeding it to the OpenChrom command-line interface. OpenChrom batchfiles diff --git a/R/read_chroms.R b/R/read_chroms.R index ca10262..f79fcbd 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -13,8 +13,8 @@ #' MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), #' 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' #' (\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps -#' Openchrom parsers, which include many additional formats. To use 'Entab', -#' 'ThermoRawFileParser', or 'Openchrom' parsers, they must be manually installed. +#' 'OpenChrom' parsers, which include many additional formats. To use 'Entab', +#' 'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed. #' Please see the instructions in the #' [README](https://ethanbass.github.io/chromConverter/) for further details. #' @@ -75,7 +75,7 @@ #' @section Side effects: If \code{export} is TRUE, chromatograms will be #' exported in the format specified by \code{export_format} in the folder #' specified by \code{path_out}. Currently, the most versatile option for -#' exporting files is code{csv}. However, one-dimensional chromatograms can +#' exporting files is \code{csv}. However, unidimensional chromatograms can #' also be exported in ANDI Chromatography (netCDF) format by selecting #' \code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are #' available as additional options. diff --git a/R/write_chroms.R b/R/write_chroms.R index 821d1c3..8b60bf1 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -20,9 +20,9 @@ export_cdfs <- function(data, path_out){ #' Write CDF file from chromatogram #' #' Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography -#' format (ASTM E1947-98). This format can only accomodate unidimensional data. +#' format (ASTM E1947-98). This format can only accommodate unidimensional data. #' For two-dimensional chromatograms, the column to export can be specified -#' using the \code{lambda} argument. Othewise, a warning will be generated and +#' using the \code{lambda} argument. Otherwise, a warning will be generated and #' the first column of the chromatogram will be exported. #' #' @author Ethan Bass diff --git a/README.md b/README.md index 71199bb..ac1f71e 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,8 @@ chromConverter aims to facilitate the conversion of chromatography data from var ### Formats ##### ChromConverter -- 'Agilent Chemstation' & 'OpenLab' `.uv` files (versions 131, 31) -- 'Agilent Chemstation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) +- 'Agilent ChemStation' & 'OpenLab' `.uv` files (versions 131, 31) +- 'Agilent ChemStation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) - ÅNDI (Analytical Data Interchange) chromatography format (`.cdf`) - mzML (`.mzml`) - 'Shimadzu LabSolutions' ascii (`.txt`) @@ -104,7 +104,7 @@ chromConverter includes some options to extract metadata from the provided files ##### Importing peak lists -The `read_peak` list function can be used to import peak lists from 'Chemstation' REPORT files or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. +The `read_peak` list function can be used to import peak lists from 'ChemStation' REPORT files or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. ``` pks <- read_chroms(, format_in = "chemstation") @@ -133,7 +133,7 @@ Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://g ##### **OpenChrom** ###### (**Note:** Support for the commmand line interface has been removed from OpenChrom (as of `version 1.5.0`). Older versions (e.g. `1.4.x`) should still work for now). -[OpenChrom](https://lablicate.com/platform/openchrom) is opensource chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: +[OpenChrom](https://lablicate.com/platform/openchrom) is open source chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: 1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. 2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 26e1d17..63f8212 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -42,7 +42,7 @@ system call to the command-line interface. To use this function \href{https://lablicate.com/platform/openchrom}{OpenChrom} (version 0.4.0) must be manually installed. The command line interface is no longer supported in the latest versions of OpenChrom (starting with version 0.5.0), so the function -will not work with these new versions. +will not work with these newer versions. } \details{ The \code{call_openchrom} works by creating an \code{xml} batchfile and diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 9283ba6..a0c309b 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -113,8 +113,8 @@ Provides a unified interface to all chromConverter parsers. Currently recognizes MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' (\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps -Openchrom parsers, which include many additional formats. To use 'Entab', -'ThermoRawFileParser', or 'Openchrom' parsers, they must be manually installed. +'OpenChrom' parsers, which include many additional formats. To use 'Entab', +'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed. Please see the instructions in the \href{https://ethanbass.github.io/chromConverter/}{README} for further details. @@ -127,7 +127,7 @@ providing paths to directories, the file format must be specified using the If \code{export} is TRUE, chromatograms will be exported in the format specified by \code{export_format} in the folder specified by \code{path_out}. Currently, the most versatile option for -exporting files is code{csv}. However, one-dimensional chromatograms can +exporting files is \code{csv}. However, unidimensional chromatograms can also be exported in ANDI Chromatography (netCDF) format by selecting \code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are available as additional options. diff --git a/man/write_cdf.Rd b/man/write_cdf.Rd index ce363e6..f901e0e 100644 --- a/man/write_cdf.Rd +++ b/man/write_cdf.Rd @@ -25,9 +25,9 @@ No return value. The function is called for its side effects. } \description{ Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography -format (ASTM E1947-98). This format can only accomodate unidimensional data. +format (ASTM E1947-98). This format can only accommodate unidimensional data. For two-dimensional chromatograms, the column to export can be specified -using the \code{lambda} argument. Othewise, a warning will be generated and +using the \code{lambda} argument. Otherwise, a warning will be generated and the first column of the chromatogram will be exported. } \section{Side effects}{ From 3dfc213b8a45e04f09f9abf6d7b7cd466f75fa23 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 18:50:47 -0500 Subject: [PATCH 42/51] feat: added precision argument to call rainbow fixed names of mz values in long format data --- R/call_rainbow.R | 21 ++++++++++++++------- R/reshape_chroms.R | 8 ++++---- R/write_chroms.R | 2 +- man/call_rainbow.Rd | 5 ++++- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/R/call_rainbow.R b/R/call_rainbow.R index d792c67..796a5ef 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -19,18 +19,22 @@ #' @param read_metadata Logical. Whether to attach metadata. Defaults to TRUE. #' @param collapse Logical. Whether to collapse lists that only contain a single #' element. +#' @param precision Number of decimals to round mz values. Defaults to 1. #' @author Ethan Bass #' @return Returns a (nested) list of \code{matrices} or \code{data.frames} according to #' the value of \code{format_out}. Data is ordered according to the value of #' \code{by}. #' @export -call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshunter", - "chemstation", "chemstation_uv", "chemstation_fid"), +call_rainbow <- function(file, + format_in = c("agilent_d", "waters_raw", "masshunter", + "chemstation", "chemstation_uv", + "chemstation_fid"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), by = c("detector","name"), what = NULL, - read_metadata = TRUE, collapse = TRUE){ + read_metadata = TRUE, collapse = TRUE, + precision = 1){ check_rb_configuration() by <- match.arg(by, c("detector","name")) format_out <- match.arg(format_out, c("matrix","data.frame")) @@ -48,7 +52,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun if (format_in %in% c("chemstation")){ by <- "single" } - x <- converter(file) + x <- converter(file, prec = as.integer(precision)) if (by == "detector"){ if (!is.null(what)){ what_not_present <- which(!(what %in% names(x$by_detector))) @@ -83,14 +87,17 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun #' @noRd extract_rb_data <- function(xx, format_out = "matrix", - data_format = c("wide","long"), + data_format = c("wide", "long"), read_metadata = TRUE){ - data_format <- match.arg(data_format, c("wide","long")) + data_format <- match.arg(data_format, c("wide", "long")) data <- xx$data try(rownames(data) <- xx$xlabels) colnames(data) <- xx$ylabels if (data_format == "long"){ - data <- reshape_chrom(data, data_format = "long") + names_to <- switch(xx$detector, "MS" = "mz", + "UV" = "lambda", + "lambda") + data <- reshape_chrom(data, data_format = "long", names_to = names_to) } if (format_out == "data.frame"){ data <- as.data.frame(data) diff --git a/R/reshape_chroms.R b/R/reshape_chroms.R index 5341024..bd1615b 100644 --- a/R/reshape_chroms.R +++ b/R/reshape_chroms.R @@ -58,10 +58,11 @@ reshape_chrom <- function(x, data_format, ...){ #' @importFrom stats reshape #' @param x A chromatographic matrix in wide format. #' @param lambdas Wavelength(s) to include. +#' @param names_to Argument to \code{\link[tidyr]{pivot_longer}} #' @return A chromatographic matrix in long format. #' @author Ethan Bass #' @noRd -reshape_chrom_long <- function(x, lambdas, format_out = NULL){ +reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda"){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "long"){ warning("The data already appear to be in long format!", immediate. = TRUE) } @@ -80,9 +81,8 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL){ xx <- xx[,lambdas, drop = FALSE] } data <- data.frame(tidyr::pivot_longer(data.frame(rt = rownames(xx), xx, check.names = FALSE), - cols = -c("rt"), names_to = "lambda", values_to = "intensity")) - data$rt <- as.numeric(data$rt) - data$lambda <- as.numeric(data$lambda) + cols = -c("rt"), names_to = names_to, values_to = "intensity")) + data <- apply(data, 2, as.numeric) } if (format_out == "matrix"){ data <- as.matrix(data) diff --git a/R/write_chroms.R b/R/write_chroms.R index 8b60bf1..410b13b 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -1,4 +1,4 @@ -#' Export chromatograms as csvs +#' Export chromatograms as CSVs #' @author Ethan Bass #' @noRd export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE){ diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index 2076c6c..6f9a782 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -13,7 +13,8 @@ call_rainbow( by = c("detector", "name"), what = NULL, read_metadata = TRUE, - collapse = TRUE + collapse = TRUE, + precision = 1 ) } \arguments{ @@ -36,6 +37,8 @@ call_rainbow( \item{collapse}{Logical. Whether to collapse lists that only contain a single element.} + +\item{precision}{Number of decimals to round mz values. Defaults to 1.} } \value{ Returns a (nested) list of \code{matrices} or \code{data.frames} according to From c07dad414a72527533e01e492a6b1d4d551b97f8 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 18:51:27 -0500 Subject: [PATCH 43/51] fix: improve path parsing for mac openchrom updated call_openchrom docs --- R/call_openchrom.R | 12 +++++++++--- man/call_openchrom.Rd | 9 ++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 05adc88..21da731 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -34,9 +34,12 @@ #' @param return_paths Logical. If TRUE, the function will return a character #' vector of paths to the newly created files. #' @param verbose Logical. Whether to print output from OpenChrom to the console. -#' @return If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. -#' If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list -#' of chromatograms in \code{data.frame} format. Otherwise, it will not return anything. +#' @return If \code{return_paths} is \code{FALSE}, the function will return a list of +#' chromatograms (if an appropriate parser is available in chromConverter). The +#' chromatograms will be returned in \code{matrix} or \code{data.frame} format +#' according to the value of {data_class}. If \code{return_paths} is \code{TRUE}, +#' the function will return a character vector of paths to the newly created +#' files. #' @section Side effects: Chromatograms will be exported in the format specified #' by \code{export_format} in the folder specified by \code{path_out}. #' @author Ethan Bass @@ -177,6 +180,9 @@ configure_openchrom <- function(cli = c("null", "true", "false", "status"), path } else{ path_parser <- path } + if (grepl("app/?$", path_parser)){ + path_parser <- fs::path(path_parser, "Contents/MacOS/openchrom") + } writeLines(path_parser, con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 63f8212..2e4b3dd 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -32,9 +32,12 @@ vector of paths to the newly created files.} \item{verbose}{Logical. Whether to print output from OpenChrom to the console.} } \value{ -If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. -If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list -of chromatograms in \code{data.frame} format. Otherwise, it will not return anything. +If \code{return_paths} is \code{FALSE}, the function will return a list of +chromatograms (if an appropriate parser is available in chromConverter). The +chromatograms will be returned in \code{matrix} or \code{data.frame} format +according to the value of {data_class}. If \code{return_paths} is \code{TRUE}, +the function will return a character vector of paths to the newly created +files. } \description{ Writes \code{xml} batch-files and calls OpenChrom file parsers using a From c10d27fa04cbfa2b1b1b97a1c601400864f2751b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 18:51:32 -0500 Subject: [PATCH 44/51] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 3d115a5..31d85fe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,7 @@ * Added `collapse` argument to `call_rainbow` and to collapse superfluous lists. * Added `...` argument to `read_chroms` for supplying additional arguments to parsers. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. +* Added `precision` argument to `call_rainbow` to control number of digits "mz" values are rounded to. (Also changed default behavior so values are rounded to one decimal by default). * Updated documentation of various functions. ## chromConverter 0.5.0 From c25e0d0664543a7e60f1239c2538b600b3e61c0c Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 19:03:27 -0500 Subject: [PATCH 45/51] test: added additional tests, expanded existing tests --- tests/testthat/test-extra.R | 134 +++++++++++++++++++++++++++--- tests/testthat/test-read_chroms.R | 17 ++++ 2 files changed, 138 insertions(+), 13 deletions(-) diff --git a/tests/testthat/test-extra.R b/tests/testthat/test-extra.R index 6f152b0..7062ff5 100644 --- a/tests/testthat/test-extra.R +++ b/tests/testthat/test-extra.R @@ -1,4 +1,31 @@ +test_that("read_chroms can read 'Agilent MS' files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + skip_if_not_installed("entab") + + path <- system.file("chemstation_MSD.MS", + package = "chromConverterExtraTests") + + x <- read_chroms(path, parser = "entab", progress_bar = FALSE)[[1]] + expect_equal(class(x), "data.frame") + expect_equal(dim(x), c(95471, 3)) + expect_equal(attr(x, "parser"), "entab") + expect_equal(colnames(x), c("rt", "mz", "intensity")) + + x1 <- read_chroms(path, parser = "rainbow", + progress_bar = FALSE, precision = 0)[[1]] + expect_equal(class(x1)[1], "matrix") + expect_equal(dim(x1), c(2534, 841)) + + x2 <- read_chroms(path, parser = "rainbow", + progress_bar = FALSE, data_format = "long", + precision = 0)[[1]] + expect_equal(class(x2)[1], "matrix") + expect_equal(dim(x2), c(2131094, 3)) + expect_equal(colnames(x2), c("rt", "mz", "intensity")) +}) + test_that("read_chroms can read 'Agilent Chemstation' version 30 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -6,13 +33,13 @@ test_that("read_chroms can read 'Agilent Chemstation' version 30 files", { path <- system.file("chemstation_30.ch", package = "chromConverterExtraTests") - x <- read_chroms(path, progress_bar = FALSE, parser="chromconverter") + x <- read_chroms(path, parser="chromconverter", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") expect_equal(dim(x[[1]]), c(38405, 1)) expect_equal(attr(x[[1]], "parser"), "chromconverter") - x1 <- read_chroms(path, progress_bar = FALSE, format_out = "data.frame", - data_format = "long", parser = "chromconverter") + x1 <- read_chroms(path, parser = "chromconverter", format_out = "data.frame", + data_format = "long", progress_bar = FALSE) expect_equal(class(x1[[1]])[1], "data.frame") expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) @@ -36,7 +63,6 @@ test_that("read_chroms can read 'Agilent Chemstation' 31 files", { expect_equal(attr(x, "parser"), "chromconverter") }) - test_that("read_chroms can read 'Agilent Chemstation' version 81 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -73,11 +99,11 @@ test_that("read_chroms can read 'Agilent Chemstation' version 130 files", { }) -test_that("read_chroms can read 'Agilent Chemstation' 179 files", { +test_that("read_chroms can read 'Agilent OpenLab' 179 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") - path <- system.file("chemstation_179.ch", + path <- system.file("openlab_179.ch", package = "chromConverterExtraTests") x <- read_chroms(path, progress_bar = FALSE) @@ -92,6 +118,32 @@ test_that("read_chroms can read 'Agilent Chemstation' 179 files", { expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) }) +test_that("read_chroms can read 'Agilent ChemStation' 179 files (8-byte format)", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_179_mustang.ch", + package = "chromConverterExtraTests") + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(54704, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") +}) + +test_that("read_chroms can read 'Agilent ChemStation' 179 (4-byte format)", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_179_asterix.ch", + package = "chromConverterExtraTests") + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(22800, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") +}) + test_that("read_chroms can read 'Agilent Masshunter' dad files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -99,9 +151,12 @@ test_that("read_chroms can read 'Agilent Masshunter' dad files", { path <- system.file("masshunter.d/AcqData/DAD1.sp", package = "chromConverterExtraTests") - x <- read_chroms(path, format_in = "masshunter_dad", progress_bar = FALSE) + + x <- read_chroms(path, format_in = "masshunter_dad", parser = "entab", + progress_bar = FALSE) x1 <- read_chroms(path, format_in = "masshunter_dad", parser = "aston", progress_bar = FALSE) + expect_equal(dim(x[[1]]), c(240, 276)) expect_equal(class(x[[1]])[1], "matrix") expect_equal(x[[1]], x1[[1]], ignore_attr = TRUE) @@ -140,6 +195,33 @@ test_that("read_chroms can read 'Waters ARW' PDA files", { # expect_equal(dim(x1[[1]])) }) +test_that("read_chroms can read 'Waters RAW' files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("waters_blue.raw", package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + precision = 0)[[1]] + expect_equal(names(x), c("MS","UV","CAD")) + expect_equal(dim(x$MS), c(725, 740)) + expect_equal(attr(x$MS, "parser"), "rainbow") + expect_equal(attr(x$MS, "data_format"), "wide") + expect_equal(attr(x$MS, "metadata")$polarity,"+") + + x1 <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + parser = "chromconverter")[[1]] + expect_equal(class(x1$CAD)[1], "matrix") + expect_equal(x$CAD, x1$CAD, ignore_attr = TRUE) + + x2 <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + what = "MS", data_format = "long", precision = 0)[[1]] + expect_equal(nrow(x2$MS), nrow(x$MS)*ncol(x$MS)) + expect_equal(colnames(x2$MS), c("rt", "mz", "intensity")) + # no metadata + # expect_equal(attr(x1$CAD, "parser"), "chromconverter") +}) + test_that("read_chroms can read 'Chromeleon' comma-separated files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -180,7 +262,7 @@ test_that("read_chroms can read 'Chromeleon' period-separated files", { expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) }) -test_that("read_peaklist can read `Shimadzu` PDA files", { +test_that("read_peaklist can read `Shimadzu` ascii (PDA) files", { skip_on_cran() skip_if_missing_dependecies() skip_if_not_installed("chromConverterExtraTests") @@ -195,7 +277,7 @@ test_that("read_peaklist can read `Shimadzu` PDA files", { expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) }) -test_that("read_chroms can read 'Shimadzu' PDA files", { +test_that("read_chroms can read 'Shimadzu' ascii (PDA) files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -250,11 +332,10 @@ test_that("read_chroms can read 'Thermo' RAW files", { expect_equal(names(x), c("MS1", "MS2", "DAD", "BPC", "TIC", "chroms", "metadata")) }) - # test_that("thermoraw parser works",{ # skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) + file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" + x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) # expect_equal(class(x[[1]])[1], "matrix") # expect_equal(attributes(x[[1]])$instrument, "GC-2014") # }) @@ -266,10 +347,13 @@ test_that("read_chroms can use 'OpenChrom' parsers", { skip_if_missing_openchrom() path <- system.file("DCM1.SMS", package = "chromConverterExtraTests") + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, verbose = FALSE, export_format = "csv")[[1]] + expect_equal(class(x)[1], "matrix") expect_equal(dim(x), c(3032, 297)) + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, verbose = FALSE)[[1]] expect_equal(class(x), "list") @@ -279,7 +363,6 @@ test_that("read_chroms can use 'OpenChrom' parsers", { test_that("read_varian_peaklist function works", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") - path <- system.file("varian_peaklist.csv", package = "chromConverterExtraTests") x <- read_varian_peaklist(path) @@ -293,6 +376,31 @@ test_that("read_cdf function can read peak tables", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("VARIAN1.CDF", package = "chromConverterExtraTests") x <- read_cdf(path, what = "peak_table") + # what about chromatograms expect_s3_class(x, "data.frame") expect_equal(dim(x), c(8,5)) }) + +test_that("read_chroms can read ANDI MS files", { + skip_on_cran() + skip_if_not_installed("ncdf4") + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("HP_MS.CDF", package = "chromConverterExtraTests") + + x <- read_chroms(path, what=c("chromatogram","ms_spectra"), progress_bar = FALSE)[[1]] + expect_equal(names(x), c("chromatogram", "ms_spectra")) + expect_s3_class(x$ms_spectra, "data.frame") + expect_equal(class(x$chromatogram)[1], "matrix") + expect_true(all(dim(x$chromatogram) == c(621, 1))) + expect_true(all(dim(x$ms_spectra) == c(7638, 3))) + + x1 <- read_chroms(path, what=c("chromatogram"), data_format="long", + progress_bar = FALSE)[[1]] + expect_equal(ncol(x1), 2) + + x2 <- read_chroms(path, what=c("ms_spectra"), ms_format="list", + progress_bar = FALSE)[[1]] + expect_type(x2, "list") + expect_equal(length(x2), length(unique(x$ms_spectra$rt))) +}) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 96ac5f5..95a1294 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -43,9 +43,11 @@ test_that("entab parser can read `Agilent Chemstation` 131 files", { skip_on_cran() skip_if_not_installed("entab") file <- "testdata/dad1.uv" + x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "entab", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(class(x1[[1]])[1], "matrix") @@ -55,8 +57,10 @@ test_that("entab parser can read `Agilent Chemstation` 131 files", { test_that("Shimadzu ascii parser works", { file <- "testdata/ladder.txt" + x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) @@ -87,15 +91,28 @@ test_that("Rainbow parser can read chemstation 131 files", { skip_if_missing_dependecies() skip_on_cran() skip_on_ci() + x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "rainbow", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(class(x1[[1]])[1], "matrix") expect_equal(attr(x1[[1]], "parser"), "rainbow") expect_equal(attr(x1[[1]], "data_format"), "wide") + expect_equal(attr(x1$dad1, "metadata")$unit, "mAU") + + x2 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "rainbow", + find_files = FALSE, data_format = "long", + read_metadata = TRUE, + progress_bar = FALSE) + expect_equal(nrow(x2$dad1), nrow(x1$dad1)*ncol(x1$dad1)) + expect_equal(colnames(x2$dad1), c("rt", "lambda", "intensity")) + expect_equal(attr(x2$dad1, "metadata")$unit, "mAU") + expect_equal(attr(x2[[1]], "data_format"), "long") + expect_equal(attr(x2[[1]], "parser"), "rainbow") }) test_that("chromconverter parser can read chemstation 130 files", { From 81b2119c31863bfd0a5b0183cf43d2a193bd8fc7 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 19:03:40 -0500 Subject: [PATCH 46/51] docs: update readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ac1f71e..baa7ed5 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,18 @@ ### Overview -chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It includes a number of parsers written directly in R as well as bindings to various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), [rainbow](https://rainbow-api.readthedocs.io/), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), and [OpenChrom](https://lablicate.com/platform/openchrom). +chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It includes a number of parsers written directly in R as well as bindings to various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), [rainbow](https://rainbow-api.readthedocs.io/), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), [OpenChrom](https://lablicate.com/platform/openchrom) and [RaMS](https://github.com/wkumler/RaMS/). ### Formats ##### ChromConverter - 'Agilent ChemStation' & 'OpenLab' `.uv` files (versions 131, 31) -- 'Agilent ChemStation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) -- ÅNDI (Analytical Data Interchange) chromatography format (`.cdf`) -- mzML (`.mzml`) +- 'Agilent ChemStation' & 'OpenLab' `.ch` files (versions 30, 130, 8, 81, 179, 181) +- ÅNDI (Analytical Data Interchange) Chromatography & MS formats (`.cdf`) +- mzML (`.mzml`) & mzXML (.`mzxml`) (via RaMS). - 'Shimadzu LabSolutions' ascii (`.txt`) - 'Shimadzu LabSolutions'`.lcd` (*provisional support* for PDA stream) -- 'Thermo Scientific Chromeleon' UV ascii (`.txt`) +- 'Thermo Scientific Chromeleon' ascii (`.txt`) - 'Waters' ascii (`.arw`) ##### External Libraries From 7a553656a9dc7448667e214ab80090ec18108bed Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Dec 2023 21:53:57 -0500 Subject: [PATCH 47/51] test: skip test if file isn't found --- tests/testthat/test-extra.R | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-extra.R b/tests/testthat/test-extra.R index 7062ff5..ee3f04b 100644 --- a/tests/testthat/test-extra.R +++ b/tests/testthat/test-extra.R @@ -6,6 +6,7 @@ test_that("read_chroms can read 'Agilent MS' files", { path <- system.file("chemstation_MSD.MS", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, parser = "entab", progress_bar = FALSE)[[1]] expect_equal(class(x), "data.frame") @@ -32,6 +33,7 @@ test_that("read_chroms can read 'Agilent Chemstation' version 30 files", { path <- system.file("chemstation_30.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, parser="chromconverter", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -49,7 +51,10 @@ test_that("read_chroms can read 'Agilent Chemstation' 31 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") skip_if_not_installed("entab") + path <- system.file("chemstation_31.uv", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_chroms(path, progress_bar = FALSE, parser = "chromconverter")[[1]] x1 <- read_chroms(path, progress_bar = FALSE, parser = "entab")[[1]] @@ -69,6 +74,7 @@ test_that("read_chroms can read 'Agilent Chemstation' version 81 files", { path <- system.file("chemstation_81.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -89,6 +95,8 @@ test_that("read_chroms can read 'Agilent Chemstation' version 130 files", { path <- system.file("chemstation_130.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") expect_equal(dim(x[[1]]), c(12750, 1)) @@ -105,6 +113,7 @@ test_that("read_chroms can read 'Agilent OpenLab' 179 files", { path <- system.file("openlab_179.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -124,6 +133,7 @@ test_that("read_chroms can read 'Agilent ChemStation' 179 files (8-byte format)" path <- system.file("chemstation_179_mustang.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -137,6 +147,7 @@ test_that("read_chroms can read 'Agilent ChemStation' 179 (4-byte format)", { path <- system.file("chemstation_179_asterix.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -151,6 +162,7 @@ test_that("read_chroms can read 'Agilent Masshunter' dad files", { path <- system.file("masshunter.d/AcqData/DAD1.sp", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "masshunter_dad", parser = "entab", progress_bar = FALSE) @@ -181,6 +193,7 @@ test_that("read_chroms can read 'Waters ARW' PDA files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("waters_pda.arw", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "waters_arw", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -200,6 +213,7 @@ test_that("read_chroms can read 'Waters RAW' files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("waters_blue.raw", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, precision = 0)[[1]] @@ -228,6 +242,7 @@ test_that("read_chroms can read 'Chromeleon' comma-separated files", { path <- system.file("chromeleon_comma.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "chromeleon_uv", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -249,6 +264,7 @@ test_that("read_chroms can read 'Chromeleon' period-separated files", { path <- system.file("chromeleon_period.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -266,8 +282,10 @@ test_that("read_peaklist can read `Shimadzu` ascii (PDA) files", { skip_on_cran() skip_if_missing_dependecies() skip_if_not_installed("chromConverterExtraTests") + path <- system.file("shimadzuDAD_Anthocyanin.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_peaklist(path, format_in="shimadzu_dad", progress_bar = FALSE)[[1]] expect_equal(class(x), "list") @@ -283,6 +301,7 @@ test_that("read_chroms can read 'Shimadzu' ascii (PDA) files", { path <- system.file("shimadzuDAD_Anthocyanin.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] expect_equal(class(x)[1], "matrix") @@ -306,6 +325,7 @@ test_that("read_chroms can read 'Agilent' dx files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("agilent.dx", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "agilent_dx", progress_bar = FALSE)[[1]] expect_equal(class(x)[1], "matrix") @@ -334,8 +354,8 @@ test_that("read_chroms can read 'Thermo' RAW files", { # test_that("thermoraw parser works",{ # skip_if_not(configure_thermo_parser(check = TRUE)) - file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" - x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) + # file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" + # x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) # expect_equal(class(x[[1]])[1], "matrix") # expect_equal(attributes(x[[1]])$instrument, "GC-2014") # }) @@ -347,6 +367,7 @@ test_that("read_chroms can use 'OpenChrom' parsers", { skip_if_missing_openchrom() path <- system.file("DCM1.SMS", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, verbose = FALSE, export_format = "csv")[[1]] @@ -363,8 +384,11 @@ test_that("read_chroms can use 'OpenChrom' parsers", { test_that("read_varian_peaklist function works", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") + path <- system.file("varian_peaklist.csv", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_varian_peaklist(path) expect_s3_class(x, "data.frame") expect_equal(dim(x), c(46476, 15)) @@ -374,7 +398,10 @@ test_that("read_cdf function can read peak tables", { skip_on_cran() skip_if_not_installed("ncdf4") skip_if_not_installed("chromConverterExtraTests") + path <- system.file("VARIAN1.CDF", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_cdf(path, what = "peak_table") # what about chromatograms expect_s3_class(x, "data.frame") @@ -387,6 +414,7 @@ test_that("read_chroms can read ANDI MS files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("HP_MS.CDF", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, what=c("chromatogram","ms_spectra"), progress_bar = FALSE)[[1]] expect_equal(names(x), c("chromatogram", "ms_spectra")) From 2911e02843ae4f613a5b4d35cdb749834eeefb4b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 24 Dec 2023 00:39:21 -0500 Subject: [PATCH 48/51] Update r.yml install rainbow --- .github/workflows/r.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index d8407d7..41626b3 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -52,7 +52,7 @@ jobs: install.packages("chromConverterExtraTests", repos = "https://ethanbass.github.io/drat/") reticulate::install_miniconda() reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas')) - reticulate::conda_install('r-reticulate', packages = c('aston', "olefile"), pip=TRUE) + reticulate::conda_install('r-reticulate', packages = c('aston', "olefile", "rainbow-api"), pip=TRUE) shell: Rscript {0} - if: runner.os == 'macOS' From 1c18870557716f2099401421095f8d8447626334 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 24 Dec 2023 18:02:34 -0500 Subject: [PATCH 49/51] fix: windows path bug in lcd parser --- NEWS.md | 1 + R/read_shimadzu_lcd.R | 2 +- tests/testthat/test-extra.R | 17 ++++++++++------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 31d85fe..ce521b5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,7 @@ * Added `...` argument to `read_chroms` for supplying additional arguments to parsers. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. * Added `precision` argument to `call_rainbow` to control number of digits "mz" values are rounded to. (Also changed default behavior so values are rounded to one decimal by default). +* Fixed bug in `read_shimadzu_lcd` on Windows due to issue with passing escaped paths to Python. * Updated documentation of various functions. ## chromConverter 0.5.0 diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index b00239d..b7b4fb2 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -172,7 +172,7 @@ export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, reticulate::py_run_string('data = st.read()') if (missing(path_out)){ - path_out <- tempfile() + path_out <- fs::file_temp() } if (remove_null_bytes){ reticulate::py_run_string("data = data.replace(b'\\x00', b'')") diff --git a/tests/testthat/test-extra.R b/tests/testthat/test-extra.R index ee3f04b..9ecdd9d 100644 --- a/tests/testthat/test-extra.R +++ b/tests/testthat/test-extra.R @@ -295,27 +295,30 @@ test_that("read_peaklist can read `Shimadzu` ascii (PDA) files", { expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) }) -test_that("read_chroms can read 'Shimadzu' ascii (PDA) files", { +test_that("read_chroms can read 'Shimadzu' PDA files (ascii and LCD)", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") - path <- system.file("shimadzuDAD_Anthocyanin.txt", + path_ascii <- system.file("shimadzuDAD_Anthocyanin.txt", package = "chromConverterExtraTests") - skip_if_not(file.exists(path)) + skip_if_not(file.exists(path_ascii)) - x <- read_chroms(path, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] + path_lcd <- system.file("Anthocyanin.lcd", package = "chromConverterExtraTests") + skip_if_not(file.exists(path_lcd)) + + x <- read_chroms(path_ascii, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] expect_equal(class(x)[1], "matrix") expect_equal(dim(x), c(4689, 328)) expect_equal(attr(x, "parser"), "chromconverter") expect_equal(attr(x, "data_format"), "wide") - x1 <- read_chroms(path, format_in="shimadzu_dad", progress_bar = FALSE, + x1 <- read_chroms(path_ascii, format_in="shimadzu_dad", progress_bar = FALSE, data_format = "long", format_out = "data.frame")[[1]] expect_equal(class(x1)[1], "data.frame") expect_equal(dim(x1), c(4689*328, 3)) - path <- system.file("Anthocyanin.lcd", package = "chromConverterExtraTests") - x2 <- read_chroms(path, progress_bar = FALSE)[[1]] + + x2 <- read_chroms(path_lcd, progress_bar = FALSE)[[1]] expect_equal(dim(x2),c(4689,328)) expect_equal(x, x2, ignore_attr = TRUE) }) From 7885ccf6332eda449a3104f1feb56e60563360d5 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 24 Dec 2023 18:31:27 -0500 Subject: [PATCH 50/51] docs: update docs, added explanatory comments to internal fncs --- R/call_openchrom.R | 13 +++++----- R/call_rainbow.R | 9 +++++++ R/read_chemstation_ch.R | 10 +++++--- R/read_chemstation_report.R | 1 + R/read_mdf.R | 1 + R/read_shimadzu_ascii.R | 18 ++++++++++++- R/read_shimadzu_lcd.R | 43 +++++++++++++++++++------------- R/read_thermoraw.R | 1 + R/reshape_chroms.R | 3 +-- man/call_openchrom.Rd | 12 ++++----- man/read_shimadzu_lcd.Rd | 6 +++++ man/write_openchrom_batchfile.Rd | 4 ++- 12 files changed, 85 insertions(+), 36 deletions(-) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 21da731..029c3d2 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -34,12 +34,12 @@ #' @param return_paths Logical. If TRUE, the function will return a character #' vector of paths to the newly created files. #' @param verbose Logical. Whether to print output from OpenChrom to the console. -#' @return If \code{return_paths} is \code{FALSE}, the function will return a list of -#' chromatograms (if an appropriate parser is available in chromConverter). The -#' chromatograms will be returned in \code{matrix} or \code{data.frame} format -#' according to the value of {data_class}. If \code{return_paths} is \code{TRUE}, -#' the function will return a character vector of paths to the newly created -#' files. +#' @return If \code{return_paths} is \code{FALSE}, the function will return a +#' list of chromatograms (if an appropriate parser is available to import the +#' files into R). The chromatograms will be returned in \code{matrix} or +#' \code{data.frame} format according to the value of {format_out}. If +#' \code{return_paths} is \code{TRUE}, the function will return a character +#' vector of paths to the newly created files. #' @section Side effects: Chromatograms will be exported in the format specified #' by \code{export_format} in the folder specified by \code{path_out}. #' @author Ethan Bass @@ -98,6 +98,7 @@ call_openchrom <- function(files, path_out = NULL, format_in, } #' Writes OpenChrom XML batch file +#' This function is called internally by \code{call_openchrom}. #' @import xml2 #' @import magrittr #' @param files Paths to files for conversion diff --git a/R/call_rainbow.R b/R/call_rainbow.R index 796a5ef..5ef075b 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -85,6 +85,9 @@ call_rainbow <- function(file, xx } +#' Extract data with rainbow +#' This function is called internally by \code{call_rainbow}. +#' @author Ethan Bass #' @noRd extract_rb_data <- function(xx, format_out = "matrix", data_format = c("wide", "long"), @@ -111,6 +114,8 @@ extract_rb_data <- function(xx, format_out = "matrix", data } +#' Extract 'rainbow' element names. +#' This function is called internally by \code{call_rainbow}. #' @noRd extract_rb_names <- function(xx){ sapply(xx, function(xxx){ @@ -118,6 +123,8 @@ extract_rb_names <- function(xx){ }) } +#' Assign 'rainbow' read +#' This function is called internally by \code{call_rainbow}. #' @noRd assign_rb_read <- function(){ pos <- 1 @@ -126,6 +133,8 @@ assign_rb_read <- function(){ assign("rb_parse_agilent", reticulate::import("rainbow.agilent"), envir = envir) } +#' Check 'rainbow' configuration +#' This function is called internally by \code{call_rainbow}. #' @noRd check_rb_configuration <- function(){ assign_rb_read() diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index 32a8123..1a8ccce 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -163,16 +163,18 @@ get_chemstation_dir_name <- function(path){ grep("\\.D|\\.d$", sp, ignore.case = TRUE, value = TRUE) } +#' Get number of characters for Agilent segment #' @noRd get_nchar <- function(f){ as.numeric(readBin(f, what = "raw", n = 1)) } #' Decode double delta array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). +#' @noRd + decode_double_delta <- function(file, offset) { seek(file, 0, 'end') @@ -208,10 +210,11 @@ decode_double_delta <- function(file, offset) { } #' Decode double array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). +#' @noRd + decode_double_array_4byte <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") @@ -238,10 +241,11 @@ decode_double_array_8byte <- function(file, offset) { } #' Decode delta array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). +#' @noRd + decode_delta <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") diff --git a/R/read_chemstation_report.R b/R/read_chemstation_report.R index ca04e29..92e9904 100644 --- a/R/read_chemstation_report.R +++ b/R/read_chemstation_report.R @@ -136,6 +136,7 @@ convert_chemstation_peaklist <- function(table, data_format = } #' Remove blank lines +#' This function is called internally by \code{read_chemstation_reports}. #' @noRd remove_blank_lines <- function(x){ x[which(x != "")] diff --git a/R/read_mdf.R b/R/read_mdf.R index 37e0468..a729833 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -65,6 +65,7 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), } #' Extract MDF metadata +#' This function is called internally by \code{read_mdf}. #' @author Ethan Bass #' @noRd extract_mdf_metadata <- function(x){ diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index 7430c16..13cfae8 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -143,6 +143,9 @@ read_shimadzu <- function(file, what = "chromatogram", xx } +#' Convert list of mass spectra to data.frame +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd ms_list_to_dataframe <- function(x){ if (!is.null(names(x))){ @@ -156,7 +159,10 @@ ms_list_to_dataframe <- function(x){ } as.data.frame(do.call(rbind, ms)) } -#' Read Shimadzu Metadata + +#' Read 'Shimadzu' Metadata +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd read_shimadzu_metadata <- function(x, met = NULL, sep){ @@ -181,6 +187,8 @@ read_shimadzu_metadata <- function(x, met = NULL, sep){ } #' Read Shimadzu Chromatogram +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd read_shimadzu_chromatogram <- function(file, x, chrom.idx, sep, data_format, read_metadata, format_out){ @@ -221,6 +229,8 @@ read_shimadzu_chromatogram <- function(file, x, chrom.idx, sep, data_format, } #' Read Shimadzu DAD Array +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd read_shimadzu_dad <- function(file, x, chrom.idx, sep, data_format, read_metadata, format_out){ @@ -258,6 +268,8 @@ read_shimadzu_dad <- function(file, x, chrom.idx, sep, data_format, } #' Read Shimadzu Peak Table +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd read_shimadzu_peaktable <- function(file, x, idx, sep, format_in, format_out){ nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) @@ -283,6 +295,8 @@ read_shimadzu_peaktable <- function(file, x, idx, sep, format_in, format_out){ } #' Read Shimadzu MS Spectrum +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd read_shimadzu_spectrum <- function(file, x, idx, sep){ nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) @@ -295,6 +309,8 @@ read_shimadzu_spectrum <- function(file, x, idx, sep){ } #' Extract Header from Shimadzu ASCII Files +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass #' @noRd extract_shimadzu_header <- function(x, chrom.idx, sep){ index <- chrom.idx + 1 diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index b7b4fb2..de4a4da 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -34,6 +34,10 @@ #' @param data_format Either \code{wide} (default) or \code{long}. #' @param read_metadata Logical. Whether to attach metadata. #' @author Ethan Bass +#' @return A 3D chromatogram from the PDA stream in \code{matrix} or +#' \code{data.frame} format, according to the value of \code{format_out}. +#' The chromatograms will be returned in \code{wide} or \code{long} format +#' according to the value of \code{data_format}. #' @note This parser is experimental and may still #' need some work. It is not yet able to interpret much metadata from the files. #' @export @@ -75,6 +79,7 @@ read_shimadzu_lcd <- function(path, format_out = c("matrix", "data.frame"), } #' Read Shimadzu "Method" stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd read_sz_method <- function(path){ @@ -110,6 +115,7 @@ read_sz_method <- function(path){ } #' Infer times from 'Shimadzu' Method stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd get_sz_times <- function(sz_method, nval){ @@ -151,6 +157,7 @@ read_shimadzu_raw <- function(path, n_lambdas = NULL){ } #' Export OLE stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' Use olefile to export te specified stream. #' @param file Path to ole file. #' @author Ethan Bass @@ -183,7 +190,25 @@ export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, } } +#' Extract wavelengths from Shimadzu LCD +#' This function is called internally by \code{read_shimadzu_lcd}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_wavelengths <- function(path){ + path_wavtab <- export_stream(path, stream = c("PDA 3D Raw Data", "Wavelength Table")) + f <- file(path_wavtab, "rb") + on.exit(close(f)) + n_lambda <- readBin(f, what="integer", size = 4) + count <- 1 + # lambdas <- numeric(n_lambda) + lambdas <- sapply(seq_len(n_lambda), function(i){ + readBin(f, what="integer", size = 4)/100 + }) + lambdas +} + #' Read 'Shimadzu' LCD data block +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd decode_shimadzu_block <- function(file) { @@ -236,6 +261,7 @@ decode_shimadzu_block <- function(file) { } #' Return twos complement from binary string +#' This function is called internally by \code{read_shimadzu_lcd}. #' @noRd twos_complement <- function(bin, exp){ if (missing(exp)){ @@ -277,20 +303,3 @@ integer_to_binary <- function(x, n) { # Return x } - -#' Extract wavelengths from Shimadzu LCD -#' @author Ethan Bass -#' @noRd -read_shimadzu_wavelengths <- function(path){ - path_wavtab <- export_stream(path, stream = c("PDA 3D Raw Data", "Wavelength Table")) - f <- file(path_wavtab, "rb") - on.exit(close(f)) - n_lambda <- readBin(f, what="integer", size = 4) - count <- 1 - # lambdas <- numeric(n_lambda) - lambdas <- sapply(seq_len(n_lambda), function(i){ - readBin(f, what="integer", size = 4)/100 - }) - lambdas -} - diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index 7e26167..4112dc9 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -96,6 +96,7 @@ read_thermoraw <- function(path_in, path_out = NULL, #' @return No return value. #' @author Ethan Bass #' @noRd + configure_thermo_parser <- function(reconfigure = FALSE, check = FALSE){ if (.Platform$OS.type == "windows"){ path_parser <- readLines(system.file("shell/path_parser.txt", package = 'chromConverter')) diff --git a/R/reshape_chroms.R b/R/reshape_chroms.R index bd1615b..29f58a9 100644 --- a/R/reshape_chroms.R +++ b/R/reshape_chroms.R @@ -1,4 +1,3 @@ - #' Reshapes list of chromatograms from wide to long format #' @name reshape_chroms #' @param x A list of chromatographic matrices in wide format. @@ -92,7 +91,7 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda data } - +#' Reshapes a single chromatogram from long to wide format #' @noRd reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt", value_var = "int", drop){ diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 2e4b3dd..091a3b4 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -32,12 +32,12 @@ vector of paths to the newly created files.} \item{verbose}{Logical. Whether to print output from OpenChrom to the console.} } \value{ -If \code{return_paths} is \code{FALSE}, the function will return a list of -chromatograms (if an appropriate parser is available in chromConverter). The -chromatograms will be returned in \code{matrix} or \code{data.frame} format -according to the value of {data_class}. If \code{return_paths} is \code{TRUE}, -the function will return a character vector of paths to the newly created -files. +If \code{return_paths} is \code{FALSE}, the function will return a +list of chromatograms (if an appropriate parser is available to import the +files into R). The chromatograms will be returned in \code{matrix} or +\code{data.frame} format according to the value of {format_out}. If +\code{return_paths} is \code{TRUE}, the function will return a character +vector of paths to the newly created files. } \description{ Writes \code{xml} batch-files and calls OpenChrom file parsers using a diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd index b929954..b480987 100644 --- a/man/read_shimadzu_lcd.Rd +++ b/man/read_shimadzu_lcd.Rd @@ -20,6 +20,12 @@ read_shimadzu_lcd( \item{read_metadata}{Logical. Whether to attach metadata.} } +\value{ +A 3D chromatogram from the PDA stream in \code{matrix} or +\code{data.frame} format, according to the value of \code{format_out}. +The chromatograms will be returned in \code{wide} or \code{long} format +according to the value of \code{data_format}. +} \description{ Read 3D PDA data stream from 'Shimadzu' LCD files. } diff --git a/man/write_openchrom_batchfile.Rd b/man/write_openchrom_batchfile.Rd index 75a5ab9..597fea6 100644 --- a/man/write_openchrom_batchfile.Rd +++ b/man/write_openchrom_batchfile.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/call_openchrom.R \name{write_openchrom_batchfile} \alias{write_openchrom_batchfile} -\title{Writes OpenChrom XML batch file} +\title{Writes OpenChrom XML batch file +This function is called internally by \code{call_openchrom}.} \usage{ write_openchrom_batchfile( files, @@ -26,6 +27,7 @@ Returns path to newly created xml batch file. } \description{ Writes OpenChrom XML batch file +This function is called internally by \code{call_openchrom}. } \author{ Ethan Bass From 474effe4b0164aec75a4c1b3f00c422e36dfe741 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 25 Dec 2023 12:30:58 -0500 Subject: [PATCH 51/51] style: whitespace --- R/read_mzml.R | 9 +++++---- R/read_thermoraw.R | 13 ++++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/R/read_mzml.R b/R/read_mzml.R index 004df4e..934c8d5 100644 --- a/R/read_mzml.R +++ b/R/read_mzml.R @@ -26,8 +26,8 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), data_format = c("long","wide"), - parser=c("RaMS","mzR"), - what=c("MS1","MS2", "BPC", "TIC", "DAD", + parser = c("RaMS","mzR"), + what = c("MS1","MS2", "BPC", "TIC", "DAD", "chroms", "metadata", "everything"), verbose = FALSE, ...){ @@ -35,10 +35,11 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("long","wide")) what <- match.arg(what, c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), several.ok = TRUE) + "chroms", "metadata", "everything"), + several.ok = TRUE) if (all(c("MS1","MS2", "BPC", "TIC", "DAD", "chroms", "metadata", "everything") %in% what)){ - what <- grep("everything",what, invert = TRUE,value = TRUE) + what <- grep("everything", what, invert = TRUE, value = TRUE) } if (parser == "RaMS"){ data <- RaMS::grabMSdata(path, grab_what = what, verbosity = verbose, ...) diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index 4112dc9..5d87d3f 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -103,20 +103,23 @@ configure_thermo_parser <- function(reconfigure = FALSE, check = FALSE){ exists <- file.exists(path_parser) if (!exists & !check){ warning("ThermoRawFileParser not found!", immediate. = TRUE) - path_parser <- readline(prompt="Please provide path to `ThermoRawFileParser.exe`):") + path_parser <- readline(prompt = "Please provide path to `ThermoRawFileParser.exe`):") path_parser <- gsub("/", "\\\\", path_parser) - writeLines(path_parser, con=system.file('shell/path_parser.txt', package='chromConverter')) + writeLines(path_parser, con = system.file('shell/path_parser.txt', + package = 'chromConverter')) } } else { - shell_script <- readLines(system.file('shell/thermofileparser.sh', package='chromConverter')) + shell_script <- readLines(system.file('shell/thermofileparser.sh', + package = 'chromConverter')) path_parser <- strsplit(shell_script[2]," ")[[1]] path_parser <- path_parser[grep(".exe", path_parser)] exists <- file.exists(path_parser) if (!exists & !check){ warning("ThermoRawFileParser not found!", immediate. = TRUE) - path_parser <- readline(prompt="Please provide path to `ThermoRawFileParser.exe`):") + path_parser <- readline(prompt = "Please provide path to `ThermoRawFileParser.exe`):") shell_script[2] <- paste0("mono ", path_parser, ' "$@"') - writeLines(shell_script, con = system.file('shell/thermofileparser.sh', package='chromConverter')) + writeLines(shell_script, con = system.file('shell/thermofileparser.sh', + package = 'chromConverter')) } } if (check){