From 54a1bac72154658b25f5bc8d7930bd303dda7d21 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 11:55:14 -0400 Subject: [PATCH 01/28] v0.3.4, added additional parsers, bug fixes, cdf exports --- DESCRIPTION | 3 +- NAMESPACE | 2 + NEWS.md | 18 ++ R/aston_parsers.R | 4 +- R/attach_metadata.R | 202 +++++++++++---- R/call_entab.R | 3 +- R/call_openchrom.R | 20 +- R/parsers.R | 73 ++++-- R/rainbow_parser.R | 3 +- R/read_cdf.R | 93 +++++++ R/read_chemstation_ch.R | 249 ++++++++++--------- R/read_chemstation_uv.R | 7 +- R/read_chroms.R | 88 ++++--- R/read_mdf.R | 86 +++++++ R/utils.R | 31 ++- R/write_chroms.R | 136 ++++++++++ inst/shell/path_to_openchrom_commandline.txt | 2 +- man/attach_metadata.Rd | 14 +- man/read_cdf.Rd | 32 +++ man/read_chemstation_ch.Rd | 11 +- man/read_chemstation_uv.Rd | 8 +- man/read_chromeleon.Rd | 3 + man/read_chroms.Rd | 18 +- man/read_mdf.Rd | 29 +++ man/read_waters_arw.Rd | 11 +- tests/testthat/test-read_chroms.R | 96 ++++--- 26 files changed, 942 insertions(+), 300 deletions(-) create mode 100644 R/read_cdf.R create mode 100644 R/read_mdf.R create mode 100644 R/write_chroms.R create mode 100644 man/read_cdf.Rd create mode 100644 man/read_mdf.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 88e9d74..298d001 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.3.3 +Version: 0.3.4 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), @@ -30,6 +30,7 @@ Imports: Suggests: entab, mzR, + ncdf4, pbapply, testthat (>= 3.0.0) Config/reticulate: list( packages = list( list(package = "scipy"), diff --git a/NAMESPACE b/NAMESPACE index bb1c652..c110538 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,11 +6,13 @@ export(call_rainbow) export(configure_aston) export(configure_rainbow) export(extract_metadata) +export(read_cdf) export(read_chemstation_ch) export(read_chemstation_csv) export(read_chemstation_uv) export(read_chromeleon) export(read_chroms) +export(read_mdf) export(read_mzml) export(read_shimadzu) export(read_thermoraw) diff --git a/NEWS.md b/NEWS.md index fcf052c..e017105 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,19 @@ +## chromConverter 0.3.4 + +* Added parser for ANDI chrom `cdf` files through the `read_cdf` function. +* Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. +* Updated `configure_openchrom` for better discovery of 'OpenChrom' path. +* Added additional options for file exports. New options for writing +`chemstation_csv` (utf-16) and `cdf` files through `read_chroms`. +* Added `wide` and `long` `data_format` options for 2D data, such that the `wide` +format option writes retention times as rownames of the matrix or data.frame +while the `long` format writes retention times as the first column of the object. +* Slightly restructured metadata fields. Added `source_file` field to track +data origin. +* Fixed bug causing sloppy 'Chemstation' FID metadata. +* Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. +* Added additional tests. + ## chromConverter 0.3.3 * Added R-based parser for "Chemstation" UV (`.uv`) files (version 131) through @@ -9,6 +25,8 @@ and returning it as a `data.frame` or `tibble`. * Added wide format option in `read_mzml`. * Added automatic detection of file formats by `read_chroms`. * Minor changes to storage of metadata in attributes for the purpose of simplification. +* Fixed bug preventing removal of file extensions for 'Agilent' data when using `read_chroms`. +* Standardized run date/time in metadata to `POSIXct` format. * Minor updates to documentation. ## chromConverter 0.3.2 diff --git a/R/aston_parsers.R b/R/aston_parsers.R index 3ff99ba..57f497c 100644 --- a/R/aston_parsers.R +++ b/R/aston_parsers.R @@ -33,7 +33,7 @@ sp_converter <- function(file, format_out = c("matrix", "data.frame"), meta <- read_masshunter_metadata(file) x <- attach_metadata(x, meta, format_in = "masshunter_dad", format_out = format_out, data_format = "wide", - parser = "aston") + parser = "aston", source_file = file) } x } @@ -82,7 +82,7 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"), meta <- read_chemstation_metadata(file) x <- attach_metadata(x, meta, format_in = "chemstation_uv", format_out = format_out, data_format = "wide", - parser = "Aston") + parser = "Aston", source_file = file) } x } diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 8781570..272a93c 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -6,29 +6,36 @@ #' @param format_in Chromatogram format #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether data is in wide or long format. -#' @param parser What parser was used to decode the data +#' @param parser What parser was used to decode the data. +#' @param source_file The path to the source file. #' @return A chromatogram with attached metadata. #' @author Ethan Bass -attach_metadata <- function(x, meta, format_in, format_out, data_format, parser = NULL){ - if (format_in == "waters_arw"){ +attach_metadata <- function(x, meta, format_in, format_out, data_format, parser = NULL, + source_file){ + switch(format_in, + "waters_arw" = { structure(x, instrument = NA, detector = NA, software = NA, method = meta$`Instrument Method Name`, batch = meta$`Sample Set Name`, operator = NA, - run_date = NA, + run_datetime = NA, sample_name = meta$SampleName, sample_id = NA, - injection_volume = NA, + sample_injection_volume = NA, + sample_amount = NA, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = meta$Channel, + detector_unit = NA, + source_file = source_file, data_format = "long", parser = "chromConverter", format_out = format_out) - } else if (format_in == "shimadzu"){ + }, "shimadzu" = { structure(x, instrument = meta$`Instrument Name`, detector = meta$`Detector Name`, @@ -37,37 +44,59 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser method = meta$`Method File`, batch = meta$`Batch File`, operator = meta$`Operator Name`, - run_date = meta$Acquired, + run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), sample_name = meta$`Sample Name`, sample_id = meta$`Sample ID`, - injection_volume = meta$`Injection Volume`, - start_time = meta$`Start Time(min)`, - end_time = meta$`End Time(min)`, + sample_injection_volume = meta$`Injection Volume`, + sample_amount = meta$`Injection Volume`, + time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), + # start_time = meta$`Start Time(min)`, + # end_time = meta$`End Time(min)`, time_interval = meta$`Interval(msec)`, - start_wavelength = meta$`Start Wavelength(nm)`, - end_wavelength = meta$`End Wavelength(nm)`, + time_interval_unit = get_time_unit( + grep("Interval", names(meta), value=TRUE)[1], format_in = "shimadzu"), + time_unit = get_time_unit( + grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"), + detector_range = c(meta$`Start Wavelength(nm)`,meta$`End Wavelength(nm)`), + # detector_end = meta$`End Wavelength(nm)`, + detector_unit = NA, + source_file = source_file, data_format = data_format, parser = "chromConverter", format_out = format_out) - } else if (format_in == "chromeleon"){ + }, "chromeleon" = { structure(x, instrument = NA, detector = meta$Detector, software = meta$`Generating Data System`, method = meta$`Instrument Method`, batch = NA, operator = meta$`Operator`, - run_date = meta$`Injection Date`, - run_time = meta$`Injection Time`, + run_datetime = as.POSIXct(paste(meta$`Injection Date`, meta$`Injection Time`), + format = "%m/%d/%Y %H:%M:%S"), + # run_date = meta$`Injection Date`, + # run_time = meta$`Injection Time`, sample_name = meta$Injection, sample_id = NA, - injection_volume = meta$`Injection Volume`, - start_time = meta$`Time Min. (min)`, - end_time = meta$`Time Max. (min)`, + sample_injection_volume = meta$`Injection Volume`, + sample_amount = meta$`Injection Volume`, + time_range = c(meta$`Time Min. (min)`, meta$`Time Max. (min)`), + # start_time = meta$`Time Min. (min)`, + # end_time = meta$`Time Max. (min)`, time_interval = meta$`Average Step (s)`, + time_interval_unit <- get_time_unit( + grep("Average Step", names(meta), value = TRUE)[1], + format_in = "chromeleon"), + time_unit = get_time_unit( + grep("Time Min.", names(meta), value = TRUE)[1], + format_in="chromeleon"), + # uniform_sampling = meta$`Min. Step (s)` == meta$`Max. Step (s)`, detector_range = NA, + detector_unit = meta$`Signal Unit`, + source_file = source_file, + format_out = format_out, data_format = "long", - parser = "chromConverter", - format_out = format_out) + parser = "chromConverter" + ) # } else if (format_in == "entab"){ # structure(x, instrument = meta$instrument, # detector = NA, @@ -75,7 +104,7 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # method = meta$method, # batch = meta$SeqPathAndFile, # operator = meta$operator, - # run_date = meta$run_date, + # run_datetime = meta$run_date, # sample_name = meta$sample, # sample_id = NA, # injection_volume = meta$InjVolume, @@ -85,58 +114,137 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # format = data_format, # parser = "entab", # format_out = format_out) - } else if (format_in == "chemstation_uv"){ + }, "chemstation_uv" = { structure(x, instrument = meta$AcqInstName, detector = NA, software = meta$Version, method = meta$AcqMeth, batch = meta$SeqPathAndFile, operator = meta$AcqOp, - run_date = meta$InjDateTime, + run_datetime = meta$InjDateTime, sample_name = meta$SampleName, sample_id = NA, - injection_volume = meta$InjVolume, + sample_injection_volume = meta$InjVolume, + sample_amount = meta$InjVolume, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = NA, + detector_unit = NA, + source_file = source_file, data_format = data_format, parser = parser, format_out = format_out) - } else if (format_in == "masshunter_dad"){ - structure(x, instrument = meta$Instrument, - detector = NA, - software = NA, - method = meta$Method, - batch = NA, - operator = meta$OperatorName, - run_date = meta$AcqTime, - sample_name = meta$`Sample Name`, - sample_id = meta$`Sample ID`, - injection_volume = meta$`Inj Vol`, - time_range = NA, - time_interval = NA, - detector_range = NA, - data_format = data_format, - parser = parser, - format_out = format_out) - } else { + }, "masshunter_dad" = { + structure(x, instrument = meta$Instrument, + detector = NA, + software = NA, + method = meta$Method, + batch = NA, + operator = meta$OperatorName, + run_datetime = meta$AcqTime, + sample_name = meta$`Sample Name`, + sample_id = meta$`Sample ID`, + sample_injection_volume = meta$`Inj Vol`, + sample_amount = meta$`Inj Vol`, + time_range = NA, + time_interval = NA, + time_unit = NA, + detector_range = NA, + detector_unit = NA, + source_file = source_file, + data_format = data_format, + parser = parser, + format_out = format_out) + }, "cdf" = { + structure(x, instrument = NA, + detector = get_metadata_field(meta, "detector_name"), + software = NA, + method = NA, + batch = get_metadata_field(meta, "experiment_title"), + operator = get_metadata_field(meta, "operator_name"), + run_datetime = as.POSIXct( + get_metadata_field(meta, "injection_date_time_stamp"), + format = "%Y%m%d%H%M%S%z"), + sample_name = get_metadata_field(meta, "sample_name"), + sample_id = get_metadata_field(meta, "sample_id"), + sample_type = get_metadata_field(meta, "sample_type"), + sample_injection_volume = get_metadata_field(meta, "sample_injection_volume"), + sample_amount = get_metadata_field(meta, "sample_amount"), + time_start = NA, + time_end = NA, + time_interval = NA, + time_unit = get_metadata_field(meta, "retention_unit"), + detector_range = NA, + # detector_end = NA, + detector_unit = get_metadata_field(meta, "detector_unit"), + source_file = ifelse(missing(source_file), NA, source_file), + format_out = ifelse(missing(format_out), NA, format_out), + data_format = ifelse(missing(data_format), NA, data_format), + parser = "chromConverter") + }, "mdf" = { + structure(x, instrument = meta[meta$Property == "Instrument","Value"], + detector = "Variable Wavelength Detector", + software = NA, + method = NA, + batch = get_metadata_field(meta, "experiment_title"), + operator = meta[meta$Property == "Operator", "Value"], + run_datetime = as.POSIXct( + meta[meta$Property == "Time", "Value"], + format = "%d.%m.%Y %H:%M:%S"), + sample_name = get_metadata_field(meta, "sample_name"), + sample_id = get_metadata_field(meta, "sample_id"), + sample_type = "unknown", + sample_injection_volume = 1, + sample_amount = 1, + time_start = meta[meta$Group=="Interval Time" & meta$Property == "From", "Value"], + time_end = meta[meta$Group=="Interval Time" & meta$Property == "To", "Value"], + time_interval = meta[meta$Group=="Interval Time" & meta$Property == "Step", "Value"], + time_unit = meta[meta$Group=="Interval Time" & meta$Property == "Units", "Value"], + detector_range = meta[meta$Property == "Wave", "Value"], + # detector_end = meta[meta$Property == "Wave", "Value"], + detector_unit = meta[meta$Group=="Array photometric" & meta$Property == "Units", "Value"], + source_file = ifelse(missing(source_file), NA, source_file), + format_out = ifelse(missing(format_out), NA, format_out), + data_format = ifelse(missing(data_format), NA, data_format), + parser = "chromConverter") + }, "default" = { structure(x, instrument = meta$Instrument, detector = NA, software = NA, method = meta$Method, batch = NA, operator = meta$OperatorName, - run_date = meta$AcqTime, + run_datetime = meta$AcqTime, sample_name = meta$`Sample Name`, sample_id = meta$`Sample ID`, - injection_volume = meta$`Inj Vol`, + sample_injection_volume = meta$`Inj Vol`, + sample_amount = meta$`Inj Vol`, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = NA, + detector_unit = NA, + source_file = source_file, + format_out = ifelse(missing(format_out), NA, format_out), data_format = ifelse(missing(data_format), NA, data_format), - parser = ifelse(missing(parser), NA, parser), - format_out = ifelse(missing(format_out), NA, format_out) - ) + parser = ifelse(missing(parser), NA, parser) + ) + } + ) +} + +#' @noRd +get_metadata_field <- function(x, field){ + ifelse(!is.null(x[[field]]), x[[field]], NA) +} + +#' @noRd +get_time_unit <- function(string, format_in){ + if (format_in %in% c("chromeleon","shimadzu")){ + pattern <- "\\((.*?)\\)" + unit <- gsub("\\(|\\)", "", regmatches(string, regexpr(pattern, string))[[1]]) + switch(unit, "min" = "Minutes", "sec" = "Seconds") } } diff --git a/R/call_entab.R b/R/call_entab.R index 0ef6112..fc5b6fb 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -47,7 +47,8 @@ call_entab <- function(file, data_format = c("wide","long"), meta <- c(meta, metadata_from_file) } x <- attach_metadata(x, meta, format_in = format_in, format_out = format_out, - data_format = data_format, parser = "entab") + data_format = data_format, parser = "entab", + source_file = file) } x } diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 47b2d72..52d5f07 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -50,7 +50,7 @@ call_openchrom <- function(files, path_out, format_in, if(!file.exists(path_out)){ stop("'path_out' not found. Make sure directory exists.") } - openchrom_path <- configure_call_openchrom() + openchrom_path <- configure_openchrom() path_xml <- write_openchrom_batchfile(files = files, path_out=path_out, format_in = format_in, export_format = export_format) system(paste0(openchrom_path, " -nosplash -cli -batchfile ", path_xml)) @@ -120,22 +120,34 @@ write_openchrom_batchfile <- function(files, path_out, #' Configure OpenChrom parser #' -#' @name configure_call_openchrom +#' @name configure_openchrom #' @param cli Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. #' If "false", R will disable CLI. If NULL, R will not modify the ini file. +#' @importFrom utils read.table write.table #' @return Returns path to OpenChrom command-line application #' @author Ethan Bass #' @noRd -configure_call_openchrom <- function(cli = c(NULL, "true", "false")){ + +configure_openchrom <- function(cli = c(NULL, "true", "false")){ cli <- match.arg(cli, c(NULL, "true", "false")) path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) + if (path_parser == "NULL"){ + path_parser <- switch(.Platform$OS.type, + unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", + windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom"), + linux = "/snap/bin/openchrom" + ) + writeLines(path_parser, + con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + } if (!file.exists(path_parser)){ warning("OpenChrom not found!", immediate. = TRUE) path_parser <- readline(prompt="Please provide path to `OpenChrom` command line):") if (.Platform$OS.type == "windows"){ path_parser <- gsub("/","\\\\", path_parser) } - writeLines(path_parser, con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + writeLines(path_parser, + con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) } path_ini <- switch(.Platform$OS.type, "unix" = paste0(gsub("MacOS/openchrom", "", path_parser), "Eclipse/openchrom.ini"), diff --git a/R/parsers.R b/R/parsers.R index e0bae96..53aa4d1 100644 --- a/R/parsers.R +++ b/R/parsers.R @@ -3,14 +3,18 @@ #' @importFrom utils tail read.csv #' @param file path to file #' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. #' @return A chromatogram in the format specified by \code{format_out}. #' (retention time x wavelength). #' @author Ethan Bass #' @export + read_chromeleon <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) + data_format <- match.arg(data_format, c("wide","long")) xx <- readLines(file) start <- tail(grep("Data:", xx), 1) x <- read.csv(file, skip = start, sep="\t") @@ -20,14 +24,20 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"), x <- apply(x, 2, function(x) gsub(",", ".", x)) } x <- apply(x, 2, as.numeric) - x <- as.matrix(x) - rownames(x) <- x[,1] - x <- x[,2, drop = FALSE] + colnames(x) <- c("RT","Intensity") + if (data_format == "wide"){ + rownames(x) <- x[,1] + x <- x[, 2, drop = FALSE] + } + if (format_out == "data.frame"){ + x <- as.data.frame(x) + } if (read_metadata){ meta <- try(read_chromeleon_metadata(xx)) if (!inherits(meta, "try-error")){ x <- attach_metadata(x, meta, format_in = "chromeleon", format_out = format_out, - data_format = "wide", parser = "chromConverter") + data_format = "wide", parser = "chromConverter", + source_file = file) } } x @@ -61,8 +71,8 @@ read_shimadzu <- function(file, format_in, read_metadata = TRUE){ if (missing(format_in)) stop("`format_in` must be specified. The options are `fid` or `dad`.") - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) x <- readLines(file) sep <- substr(x[2], 17, 17) @@ -77,7 +87,7 @@ read_shimadzu <- function(file, format_in, if (length(chrom.idx) != 0){ header <- try(extract_header(x = x, chrom.idx = chrom.idx, sep = sep)) met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2,2]),",",".") + decimal_separator <- ifelse(grepl(",", met[2, 2]),",",".") if (decimal_separator == ","){ met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) } @@ -90,7 +100,9 @@ read_shimadzu <- function(file, format_in, rownames(xx) <- xx[,1] xx <- xx[, 2, drop = FALSE] colnames(xx) <- "Intensity" - data_format <- "long" + if (data_format == "long"){ + xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) + } } else if (format_in == "dad"){ nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]),2]) ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]),2]) @@ -153,7 +165,7 @@ read_shimadzu <- function(file, format_in, } xx <- switch(what, "chromatogram" = xx, "peak_table" = peak_tab, - "both" = list(xx, peak_tab)) + "both" = list(chromatogram = xx, peak_table = peak_tab)) if (read_metadata){ idx <- which(x[headings] %in% c("[Header]", "[File Information]", "[Sample Information]", @@ -170,17 +182,18 @@ read_shimadzu <- function(file, format_in, } rownames(meta) <- meta[, 1] meta <- as.list(meta[,2]) - data_format <- switch(format_in, - "fid" = "long", - "dad" = "wide") + # data_format <- switch(format_in, + # "fid" = "long", + # "dad" = "wide") if (inherits(xx, "list")){ xx <- lapply(xx, function(xxx){ - attach_metadata(xxx, meta, format_in = "shimadzu", format_out = format_out, - data_format = data_format, - parser = "chromConverter") + attach_metadata(xxx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, + data_format = data_format, parser = "chromConverter") }) } else{ - xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out, + xx <- attach_metadata(xx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, data_format = data_format, parser = "chromConverter") } @@ -199,24 +212,36 @@ read_shimadzu <- function(file, format_in, #' @importFrom utils tail read.csv #' @param file path to file #' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @author Ethan Bass #' @export -read_waters_arw <- function(file, read_metadata = TRUE, - format_out = c("matrix","data.frame")){ + +read_waters_arw <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) - x <- read.csv(file, sep="\t", skip = 2, header=FALSE, row.names=1) - if (format_out == "matrix") + data_format <- match.arg(data_format, c("wide","long")) + x <- read.csv(file, sep="\t", skip = 2, header = FALSE, row.names = 1) + if (ncol(x) == 1){ + colnames(x) <- "Intensity" + if (data_format == "long"){ + x <- data.frame(RT = rownames(x), Intensity=x[,1]) + } + } + if (format_out == "matrix"){ x <- as.matrix(x) + } if (read_metadata){ meta <- try(read_waters_metadata(file)) if (!inherits(meta, "try-error")){ x <- attach_metadata(x, meta, format_in = "waters_arw", format_out = format_out, - data_format = "wide", - parser = "chromConverter") + data_format = data_format, + parser = "chromConverter", + source_file = file) } } x @@ -232,6 +257,7 @@ read_waters_arw <- function(file, read_metadata = TRUE, #' (retention time x wavelength). #' @author Ethan Bass #' @export + read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){ format_out <- match.arg(format_out, c("matrix", "data.frame")) x <- read.csv(file, row.names = 1, header = TRUE, @@ -272,7 +298,8 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), data_format = c("long","wide"), parser=c("RaMS","mzR"), what=c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), verbose = FALSE, + "chroms", "metadata", "everything"), + verbose = FALSE, ...){ parser <- match.arg(parser, c("RaMS", "mzR")) format_out <- match.arg(format_out, c("matrix", "data.frame")) diff --git a/R/rainbow_parser.R b/R/rainbow_parser.R index a129473..4117b75 100644 --- a/R/rainbow_parser.R +++ b/R/rainbow_parser.R @@ -40,7 +40,8 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun "masshunter" = rb_read$read, "chemstation" = rb_parse_agilent$chemstation$parse_file, "chemstation_uv" = rb_parse_agilent$chemstation$parse_file, - "chemstation_fid" = rb_parse_agilent$chemstation$parse_file) + "chemstation_fid" = rb_parse_agilent$chemstation$parse_file, + "default" = rb_read$read) if (format_in %in% c("chemstation", "chemstation_uv", "chemstation_fid")){ by <- "single" } diff --git a/R/read_cdf.R b/R/read_cdf.R new file mode 100644 index 0000000..461b2ce --- /dev/null +++ b/R/read_cdf.R @@ -0,0 +1,93 @@ +#' Read CDF file +#' @param file path to file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param read_metadata Whether to read metadata from file. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @return A chromatogram in the format specified by \code{format_out}. +#' (retention time x wavelength). +#' @export + +read_cdf <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + what = "chromatogram", read_metadata = TRUE){ + check_for_pkg("ncdf4") + nc <- ncdf4::nc_open(file) + if ("ordinate_values" %in% names(nc$var)){ + format = "chrom" + } else { + format = "ms" + } + ncdf4::nc_close(nc) + fn <- switch(format, chrom = read_andi_chrom, ms = andi_ms_error) + fn(file = file, data_format = data_format, format_out = format_out, + what = what, read_metadata = read_metadata) +} + +#' @noRd +andi_ms_error <- function(...){ + stop("The `cdf` MS format is not yet supported by an internal `chromConverter` parser. + Please try the OpenChrom `msd` parser instead.") +} + +#' Read ANDI chrom file +#' @noRd +read_andi_chrom <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + what = "chromatogram", read_metadata = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) + nc <- ncdf4::nc_open(file) + if (any(what == "chromatogram")){ + y <- ncdf4::ncvar_get(nc, "ordinate_values") + nvals <- ncdf4::ncvar_get(nc, "actual_run_time_length") + n_interval <- ncdf4::ncvar_get(nc, "actual_sampling_interval") + n_start <- ncdf4::ncvar_get(nc, "actual_delay_time") + x <- seq(from = n_start, to = nvals, length.out=length(y)) + data = data.frame(RT = x, Intensity = y) + if (data_format == "wide"){ + rownames(data) <- data[,1] + data <- data[,-1, drop = FALSE] + } + if (format_out == "matrix"){ + data <- as.matrix(data) + } + } + if (any(what == "peak_table")){ + peak_table_vars <- names(which(sapply(nc$var, function(x){ + x$dim[[1]]$name + }) == "peak_number")) + if (length(peak_table_vars) > 0){ + peak_tab <- sapply(peak_table_vars, function(var){ + ncdf4::ncvar_get(nc, varid = var) + }) + peak_tab <- as.data.frame(peak_tab) + } + } + if ("peak_table" %in% what & "chromatogram" %in% what){ + what <- "both" + } + data <- switch(what, "chromatogram" = data, + "peak_table" = peak_tab, + "both" = list(chromatogram=data, peak_table = peak_tab)) + if (read_metadata){ + meta <- ncdf4::ncatt_get(nc, varid = 0) + # data <- attach_metadata(data, meta = meta, format_in = format_in, + # format_out = format_out, data_format = data_format, + # parser = "chromconverter", source_file = file) + if (inherits(data, "list")){ + data <- lapply(data, function(xx){ + attach_metadata(data, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + }) + } else{ + data <- attach_metadata(data, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + } + ncdf4::nc_close(nc) + data +} diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index bdc0670..8493fc2 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -1,8 +1,9 @@ #' Parser for reading Agilent ('.ch') files into R #' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file +#' @param format_out Matrix or data.frame. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to attach metadata. -#' @param format_out Matrix or data.frame #' @author Ethan Bass #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). @@ -10,9 +11,11 @@ #' ((c) James Dillon 2014). #' @export -read_chemstation_ch <- function(path, read_metadata = TRUE, - format_out = c("matrix","data.frame")){ +read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) + data_format <- match.arg(data_format, c("wide","long")) f <- file(path, "rb") on.exit(close(f)) @@ -20,83 +23,8 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, # HEADER seek(f, 1, "start") version <- readBin(f, "character", n = 1) - version <- match.arg(version, choices = c("8", "81", "130", "179","181")) - if (version %in% c("179","181")){ - offsets <- list( - file_type = 347, #0x15B - sample_name = 858, #0x35A - operator = 1880, #0x758 - date = 2391, # 0x957 - instrument = 2492, # 0x9BC - method = 2574, # 0xA0E - software = 3089, # 0xC11 - unit = 4172, # 0x104C - signal = 4213, # 0x1075 - num_times = 278, # 0x116 - rt_first = 282, # 0x11A - rt_last = 286, # 0x11E - scaling_factor = 4732, # 0x127C - intercept = 4724, - data_start = 4096 # 0x1000 - ) - } else if (version == "130"){ - offsets <- list( - # sequence_line_or_injection = 252, #UINT16 - # injection_or_sequence_line = 256, #UINT16 - # data_offset = 264, # UINT32 - # start_time = 282, - # end_time = 286, - # version_string = 326, # utf16 - file_type = 347, # utf16 - sample_name = 858, # utf16 - operator = 1880, # utf16 - date = 2391, # utf16 - inlet = 2492, # utf16 - instrument = 2533, # utf16' - method = 2574, # utf16 - software_version = 3601, #utf16' - software = 3089, # 'utf16' - software_revision = 3802, #'utf16' - units = 4172, # 'utf16' - signal = 4213, # 'utf16' - zero = 4110, # INT32 - scaling_factor = 4732) #ENDIAN + 'd' - } else if (version == 30){ - offsets <- list( - file_type = 4, # utf16 - # sample_name = 858, # utf16 - operator = 148, # utf16 - date = 178, # utf16 - # inlet = 2492, # utf16 - instrument = 208, # utf16' - method = 228, # utf16 - software_version = 355, #utf16' - software = 322, # 'utf16' - software_revision = 405, #'utf16' - units = 580, # 'utf16' - signal = 596, # 'utf16' - zero = 4110, # INT32 - scaling_factor = 4732, - data_start = 1024 #ENDIAN + 'd' - ) - } else if (version %in% c("8","81")){ - offsets <- list(sample_name = 24, - description = 86, - operator = 148, - date = 178, - instrument = 218, - inlet = 208, - method = 228, - software = 0xC11, - unit = 580, - num_times = 0x116, - rt_first = 0x11A, - rt_last = 0x11E, - scaling_factor = 644, - intercept = 636, - data_start = 4096) - } - + version <- match.arg(version, choices = c("8", "81", "130", "179", "181")) + offsets <- get_agilent_offsets(version) decoder <- switch(version, "8" = decode_delta, "81" = decode_double_delta, @@ -114,8 +42,8 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, data <- decoder(f, offset) - seek(f, 282, "start") - seek(f, 282, "start") + seek(f, where = 282, origin = "start") + seek(f, where = 282, origin = "start") if (version %in% c("8","130")){ xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 @@ -134,7 +62,11 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, data <- data * scaling_factor + intercept - data <- data.frame(Intensity = data, row.names = times) + if (data_format == "wide"){ + data <- data.frame(Intensity = data, row.names = times) + } else if (data_format == "long"){ + data <- data.frame(RT = times, Intensity = data) + } if (format_out == "matrix"){ data <- as.matrix(data) } @@ -150,14 +82,21 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, n <- get_nchar(f) cc_collapse(readBin(f, "character", n = n)) }) - + if (read_metadata){ + datetime_regex <- "(\\d{2}-[A-Za-z]{3}-\\d{2}, \\d{2}:\\d{2}:\\d{2})|(\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2}:\\d{2} (?:AM|PM)?)" + datetime <- regmatches(meta$date, gregexpr(datetime_regex, meta$date))[[1]] + datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") + datetime <- as.POSIXct(datetime, tz = "UTC", tryFormats = datetime_formats) data <- structure(data, file_version = version, sample_name = meta$sample_name, - run_date = meta$date, instrument = meta$instrument, + run_date = datetime, + instrument = meta$instrument, method = meta$method, software_version = meta$software_version, software = meta$software, software_rev = meta$software_revision, - signal = meta$signal, unit = meta$unit, - time_range = c(xmin, xmax), - data_format = "long", parser = "chromConverter") + signal = meta$signal, detector_unit = meta$unit, + time_range = c(xmin, xmax), time_interval = mean(diff(times)), + time_unit = "Minutes", source_file = path, + data_format = data_format, parser = "chromConverter") + } } data } @@ -182,7 +121,7 @@ get_chemstation_dir_name <- function(path){ #' @noRd get_nchar <- function(f){ - as.numeric(readBin(f, what = "raw", n = 1))*2 + as.numeric(readBin(f, what = "raw", n = 1)) } #' @noRd @@ -239,49 +178,119 @@ decode_double_array <- function(file, offset) { #' @noRd #' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). - -# file <- f decode_delta <- function(file, offset) { - # if (ftell(file) == -1) { - # signal <- c() - # return(signal) - # } else { seek(file, 0, 'end') fsize <- seek(file, NA, "current") seek(file, offset, "start") start <- seek(file, NA, "current") - # } - signal <- rep(0, round((fsize - start)/2)) + signal <- rep(NA, round((fsize - start)/2)) buffer <- rep(0, 4) index <- 1 - while (seek(file, NA, "current") < fsize) { - buffer[1] <- readBin(file, "integer", n=1, endian = "big", size = 2) - buffer[2] <- buffer[4] - - if (bitShiftL(buffer[1], 12) == 0) { - # signal <- signal[1:index-1] - break - } - - for (i in (1:bitAnd(buffer[1], 4095))) { - buffer[3] <- readBin(file, "integer", n = 1, endian = "big", size = 2) - - if (buffer[3] != -32768) { - buffer[2] <- buffer[2] + buffer[3] - } else { - buffer[2] <- readBin(file, "integer", n = 1, endian = "big", size = 4) + while (TRUE) { + head <- readBin(file, "integer", n = 1, size = 1, endian = "big") + if (head != 0x10) { + break } - - signal[index] <- buffer[2] - index <- index + 1 - } - - buffer[4] <- buffer[2] + buffer[2] <- buffer[4] + + segment_length <- readBin(file, "integer", n = 1, size = 1, endian = "big") + for (i in seq_len(segment_length)){ + # for (i in (1:bitwAnd(buffer[1], 4095L))) { + buffer[3] <- readBin(file, "integer", n = 1, size = 2, endian = "big") + if (buffer[3] != -32768L) { + buffer[2] <- buffer[2] + buffer[3] + } else { + buffer[2] <- readBin(file, "integer", n = 1, size =4 ,endian = "big") + } + + signal[index] <- buffer[2] + index <- index + 1 + } + buffer[4] <- buffer[2] } - + signal <- signal[!is.na(signal)] return(signal) } +#' @noRd +get_agilent_offsets <- function(version){ + if (version %in% c("179","181")){ + offsets <- list( + file_type = 347, #0x15B + sample_name = 858, #0x35A + operator = 1880, #0x758 + date = 2391, # 0x957 + instrument = 2492, # 0x9BC + method = 2574, # 0xA0E + software = 3089, # 0xC11 + unit = 4172, # 0x104C + signal = 4213, # 0x1075 + num_times = 278, # 0x116 + rt_first = 282, # 0x11A + rt_last = 286, # 0x11E + scaling_factor = 4732, # 0x127C + intercept = 4724, + data_start = 4096 # 0x1000 + ) + } else if (version == "130"){ + offsets <- list( + # sequence_line_or_injection = 252, #UINT16 + # injection_or_sequence_line = 256, #UINT16 + # data_offset = 264, # UINT32 + start_time = 282, + # end_time = 286, + # version_string = 326, # utf16 + file_type = 347, # utf16 + sample_name = 858, # utf16 + operator = 1880, # utf16 + date = 2391, # utf16 + inlet = 2492, # utf16 + instrument = 2533, # utf16' + method = 2574, # utf16 + software_version = 3601, #utf16' + software = 3089, # 'utf16' + software_revision = 3802, #'utf16' + units = 4172, # 'utf16' + signal = 4213, # 'utf16' + zero = 4110, # INT32 + scaling_factor = 4732) #ENDIAN + 'd' + } else if (version == 30){ + offsets <- list( + file_type = 4, # utf16 + # sample_name = 858, # utf16 + operator = 148, # utf16 + date = 178, # utf16 + # inlet = 2492, # utf16 + instrument = 208, # utf16' + method = 228, # utf16 + software_version = 355, #utf16' + software = 322, # 'utf16' + software_revision = 405, #'utf16' + units = 580, # 'utf16' + signal = 596, # 'utf16' + zero = 4110, # INT32 + scaling_factor = 4732, + data_start = 1024 #ENDIAN + 'd' + ) + } else if (version %in% c("8","81")){ + offsets <- list(sample_name = 24, + description = 86, + operator = 148, + date = 178, + instrument = 218, + inlet = 208, + method = 228, + # software = 0xC11, + unit = 580, + num_times = 0x116, + rt_first = 0x11A, + rt_last = 0x11E, + scaling_factor = 644, + intercept = 636, + data_start = 4096) + } + offsets +} diff --git a/R/read_chemstation_uv.R b/R/read_chemstation_uv.R index 1e7bb14..4bc0baf 100644 --- a/R/read_chemstation_uv.R +++ b/R/read_chemstation_uv.R @@ -12,9 +12,9 @@ #' (https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). #' @export -read_chemstation_uv <- function(path, read_metadata = TRUE, - format_out = c("matrix","data.frame"), - data_format = c("wide","long")){ +read_chemstation_uv <- function(path, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) @@ -127,6 +127,7 @@ read_chemstation_uv <- function(path, read_metadata = TRUE, if (read_metadata){ data <- structure(data, file_version = meta$file_type, sample_name = meta$sample_name, + file_source = path, operator = meta$operator, run_date = meta$date, instrument = meta$detector, method = meta$method, software_version = NA, diff --git a/R/read_chroms.R b/R/read_chroms.R index 37cb455..ff78487 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -34,16 +34,20 @@ #' @param format_out R object format (i.e. data.frame or matrix). #' @param data_format Whether to output data in wide or long format. Either #' \code{wide} or \code{long}. -#' @param export Logical. If TRUE, will export files as csvs. +#' @param export Logical. If TRUE, the program will export files in the format +#' specified by \code{export_format} in the directory specified by \code{path_out}. #' @param path_out Path for exporting files. If path not specified, files will #' export to current working directory. -#' @param export_format Export format. Currently the only option is \code{.csv}, -#' unless you are using OpenChrom parsers, where you could have \code{csv}, -#' \code{cdf}, \code{mzml}, or \code{animl}. +#' @param export_format Export format. Currently the options include \code{.csv}, +#' \code{chemstation_csv} (utf-16 encoding), and \code{cdf}, unless you are using +#' OpenChrom parsers, where there are two additional options: \code{mzml}, and +#' \code{animl}. #' @param read_metadata Logical, whether to attach metadata (if it's available). #' Defaults to TRUE. #' @param progress_bar Logical. Whether to show progress bar. Defaults to #' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed. +#' @param sample_names An optional character vector of sample names. Otherwise +#' sample names default to the basename of the specified files. #' @param dat Existing list of chromatograms to append results. #' (Defaults to NULL). #' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, @@ -67,15 +71,16 @@ read_chroms <- function(paths, find_files, "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", - "msd", "csd", "wsd", "other"), + "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), data_format = c("wide","long"), export = FALSE, path_out = NULL, - export_format = c("csv", "cdf", "mzml", "animl"), - read_metadata = TRUE, progress_bar, dat = NULL){ + export_format = c("csv", "chemstation_csv", "cdf", "mzml", "animl"), + read_metadata = TRUE, progress_bar, sample_names = NULL, + dat = NULL){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix", "data.frame")) parser <- match.arg(parser, c("", "chromconverter", "aston","entab", @@ -110,14 +115,17 @@ read_chroms <- function(paths, find_files, "chemstation_csv", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", - "waters_raw", "msd", "csd", "wsd", "other")) + "waters_raw", "msd", "csd", "wsd", "mdf", + "cdf", "other")) if (parser == ""){ parser <- check_parser(format_in, find = TRUE) } - export_format <- match.arg(export_format, c("csv", "cdf", "mzml", "animl")) + export_format <- match.arg(export_format, c("csv", "chemstation_csv", "cdf", "mzml", "animl")) check_parser(format_in, parser) - if (parser != "openchrom" & export_format != "csv") - stop("Only `csv` format is currently supported for exporting files unless the parser is `openchrom`.") + if (parser != "openchrom" && !(export_format %in% c("csv", "chemstation_csv", "cdf"))) + stop("The selected export format is currently only supported by `openchrom` parsers.") + # if (export_format == "cdf" && format_in != "mdf" && parser != "openchrom") + # stop("Currently CDF exports are only available for MDF files.") if (parser == "entab" & !requireNamespace("entab", quietly = TRUE)) { stop("The entab R package must be installed to use entab parsers: install.packages('entab', repos='https://ethanbass.github.io/drat/')", @@ -127,19 +135,16 @@ read_chroms <- function(paths, find_files, if (all(!exists)){ stop("Cannot locate files. None of the supplied paths exist.") } - if (!is.null(path_out)){ - path_out <- check_path(path_out) - } if (export | format_in == "thermoraw" | parser == "openchrom"){ if (is.null(path_out)){ path_out <- set_temp_directory() } if (!dir.exists(path_out)){ - stop(paste0("The export directory '", path_out, "' does not exist.")) + stop(paste0("The export directory '", path_out, "' could not be found.")) } } if (is.null(dat)){ - dat<-list()} + dat <- list()} # choose converter entab_parser <- partial(call_entab, format_in = format_in, @@ -178,21 +183,24 @@ read_chroms <- function(paths, find_files, converter <- rainbow_parser } else if (format_in == "chromeleon_uv"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_chromeleon, read_metadata = read_metadata, format_out = format_out) + converter <- partial(read_chromeleon, format_out = format_out, + data_format = data_format, read_metadata = read_metadata) } else if (format_in == "shimadzu_fid"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "fid", - read_metadata = read_metadata, format_out = format_out) + format_out = format_out, data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "shimadzu_dad"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "dad", - read_metadata = read_metadata, format_out = format_out) + format_out = format_out, data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "thermoraw"){ pattern <- ifelse(is.null(pattern), ".raw", pattern) converter <- switch(parser, "thermoraw" = partial(read_thermoraw, path_out = path_out, - read_metadata = read_metadata, - format_out = format_out), + format_out = format_out, + read_metadata = read_metadata), "entab" = entab_parser) } else if (format_in == "mzml"){ pattern <- ifelse(is.null(pattern), ".mzML", pattern) @@ -207,11 +215,12 @@ read_chroms <- function(paths, find_files, pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) } else if (format_in %in% c("chemstation_fid", "chemstation_ch")){ - data_format <- "long" pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, - "chromconverter" = partial(read_chemstation_ch, read_metadata = read_metadata, - format_out = format_out), + "chromconverter" = partial(read_chemstation_ch, + format_out = format_out, + data_format = data_format, + read_metadata = read_metadata), "rainbow" = rainbow_parser, "entab" = entab_parser) } else if (format_in %in% c("msd", "csd", "wsd")){ @@ -222,7 +231,17 @@ read_chroms <- function(paths, find_files, converter <- partial(call_openchrom, path_out = path_out, format_in = format_in, export_format = export_format, return_paths = return_paths) - } else{ + } else if (format_in == "mdf"){ + pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) + converter <- partial(read_mdf, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) + } else if (format_in == "cdf"){ + pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) + converter <- partial(read_cdf, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) + } else { converter <- switch(parser, "aston" = partial(trace_converter, format_out = format_out, data_format = data_format, @@ -230,11 +249,10 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser ) } - writer <- switch(export_format, "csv" = export_csvs) if (find_files){ files <- find_files(paths, pattern) - } else{ + } else { files <- paths if (!is.null(pattern)){ match <- grep(pattern, files, ignore.case = TRUE) @@ -249,12 +267,12 @@ read_chroms <- function(paths, find_files, } } - if (format_in %in% c("chemstation_uv", "masshunter_dad", "chemstation", "chemstation_fid")){ + if (all(grepl(".d$", files, ignore.case = TRUE))){ file_names <- strsplit(files, "/") file_names <- gsub("\\.[Dd]", "", sapply(file_names, function(n){ - ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value=TRUE), tail(n,1)) - } )) + ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) + })) } else {file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])} if (parser != "openchrom"){ laplee <- choose_apply_fnc(progress_bar) @@ -272,8 +290,16 @@ read_chroms <- function(paths, find_files, } else{ data <- converter(files) } - names(data) <- file_names + if (!is.null(sample_names)){ + names(data) <- sample_names + } else{ + names(data) <- file_names + } if (export & !(parser %in% c("thermoraw", "openchrom"))){ + writer <- switch(export_format, csv = export_csvs, + chemstation_csv = purrr::partial(export_csvs, fileEncoding = "utf16"), + cdf = export_cdfs) + writer(data, path_out) } dat <- append(dat, data) diff --git a/R/read_mdf.R b/R/read_mdf.R new file mode 100644 index 0000000..0d5408e --- /dev/null +++ b/R/read_mdf.R @@ -0,0 +1,86 @@ +#' Read MDF files into R +#' @param file Path to a 'Lumex' \code{.mdf} file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param read_metadata Whether to read metadata from file. +#' @return A chromatogram in the format specified by \code{format_out}. +#' (retention time x wavelength). +#' @export + +read_mdf <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), read_metadata = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + + f <- file(file, "rb") + + # extract metadata + metadata <- readBin(f, "character", n = 1) + meta <- extract_mdf_metadata(metadata) + # meta <- rbind(meta, c(Property = "source_file", file, Group = "Conversion")) + + array1_len <- as.numeric(meta[which(meta$Group == "Array photometric" & meta$Property == "Size"),"Value"]) + array2_len <- as.numeric(meta[which(meta$Group == "Array current" & meta$Property == "Size"),"Value"]) + + # read array 1 + seek(f, 1018, "start") + seek(f, 1018, "start") + photo_array <- readBin(f, "double", size=8, n=array1_len) + + # read array 2 + current_array <-readBin(f, "integer", size=4, n=array2_len) + + # close file + close(f) + + t1 <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "From"), "Value"]) + t2 <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "To"), "Value"]) + t_step <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "Step"), "Value"]) + + # create time array + time_array <- seq(t1, t2, by = t_step) + + # construct data.frame + if (data_format == "wide"){ + data <- data.frame(Intensity = photo_array, Current = current_array, row.names=time_array) + } else if (data_format == "long"){ + data <- data.frame(RT = time_array, Intensity = photo_array, Current = current_array) + } + + if (data_format == "long"){ + data <- reshape_chrom(data) + } + if (format_out == "matrix"){ + as.matrix(format_out) + } + if (read_metadata){ + data <- attach_metadata(x = data, meta = meta, format_in = "mdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + data +} + +#' Extract MDF metadata +#' @noRd +extract_mdf_metadata <- function(x){ + ma <- strsplit(x, "\n")[[1]] + splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) + x2 <- splitAt(ma, grep("\\[*\\]", ma)) + x2 <- x2[-length(x2)] + names(x2) <- gsub("\\[|\\]","",sapply(x2, function(xx) xx[[1]])) + x3 <- lapply(seq_along(x2), function(i){ + xx <- x2[[i]][-1] + xx <- as.data.frame(do.call(rbind,lapply(xx, function(xxx){ + stringr::str_split_fixed(xxx, "=", 2) + }))) + xx[,3] <- names(x2)[i] + colnames(xx) <- c("Property","Value","Group") + xx + }) + names(x3) <- names(x2) + meta <- do.call(rbind, x3) + meta <- meta[-which(meta$Property == ""),] + meta +} + diff --git a/R/utils.R b/R/utils.R index c0aa7a1..f27f8c3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,7 +8,7 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chemstation_fid", "chemstation_uv", "chromeleon_uv", "mzml", "shimadzu_fid", "shimadzu_dad", - "waters_arw"), + "waters_arw", "mdf", "cdf"), aston = c("chemstation_uv", "masshunter_dad", "other"), entab = c("chemstation_uv", "chemstation_fid", "masshunter_dad", "thermoraw", "other"), rainbow = c("chemstation_uv", "waters_raw", @@ -40,6 +40,22 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } } +#' @noRd +format_to_extension <- function(format_in){ + switch(format_in, + "agilent_d" = ".d|.D", + "chemstation_uv" = ".uv|.UV", + "chemstation_ch" = ".ch|.CH", + "chemstation_fid" = ".ch|.CH", + "chemstation_csv" = ".csv|.CSV", + "masshunter_dad" = ".sp|.SP", + "shimadzu_fid" = ".txt", + "shimadzu_dad" = ".txt", + "chromeleon_uv" = ".txt", + "thermoraw" = ".raw", "mzml" = ".mzml", "waters_arw" = ".arw", + "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", "mdf" = ".mdf|.MDF", "other"=".") +} + #' @noRd find_files <- function(paths, pattern){ files <- unlist(lapply(paths, function(path){ @@ -57,20 +73,12 @@ find_files <- function(paths, pattern){ })) } -#' @noRd -export_csvs <- function(data, path.out){ - sapply(seq_along(data), function(i){ - write.csv(data[[i]], file = paste0(paste0(path.out, names(data)[i]),".CSV")) - }) -} - #' @noRd set_temp_directory <- function(){ ans <- readline("Export directory not specified! Export files to `temp` directory (y/n)?") if (ans %in% c("y","Y")){ - if (!dir.exists("temp")) - dir.create("temp") - path_out <- paste0(getwd(),'/temp/') + fs::dir_create("temp") + path_out <- fs::path(getwd(),"temp") path_out } else{ stop("Must specify directory to export files.") @@ -120,6 +128,7 @@ extract_header <- function(x, chrom.idx, sep){ #' Check for suggested package #' @noRd +#' @keywords internal check_for_pkg <- function(pkg, return_boolean = FALSE){ pkg_exists <- requireNamespace(pkg, quietly = TRUE) if (!pkg_exists) { diff --git a/R/write_chroms.R b/R/write_chroms.R new file mode 100644 index 0000000..0981a4d --- /dev/null +++ b/R/write_chroms.R @@ -0,0 +1,136 @@ +#' Export chromatograms as csvs +#' @noRd +export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE){ + sapply(seq_along(data), function(i){ + write.csv(data[[i]], file = fs::path(path_out, names(data)[i], ext = "csv"), + fileEncoding = fileEncoding, row.names = row.names) + }) +} + +#' Export chromatograms as CDFs +#' @noRd +export_cdfs <- function(data, path_out){ + sapply(seq_along(data), function(i){ + write_cdf(data[[i]], sample_name = names(data)[i], + path_out = path_out) + }) +} + +#' Write CDF file from chromatogram +#' @noRd +write_cdf <- function(x, sample_name, path_out){ + check_for_pkg("ncdf4") + if (ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ + warning("The supplies chromatogram contains more than two dimensions. Only + the first two dimensions will be written to the ANDI chrom file.", + immediate. = TRUE) + } + if (attr(x, "data_format") == "wide"){ + x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[,1]) + x <- transfer_metadata(x1, x) + } + # if (!missing(column)){ + # x1 <- x[,column,drop = FALSE] + # x <- transfer_metadata(x1,x) + # } + filename <- fs::path_ext_remove(fs::path_file(sample_name)) + file_out <- fs::path(path_out, filename, ext = "cdf") + if (fs::file_exists(file_out)){ + warning("File already exists and will not be overwritten.", immediate. = TRUE) + } + # define dimensions + point_number <- ncdf4::ncdim_def("point_number", "", + vals = seq_along(x[,1]), + create_dimvar = FALSE) + + # define variables + nc_time <- ncdf4::ncvar_def("raw_data_retention", "", dim = point_number) + nc_intensity <- ncdf4::ncvar_def("ordinate_values", "", dim = point_number) + other_vars <- c("actual_delay_time", "actual_run_time_length", "actual_sampling_interval", + "detector_maximum_value", "detector_minimum_value") + other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "",list())) + + # write netcdf file + ncdf4::nc_create(file_out, c(list(nc_time, nc_intensity), other_vars)) + + # open netcdf file + nc <- ncdf4::nc_open(file_out, write = TRUE) + + # write data to file + ncdf4::ncvar_put(nc = nc, varid = "raw_data_retention", vals = x[,1]) + ncdf4::ncvar_put(nc = nc, varid = "ordinate_values", vals = x[,2]) + ncdf4::ncatt_put(nc, varid="ordinate_values", + attname = "uniform_sampling_flag", attval = "Y") + ncdf4::ncvar_put(nc = nc, varid = "actual_run_time_length", vals = tail(x[,1],1)) + ncdf4::ncvar_put(nc = nc, varid = "actual_delay_time", vals = head(x[,1],1)) + ncdf4::ncvar_put(nc = nc, varid = "actual_sampling_interval", vals = mean(diff(x[,1]))) + ncdf4::ncvar_put(nc = nc, varid = "detector_maximum_value", vals = 1000) + ncdf4::ncvar_put(nc = nc, varid = "detector_minimum_value", vals = -1000) + + # write metadata as global attributes + meta <- format_metadata_for_cdf(x) + nc_add_global_attributes(nc = nc, meta = meta, sample_name = sample_name) + # finish writing file + ncdf4::nc_close(nc) +} + +#' Add global attributes to CDF file +#' @noRd +nc_add_global_attributes <- function(nc, meta, sample_name){ + sapply(seq_along(meta), function(i){ + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = names(meta)[i], attval = meta[[i]], + prec = ifelse(names(meta)[i] %in% c("sample_amount", "sample_injection_volume"), + "float","text")) + }) + if (!is.null(sample_name)){ + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = "sample_name", attval = sample_name) + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = "experiment_title", attval = sample_name) + } +} + +#' @noRd +format_metadata_for_cdf <- function(x){ + # datetime_str <- x[which(x$Property=="Time"),"Value"] + # datetime_standard <- as.POSIXct(datetime_str, format = "%d.%m.%Y %H:%M:%S") + datetime <- format(attr(x, "run_datetime"), "%Y%m%d%H%M%S%z") + # rt_units <- x[which(x$Group=="Interval Time" & x$Property == "Units"), "Value"] + rt_units <- switch(tolower(attr(x, "time_unit")), + "sec" = "Seconds", "seconds" = "Seconds", + "min" = "Minutes", "minutes" = "Minutes", + "default" = "Minutes") + + meta <- list(dataset_completeness = "C1", + aia_template_revision = "1.0", + protocol_template_revision = "1.0", + netcdf_revision = system("nc-config --version", intern = TRUE), + administrative_comments = paste("Collected on", attr(x, "instrument")), + languages = "English only", + converter_name = "chromconverter", + converter_description = "AIA/ANDI netCDF Chromatography", + converter_input_source = attr(x,"source_file"), + date_time_stamp = datetime, + dataset_date_time_stamp = datetime, + injection_date_time_stamp = datetime, + detector_units = attr(x,"detector_unit"), + detector_unit = attr(x,"detector_unit"), + retention_units = rt_units, + retention_unit = rt_units, + sample_id_comments = "", + detector_name = attr(x,"detector"), + # experiment_title = "", + sample_amount = as.numeric(attr(x, "sample_amount")), + sample_injection_volume = as.numeric(attr(x, "sample_injection_volume")), + sample_type = attr(x, "sample_type") + ) + meta$sample_type <- ifelse(!is.null(meta$sample_type), meta$sample_type, "unknown") + meta$sample_amount <- + ifelse(length(meta$sample_amount) != 0, meta$sample_amount, 1) + meta$sample_injection_volume <- + ifelse(length(meta$sample_injection_volume) != 0, meta$sample_injection_volume, 1) + meta[sapply(meta, is.null)] <- "" + meta[sapply(meta, is.na)] <- "" + meta +} diff --git a/inst/shell/path_to_openchrom_commandline.txt b/inst/shell/path_to_openchrom_commandline.txt index 1ccd88a..7951def 100644 --- a/inst/shell/path_to_openchrom_commandline.txt +++ b/inst/shell/path_to_openchrom_commandline.txt @@ -1 +1 @@ -/Applications/Eclipse.app/Contents/MacOS/openchrom \ No newline at end of file +NULL diff --git a/man/attach_metadata.Rd b/man/attach_metadata.Rd index 11b8d42..cc2fed9 100644 --- a/man/attach_metadata.Rd +++ b/man/attach_metadata.Rd @@ -4,7 +4,15 @@ \alias{attach_metadata} \title{Attaches metadata to chromatogram} \usage{ -attach_metadata(x, meta, format_in, format_out, data_format, parser = NULL) +attach_metadata( + x, + meta, + format_in, + format_out, + data_format, + parser = NULL, + source_file +) } \arguments{ \item{x}{chromatogram} @@ -17,7 +25,9 @@ attach_metadata(x, meta, format_in, format_out, data_format, parser = NULL) \item{data_format}{Whether data is in wide or long format.} -\item{parser}{What parser was used to decode the data} +\item{parser}{What parser was used to decode the data.} + +\item{source_file}{The path to the source file.} } \value{ A chromatogram with attached metadata. diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd new file mode 100644 index 0000000..a2a7649 --- /dev/null +++ b/man/read_cdf.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_cdf.R +\name{read_cdf} +\alias{read_cdf} +\title{Read CDF file} +\usage{ +read_cdf( + file, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + what = "chromatogram", + read_metadata = TRUE +) +} +\arguments{ +\item{file}{path to file} + +\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}.} + +\item{read_metadata}{Whether to read metadata from file.} +} +\value{ +A chromatogram in the format specified by \code{format_out}. +(retention time x wavelength). +} +\description{ +Read CDF file +} diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 6dea593..0459c8d 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -6,16 +6,19 @@ \usage{ read_chemstation_ch( path, - read_metadata = TRUE, - format_out = c("matrix", "data.frame") + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{path}{Path to \code{.ch} file} -\item{read_metadata}{Logical. Whether to attach metadata.} +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} -\item{format_out}{Matrix or data.frame} +\item{read_metadata}{Logical. Whether to attach metadata.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index c37ab7d..8a4683d 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -6,19 +6,19 @@ \usage{ read_chemstation_uv( path, - read_metadata = TRUE, format_out = c("matrix", "data.frame"), - data_format = c("wide", "long") + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{path}{Path to \code{.uv} file} -\item{read_metadata}{Logical. Whether to attach metadata.} - \item{format_out}{Matrix or data.frame} \item{data_format}{Either \code{wide} (default) or \code{long}.} + +\item{read_metadata}{Logical. Whether to attach metadata.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chromeleon.Rd b/man/read_chromeleon.Rd index aa67387..d5a8dc4 100644 --- a/man/read_chromeleon.Rd +++ b/man/read_chromeleon.Rd @@ -7,6 +7,7 @@ read_chromeleon( file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), read_metadata = TRUE ) } @@ -15,6 +16,8 @@ read_chromeleon( \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + \item{read_metadata}{Whether to read metadata from file.} } \value{ diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 21b731a..d9f410c 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -10,16 +10,17 @@ read_chroms( format_in = c("agilent_d", "chemstation", "chemstation_uv", "chemstation_csv", "chemstation_ch", "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", - "msd", "csd", "wsd", "other"), + "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), export = FALSE, path_out = NULL, - export_format = c("csv", "cdf", "mzml", "animl"), + export_format = c("csv", "chemstation_csv", "cdf", "mzml", "animl"), read_metadata = TRUE, progress_bar, + sample_names = NULL, dat = NULL ) } @@ -49,14 +50,16 @@ case file extension will be deduced from \code{format_in}.} \item{data_format}{Whether to output data in wide or long format. Either \code{wide} or \code{long}.} -\item{export}{Logical. If TRUE, will export files as csvs.} +\item{export}{Logical. If TRUE, the program will export files in the format +specified by \code{export_format} in the directory specified by \code{path_out}.} \item{path_out}{Path for exporting files. If path not specified, files will export to current working directory.} -\item{export_format}{Export format. Currently the only option is \code{.csv}, -unless you are using OpenChrom parsers, where you could have \code{csv}, -\code{cdf}, \code{mzml}, or \code{animl}.} +\item{export_format}{Export format. Currently the options include \code{.csv}, +\code{chemstation_csv} (utf-16 encoding), and \code{cdf}, unless you are using +OpenChrom parsers, where there are two additional options: \code{mzml}, and +\code{animl}.} \item{read_metadata}{Logical, whether to attach metadata (if it's available). Defaults to TRUE.} @@ -64,6 +67,9 @@ Defaults to TRUE.} \item{progress_bar}{Logical. Whether to show progress bar. Defaults to \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.} +\item{sample_names}{An optional character vector of sample names. Otherwise +sample names default to the basename of the specified files.} + \item{dat}{Existing list of chromatograms to append results. (Defaults to NULL).} } diff --git a/man/read_mdf.Rd b/man/read_mdf.Rd new file mode 100644 index 0000000..97d6f5a --- /dev/null +++ b/man/read_mdf.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_mdf.R +\name{read_mdf} +\alias{read_mdf} +\title{Read MDF files into R} +\usage{ +read_mdf( + file, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE +) +} +\arguments{ +\item{file}{Path to a 'Lumex' \code{.mdf} file} + +\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{read_metadata}{Whether to read metadata from file.} +} +\value{ +A chromatogram in the format specified by \code{format_out}. +(retention time x wavelength). +} +\description{ +Read MDF files into R +} diff --git a/man/read_waters_arw.Rd b/man/read_waters_arw.Rd index 7738972..24c25b8 100644 --- a/man/read_waters_arw.Rd +++ b/man/read_waters_arw.Rd @@ -6,16 +6,19 @@ \usage{ read_waters_arw( file, - read_metadata = TRUE, - format_out = c("matrix", "data.frame") + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{file}{path to file} -\item{read_metadata}{Whether to read metadata from file.} - \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{read_metadata}{Whether to read metadata from file.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 73afaf1..620f835 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -7,7 +7,7 @@ x <- read_chroms(path_csv, format_in = "chemstation_csv", progress_bar = FALSE) test_that("aston parser works", { skip_if_missing_dependecies() - paths <- rep(path_uv,2) + paths <- rep(path_uv, 2) x1 <- read_chroms(paths, format_in = "chemstation_uv", parser = "aston", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) @@ -16,7 +16,8 @@ test_that("aston parser works", { expect_equal(length(x1), length(paths)) expect_equal(class(x1[[1]])[1], "matrix") expect_equal(attr(x1[[1]], "data_format"), "wide") - }) + expect_equal(names(x1), c("dad1", "dad1")) +}) x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "chromconverter", find_files = FALSE, @@ -51,20 +52,6 @@ test_that("entab parser works", { expect_equal(attr(x1[[1]], "data_format"), "wide") }) - -# test_that("rainbow parser works", { -# skip_if_missing_dependecies() -# file <- "testdata/DAD1.uv" -# x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "rainbow", -# find_files = FALSE, -# read_metadata = TRUE) -# expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) -# expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) -# expect_equal(class(x1[[1]])[1], "matrix") -# expect_equal(attr(x1[[1]], "parser"), "rainbow") -# expect_equal(attr(x1[[1]], "data_format"), "wide") -# }) - test_that("shimadzu parser works", { file <- "testdata/ladder.txt" x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) @@ -72,25 +59,6 @@ test_that("shimadzu parser works", { expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) -test_that("check_path works on unix/linux", { - skip_on_os("windows") - expect_equal(check_path("~/Downloads"), "~/Downloads/") - expect_equal(check_path("Downloads"), "/Downloads/") - expect_equal(check_path("~/Downloads/"), "~/Downloads/") - expect_equal(check_path("/Users/foo/"), "/Users/foo/") - expect_equal(check_path("Users/foo/"), "/Users/foo/") - expect_equal(check_path("/Users/foo"), "/Users/foo/") - expect_equal(check_path("Users/foo"), "/Users/foo/") -}) - -# test_that("thermoraw parser works",{ -# skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) -# expect_equal(class(x[[1]])[1], "matrix") -# expect_equal(attributes(x[[1]])$instrument, "GC-2014") -# }) - test_that("read_mzml works", { ext_filepath <- system.file("extdata", package = "RaMS") DAD_filepath <- list.files(ext_filepath, full.names = TRUE, @@ -109,3 +77,61 @@ test_that("read_mzml works", { test_that("get_filetype works as expected", { expect_equal(get_filetype(path_uv), "chemstation_uv") }) + +# test_that("thermoraw parser works",{ +# skip_if_not(configure_thermo_parser(check = TRUE)) +# file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW" +# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) +# expect_equal(class(x[[1]])[1], "matrix") +# expect_equal(attributes(x[[1]])$instrument, "GC-2014") +# }) + +# test_that("rainbow parser works", { +# skip_if_missing_dependecies() +# file <- "testdata/DAD1.uv" +# x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "rainbow", +# find_files = FALSE, +# read_metadata = TRUE) +# expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) +# expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) +# expect_equal(class(x1[[1]])[1], "matrix") +# expect_equal(attr(x1[[1]], "parser"), "rainbow") +# expect_equal(attr(x1[[1]], "data_format"), "wide") +# }) + +# test_that("check_path works on unix/linux", { +# skip_on_os("windows") +# expect_equal(check_path("~/Downloads"), "~/Downloads/") +# expect_equal(check_path("Downloads"), "/Downloads/") +# expect_equal(check_path("~/Downloads/"), "~/Downloads/") +# expect_equal(check_path("/Users/foo/"), "/Users/foo/") +# expect_equal(check_path("Users/foo/"), "/Users/foo/") +# expect_equal(check_path("/Users/foo"), "/Users/foo/") +# expect_equal(check_path("Users/foo"), "/Users/foo/") +# }) + +test_that("read_chroms exports csvs correctly", { + skip_on_cran() + path_out <- tempdir(check = TRUE) + on.exit(unlink(c(fs::path(path_out, "dad1", ext = "csv"), path_out))) + x1 <- read_chroms(paths = path_uv, export=TRUE, path_out = path_out, + export_format="csv", format_out = "data.frame", + progress_bar = FALSE) + x1_out <- read.csv(fs::path(path_out, "dad1", ext="csv"), row.names=1) + expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) + # unlink(fs::path(path_out, "dad1", ext = "csv")) + # unlink(path_out) +}) + +test_that("read_chroms exports cdf files correctly", { + skip_on_cran() + skip_if_not_installed("ncdf4") + path_out <- tempdir(check = TRUE) + on.exit(unlink(c(fs::path(path_out, "ladder", ext = "cdf"), path_out))) + file <- "testdata/ladder.txt" + x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, path_out = path_out, + export_format = "cdf", progress_bar = FALSE) + x1_out <- read_cdf(fs::path(path_out, "ladder", ext = "cdf")) + expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) +}) + From 560ca0f5059201bc130a752ab661a3d4ed5bb099 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 12:34:07 -0400 Subject: [PATCH 02/28] updated configure_openchrom and readme --- NAMESPACE | 1 + NEWS.md | 3 +- R/call_openchrom.R | 69 ++++++++++++++++++++++---------------- R/read_cdf.R | 1 + R/read_chroms.R | 3 +- R/read_mdf.R | 1 + R/read_thermoraw.R | 2 +- R/utils.R | 23 ------------- R/write_chroms.R | 5 +++ README.md | 3 +- man/call_openchrom.Rd | 6 ++++ man/configure_openchrom.Rd | 24 +++++++++++++ man/read_cdf.Rd | 3 ++ man/read_mdf.Rd | 3 ++ man/read_thermoraw.Rd | 2 +- 15 files changed, 92 insertions(+), 57 deletions(-) create mode 100644 man/configure_openchrom.Rd diff --git a/NAMESPACE b/NAMESPACE index c110538..c35b441 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ export(call_entab) export(call_openchrom) export(call_rainbow) export(configure_aston) +export(configure_openchrom) export(configure_rainbow) export(extract_metadata) export(read_cdf) diff --git a/NEWS.md b/NEWS.md index e017105..3672e19 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * Added parser for ANDI chrom `cdf` files through the `read_cdf` function. * Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. -* Updated `configure_openchrom` for better discovery of 'OpenChrom' path. +* Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. * Added additional options for file exports. New options for writing `chemstation_csv` (utf-16) and `cdf` files through `read_chroms`. * Added `wide` and `long` `data_format` options for 2D data, such that the `wide` @@ -12,6 +12,7 @@ while the `long` format writes retention times as the first column of the object data origin. * Fixed bug causing sloppy 'Chemstation' FID metadata. * Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. +* Use `fs` package for parsing paths, eliminating buggy `check_paths` function. * Added additional tests. ## chromConverter 0.3.3 diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 52d5f07..fc40a8a 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -32,7 +32,12 @@ #' @section Side effects: Chromatograms will be exported in the format specified #' by \code{export_format} in the folder specified by \code{path_out}. #' @author Ethan Bass -#' @export call_openchrom +#' @references +#' Wenig, Philip and Odermatt, Juergen. OpenChrom: A Cross-Platform Open Source +#' Software for the Mass Spectrometric Analysis of Chromatographic Data. \emph{ +#' BMC Bioinformatics} \bold{11}, no. 1 (July 30, 2010): 405. \doi{ +#' 10.1186/1471-2105-11-405}. +#' @export call_openchrom <- function(files, path_out, format_in, export_format = c("csv", "cdf", "mzml", "animl"), @@ -123,23 +128,30 @@ write_openchrom_batchfile <- function(files, path_out, #' @name configure_openchrom #' @param cli Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. #' If "false", R will disable CLI. If NULL, R will not modify the ini file. +#' @param path Path to 'OpenChrom' executable (Optional). The supplied path will +#' overwrite the current path. #' @importFrom utils read.table write.table #' @return Returns path to OpenChrom command-line application #' @author Ethan Bass -#' @noRd +#' @export -configure_openchrom <- function(cli = c(NULL, "true", "false")){ +configure_openchrom <- function(cli = c(NULL, "true", "false"), path = NULL){ cli <- match.arg(cli, c(NULL, "true", "false")) - path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) - if (path_parser == "NULL"){ - path_parser <- switch(.Platform$OS.type, - unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", - windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom"), - linux = "/snap/bin/openchrom" - ) - writeLines(path_parser, - con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + if (is.null(path)){ + path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) + if (path_parser == "NULL"){ + path_parser <- switch(.Platform$OS.type, + unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", + windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom"), + linux = "/snap/bin/openchrom" + ) + } + } else{ + path_parser <- path } + writeLines(path_parser, + con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + if (!file.exists(path_parser)){ warning("OpenChrom not found!", immediate. = TRUE) path_parser <- readline(prompt="Please provide path to `OpenChrom` command line):") @@ -153,24 +165,25 @@ configure_openchrom <- function(cli = c(NULL, "true", "false")){ "unix" = paste0(gsub("MacOS/openchrom", "", path_parser), "Eclipse/openchrom.ini"), "linux" = paste0(path_parser, ".uni"), "windows" = paste0(gsub(".exe", "", path_parser), ".ini")) - ini <- readLines(path_ini) - cli_index <- grep("-Denable.cli.support",ini) - ini_split <- strsplit(ini[cli_index],"=")[[1]] - cli_tf <- ini_split[2] - if(cli_tf == "false"){ - message(" The OpenChrom command-line interface is turned off! - Update `openchrom.ini` to activate the command-line interface (y/n)? - (Warning: This will deactivate the GUI on your OpenChrom installation!)") - ans <- readline() - if (ans %in% c("y","Y", "yes", "Yes", "YES")){ - cli <- "true" - } else{ - stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") + if (is.null(cli)){ + ini <- readLines(path_ini) + cli_index <- grep("-Denable.cli.support", ini) + ini_split <- strsplit(ini[cli_index], "=")[[1]] + cli_tf <- ini_split[2] + if (cli_tf == "false"){ + message(" The OpenChrom command-line interface is turned off! + Update `openchrom.ini` to activate the command-line interface (y/n)? + (Warning: This will deactivate the GUI on your OpenChrom installation!)") + ans <- readline() + if (ans %in% c("y","Y", "yes", "Yes", "YES")){ + cli <- "true" + } else{ + stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") + } } - } - if (cli %in% c("true", "false")){ + } else if (cli %in% c("true", "false")){ ini_split[2] <- cli - ini[cli_index] <- paste(ini_split, collapse="=") + ini[cli_index] <- paste(ini_split, collapse = "=") writeLines(ini, path_ini) } path_parser[1] diff --git a/R/read_cdf.R b/R/read_cdf.R index 461b2ce..322a792 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -6,6 +6,7 @@ #' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. #' @return A chromatogram in the format specified by \code{format_out}. #' (retention time x wavelength). +#' @author Ethan Bass #' @export read_cdf <- function(file, format_out = c("matrix","data.frame"), diff --git a/R/read_chroms.R b/R/read_chroms.R index ff78487..1eca1c7 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -120,7 +120,8 @@ read_chroms <- function(paths, find_files, if (parser == ""){ parser <- check_parser(format_in, find = TRUE) } - export_format <- match.arg(export_format, c("csv", "chemstation_csv", "cdf", "mzml", "animl")) + export_format <- match.arg(export_format, choices = + c("csv", "chemstation_csv", "cdf", "mzml", "animl")) check_parser(format_in, parser) if (parser != "openchrom" && !(export_format %in% c("csv", "chemstation_csv", "cdf"))) stop("The selected export format is currently only supported by `openchrom` parsers.") diff --git a/R/read_mdf.R b/R/read_mdf.R index 0d5408e..cc46cec 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -5,6 +5,7 @@ #' @param read_metadata Whether to read metadata from file. #' @return A chromatogram in the format specified by \code{format_out}. #' (retention time x wavelength). +#' @author Ethan Bass #' @export read_mdf <- function(file, format_out = c("matrix","data.frame"), diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index 78eb2b9..a50decc 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -21,7 +21,7 @@ #' @references #' Hulstaert Niels, Jim Shofstahl, Timo Sachsenberg, Mathias Walzer, #' Harald Barsnes, Lennart Martens, and Yasset Perez-Riverol. -#' “=ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion.” +#' ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion. #' \emph{Journal of Proteome Research} \bold{19}, no. 1 (January 3, 2020): 537–42. #' \doi{10.1021/acs.jproteome.9b00328}. #' @export read_thermoraw diff --git a/R/utils.R b/R/utils.R index f27f8c3..6392e6d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -85,29 +85,6 @@ set_temp_directory <- function(){ } } -#' Check path -#' Check that path is properly formatted. -#' @param path path as character string -#' @noRd -check_path <- function(path){ - # check for leading slash - if (.Platform$OS.type %in% c("unix","linux")){ - if (!(substr(path,1,1) %in% c("/", "~"))){ - path <- paste0("/", path) - } - } - - # check for trailing slash - n <- nchar(path) - if (substr(path, n, n) != "/"){ - path <- paste0(path, "/") - } - if (.Platform$OS.type == "windows"){ - path <- gsub("/", "\\\\", path) - } - path -} - #' Extract header from Shimadzu ascii files #' @noRd extract_header <- function(x, chrom.idx, sep){ diff --git a/R/write_chroms.R b/R/write_chroms.R index 0981a4d..2167018 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -1,4 +1,5 @@ #' Export chromatograms as csvs +#' @author Ethan Bass #' @noRd export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE){ sapply(seq_along(data), function(i){ @@ -8,6 +9,7 @@ export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE) } #' Export chromatograms as CDFs +#' @author Ethan Bass #' @noRd export_cdfs <- function(data, path_out){ sapply(seq_along(data), function(i){ @@ -17,6 +19,7 @@ export_cdfs <- function(data, path_out){ } #' Write CDF file from chromatogram +#' @author Ethan Bass #' @noRd write_cdf <- function(x, sample_name, path_out){ check_for_pkg("ncdf4") @@ -91,6 +94,8 @@ nc_add_global_attributes <- function(nc, meta, sample_name){ } } +#' Format metadata for CDF +#' @author Ethan Bass #' @noRd format_metadata_for_cdf <- function(x){ # datetime_str <- x[which(x$Property=="Time"),"Value"] diff --git a/README.md b/README.md index 029880d..c41d383 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,7 @@ Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://g 1) Download OpenChrom from the [website](https://lablicate.com/platform/openchrom/download) and place it into a directory of your choice. 2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. -3) Follow the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to activate OpenChrom's command-line interface. Alternatively, the command-line option can be activated from R by calling `configure_openchrom_parser(cli = "true")` or by calling the OpenChrom parser and following the prompts. -4) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, it will ask you to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. +3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path="path_to_openchrom_executable"). ### Usage diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index ccd3a2a..2e7a9fb 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -54,6 +54,12 @@ call from R. by \code{export_format} in the folder specified by \code{path_out}. } +\references{ +Wenig, Philip and Odermatt, Juergen. OpenChrom: A Cross-Platform Open Source +Software for the Mass Spectrometric Analysis of Chromatographic Data. \emph{ +BMC Bioinformatics} \bold{11}, no. 1 (July 30, 2010): 405. \doi{ +10.1186/1471-2105-11-405}. +} \author{ Ethan Bass } diff --git a/man/configure_openchrom.Rd b/man/configure_openchrom.Rd new file mode 100644 index 0000000..6485d12 --- /dev/null +++ b/man/configure_openchrom.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/call_openchrom.R +\name{configure_openchrom} +\alias{configure_openchrom} +\title{Configure OpenChrom parser} +\usage{ +configure_openchrom(cli = c(NULL, "true", "false"), path = NULL) +} +\arguments{ +\item{cli}{Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. +If "false", R will disable CLI. If NULL, R will not modify the ini file.} + +\item{path}{Path to 'OpenChrom' executable (Optional). The supplied path will +overwrite the current path.} +} +\value{ +Returns path to OpenChrom command-line application +} +\description{ +Configure OpenChrom parser +} +\author{ +Ethan Bass +} diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index a2a7649..84ed008 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -30,3 +30,6 @@ A chromatogram in the format specified by \code{format_out}. \description{ Read CDF file } +\author{ +Ethan Bass +} diff --git a/man/read_mdf.Rd b/man/read_mdf.Rd index 97d6f5a..c783252 100644 --- a/man/read_mdf.Rd +++ b/man/read_mdf.Rd @@ -27,3 +27,6 @@ A chromatogram in the format specified by \code{format_out}. \description{ Read MDF files into R } +\author{ +Ethan Bass +} diff --git a/man/read_thermoraw.Rd b/man/read_thermoraw.Rd index 8a9da64..7a08f73 100644 --- a/man/read_thermoraw.Rd +++ b/man/read_thermoraw.Rd @@ -44,7 +44,7 @@ read_thermoraw(path) \references{ Hulstaert Niels, Jim Shofstahl, Timo Sachsenberg, Mathias Walzer, Harald Barsnes, Lennart Martens, and Yasset Perez-Riverol. -“=ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion.” +ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion. \emph{Journal of Proteome Research} \bold{19}, no. 1 (January 3, 2020): 537–42. \doi{10.1021/acs.jproteome.9b00328}. } From b7b647131d2a3b43cf1d1773b523458b15fdd889 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 12:40:14 -0400 Subject: [PATCH 03/28] Update NEWS.md --- NEWS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3672e19..b6e470a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,14 +2,17 @@ * Added parser for ANDI chrom `cdf` files through the `read_cdf` function. * Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. -* Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. * Added additional options for file exports. New options for writing `chemstation_csv` (utf-16) and `cdf` files through `read_chroms`. * Added `wide` and `long` `data_format` options for 2D data, such that the `wide` format option writes retention times as rownames of the matrix or data.frame while the `long` format writes retention times as the first column of the object. +* Added preliminary support for automatic filetype detection by `read_chroms` when +providing direct paths to files (i.e. when `find_files == FALSE`). +* Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. * Slightly restructured metadata fields. Added `source_file` field to track data origin. +* Standardized datetime stamps so they are always converted to POSIXct format. * Fixed bug causing sloppy 'Chemstation' FID metadata. * Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. * Use `fs` package for parsing paths, eliminating buggy `check_paths` function. From f5d2792e51e2549e2338060da0d4f9cde5b924f0 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 21:11:56 -0400 Subject: [PATCH 04/28] Update call_openchrom.R --- R/call_openchrom.R | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index fc40a8a..8fa4829 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -56,16 +56,24 @@ call_openchrom <- function(files, path_out, format_in, stop("'path_out' not found. Make sure directory exists.") } openchrom_path <- configure_openchrom() - path_xml <- write_openchrom_batchfile(files = files, path_out=path_out, format_in = format_in, - export_format = export_format) + path_xml <- write_openchrom_batchfile(files = files, path_out = path_out, + format_in = format_in, + export_format = export_format) system(paste0(openchrom_path, " -nosplash -cli -batchfile ", path_xml)) - new_files <- paste0(path_out, sapply(strsplit(basename(files), "\\."), function(x) x[1]), ".", export_format) + new_files <- fs::path(path_out, + fs::path_ext_remove(fs::path_file(files)), + ext = switch(export_format, "animl" = "animl", + "csv" = "csv", "cdf" = "CDF", + "mzml" = "mzML")) if (return_paths){ new_files } else{ - if (export_format == "csv"){ - lapply(new_files, read.csv) - } + file_reader <- switch(export_format, + "csv" = read.csv, + "cdf" = read_cdf, + "animl" = warning("An animl parser is not currently available in chromConverter"), + "mzml" = read_mzml) + lapply(new_files, file_reader) } } @@ -118,7 +126,7 @@ write_openchrom_batchfile <- function(files, path_out, "animl" = wsd_animl_converter) } x %>% xml_children %>% .[[4]] %>% xml_add_child(.value=gsub("path_out", path_out, parser)) - path_xml <- paste0(path_out, "batchfile_", strftime(Sys.time(),format = "%Y-%m-%d_%H-%M-%S"), ".xml") + path_xml <- fs::path(path_out, paste0("batchfile_", strftime(Sys.time(),format = "%Y-%m-%d_%H-%M-%S")), ext = "xml") write_xml(x, file = path_xml) path_xml } @@ -135,8 +143,8 @@ write_openchrom_batchfile <- function(files, path_out, #' @author Ethan Bass #' @export -configure_openchrom <- function(cli = c(NULL, "true", "false"), path = NULL){ - cli <- match.arg(cli, c(NULL, "true", "false")) +configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ + cli <- match.arg(cli, c("null", "true", "false")) if (is.null(path)){ path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) if (path_parser == "NULL"){ @@ -165,11 +173,11 @@ configure_openchrom <- function(cli = c(NULL, "true", "false"), path = NULL){ "unix" = paste0(gsub("MacOS/openchrom", "", path_parser), "Eclipse/openchrom.ini"), "linux" = paste0(path_parser, ".uni"), "windows" = paste0(gsub(".exe", "", path_parser), ".ini")) + ini <- readLines(path_ini) + cli_index <- grep("-Denable.cli.support", ini) + ini_split <- strsplit(ini[cli_index], "=")[[1]] + cli_tf <- ini_split[2] if (is.null(cli)){ - ini <- readLines(path_ini) - cli_index <- grep("-Denable.cli.support", ini) - ini_split <- strsplit(ini[cli_index], "=")[[1]] - cli_tf <- ini_split[2] if (cli_tf == "false"){ message(" The OpenChrom command-line interface is turned off! Update `openchrom.ini` to activate the command-line interface (y/n)? From 3b50db541c270b8ee6f456d1d5de4f2acb66b102 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 22:38:54 -0400 Subject: [PATCH 05/28] windows bugs --- R/call_openchrom.R | 18 +++++++++++------- R/write_chroms.R | 5 +++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 8fa4829..717effd 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -143,14 +143,14 @@ write_openchrom_batchfile <- function(files, path_out, #' @author Ethan Bass #' @export -configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ - cli <- match.arg(cli, c("null", "true", "false")) +configure_openchrom <- function(cli = c("null", "true", "false", "status"), path = NULL){ + cli <- match.arg(cli, c("null", "true", "false", "status")) if (is.null(path)){ path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) if (path_parser == "NULL"){ path_parser <- switch(.Platform$OS.type, unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", - windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom"), + windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom/openchrom.exe"), linux = "/snap/bin/openchrom" ) } @@ -176,9 +176,10 @@ configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ ini <- readLines(path_ini) cli_index <- grep("-Denable.cli.support", ini) ini_split <- strsplit(ini[cli_index], "=")[[1]] - cli_tf <- ini_split[2] - if (is.null(cli)){ - if (cli_tf == "false"){ + cli_bool <- ini_split[2] + + if (cli == "null"){ + if (cli_bool == "false"){ message(" The OpenChrom command-line interface is turned off! Update `openchrom.ini` to activate the command-line interface (y/n)? (Warning: This will deactivate the GUI on your OpenChrom installation!)") @@ -189,7 +190,10 @@ configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") } } - } else if (cli %in% c("true", "false")){ + } else if (cli == "status"){ + return(cli_bool) + } + if (cli %in% c("true", "false")){ ini_split[2] <- cli ini[cli_index] <- paste(ini_split, collapse = "=") writeLines(ini, path_ini) diff --git a/R/write_chroms.R b/R/write_chroms.R index 2167018..54d4bb1 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -106,11 +106,12 @@ format_metadata_for_cdf <- function(x){ "sec" = "Seconds", "seconds" = "Seconds", "min" = "Minutes", "minutes" = "Minutes", "default" = "Minutes") - + get_nc_version <- switch(.Platform$OS.type, "windows" = ) meta <- list(dataset_completeness = "C1", aia_template_revision = "1.0", protocol_template_revision = "1.0", - netcdf_revision = system("nc-config --version", intern = TRUE), + netcdf_revision = stringr::str_extract(ncdf4::nc_version(), + "(?<=library version\\s)\\d+\\.\\d+\\.\\d+") administrative_comments = paste("Collected on", attr(x, "instrument")), languages = "English only", converter_name = "chromconverter", From 48004bd113d4a3e0fa6e0854a77babfd0c5ee973 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 22:38:54 -0400 Subject: [PATCH 06/28] fix windows bugs Update write_chroms.R --- R/call_openchrom.R | 18 +++++++++++------- R/write_chroms.R | 5 +++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 8fa4829..717effd 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -143,14 +143,14 @@ write_openchrom_batchfile <- function(files, path_out, #' @author Ethan Bass #' @export -configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ - cli <- match.arg(cli, c("null", "true", "false")) +configure_openchrom <- function(cli = c("null", "true", "false", "status"), path = NULL){ + cli <- match.arg(cli, c("null", "true", "false", "status")) if (is.null(path)){ path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) if (path_parser == "NULL"){ path_parser <- switch(.Platform$OS.type, unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", - windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom"), + windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom/openchrom.exe"), linux = "/snap/bin/openchrom" ) } @@ -176,9 +176,10 @@ configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ ini <- readLines(path_ini) cli_index <- grep("-Denable.cli.support", ini) ini_split <- strsplit(ini[cli_index], "=")[[1]] - cli_tf <- ini_split[2] - if (is.null(cli)){ - if (cli_tf == "false"){ + cli_bool <- ini_split[2] + + if (cli == "null"){ + if (cli_bool == "false"){ message(" The OpenChrom command-line interface is turned off! Update `openchrom.ini` to activate the command-line interface (y/n)? (Warning: This will deactivate the GUI on your OpenChrom installation!)") @@ -189,7 +190,10 @@ configure_openchrom <- function(cli = c("null", "true", "false"), path = NULL){ stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") } } - } else if (cli %in% c("true", "false")){ + } else if (cli == "status"){ + return(cli_bool) + } + if (cli %in% c("true", "false")){ ini_split[2] <- cli ini[cli_index] <- paste(ini_split, collapse = "=") writeLines(ini, path_ini) diff --git a/R/write_chroms.R b/R/write_chroms.R index 2167018..0ed4549 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -106,11 +106,12 @@ format_metadata_for_cdf <- function(x){ "sec" = "Seconds", "seconds" = "Seconds", "min" = "Minutes", "minutes" = "Minutes", "default" = "Minutes") - + get_nc_version <- switch(.Platform$OS.type, "windows" = ) meta <- list(dataset_completeness = "C1", aia_template_revision = "1.0", protocol_template_revision = "1.0", - netcdf_revision = system("nc-config --version", intern = TRUE), + netcdf_revision = paste("netCDF", stringr::str_extract(ncdf4::nc_version(), + "(?<=library version\\s)\\d+\\.\\d+\\.\\d+")), administrative_comments = paste("Collected on", attr(x, "instrument")), languages = "English only", converter_name = "chromconverter", From 516ce1f32b3e17228c084a2d0a7ffcf0cbdfd4c5 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 22 Apr 2023 22:40:00 -0400 Subject: [PATCH 07/28] Update write_chroms.R Update configure_openchrom.Rd --- R/write_chroms.R | 4 ++-- man/configure_openchrom.Rd | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/write_chroms.R b/R/write_chroms.R index 54d4bb1..0ed4549 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -110,8 +110,8 @@ format_metadata_for_cdf <- function(x){ meta <- list(dataset_completeness = "C1", aia_template_revision = "1.0", protocol_template_revision = "1.0", - netcdf_revision = stringr::str_extract(ncdf4::nc_version(), - "(?<=library version\\s)\\d+\\.\\d+\\.\\d+") + netcdf_revision = paste("netCDF", stringr::str_extract(ncdf4::nc_version(), + "(?<=library version\\s)\\d+\\.\\d+\\.\\d+")), administrative_comments = paste("Collected on", attr(x, "instrument")), languages = "English only", converter_name = "chromconverter", diff --git a/man/configure_openchrom.Rd b/man/configure_openchrom.Rd index 6485d12..671b830 100644 --- a/man/configure_openchrom.Rd +++ b/man/configure_openchrom.Rd @@ -4,7 +4,7 @@ \alias{configure_openchrom} \title{Configure OpenChrom parser} \usage{ -configure_openchrom(cli = c(NULL, "true", "false"), path = NULL) +configure_openchrom(cli = c("null", "true", "false", "status"), path = NULL) } \arguments{ \item{cli}{Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. From b9056bdf32e86a2fcdcc237934934a1804d80a31 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 24 Apr 2023 21:47:45 -0400 Subject: [PATCH 08/28] fix check_for_pkg return_boolean --- R/utils.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 6392e6d..6df2f7a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -108,16 +108,15 @@ extract_header <- function(x, chrom.idx, sep){ #' @keywords internal check_for_pkg <- function(pkg, return_boolean = FALSE){ pkg_exists <- requireNamespace(pkg, quietly = TRUE) - if (!pkg_exists) { + if (return_boolean){ + return(pkg_exists) + } else if (!pkg_exists) { stop(paste( "Package", sQuote(pkg), "must be installed to perform this action: try", paste0("`install.packages('", pkg, "')`.")), call. = FALSE ) } - if (return_boolean){ - pkg_exists - } } #' Choose apply function From cf0bc2cde2d723a3b7450e7585ecc03534cffaf8 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 1 May 2023 18:10:04 -0400 Subject: [PATCH 09/28] fix: read_mdf failure to find start of data array --- R/read_mdf.R | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/R/read_mdf.R b/R/read_mdf.R index cc46cec..f30ca43 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -3,8 +3,8 @@ #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. -#' @return A chromatogram in the format specified by \code{format_out}. -#' (retention time x wavelength). +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). #' @author Ethan Bass #' @export @@ -24,8 +24,9 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), array2_len <- as.numeric(meta[which(meta$Group == "Array current" & meta$Property == "Size"),"Value"]) # read array 1 - seek(f, 1018, "start") - seek(f, 1018, "start") + end_metadata <- seek(f,NA,"current") - 1 + seek(f, end_metadata, "start") + seek(f, end_metadata, "start") photo_array <- readBin(f, "double", size=8, n=array1_len) # read array 2 From abe694f77e1033ece0895796f2096a34deff6906 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 1 May 2023 22:35:12 -0400 Subject: [PATCH 10/28] fix: problem reading unicode metadata chars --- R/read_mdf.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/read_mdf.R b/R/read_mdf.R index f30ca43..896750a 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -24,13 +24,13 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), array2_len <- as.numeric(meta[which(meta$Group == "Array current" & meta$Property == "Size"),"Value"]) # read array 1 - end_metadata <- seek(f,NA,"current") - 1 + end_metadata <- seek(f, NA, "current") - 1 seek(f, end_metadata, "start") seek(f, end_metadata, "start") - photo_array <- readBin(f, "double", size=8, n=array1_len) + photo_array <- readBin(f, "double", size = 8, n = array1_len) # read array 2 - current_array <-readBin(f, "integer", size=4, n=array2_len) + current_array <- readBin(f, "integer", size = 4, n = array2_len) # close file close(f) @@ -44,9 +44,11 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), # construct data.frame if (data_format == "wide"){ - data <- data.frame(Intensity = photo_array, Current = current_array, row.names=time_array) + data <- data.frame(Intensity = photo_array, Current = current_array, + row.names = time_array) } else if (data_format == "long"){ - data <- data.frame(RT = time_array, Intensity = photo_array, Current = current_array) + data <- data.frame(RT = time_array, Intensity = photo_array, + Current = current_array) } if (data_format == "long"){ @@ -66,6 +68,7 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), #' Extract MDF metadata #' @noRd extract_mdf_metadata <- function(x){ + x <- stringr::str_replace_all(x, "\xb5", "μ") ma <- strsplit(x, "\n")[[1]] splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) x2 <- splitAt(ma, grep("\\[*\\]", ma)) From 15be11a26a4e43130b19477b36ff68b5467fb964 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 1 May 2023 22:35:12 -0400 Subject: [PATCH 11/28] fix: problem reading unicode metadata chars Update read_mdf.R --- R/read_mdf.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/read_mdf.R b/R/read_mdf.R index f30ca43..4abdebb 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -24,13 +24,13 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), array2_len <- as.numeric(meta[which(meta$Group == "Array current" & meta$Property == "Size"),"Value"]) # read array 1 - end_metadata <- seek(f,NA,"current") - 1 + end_metadata <- seek(f, NA, "current") - 1 seek(f, end_metadata, "start") seek(f, end_metadata, "start") - photo_array <- readBin(f, "double", size=8, n=array1_len) + photo_array <- readBin(f, "double", size = 8, n = array1_len) # read array 2 - current_array <-readBin(f, "integer", size=4, n=array2_len) + current_array <- readBin(f, "integer", size = 4, n = array2_len) # close file close(f) @@ -44,9 +44,11 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), # construct data.frame if (data_format == "wide"){ - data <- data.frame(Intensity = photo_array, Current = current_array, row.names=time_array) + data <- data.frame(Intensity = photo_array, Current = current_array, + row.names = time_array) } else if (data_format == "long"){ - data <- data.frame(RT = time_array, Intensity = photo_array, Current = current_array) + data <- data.frame(RT = time_array, Intensity = photo_array, + Current = current_array) } if (data_format == "long"){ @@ -66,6 +68,7 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), #' Extract MDF metadata #' @noRd extract_mdf_metadata <- function(x){ + x <- stringr::str_replace_all(x, "\xb5", "micro") ma <- strsplit(x, "\n")[[1]] splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) x2 <- splitAt(ma, grep("\\[*\\]", ma)) From ed4d0dff6d49abfa7b6b55e642a73f5fd9b86053 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 2 May 2023 15:25:35 -0400 Subject: [PATCH 12/28] docs: fix description --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 298d001..293db42 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -6,8 +6,8 @@ Authors@R: c( role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6175-6739")), person(given = "James", family = "Dillon", role = c("ctb", "cph"), - comment = "Author and copyright holder of source code adapted from the - 'Chromatography Toolbox' for parsing Agilent FID files.") + comment = c("Author and copyright holder of source code adapted from the + 'Chromatography Toolbox' for parsing Agilent FID files.")) ) Maintainer: Ethan Bass Description: Reads chromatograms from binary formats into R objects. Currently supports conversion of 'Agilent ChemStation', 'Agilent MassHunter', and 'ThermoRaw' files as well as various text-based formats. Utilizes file parsers from external libraries, such as 'Aston' , 'Entab' , 'rainbow' , and 'ThermoRawFileParser' . From 1ffd8a5a120997e4b47609979a01fb84aaae5e4d Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 2 May 2023 15:26:16 -0400 Subject: [PATCH 13/28] docs: change name of rainbow_parsers file --- R/{rainbow_parser.R => call_rainbow.R} | 1 + man/call_rainbow.Rd | 2 +- man/configure_rainbow.Rd | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) rename R/{rainbow_parser.R => call_rainbow.R} (98%) diff --git a/R/rainbow_parser.R b/R/call_rainbow.R similarity index 98% rename from R/rainbow_parser.R rename to R/call_rainbow.R index 4117b75..b634598 100644 --- a/R/rainbow_parser.R +++ b/R/call_rainbow.R @@ -41,6 +41,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun "chemstation" = rb_parse_agilent$chemstation$parse_file, "chemstation_uv" = rb_parse_agilent$chemstation$parse_file, "chemstation_fid" = rb_parse_agilent$chemstation$parse_file, + "chemstation_ch" = rb_parse_agilent$chemstation$parse_file, "default" = rb_read$read) if (format_in %in% c("chemstation", "chemstation_uv", "chemstation_fid")){ by <- "single" diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index 1445d88..ca4fddb 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rainbow_parser.R +% Please edit documentation in R/call_rainbow.R \name{call_rainbow} \alias{call_rainbow} \title{Parse Agilent or Waters files with rainbow parser} diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd index c8e6af3..570c190 100644 --- a/man/configure_rainbow.Rd +++ b/man/configure_rainbow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rainbow_parser.R +% Please edit documentation in R/call_rainbow.R \name{configure_rainbow} \alias{configure_rainbow} \title{Configure rainbow} From 3ed1aad1fa141089fb78e42e185e0bad5f1649d7 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 2 May 2023 15:28:02 -0400 Subject: [PATCH 14/28] docs: update read_cdf, read_mdf --- R/read_cdf.R | 7 +++++-- man/read_cdf.Rd | 9 ++++++--- man/read_mdf.Rd | 4 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/R/read_cdf.R b/R/read_cdf.R index 322a792..276084e 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -2,10 +2,13 @@ #' @param file path to file #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' For 2D files, "long" format returns the retention time as the first column of +#' the data.frame or matrix while "wide" format returns the retention time as the +#' rownames of the object. #' @param read_metadata Whether to read metadata from file. #' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. -#' @return A chromatogram in the format specified by \code{format_out}. -#' (retention time x wavelength). +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). #' @author Ethan Bass #' @export diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index 84ed008..b666739 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -17,15 +17,18 @@ read_cdf( \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} -\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} +\item{data_format}{Whether to return data in \code{wide} or \code{long} format. +For 2D files, "long" format returns the retention time as the first column of +the data.frame or matrix while "wide" format returns the retention time as the +rownames of the object.} \item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}.} \item{read_metadata}{Whether to read metadata from file.} } \value{ -A chromatogram in the format specified by \code{format_out}. -(retention time x wavelength). +A chromatogram in the format specified by the \code{format_out} and +\code{data_format} arguments (retention time x wavelength). } \description{ Read CDF file diff --git a/man/read_mdf.Rd b/man/read_mdf.Rd index c783252..b687d2b 100644 --- a/man/read_mdf.Rd +++ b/man/read_mdf.Rd @@ -21,8 +21,8 @@ read_mdf( \item{read_metadata}{Whether to read metadata from file.} } \value{ -A chromatogram in the format specified by \code{format_out}. -(retention time x wavelength). +A chromatogram in the format specified by the \code{format_out} and +\code{data_format} arguments (retention time x wavelength). } \description{ Read MDF files into R From 40bec537f0e9725c370d0b7bfab4ef5259605a48 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Tue, 2 May 2023 15:28:37 -0400 Subject: [PATCH 15/28] update write_chroms warning --- R/write_chroms.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/write_chroms.R b/R/write_chroms.R index 0ed4549..cefbc09 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -39,7 +39,7 @@ write_cdf <- function(x, sample_name, path_out){ filename <- fs::path_ext_remove(fs::path_file(sample_name)) file_out <- fs::path(path_out, filename, ext = "cdf") if (fs::file_exists(file_out)){ - warning("File already exists and will not be overwritten.", immediate. = TRUE) + warning(paste("File", sQuote(basename(file_out)), "already exists and will not be overwritten."), immediate. = TRUE) } # define dimensions point_number <- ncdf4::ncdim_def("point_number", "", From 93f199e09e46469f908de9e79ded649aa58f75ba Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 3 May 2023 09:52:56 -0400 Subject: [PATCH 16/28] style: add whitespace --- R/read_chemstation_uv.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/read_chemstation_uv.R b/R/read_chemstation_uv.R index 4bc0baf..6712bdc 100644 --- a/R/read_chemstation_uv.R +++ b/R/read_chemstation_uv.R @@ -78,7 +78,7 @@ read_chemstation_uv <- function(path, format_out = c("matrix","data.frame"), seek(f, offsets$data_start + 0x8) # Read and unpack wavelength information - wave_info <- readBin(f, integer(), n=3, size=2, endian="little") + wave_info <- readBin(f, integer(), n = 3, size = 2, endian = "little") lambda_start <- wave_info[1] %/% 20 lambda_end <- wave_info[2] %/% 20 delta_lambda <- wave_info[3] %/% 20 From bff9eae3c58695fb7523f2fb9a56472324f7ca6f Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 3 May 2023 09:57:44 -0400 Subject: [PATCH 17/28] docs: add Evan Shi as ctb cph --- DESCRIPTION | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 293db42..438b2ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,7 +7,10 @@ Authors@R: c( comment = c(ORCID = "0000-0002-6175-6739")), person(given = "James", family = "Dillon", role = c("ctb", "cph"), comment = c("Author and copyright holder of source code adapted from the - 'Chromatography Toolbox' for parsing Agilent FID files.")) + 'Chromatography Toolbox' for parsing 'Agilent' FID files.")), + person(given = "Evan", family = "Shi", role = c("ctb", "cph"), + comment = c("Author and copyright holder of source code adapted from 'rainbow' + for parsing 'Agilent' UV files.")) ) Maintainer: Ethan Bass Description: Reads chromatograms from binary formats into R objects. Currently supports conversion of 'Agilent ChemStation', 'Agilent MassHunter', and 'ThermoRaw' files as well as various text-based formats. Utilizes file parsers from external libraries, such as 'Aston' , 'Entab' , 'rainbow' , and 'ThermoRawFileParser' . From 6439c1773acfe5487008f7dc489267d439cb1e04 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Wed, 3 May 2023 09:58:34 -0400 Subject: [PATCH 18/28] feat: add "chemstation" as accepted format for entab --- R/utils.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 6df2f7a..9c9b7b9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,10 +9,12 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chromeleon_uv", "mzml", "shimadzu_fid", "shimadzu_dad", "waters_arw", "mdf", "cdf"), - aston = c("chemstation_uv", "masshunter_dad", "other"), - entab = c("chemstation_uv", "chemstation_fid", "masshunter_dad", "thermoraw", "other"), - rainbow = c("chemstation_uv", "waters_raw", - "agilent_d", "chemstation", "chemstation_fid"), + aston = c("chemstation", "chemstation_uv", "masshunter_dad", "other"), + entab = c("chemstation", "chemstation_ch", "chemstation_fid", + "chemstation_uv", "masshunter_dad", "thermoraw", "other"), + rainbow = c("chemstation", "chemstation_ch", "chemstation_fid", + "chemstation_uv", "waters_raw", + "agilent_d"), thermoraw = c("thermoraw") ) if (find){ From 347e5231061246ababd058342fc325e827d1921d Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 10:19:17 -0400 Subject: [PATCH 19/28] feat: read_varian_peaklist, v0.4.0 Update NEWS.md --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 7 ++++--- R/read_peaklist.R | 24 ++++++++++++++++++++++++ inst/CITATION | 2 +- man/read_varian_peaklist.Rd | 19 +++++++++++++++++++ 6 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 R/read_peaklist.R create mode 100644 man/read_varian_peaklist.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 438b2ee..0f19fa6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.3.4 +Version: 0.4.0 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index c35b441..4e30146 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(read_mdf) export(read_mzml) export(read_shimadzu) export(read_thermoraw) +export(read_varian_peaklist) export(read_waters_arw) export(sp_converter) export(uv_converter) diff --git a/NEWS.md b/NEWS.md index b6e470a..9ddc08b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,15 @@ -## chromConverter 0.3.4 +## chromConverter 0.4.0 * Added parser for ANDI chrom `cdf` files through the `read_cdf` function. * Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. * Added additional options for file exports. New options for writing -`chemstation_csv` (utf-16) and `cdf` files through `read_chroms`. +`chemstation_csv` (utf-16) and ANDI chrom `cdf` files through `read_chroms`. * Added `wide` and `long` `data_format` options for 2D data, such that the `wide` -format option writes retention times as rownames of the matrix or data.frame +format option writes retention times as rownames of the matrix or data.frame. while the `long` format writes retention times as the first column of the object. * Added preliminary support for automatic filetype detection by `read_chroms` when providing direct paths to files (i.e. when `find_files == FALSE`). +* Added `read_varian_peaklist` function for reading peak lists from 'Varian MS Workstation'. * Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. * Slightly restructured metadata fields. Added `source_file` field to track data origin. diff --git a/R/read_peaklist.R b/R/read_peaklist.R new file mode 100644 index 0000000..9ce8676 --- /dev/null +++ b/R/read_peaklist.R @@ -0,0 +1,24 @@ +#' Read varian peaklist. +#' Read peak list from 'Varian MS Workstation'. +#' @param file Path to varian peaklist file. +#' @importFrom utils read.csv +#' @author Ethan Bass +#' @export + +read_varian_peaklist <- function(file){ + x <- read.csv(file, skip = 5, header = FALSE) + x$V1[x$V1 == ""] <- NA + x <- tidyr::fill(data = x, "V1", .direction = "down") + + column_names <- x[2,] + column_names[1] <- "compound" + colnames(x) <- column_names + + x <- x[-which(x$`Line#`=="Line#"),] + x <- x[-which(x$`Line#` == ""), ] + + x$Area <- as.numeric(x$Area) + x$Height <- as.numeric(x$Height) + x <- x[,-16] + x +} diff --git a/inst/CITATION b/inst/CITATION index aa3b367..6490cb6 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: chromatographic file converter", author = "Ethan Bass", year = "2022", - version = "version 0.3.3", + version = "version 0.4.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2022).", diff --git a/man/read_varian_peaklist.Rd b/man/read_varian_peaklist.Rd new file mode 100644 index 0000000..4e5a0f7 --- /dev/null +++ b/man/read_varian_peaklist.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_peaklist.R +\name{read_varian_peaklist} +\alias{read_varian_peaklist} +\title{Read varian peaklist. +Read peak list from 'Varian MS Workstation'.} +\usage{ +read_varian_peaklist(file) +} +\arguments{ +\item{file}{Path to varian peaklist file.} +} +\description{ +Read varian peaklist. +Read peak list from 'Varian MS Workstation'. +} +\author{ +Ethan Bass +} From c3014c46506186bf947ce1096d5cade3d9500d1a Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 11:18:13 -0400 Subject: [PATCH 20/28] docs: update date --- README.md | 2 +- inst/CITATION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c41d383..384a180 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,6 @@ Contributions of source code, ideas, or documentation are very welcome. Please g You can cite chromConverter as follows: -Bass, E. (2022). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. +Bass, E. (2023). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. If you use external libraries to convert your files, please cite them as well in published work. diff --git a/inst/CITATION b/inst/CITATION index 6490cb6..3343464 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -4,7 +4,7 @@ citEntry( entry = "manual", title = "chromConverter: chromatographic file converter", author = "Ethan Bass", - year = "2022", + year = "2023", version = "version 0.4.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", From ae08310e61e7b2420a8f08bc2414bf4898cb8689 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 13:53:53 -0400 Subject: [PATCH 21/28] Update r.yml with setup-r@v2 --- .github/workflows/r.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 0e1cdd3..0f8451f 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -29,7 +29,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up R ${{ matrix.config.r }} - uses: r-lib/actions/setup-r@f57f1301a053485946083d7a45022b278929a78a + uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.r-version }} - name: Set up python ${{ matrix.config.python }} From d85a7e8a94d02c65f951e73610dbde7e38c36096 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 18:04:24 -0400 Subject: [PATCH 22/28] Update r.yml --- .github/workflows/r.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 0f8451f..b7181f9 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -15,9 +15,13 @@ on: branches: [ master ] jobs: - build: - runs-on: macos-latest + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + strategy: + fail-fast: false matrix: config: - {os: macos-latest, r: 'release', python: '3.9'} @@ -26,6 +30,10 @@ jobs: - {os: ubuntu-latest, r: 'release', python: '3.9'} - {os: ubuntu-latest, r: 'oldrel-1', python: '3.9'} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: - uses: actions/checkout@v3 - name: Set up R ${{ matrix.config.r }} From a2c0f194606d0e20fa252a22bdf6003e680318ae Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 18:08:26 -0400 Subject: [PATCH 23/28] Update r.yml --- .github/workflows/r.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index b7181f9..0a901d8 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -40,6 +40,8 @@ jobs: uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.r-version }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true - name: Set up python ${{ matrix.config.python }} uses: actions/setup-python@v3 with: From 0ddd388d623d320e940767daac754cc13a933e1f Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Thu, 4 May 2023 19:50:59 -0400 Subject: [PATCH 24/28] Update r.yml --- .github/workflows/r.yml | 49 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index fed11e5..297fef9 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -1,18 +1,12 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. -# -# See https://github.com/r-lib/actions/tree/master/examples#readme for -# additional example workflows available for the R community. - -name: R - +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [ master ] + branches: [main, master] pull_request: - branches: [ master ] + branches: [main, master] + +name: R-CMD-check jobs: R-CMD-check: @@ -24,11 +18,11 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release', python: '3.9'} - - {os: windows-latest, r: 'release', python: '3.9'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release', python: '3.9'} - - {os: ubuntu-latest, r: 'release', python: '3.9'} - - {os: ubuntu-latest, r: 'oldrel-1', python: '3.9'} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -36,16 +30,19 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up R ${{ matrix.config.r }} - uses: r-lib/actions/setup-r@v2 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 with: - r-version: ${{ matrix.r-version }} + r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - name: Set up python ${{ matrix.config.python }} + + - name: Set up Python 3.9 uses: actions/setup-python@v3 with: - python-version: ${{ matrix.config.python }} + python-version: 3.9 - name: Install dependencies run: | @@ -57,8 +54,10 @@ jobs: reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas')) reticulate::conda_install('r-reticulate', packages = c('aston'), pip=TRUE) shell: Rscript {0} + - if: runner.os == 'macOS' run: echo "options(reticulate.conda_binary = reticulate:::miniconda_conda())" >> .Rprofile - - name: Check - run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") - shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true From 6ab2a5956c2fb2d128f98a9f96b165d8ed9dc780 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 8 May 2023 17:32:46 -0400 Subject: [PATCH 25/28] fix: test failure on ubuntu due to wrong letter case in filepath --- tests/testthat/test-read_chroms.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 620f835..956c06f 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -1,6 +1,6 @@ library(testthat) -path_csv <- "testdata/DAD1.CSV" +path_csv <- "testdata/dad1.csv" path_uv <- "testdata/dad1.uv" x <- read_chroms(path_csv, format_in = "chemstation_csv", progress_bar = FALSE) @@ -41,7 +41,7 @@ test_that ("extract_metadata function works", { test_that("entab parser works", { skip_if_not_installed("entab") - file <- "testdata/DAD1.uv" + file <- "testdata/dad1.uv" x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "entab", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) From 14d1f3c00b4a0878a848728479e544fb1a3b8b6b Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 8 May 2023 18:13:54 -0400 Subject: [PATCH 26/28] rename dad1.csv --- tests/testthat/testdata/{DAD1.CSV => dad1.csv} | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/testdata/{DAD1.CSV => dad1.csv} (100%) diff --git a/tests/testthat/testdata/DAD1.CSV b/tests/testthat/testdata/dad1.csv similarity index 100% rename from tests/testthat/testdata/DAD1.CSV rename to tests/testthat/testdata/dad1.csv From 1419c397cbeb1443e1ec152117dcacaa14b532dc Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 8 May 2023 18:51:11 -0400 Subject: [PATCH 27/28] update NEWS --- NEWS.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9ddc08b..fa2bf9c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,22 +1,24 @@ ## chromConverter 0.4.0 +### New features + * Added parser for ANDI chrom `cdf` files through the `read_cdf` function. * Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. * Added additional options for file exports. New options for writing `chemstation_csv` (utf-16) and ANDI chrom `cdf` files through `read_chroms`. -* Added `wide` and `long` `data_format` options for 2D data, such that the `wide` -format option writes retention times as rownames of the matrix or data.frame. -while the `long` format writes retention times as the first column of the object. -* Added preliminary support for automatic filetype detection by `read_chroms` when -providing direct paths to files (i.e. when `find_files == FALSE`). +* Added preliminary support for automatic filetype detection by `read_chroms` when providing direct paths to files (i.e. when `find_files == FALSE`). * Added `read_varian_peaklist` function for reading peak lists from 'Varian MS Workstation'. + +### Other improvements and bug fixes: + +* Added `wide` and `long` `data_format` options for 2D data, such that the `wide` format option writes retention times as rownames of the matrix or data.frame. while the `long` format writes retention times as the first column of the object. * Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. * Slightly restructured metadata fields. Added `source_file` field to track data origin. * Standardized datetime stamps so they are always converted to POSIXct format. +* Now use `fs` package for parsing paths, eliminating buggy `check_paths` function. * Fixed bug causing sloppy 'Chemstation' FID metadata. * Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. -* Use `fs` package for parsing paths, eliminating buggy `check_paths` function. * Added additional tests. ## chromConverter 0.3.3 From a4c552ae3285d3839dbd32b28c5e56e0eb8454fa Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Mon, 8 May 2023 18:51:21 -0400 Subject: [PATCH 28/28] test ncdf on CI no bioconductor --- .github/workflows/r.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 297fef9..4a60e4c 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -47,9 +47,8 @@ jobs: - name: Install dependencies run: | install.packages(c("remotes", "rcmdcheck")) + install.packages(c("ncdf4")) remotes::install_deps(dependencies = TRUE) - install.packages("BiocManager") - BiocManager::install("mzR") reticulate::install_miniconda() reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas')) reticulate::conda_install('r-reticulate', packages = c('aston'), pip=TRUE)