diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index d8407d7..41626b3 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -52,7 +52,7 @@ jobs: install.packages("chromConverterExtraTests", repos = "https://ethanbass.github.io/drat/") reticulate::install_miniconda() reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas')) - reticulate::conda_install('r-reticulate', packages = c('aston', "olefile"), pip=TRUE) + reticulate::conda_install('r-reticulate', packages = c('aston', "olefile", "rainbow-api"), pip=TRUE) shell: Rscript {0} - if: runner.os == 'macOS' diff --git a/DESCRIPTION b/DESCRIPTION index c98d9d3..19cfed3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.5.0 +Version: 0.6.0 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index d4c2cab..168b73c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,10 +3,8 @@ export(call_entab) export(call_openchrom) export(call_rainbow) -export(configure_aston) -export(configure_olefile) export(configure_openchrom) -export(configure_rainbow) +export(configure_python_environment) export(extract_metadata) export(read_agilent_dx) export(read_cdf) @@ -24,8 +22,10 @@ export(read_shimadzu_lcd) export(read_thermoraw) export(read_varian_peaklist) export(read_waters_arw) +export(read_waters_raw) export(sp_converter) export(uv_converter) +export(write_cdf) import(magrittr) import(reticulate) import(xml2) diff --git a/NEWS.md b/NEWS.md index a11e7d7..ce521b5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,14 +1,30 @@ +## chromConverter 0.6.0 + +* Added parser for reading ANDI MS (`.cdf`) files. +* Fixed parsing of Agilent MS files with 'entab' reader. +* Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. +* Re-factored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. +* Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`. +* Exported `write_cdf` and added additional arguments (`lambda` and `force`) for greater control by users. +* Added internal parser for 1D 'Waters RAW' chromatograms (`read_waters_raw`). +* Added `collapse` argument to `call_rainbow` and to collapse superfluous lists. +* Added `...` argument to `read_chroms` for supplying additional arguments to parsers. +* Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. +* Added `precision` argument to `call_rainbow` to control number of digits "mz" values are rounded to. (Also changed default behavior so values are rounded to one decimal by default). +* Fixed bug in `read_shimadzu_lcd` on Windows due to issue with passing escaped paths to Python. +* Updated documentation of various functions. + ## chromConverter 0.5.0 ### New features * Added support for parallel processing through `pbapply` package. (**Note**: The `pbapply` package must be manually installed to enable parallel processing). -* Added internal parser for 'Agilent Chemstation' version 31 files (through `read_chemstation_uv` function). +* Added internal parser for 'Agilent ChemStation' version 31 files (through `read_chemstation_uv` function). * Added support for 'Agilent OpenLab' version 131 files through internal parser. * Added preliminary support for reading 'Agilent' (`.dx`) files (through `read_agilentdx` function). -* Added support for reading 'Chemstation' REPORT files. +* Added support for reading 'ChemStation' REPORT files. * Added parser for Shimadzu `.lcd` files through the `read_shimadzu_lcd` function. Only the PDA stream (not MS) is currently supported. -* Added `read_peaklist` function for reading peak lists. Currently 'Agilent Chemstation' and 'Shimadzu ASCII' formats are supported. +* Added `read_peaklist` function for reading peak lists. Currently 'Agilent ChemStation' and 'Shimadzu ASCII' formats are supported. * Added `verbose` argument to control console output for external parsers ('OpenChrom' and 'ThermoRawFileParser'). ### Other Improvements @@ -40,7 +56,7 @@ ### New features -* Added support for "Chemstation" UV (`.ch`) files (version 30). +* Added support for "ChemStation" UV (`.ch`) files (version 30). ### Minor improvements @@ -52,7 +68,7 @@ ### Bug fixes * Fixed bug preventing compilation of PDF manual. -* Fixed new bug causing failure to correctly read names of chemstation files from .D directory. +* Fixed new bug causing failure to correctly read names of 'ChemStation' files from .D directory. ## chromConverter 0.4.0 @@ -73,13 +89,13 @@ data origin. * Standardized datetime stamps so they are always converted to POSIXct format. * Now use `fs` package for parsing paths, eliminating buggy `check_paths` function. -* Fixed bug causing sloppy 'Chemstation' FID metadata. -* Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. +* Fixed bug causing sloppy 'ChemStation' FID metadata. +* Fixed bug that caused padding of 'ChemStation 130' files with extra zeros. * Added additional tests. ## chromConverter 0.3.3 -* Added R-based parser for "Chemstation" UV (`.uv`) files (version 131) through +* Added R-based parser for "ChemStation" UV (`.uv`) files (version 131) through the `read_chemstation_uv` function. * Added `extract_metadata` function for extracting metadata from a list of chromatograms and returning it as a `data.frame` or `tibble`. @@ -98,8 +114,8 @@ and returning it as a `data.frame` or `tibble`. ## chromConverter 0.3.1 -* Added support for "Chemstation" UV (`.ch`) files (version 130). -* Added provisional support for "Chemstation" FID (version 8). +* Added support for "ChemStation" UV (`.ch`) files (version 130). +* Added provisional support for "ChemStation" FID (version 8). * Changed name of `read_chemstation_fid` function to `read_chemstation_ch`. * Ignore case when matching file extensions in `read_chroms`. * Added note to README about configuring RStudio correctly for accessing python parsers. @@ -107,12 +123,12 @@ and returning it as a `data.frame` or `tibble`. ## chromConverter 0.3.0 * Fixed bug causing "Chromeleon" metadata parser to fail. -* Fixed bug in "Chemstation" metadata parser. +* Fixed bug in "ChemStation" metadata parser. * Changed `format_data` argument to `data_format` to select wide or long format. * Added support for parsing `mzML` files with `RaMS`. * Added support for parsing "Agilent" (`.D`) and "Waters" (`.raw`) files with [rainbow](https://rainbow-api.readthedocs.io/). * Made `data_format` option available consistently for choosing `wide` or `long` format. -* Added parser in R for "Chemstation" FID (`.ch`) data (versions 81, 179 & 181). +* Added parser in R for "ChemStation" FID (`.ch`) data (versions 81, 179 & 181). * Improved error handling when loading python modules. * Improved error-handling for parsing metadata so small problems no longer error out the whole program. diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 650a896..d5e374b 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -47,23 +47,21 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter", format_out = format_out) - }, "shimadzu" = { + }, "shimadzu_dad" = { structure(x, - instrument = meta$`Instrument Name`, - detector = meta$`Detector Name`, - software_name = meta$`Application Name`, - software_version = meta$Version, - method = meta$`Method File`, - batch = meta$`Batch File`, - operator = meta$`Operator Name`, + instrument = get_metadata_field(meta, "Instrument Name"), + detector = get_metadata_field(meta, "Detector Name"), + software_name = get_metadata_field(meta, "Application Name"), + software_version = get_metadata_field(meta, "Version"), + method = get_metadata_field(meta, "Method File"), + batch = get_metadata_field(meta, "Batch File"), + operator = get_metadata_field(meta, "Operator Name"), run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), - sample_name = meta$`Sample Name`, - sample_id = meta$`Sample ID`, - sample_injection_volume = meta$`Injection Volume`, - sample_amount = meta$`Injection Volume`, + sample_name = get_metadata_field(meta, "Sample Name"), + sample_id = get_metadata_field(meta, "Sample ID"), + sample_injection_volume = get_metadata_field(meta, "Injection Volume"), + sample_amount = get_metadata_field(meta, "Injection Volume"), time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), - # start_time = meta$`Start Time(min)`, - # end_time = meta$`End Time(min)`, time_interval = meta$`Interval(msec)`, time_interval_unit = get_time_unit( grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"), @@ -76,6 +74,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter", format_out = format_out) + }, "shimadzu_chrom" = { + structure(x, + instrument = get_metadata_field(meta, "Instrument Name"), + detector = get_metadata_field(meta, "Detector Name"), + software_name = get_metadata_field(meta, "Application Name"), + software_version = get_metadata_field(meta, "Version"), + method = get_metadata_field(meta, "Method File"), + batch = get_metadata_field(meta, "Batch File"), + operator = get_metadata_field(meta, "Operator Name"), + run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), + sample_name = get_metadata_field(meta, "Sample Name"), + sample_id = get_metadata_field(meta, "Sample ID"), + sample_injection_volume = get_metadata_field(meta, "Injection Volume"), + sample_amount = get_metadata_field(meta, "Injection Volume"), + time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), + time_interval = meta$`Interval(msec)`, + time_interval_unit = get_time_unit( + grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"), + time_unit = get_time_unit( + grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"), + wavelength = get_metadata_field(meta, "Wavelength(nm)"), + bandwidth = get_metadata_field(meta, "Bandwidth(nm)"), + # detector_end = meta$`End Wavelength(nm)`, + detector_unit = get_metadata_field(meta, "Intensity Units"), + source_file = source_file, + data_format = data_format, + parser = "chromconverter", + format_out = format_out) }, "chromeleon" = { datetime.idx <- unlist(sapply(c("Date$", "Time$"), function(str) grep(str, names(meta)))) datetime <- unlist(meta[datetime.idx]) @@ -118,23 +144,6 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser data_format = data_format, parser = "chromconverter" ) - # } else if (format_in == "entab"){ - # structure(x, instrument = meta$instrument, - # detector = NA, - # software = meta$Version, - # method = meta$method, - # batch = meta$SeqPathAndFile, - # operator = meta$operator, - # run_datetime = meta$run_date, - # sample_name = meta$sample, - # sample_id = NA, - # injection_volume = meta$InjVolume, - # time_range = NA, - # time_interval = NA, - # detector_range = NA, - # format = data_format, - # parser = "entab", - # format_out = format_out) }, "chemstation" = { datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") meta$date <- as.POSIXct(meta$date, tz = "UTC", tryFormats = datetime_formats) diff --git a/R/aston_parsers.R b/R/call_aston.R similarity index 82% rename from R/aston_parsers.R rename to R/call_aston.R index e76e98e..38c0efd 100644 --- a/R/aston_parsers.R +++ b/R/call_aston.R @@ -131,48 +131,13 @@ trace_converter <- function(file, format_out = c("matrix", "data.frame"), x } -#' Configure Aston -#' -#' Configures reticulate to use Aston file parsers. -#' @name configure_aston -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_aston <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use Aston parsers. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("pandas","scipy","numpy","aston") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], - pip = TRUE) - } - } - assign_trace_file() - if (return_boolean){ - return(env) - } -} - #' @noRd check_aston_configuration <- function(){ assign_trace_file() if (length(trace_file) == 0){ ans <- readline("Aston not found. Configure Aston? (y/n)?") if (ans %in% c('y', "Y", "YES", "yes", "Yes")){ - configure_aston() + configure_python_environment(parser = "aston") } } } diff --git a/R/call_entab.R b/R/call_entab.R index 4074f0a..141b999 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -12,7 +12,7 @@ #' @export call_entab <- function(file, data_format = c("wide", "long"), - format_in = NULL, + format_in = "", format_out = c("matrix", "data.frame"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw")){ @@ -28,17 +28,29 @@ call_entab <- function(file, data_format = c("wide", "long"), metadata_format <- switch(metadata_format, chromconverter = format_in, raw = "raw") r <- entab::Reader(file) + file_format <- r$parser() x <- entab::as.data.frame(r) - signal.idx <- grep("signal", colnames(x)) - if (length(signal.idx) == 1){ - colnames(x)[signal.idx] <- "wavelength" - } - if (data_format == "wide"){ + if (grepl("dad$|uv$", file_format)){ + signal.idx <- grep("signal", colnames(x)) + if (length(signal.idx) == 1){ + colnames(x)[signal.idx] <- "wavelength" + } + if (data_format == "wide"){ x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength", value_var = "intensity") - } - if (format_out == "matrix"){ - x <- as.matrix(x) + if (format_out == "matrix"){ + x <- as.matrix(x) + } + } + } else if (grepl("fid$", file_format)){ + if (data_format == "wide"){ + x <- data.frame(row.names = x$time, intensity = x$intensity) + } + if (format_out == "matrix"){ + x <- as.matrix(x) + } + } else if (grepl("ms$", file_format)){ + colnames(x)[1] <- "rt" } if (read_metadata){ meta <- r$metadata() diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 557ad82..029c3d2 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -5,7 +5,7 @@ #' [OpenChrom](https://lablicate.com/platform/openchrom) (version 0.4.0) must be #' manually installed. The command line interface is no longer supported in the #' latest versions of OpenChrom (starting with version 0.5.0), so the function -#' will not work with these new versions. +#' will not work with these newer versions. #' #' The \code{call_openchrom} works by creating an \code{xml} batchfile and #' feeding it to the OpenChrom command-line interface. OpenChrom batchfiles @@ -34,9 +34,12 @@ #' @param return_paths Logical. If TRUE, the function will return a character #' vector of paths to the newly created files. #' @param verbose Logical. Whether to print output from OpenChrom to the console. -#' @return If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. -#' If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list -#' of chromatograms in \code{data.frame} format. Otherwise, it will not return anything. +#' @return If \code{return_paths} is \code{FALSE}, the function will return a +#' list of chromatograms (if an appropriate parser is available to import the +#' files into R). The chromatograms will be returned in \code{matrix} or +#' \code{data.frame} format according to the value of {format_out}. If +#' \code{return_paths} is \code{TRUE}, the function will return a character +#' vector of paths to the newly created files. #' @section Side effects: Chromatograms will be exported in the format specified #' by \code{export_format} in the folder specified by \code{path_out}. #' @author Ethan Bass @@ -95,6 +98,7 @@ call_openchrom <- function(files, path_out = NULL, format_in, } #' Writes OpenChrom XML batch file +#' This function is called internally by \code{call_openchrom}. #' @import xml2 #' @import magrittr #' @param files Paths to files for conversion @@ -177,6 +181,9 @@ configure_openchrom <- function(cli = c("null", "true", "false", "status"), path } else{ path_parser <- path } + if (grepl("app/?$", path_parser)){ + path_parser <- fs::path(path_parser, "Contents/MacOS/openchrom") + } writeLines(path_parser, con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) diff --git a/R/call_rainbow.R b/R/call_rainbow.R index d86562f..5ef075b 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -17,18 +17,24 @@ #' @param by How to order the list that is returned. Either \code{detector} #' (default) or \code{name}. #' @param read_metadata Logical. Whether to attach metadata. Defaults to TRUE. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. +#' @param precision Number of decimals to round mz values. Defaults to 1. #' @author Ethan Bass #' @return Returns a (nested) list of \code{matrices} or \code{data.frames} according to #' the value of \code{format_out}. Data is ordered according to the value of #' \code{by}. #' @export -call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshunter", - "chemstation", "chemstation_uv", "chemstation_fid"), +call_rainbow <- function(file, + format_in = c("agilent_d", "waters_raw", "masshunter", + "chemstation", "chemstation_uv", + "chemstation_fid"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), by = c("detector","name"), what = NULL, - read_metadata = TRUE){ + read_metadata = TRUE, collapse = TRUE, + precision = 1){ check_rb_configuration() by <- match.arg(by, c("detector","name")) format_out <- match.arg(format_out, c("matrix","data.frame")) @@ -46,7 +52,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun if (format_in %in% c("chemstation")){ by <- "single" } - x <- converter(file) + x <- converter(file, prec = as.integer(precision)) if (by == "detector"){ if (!is.null(what)){ what_not_present <- which(!(what %in% names(x$by_detector))) @@ -63,6 +69,7 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun read_metadata = read_metadata) }) names(dtr_dat) <- extract_rb_names(dtr) + if (collapse) dtr_dat <- collapse_list(dtr_dat) dtr_dat }) } else if (by == "name"){ @@ -78,16 +85,22 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun xx } +#' Extract data with rainbow +#' This function is called internally by \code{call_rainbow}. +#' @author Ethan Bass #' @noRd extract_rb_data <- function(xx, format_out = "matrix", - data_format = c("wide","long"), + data_format = c("wide", "long"), read_metadata = TRUE){ - data_format <- match.arg(data_format, c("wide","long")) + data_format <- match.arg(data_format, c("wide", "long")) data <- xx$data try(rownames(data) <- xx$xlabels) colnames(data) <- xx$ylabels if (data_format == "long"){ - data <- reshape_chrom(data, data_format = "long") + names_to <- switch(xx$detector, "MS" = "mz", + "UV" = "lambda", + "lambda") + data <- reshape_chrom(data, data_format = "long", names_to = names_to) } if (format_out == "data.frame"){ data <- as.data.frame(data) @@ -101,6 +114,8 @@ extract_rb_data <- function(xx, format_out = "matrix", data } +#' Extract 'rainbow' element names. +#' This function is called internally by \code{call_rainbow}. #' @noRd extract_rb_names <- function(xx){ sapply(xx, function(xxx){ @@ -108,41 +123,8 @@ extract_rb_names <- function(xx){ }) } - -#' Configure rainbow -#' -#' Configures reticulate to use rainbow file parsers. -#' @name configure_rainbow -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_rainbow <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("numpy","rainbow-api") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], pip = TRUE) - } - } - assign_rb_read() - if (return_boolean){ - env - } -} - +#' Assign 'rainbow' read +#' This function is called internally by \code{call_rainbow}. #' @noRd assign_rb_read <- function(){ pos <- 1 @@ -151,13 +133,15 @@ assign_rb_read <- function(){ assign("rb_parse_agilent", reticulate::import("rainbow.agilent"), envir = envir) } +#' Check 'rainbow' configuration +#' This function is called internally by \code{call_rainbow}. #' @noRd check_rb_configuration <- function(){ assign_rb_read() if (length(rb_read) == 0){ ans <- readline("rainbow not found. Configure rainbow? (y/n)?") if (ans %in% c('y', "Y", "YES", "yes", "Yes")){ - configure_rainbow() + configure_python_environment(parser = "rainbow") } } } diff --git a/R/read_cdf.R b/R/read_cdf.R index bb85d0f..b1d15e4 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -1,45 +1,69 @@ #' Read CDF file -#' @param file path to file +#' @param file Path to file. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' For 2D files, "long" format returns the retention time as the first column of #' the data.frame or matrix while "wide" format returns the retention time as the #' rownames of the object. -#' @param read_metadata Whether to read metadata from file. #' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. +#' @param ... Additional arguments to parser. The \code{ms_format} argument +#' can be used here to specify whether to return mass spectra in \code{list} +#' format or as a \code{data.frame}. #' @return A chromatogram in the format specified by the \code{format_out} and -#' \code{data_format} arguments (retention time x wavelength). +#' \code{data_format} arguments. #' @author Ethan Bass #' @export read_cdf <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), - what = "chromatogram", read_metadata = TRUE){ + what = "chromatogram", read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE, ...){ check_for_pkg("ncdf4") nc <- ncdf4::nc_open(file) if ("ordinate_values" %in% names(nc$var)){ - format = "chrom" + format <- "chrom" + } else if (all(c("intensity_values", "mass_values", + "scan_index", "scan_acquisition_time") %in% names(nc$var))){ + format <- "ms" } else { - format = "ms" + format <- "unknown" } ncdf4::nc_close(nc) - fn <- switch(format, chrom = read_andi_chrom, ms = andi_ms_error) + fn <- switch(format, chrom = read_andi_chrom, ms = read_andi_ms, + unknown = function(...){ + stop("The format of the provided cdf file could not be recognized.") + }) fn(file = file, data_format = data_format, format_out = format_out, - what = what, read_metadata = read_metadata) -} - -#' @noRd -andi_ms_error <- function(...){ - stop("The `cdf` MS format is not yet supported by an internal `chromConverter` parser. - Please try the OpenChrom `msd` parser instead.") + what = what, read_metadata = read_metadata, + metadata_format = metadata_format, collapse = collapse, ...) } #' Read ANDI chrom file +#' @param file Path to file. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' For 2D files, "long" format returns the retention time as the first column of +#' the data.frame or matrix while "wide" format returns the retention time as the +#' rownames of the object. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). +#' @author Ethan Bass #' @noRd read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ + what = "chromatogram", read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix","data.frame")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) @@ -61,32 +85,26 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), if (format_out == "matrix"){ data <- as.matrix(data) } + chromatogram <- data } if (any(what == "peak_table")){ peak_table_vars <- names(which(sapply(nc$var, function(x){ x$dim[[1]]$name }) == "peak_number")) if (length(peak_table_vars) > 0){ - peak_tab <- sapply(peak_table_vars, function(var){ + peak_table <- sapply(peak_table_vars, function(var){ ncdf4::ncvar_get(nc, varid = var) }) - peak_tab <- as.data.frame(peak_tab) + peak_table <- as.data.frame(peak_table) } } - if ("peak_table" %in% what & "chromatogram" %in% what){ - what <- "both" - } - data <- switch(what, "chromatogram" = data, - "peak_table" = peak_tab, - "both" = list(chromatogram=data, peak_table = peak_tab)) + data <- mget(what) + if (collapse) data <- collapse_list(data) if (read_metadata){ meta <- ncdf4::ncatt_get(nc, varid = 0) - # data <- attach_metadata(data, meta = meta, format_in = format_in, - # format_out = format_out, data_format = data_format, - # parser = "chromconverter", source_file = file) if (inherits(data, "list")){ data <- lapply(data, function(xx){ - attach_metadata(data, meta = meta, format_in = "cdf", + attach_metadata(xx, meta = meta, format_in = "cdf", format_out = format_out, data_format = data_format, parser = "chromconverter", source_file = file) }) @@ -99,3 +117,90 @@ read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), ncdf4::nc_close(nc) data } + +#' Read ANDI MS file +#' @param file Path to file. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return the total ion chromatogram in \code{wide} +#' or \code{long} format. The "long" format returns the retention time as the +#' first column of the data.frame or matrix while "wide" format returns the +#' retention time as the rownames of the object. +#' @param what Whether to extract \code{chromatogram} and/or \code{ms_spectra}. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @param ms_format Whether to return mass spectral data as a (long) +#' \code{data.frame} or a \code{list}. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments and MS spectra as either a long-format +#' \code{data.frame} or a \code{list} of spectra, according to the value of +#' \code{ms_format}. +#' @author Ethan Bass +#' @noRd + +read_andi_ms <- function(file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + what = "chromatogram", + ms_format = c("data.frame", "list"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + ms_format <- match.arg(ms_format, c("data.frame","list")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "cdf", raw = "raw") + what <- match.arg(what, c("chromatogram", "ms_spectra"), several.ok = TRUE) + nc <- ncdf4::nc_open(file) + if (any(what == "chromatogram")){ + y <- ncdf4::ncvar_get(nc, "total_intensity") + x <- ncdf4::ncvar_get(nc, "scan_acquisition_time") + data = data.frame(RT = x, Intensity = y) + if (data_format == "wide"){ + rownames(data) <- data[, 1] + data <- data[,-1, drop = FALSE] + } + if (format_out == "matrix"){ + data <- as.matrix(data) + } + chromatogram <- data + } + if (any(what == "ms_spectra")){ + int <- ncdf4::ncvar_get(nc, "intensity_values") + mz <- ncdf4::ncvar_get(nc, "mass_values") + scan_idx <- ncdf4::ncvar_get(nc, "scan_index") + rt <- ncdf4::ncvar_get(nc, "scan_acquisition_time") + zeros <- as.list(rep(NA, length(which(scan_idx==0)) - 1)) + if (ms_format == "data.frame"){ + n_scans <- diff(c(scan_idx, length(mz))) + rts <- unlist(sapply(seq_along(rt), function(i){rep(rt[i], n_scans[i])})) + ms_spectra <- data.frame(rt = rts, mz = mz, intensity = int) + } else if (ms_format == "list"){ + scans <- mapply(function(x,y){ + cbind(mz = x, intensity = y) + }, split_at(mz, scan_idx+1), split_at(int, scan_idx+1)) + ms_spectra <- c(zeros, scans) + names(ms_spectra) <- rt + } + } + + data <- mget(what) + if (collapse) data <- collapse_list(data) + if (read_metadata){ + meta <- ncdf4::ncatt_get(nc, varid = 0) + if (inherits(data, "list")){ + data <- lapply(data, function(xx){ + attach_metadata(xx, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + }) + } else{ + data <- attach_metadata(data, meta = meta, format_in = metadata_format, + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + } + ncdf4::nc_close(nc) + data +} diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index 64ee883..1a8ccce 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -1,4 +1,12 @@ -#' Parser for reading Agilent ('.ch') files into R +#' Read 'ChemStation' CH files +#' +#' Agilent \code{.ch} files come in several different varieties. This parser +#' can automatically detect and read several versions of these files from +#' 'Agilent ChemStation' and 'OpenLab', including versions \code{30} and \code{130}, +#' which are generally produced by ultraviolet detectors, as well as \code{81}, +#' \code{179}, and \code{181} which are generally produced by flame ionization +#' detectors. +#' #' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file #' @param format_out Matrix or data.frame. @@ -7,8 +15,14 @@ #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. #' @author Ethan Bass -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @return A 2D chromatogram in the format specified by \code{data_format} and +#' \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +#' be returned with retention times as rows and a single column for the intensity. +#' If \code{long} format is requested, two columns will be returned: one for the +#' retention time and one for the intensity. The \code{format_out} argument +#' determines whether the chromatogram is returned as a \code{matrix} or +#' \code{data.frame}. Metadata can be attached to the chromatogram as +#' \code{\link{attributes}} if \code{read_metadata} is {TRUE}. #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). @@ -21,7 +35,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("wide", "long")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) - metadata_format <- switch(metadata_format, chromconverter = "chemstation", raw = "raw") + metadata_format <- switch(metadata_format, chromconverter = "chemstation", + raw = "raw") f <- file(path, "rb") on.exit(close(f)) @@ -35,7 +50,16 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), if (version == "179"){ seek(f, 348) filetype <- paste(readBin(f, "character", n = 2), collapse = "") - version <- paste(version, filetype, sep = "_") + if (filetype == "OL"){ + bytes = "8b" + } else if (filetype == "GC"){ + seek(f, offsets$software) + n <- get_nchar(f) + soft <- cc_collapse(readBin(f, "character", n = n)) + chemstation_version <- strsplit(soft, " ")[[1]][1] + bytes <- ifelse(chemstation_version == "Mustang", "8b", "4b") + } + version <- paste(version, bytes, sep = "_") } decoder <- switch(version, "8" = decode_delta, @@ -43,8 +67,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), "30" = decode_delta, "130" = decode_delta, "181" = decode_double_delta, - "179_GC" = decode_double_array_gc, - "179_OL" = decode_double_array_ol) + "179_4b" = decode_double_array_4byte, + "179_8b" = decode_double_array_8byte) # Sample Info # offsets <- list(sample = 858, description = 1369, method = 2574, @@ -94,8 +118,8 @@ read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), "81" = 10, "30" = 13, "130" = 14, - "179_GC" = 10, - "179_OL" = 10, + "179_4b" = 10, + "179_8b" = 10, "181" = 10) meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ @@ -139,16 +163,18 @@ get_chemstation_dir_name <- function(path){ grep("\\.D|\\.d$", sp, ignore.case = TRUE, value = TRUE) } +#' Get number of characters for Agilent segment #' @noRd get_nchar <- function(f){ as.numeric(readBin(f, what = "raw", n = 1)) } #' Decode double delta array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). +#' @noRd + decode_double_delta <- function(file, offset) { seek(file, 0, 'end') @@ -184,11 +210,12 @@ decode_double_delta <- function(file, offset) { } #' Decode double array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). -decode_double_array_gc <- function(file, offset) { +#' @noRd + +decode_double_array_4byte <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") offset <- 6144 @@ -202,7 +229,7 @@ decode_double_array_gc <- function(file, offset) { #' Decode double array #' @noRd -decode_double_array_ol <- function(file, offset) { +decode_double_array_8byte <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") offset <- 6144 @@ -214,10 +241,11 @@ decode_double_array_ol <- function(file, offset) { } #' Decode delta array -#' @noRd #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). +#' @noRd + decode_delta <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") @@ -306,7 +334,7 @@ get_agilent_offsets <- function(version){ units = 326, data_start = 512 ) - } else if (version %in% c("179","179_GC", "179_OL", "181")){ + } else if (version %in% c("179","179_4b", "179_8b", "181")){ offsets <- list( version = 326, file_type = 347, #0x15B diff --git a/R/read_chemstation_report.R b/R/read_chemstation_report.R index 3d6f986..92e9904 100644 --- a/R/read_chemstation_report.R +++ b/R/read_chemstation_report.R @@ -1,9 +1,11 @@ -#' Read Agilent Chemstation Reports -#' @param files Paths to Chemstation report files. +#' Read 'Agilent ChemStation' Reports +#' @param files Paths to 'ChemStation' report files. #' @param data_format Format to output data. Either \code{chromatographr} or #' \code{chemstation}. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. +#' @return A data.frame containing the information from the specified +#' 'ChemStation' report. #' @author Ethan Bass #' @export @@ -134,6 +136,7 @@ convert_chemstation_peaklist <- function(table, data_format = } #' Remove blank lines +#' This function is called internally by \code{read_chemstation_reports}. #' @noRd remove_blank_lines <- function(x){ x[which(x != "")] diff --git a/R/read_chemstation_uv.R b/R/read_chemstation_uv.R index 340967c..0e73566 100644 --- a/R/read_chemstation_uv.R +++ b/R/read_chemstation_uv.R @@ -1,6 +1,8 @@ -#' Read 'Chemstation' DAD files +#' Read 'ChemStation' DAD files #' -#' Parser for reading Agilent UV (.uv) files into R. +#' Agilent \code{.uv} files come in several different varieties. This parser can +#' automatically detect and read several versions of these files from +#' 'Agilent ChemStation' and 'OpenLab', including versions \code{31} and \code{131}. #' #' @importFrom utils head tail #' @param path Path to \code{.uv} file. @@ -9,12 +11,18 @@ #' @param read_metadata Logical. Whether to attach metadata. #' @param metadata_format Format to output metadata. Either \code{chromconverter} #' or \code{raw}. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @return A 3D chromatogram in the format specified by \code{data_format} and +#' \code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +#' be returned with retention times as rows and wavelengths as columns. If +#' \code{long} format is requested, three columns will be returned: one for the +#' retention time, one for the wavelength and one for the intensity. The +#' \code{format_out} argument determines whether the chromatogram is returned as +#' a \code{matrix} or \code{data.frame}. Metadata can be attached to the +#' chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. #' @author Ethan Bass #' @note This function was adapted from the parser in the rainbow project #' licensed under GPL 3 by Evan Shi -#' (https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). +#' \url{https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html}. #' @export read_chemstation_uv <- function(path, format_out = c("matrix", "data.frame"), diff --git a/R/read_chroms.R b/R/read_chroms.R index 5687842..f79fcbd 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -1,23 +1,30 @@ #' Read Chromatograms #' -#' Reads chromatograms from specified folders or vector of paths using file -#' parsers from [Aston](https://github.com/bovee/aston), +#' Reads chromatograms from specified folders or vector of paths using either an +#' internal parser or bindings to an external library, such as +#' [Aston](https://github.com/bovee/aston), #' [Entab](https://github.com/bovee/entab), #' [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), #' [OpenChrom](https://lablicate.com/platform/openchrom), -#' [rainbow](https://rainbow-api.readthedocs.io/), or internal parsers. +#' [rainbow](https://rainbow-api.readthedocs.io/). #' -#' Provides a general interface to chromConverter parsers. Currently recognizes -#' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'MassHunter' (\code{.dad}) -#' files, 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' -#' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' -#' (\code{.txt}). Also, wraps Openchrom parsers, which include many additional -#' formats. To use 'Entab', 'ThermoRawFileParser', or 'Openchrom' parsers, -#' they must be manually installed. Please see the instructions in the +#' Provides a unified interface to all chromConverter parsers. Currently recognizes +#' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent +#' MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), +#' 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' +#' (\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps +#' 'OpenChrom' parsers, which include many additional formats. To use 'Entab', +#' 'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed. +#' Please see the instructions in the #' [README](https://ethanbass.github.io/chromConverter/) for further details. #' +#' If paths to individual files are provided, \code{read_chroms} will try to +#' infer the file format and select an appropriate parser. However, when +#' providing paths to directories, the file format must be specified using the +#' \code{format_in} argument. +#' #' @name read_chroms -#' @param paths paths to files or folders containing files +#' @param paths Paths to data files or directories containing the files. #' @param find_files Logical. Set to \code{TRUE} (default) if you are providing #' the function with a folder or vector of folders containing the files. #' Otherwise, set to\code{FALSE}. @@ -25,15 +32,16 @@ #' include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, #' \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, #' \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, -#' \code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -#' \code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{cdf}, \code{mdf}, -#' \code{msd}, \code{csd}, \code{wsd}, or \code{other}. +#' \code{shimadzu_ascii}, \code{shimadzu_fid}, \code{shimadzu_dad}, +#' \code{thermoraw}, \code{waters_arw}, \code{waters_raw}, \code{mzml}, +#' \code{mzxml}, \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, +#' or \code{other}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which #' case file extension will be deduced from \code{format_in}. -#' @param parser What parser to use. Current option are \code{chromconverter}, -#' \code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or -#' \code{rainbow}. -#' @param format_out R object format (i.e. data.frame or matrix). +#' @param parser What parser to use (optional). Current option are +#' \code{chromconverter}, \code{aston}, \code{entab}, \code{thermoraw}, +#' \code{openchrom}, or \code{rainbow}. +#' @param format_out Class of output (i.e. data.frame or matrix). #' @param data_format Whether to output data in wide or long format. Either #' \code{wide} or \code{long}. #' @param export Logical. If TRUE, the program will export files in the format @@ -56,15 +64,21 @@ #' @param verbose Logical. Whether to print output from external parsers to the #' R console. #' @param sample_names An optional character vector of sample names. Otherwise -#' sample names default to the basename of the specified files. +#' sample names default to the \code{\link{basename}} of the specified files. #' @param dat Existing list of chromatograms to append results. #' (Defaults to NULL). +#' @param ... Additional arguments to parser. #' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, -#' according to the value of \code{format_out}. +#' according to the value of \code{format_out}. Chromatograms may be returned +#' in either \code{wide} or \code{long} format according to the value of +#' \code{data_format}. #' @section Side effects: If \code{export} is TRUE, chromatograms will be #' exported in the format specified by \code{export_format} in the folder -#' specified by \code{path_out}. Currently, the only option for export is -#' \code{csv} unless the \code{parser} is \code{openchrom}. +#' specified by \code{path_out}. Currently, the most versatile option for +#' exporting files is \code{csv}. However, unidimensional chromatograms can +#' also be exported in ANDI Chromatography (netCDF) format by selecting +#' \code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are +#' available as additional options. #' @import reticulate #' @importFrom utils write.csv file_test #' @importFrom purrr partial @@ -79,9 +93,10 @@ read_chroms <- function(paths, find_files, "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", "chromeleon_uv", + "shimadzu_ascii", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", - "waters_arw", "waters_raw", + "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", @@ -95,7 +110,7 @@ read_chroms <- function(paths, find_files, metadata_format = c("chromconverter", "raw"), progress_bar, cl = 1, verbose = getOption("verbose"), - sample_names = NULL, dat = NULL){ + sample_names = NULL, dat = NULL, ...){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix", "data.frame")) parser <- match.arg(tolower(parser), c("", "chromconverter", "aston","entab", @@ -126,15 +141,16 @@ read_chroms <- function(paths, find_files, stop("Please specify the file format of your chromatograms by setting the `format_in` argument.") } } - format_in <- match.arg(format_in, c("agilent_d", "agilent_dx", "chemstation", + format_in <- match.arg(tolower(format_in), c("agilent_d", "agilent_dx", "chemstation", "chemstation_uv", "chemstation_ch", "chemstation_30", "chemstation_31", "chemstation_130", "chemstation_131", "openlab_131", "chemstation_179", "chemstation_81", "chemstation_181", "chemstation_fid", "chemstation_csv", "masshunter_dad", - "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", - "chromeleon_uv", "thermoraw", "mzml", + "shimadzu_fid", "shimadzu_dad", + "shimadzu_ascii", "shimadzu_lcd", + "chromeleon_uv", "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "cdf", "other")) if (parser == ""){ @@ -180,19 +196,16 @@ read_chroms <- function(paths, find_files, rainbow_parser <- partial(call_rainbow, format_in = format_in, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, ...) if (format_in == "agilent_d"){ - pattern <- ifelse(is.null(pattern), ".D", pattern) converter <- rainbow_parser } else if (format_in == "agilent_dx"){ - pattern <- ifelse(is.null(pattern), ".dx", pattern) converter <- partial(read_agilent_dx, path_out = path_out, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "masshunter_dad"){ - pattern <- ifelse(is.null(pattern), ".sp", pattern) converter <- switch(parser, "aston" = partial(sp_converter, format_out = format_out, data_format = data_format, @@ -200,7 +213,6 @@ read_chroms <- function(paths, find_files, metadata_format = metadata_format), "entab" = entab_parser) } else if (format_in == "chemstation_uv" | grepl("31", format_in)){ - pattern <- ifelse(is.null(pattern), ".uv", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_uv, format_out = format_out, @@ -214,30 +226,30 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser, "rainbow" = rainbow_parser) } else if (format_in == "chromeleon_uv"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_chromeleon, format_out = format_out, data_format = data_format, read_metadata = read_metadata, metadata_format = metadata_format) } else if (format_in == "shimadzu_fid"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_shimadzu, format_in = "fid", + converter <- partial(read_shimadzu, include = "fid", format_out = format_out, data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) + metadata_format = metadata_format, ...) } else if (format_in == "shimadzu_dad"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_shimadzu, format_in = "dad", + converter <- partial(read_shimadzu, include = "dad", format_out = format_out, data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) - } else if (format_in == "shimadzu_lcd"){ - pattern <- ifelse(is.null(pattern), ".lcd", pattern) + metadata_format = metadata_format, ...) + } else if (format_in == "shimadzu_ascii"){ + converter <- partial(read_shimadzu, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format, ...) + } else if (format_in == "shimadzu_lcd"){ converter <- partial(read_shimadzu_lcd, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "thermoraw"){ - pattern <- ifelse(is.null(pattern), ".raw", pattern) converter <- switch(parser, "thermoraw" = partial(read_thermoraw, path_out = path_out, format_out = format_out, @@ -245,27 +257,28 @@ read_chroms <- function(paths, find_files, metadata_format = metadata_format, verbose = verbose), "entab" = entab_parser) - } else if (format_in == "mzml"){ - pattern <- ifelse(is.null(pattern), ".mzML", pattern) - converter <- partial(read_mzml, format_out = format_out) + } else if (format_in %in% c("mzml","mzxml")){ + converter <- partial(read_mzml, format_out = format_out, ...) } else if (format_in == "waters_arw"){ - pattern <- ifelse(is.null(pattern), ".arw", pattern) converter <- partial(read_waters_arw, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "waters_raw"){ - pattern <- ifelse(is.null(pattern), ".raw", pattern) - converter <- rainbow_parser + converter <- switch(parser, "rainbow" = rainbow_parser, + "chromconverter" = partial(read_waters_raw, + format_out = format_out, + data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format)) } else if (format_in == "chemstation_csv"){ - pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) } else if (grepl("chemstation", format_in)){ - pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_ch, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "rainbow" = rainbow_parser, "entab" = entab_parser) } else if (format_in %in% c("msd", "csd", "wsd")){ @@ -278,15 +291,13 @@ read_chroms <- function(paths, find_files, format_in = format_in, export_format = export_format, return_paths = return_paths, verbose = verbose) } else if (format_in == "mdf"){ - pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) converter <- partial(read_mdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "cdf"){ - pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) converter <- partial(read_cdf, format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, ...) } else { converter <- switch(parser, "aston" = partial(trace_converter, format_out = format_out, @@ -296,21 +307,21 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser ) } - + pattern <- ifelse(is.null(pattern), format_to_extension(format_in), pattern) if (find_files){ files <- find_files(paths, pattern) } else { files <- paths if (!is.null(pattern)){ match <- grep(pattern, files, ignore.case = TRUE) - if (length(match) == 0){ - warning("The provided files do not match the expected file extension. - Please confirm that the specified format ('format_in') is correct.", - immediate. = TRUE) - } else if (length(match) < length(files)){ - warning(paste("Some of the files do not have the expected file extension:", - files[match]), immediate. = TRUE) - } + if (length(match) == 0){ + warning("The provided files do not match the expected file extension. + Please confirm that the specified format ('format_in') is correct.", + immediate. = TRUE) + } else if (length(match) < length(files)){ + warning(paste("Some of the files do not have the expected file extension:", + files[match]), immediate. = TRUE) + } } } if (all(grepl("\\.[Dd]$|\\.[Dd]?[/\\\\]",files))){ diff --git a/R/read_mdf.R b/R/read_mdf.R index fb42c0b..a729833 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -65,6 +65,8 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), } #' Extract MDF metadata +#' This function is called internally by \code{read_mdf}. +#' @author Ethan Bass #' @noRd extract_mdf_metadata <- function(x){ x <- stringr::str_replace_all(x, "\xb5", "micro") diff --git a/R/read_mzml.R b/R/read_mzml.R index 004df4e..934c8d5 100644 --- a/R/read_mzml.R +++ b/R/read_mzml.R @@ -26,8 +26,8 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), data_format = c("long","wide"), - parser=c("RaMS","mzR"), - what=c("MS1","MS2", "BPC", "TIC", "DAD", + parser = c("RaMS","mzR"), + what = c("MS1","MS2", "BPC", "TIC", "DAD", "chroms", "metadata", "everything"), verbose = FALSE, ...){ @@ -35,10 +35,11 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("long","wide")) what <- match.arg(what, c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), several.ok = TRUE) + "chroms", "metadata", "everything"), + several.ok = TRUE) if (all(c("MS1","MS2", "BPC", "TIC", "DAD", "chroms", "metadata", "everything") %in% what)){ - what <- grep("everything",what, invert = TRUE,value = TRUE) + what <- grep("everything", what, invert = TRUE, value = TRUE) } if (parser == "RaMS"){ data <- RaMS::grabMSdata(path, grab_what = what, verbosity = verbose, ...) diff --git a/R/read_peaklist.R b/R/read_peaklist.R index 25a88aa..3bc6325 100644 --- a/R/read_peaklist.R +++ b/R/read_peaklist.R @@ -10,8 +10,7 @@ #' include: \code{chemstation} or \code{shimadzu}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which #' case file extension will be deduced from \code{format_in}. -#' @param data_format Whether to output data in wide or long format. Either -#' \code{wide} or \code{long}. +#' @param data_format Either \code{chromatographr} or \code{original}. #' @param read_metadata Logical, whether to attach metadata (if it's available). #' Defaults to TRUE. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or @@ -63,19 +62,12 @@ read_peaklist <- function(paths, find_files, parser <- purrr::partial(read_chemstation_reports, data_format = data_format, metadata_format = metadata_format) - } else if (format_in == "shimadzu_fid"){ + } else if (grepl("shimadzu", format_in)){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - parser <- partial(read_shimadzu, format_in = "fid", what = "peak_table", - # format_out = format_out, + parser <- partial(read_shimadzu, what = "peak_table", data_format = "wide", read_metadata = read_metadata, peaktable_format = data_format) - } else if (format_in == "shimadzu_dad"){ - pattern <- ifelse(is.null(pattern), ".txt", pattern) - parser <- partial(read_shimadzu, format_in = "dad", what = "peak_table", - # format_out = format_out, - data_format = "wide", - read_metadata = read_metadata) } if (find_files){ @@ -103,9 +95,13 @@ read_peaklist <- function(paths, find_files, try(parser(file), silent = TRUE) }) data <- lapply(seq_along(data), function(i){ - lapply(data[[i]], function(xx){ - cbind(sample = file_names[i], xx) - }) + if (inherits(data[[i]], "list")){ + lapply(data[[i]], function(xx){ + cbind(sample = file_names[i], xx) + }) + } else { + cbind(sample = file_names[i], data[[i]]) + } }) class(data) <- "peak_list" names(data) <- file_names diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R index d79ec6b..13cfae8 100644 --- a/R/read_shimadzu_ascii.R +++ b/R/read_shimadzu_ascii.R @@ -7,156 +7,323 @@ #' @name read_shimadzu #' @importFrom utils tail read.csv #' @importFrom stringr str_split_fixed -#' @param file path to file -#' @param format_in Format of files. \code{fid} or \code{dad}. +#' @param file Path to file. +#' @param what Whether to extract \code{chromatogram}, \code{peak_table}, and/or +#' \code{ms_spectra}. Accepts multiple arguments. +#' @param include Which chromatograms to include. Options are \code{fid}, +#' \code{dad}, \code{uv}, \code{tic}, and \code{status}. +#' @param format_in This argument is deprecated and is no longer required. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param peaktable_format Whether to return peak tables in \code{chromatographr} or #' \code{original} format. -#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. -#' Accepts multiple arguments. #' @param read_metadata Whether to read metadata from file. #' @param metadata_format Format to output metadata. Either \code{chromconverter} or #' \code{raw}. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). +#' @param ms_format Whether to return mass spectral data as a (long) +#' \code{data.frame} or a \code{list}. +#' @param collapse Logical. Whether to collapse lists that only contain a single +#' element. +#' @return A nested list of elements from the specified \code{file}, where the +#' top levels are chromatograms, peak tables, and/or mass spectra according to +#' the value of \code{what}. Chromatograms are returned in the format specified +#' by \code{format_out} (retention time x wavelength). #' @author Ethan Bass #' @export -read_shimadzu <- function(file, format_in, +read_shimadzu <- function(file, what = "chromatogram", + format_in = NULL, + include = c("fid", "dad", "uv", "tic", "status"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), peaktable_format = c("chromatographr", "original"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ - if (missing(format_in)) - stop("`format_in` must be specified. The options are `fid` or `dad`.") + metadata_format = c("chromconverter", "raw"), + ms_format = c("data.frame", "list"), + collapse = TRUE){ + if (!is.null(format_in)){ + warning("The `format_in` argument is deprecated, since the `read_shimadzu` + function no longer requires you to specify the file format. Please use the + `include` argument instead to specify which chromatograms you'd like to read.") + } + what <- match.arg(what, c("chromatogram", "peak_table", "ms_spectra"), + several.ok = TRUE) + include <- match.arg(include, c("fid", "dad", "uv", "tic", "status"), + several.ok = TRUE) format_out <- match.arg(format_out, c("matrix", "data.frame")) data_format <- match.arg(data_format, c("wide", "long")) peaktable_format <- match.arg(peaktable_format, c("chromatographr","original")) metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) - what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) + ms_format <- match.arg(ms_format, c("data.frame", "list")) + x <- readLines(file) - sep <- substr(x[2], 17, 17) - headings <- grep("\\[*\\]", x) - peaktab.idx <- grep("\\[Peak Table", x) - chrom_heading <- switch(format_in, - "fid" = "\\[Chromatogram .*]", - "dad" = "\\[PDA 3D]") - chrom.idx <- grep(chrom_heading, x) + sep <- substr(x[grep("Type", x)[1]], 5, 5) + ### extract chromatograms ### if (any(what == "chromatogram")){ - if (length(chrom.idx) != 0){ - header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) - met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2, 2]), ",", ".") - if (decimal_separator == ","){ - met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) - } - if (format_in == "fid"){ - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", - na.strings = c("[FractionCollectionReport]","#ofFractions"), - dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - rownames(xx) <- xx[, 1] - xx <- xx[, 2, drop = FALSE] - colnames(xx) <- "Intensity" - if (data_format == "long"){ - xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) - } - } else if (format_in == "dad"){ - nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]), 2]) - ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]), 2]) - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", - na.strings = c("[FractionCollectionReport]", "#ofFractions"), - row.names = 1, nrows = nrows, dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - colnames(xx) <- as.numeric(gsub("X", "", colnames(xx)))*0.01 - if (data_format == "long"){ - xx <- reshape_chrom(xx, data_format = "long") - } - } - if (format_out == "data.frame"){ - xx <- as.data.frame(xx) - } - } else{ + regex <- c("fid" = "\\[Chromatogram .*]", + "dad" = "\\[PDA 3D]", + "uv" = "\\[PDA Multi Chromatogram\\(Ch\\d+\\)]", + "status" = "\\[LC Status Trace\\(.+\\)]", + "tic" = "\\[MS Chromatogram\\]") + regex <- regex[include] + chrom.idx <- lapply(regex, function(reg) grep(reg,x)) + chrom.idx <- chrom.idx[lengths(chrom.idx) > 0] + + if (length(chrom.idx) == 0){ if (length(what) == 1){ - stop("Chromatogram not found.") - } else{ - warning("Chromatogram not found.") - what = "peak_table" + stop("Chromatograms not found!") + } else { + warning("Chromatograms not found.") + what <- what[grep("chroms", what, invert = TRUE)] } } + chromatogram <- lapply(seq_along(chrom.idx), function(i){ + read_shimadzu_chrom <- switch(names(chrom.idx)[i], "dad" = read_shimadzu_dad, + read_shimadzu_chromatogram) + xx <- lapply(chrom.idx[[i]], function(idx){ + read_shimadzu_chrom(file = file, x = x, chrom.idx = idx, + sep = sep, data_format = data_format, + read_metadata = TRUE, format_out = format_out) + }) + names(xx) <- x[chrom.idx[[i]]] + if (collapse) xx <- collapse_list(xx) + xx + }) + names(chromatogram) <- names(chrom.idx) } - ### extract peak_table + ### extract peak tables ### if (any(what == "peak_table")){ + peaktab.idx <- grep("\\[Peak Table|\\[MC Peak Table", x) + pktab_type <- substr(x[peaktab.idx], 2, 3) + if (length(peaktab.idx) == 0){ if (length(what) == 1){ stop("Peak table not found!") } else{ warning("Peak table not found!") - what <- "chromatogram" + what <- what[grep("peak_table", what, invert = TRUE)] + } + } + peak_table <- lapply(seq_along(peaktab.idx), function(i){ + read_shimadzu_peaktable(file = file, x, idx = peaktab.idx[i], sep = sep, + format_in = pktab_type[i], + format_out = peaktable_format) + }) + names(peak_table) <- gsub("\\[|\\]","", x[peaktab.idx]) + } + + ### extract MS spectra ### + if (any(what == "ms_spectra")){ + spectra.idx <- grep("\\[MS Spectrum\\]", x) + if (length(spectra.idx) == 0){ + if (length(what) == 1){ + stop("MS spectra were not found!") + } else{ + warning("MS spectra were not found!") + what <- what[grep("spectra", what, invert = TRUE)] } } - peak_tab <- lapply(peaktab.idx, function(idx){ - nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) - if (!is.na(nrows) && nrows > 0){ - time_column <- grep("R.Time", strsplit(x = x[[idx + 2]], split = sep)[[1]]) - t1 <- strsplit(x = x[[idx + 3]], split = sep)[[1]][time_column] - decimal_separator <- ifelse(grepl(".", t1), ".", ",") - - peak_tab <- read.csv(file, skip = (idx + 1), sep = sep, nrows = nrows, - dec = decimal_separator) - if (peaktable_format == "chromatographr"){ - peak_tab <- peak_tab[, c("R.Time", "I.Time", "F.Time", "Area", "Height")] - colnames(peak_tab) <- c("rt", "start", "end", "area", "height") - # cbind(sample = gsub("\\[|\\]","", x[idx]), peak_tab) - } - peak_tab - } else{NA} + ms_spectra <- lapply(spectra.idx, function(idx){ + read_shimadzu_spectrum(file, x, idx = idx, sep = sep) }) - names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx]) + if (exists("peak_table") && "MC Peak Table" %in% names(peak_table)){ + rt.idx <- grep("^Ret.Time$|^rt$", colnames(peak_table$`MC Peak Table`)) + names(ms_spectra) <- peak_table$`MC Peak Table`[, rt.idx] + } + if (ms_format == "data.frame"){ + ms_spectra <- ms_list_to_dataframe(ms_spectra) + } + } + + xx <- mget(what) + if (collapse) xx <- collapse_list(xx) + xx +} + +#' Convert list of mass spectra to data.frame +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +ms_list_to_dataframe <- function(x){ + if (!is.null(names(x))){ + ms <- lapply(seq_along(x), function(i){ + cbind(rt = as.numeric(names(x)[i]), x[[i]]) + }) + } else { + ms <- lapply(seq_along(x), function(i){ + cbind(idx = as.numeric(i), x[[i]]) + }) + } + as.data.frame(do.call(rbind, ms)) +} + +#' Read 'Shimadzu' Metadata +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_metadata <- function(x, met = NULL, sep){ + + headings <- grep("\\[*\\]", x) + names(headings) <- x[headings] + + idx <- which(x[headings] %in% c("[Header]", "[File Information]", + "[Sample Information]", "[Original Files]", + "[File Description]", "[Configuration]") ) + meta_start <- headings[min(idx)] + meta_end <- headings[max(idx) + 1] + meta <- x[(meta_start + 1):(meta_end - 1)] + meta <- meta[meta != ""] + meta <- meta[-grep("\\[", meta)] + meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) + if (!is.null("met")){ + meta <- rbind(meta, met) } - if ("peak_table" %in% what & "chromatogram" %in% what){ - what <- "both" + rownames(meta) <- meta[, 1] + meta <- as.list(meta[,2]) + meta +} + +#' Read Shimadzu Chromatogram +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_chromatogram <- function(file, x, chrom.idx, sep, data_format, + read_metadata, format_out){ + header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + first_time <- strsplit(x[header[[2]]+2], "\t")[[1]][1] + decimal_separator <- ifelse(grepl(",", first_time), ",", ".") + + if (decimal_separator == ","){ + times.idx <- grep("Time|Intensity Multiplier", met[,1]) + met[times.idx, 2] <- gsub(",", ".", met[times.idx, 2]) } + + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", + # na.strings = c("[FractionCollectionReport]", "#ofFractions", "\\["), + dec = decimal_separator, + nrows = as.numeric(met[grep("# of Points", met),2])) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + if (data_format == "wide"){ + rownames(xx) <- xx[, 1] + xx <- xx[, 2, drop = FALSE] + colnames(xx) <- "Intensity" + } + # if (data_format == "long"){ + # xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) + # } if (format_out == "data.frame"){ xx <- as.data.frame(xx) } - xx <- switch(what, "chromatogram" = xx, - "peak_table" = peak_tab, - "both" = list(chromatogram = xx, peak_table = peak_tab)) if (read_metadata){ - idx <- which(x[headings] %in% c("[Header]", "[File Information]", - "[Sample Information]", "[Original Files]", - "[File Description]", "[Configuration]") ) - meta_start <- headings[min(idx)] - meta_end <- headings[max(idx) + 1] - meta <- x[(meta_start + 1):(meta_end - 1)] - meta <- meta[meta!=""] - meta <- meta[-grep("\\[", meta)] - meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) - if (exists("met")){ - meta <- rbind(meta, met) - } - rownames(meta) <- meta[, 1] - meta <- as.list(meta[,2]) - if (inherits(xx, "list")){ - xx <- lapply(xx, function(xxx){ - attach_metadata(xxx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, parser = "chromConverter") - }) - } else{ - xx <- attach_metadata(xx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, - parser = "chromConverter") - } + meta <- read_shimadzu_metadata(x, met = met, sep = sep) + xx <- attach_metadata(xx, meta, format_in = "shimadzu_chrom", + source_file = file, format_out = format_out, + data_format = data_format, + parser = "chromConverter") } xx } +#' Read Shimadzu DAD Array +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_dad <- function(file, x, chrom.idx, sep, data_format, + read_metadata, format_out){ + header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + first_time <- strsplit(x[header[[2]]+3], "\t")[[1]][1] + decimal_separator <- ifelse(grepl(",", first_time), ",", ".") + + if (decimal_separator == ","){ + times.idx <- grep("Time|Intensity Multiplier", met[,1]) + met[times.idx, 2] <- gsub(",", ".", met[times.idx, 2]) + } + + nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]), 2]) + ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]), 2]) + xx <- read.csv(file, skip = header[[2]]+1, sep = sep, colClasses = "numeric", + na.strings = c("[FractionCollectionReport]", "#ofFractions"), + row.names = 1, nrows = nrows, dec = decimal_separator) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + colnames(xx) <- as.numeric(gsub("X", "", colnames(xx)))*0.01 + if (data_format == "long"){ + xx <- reshape_chrom(xx, data_format = "long") + } + if (format_out == "data.frame"){ + xx <- as.data.frame(xx) + } + if (read_metadata){ + meta <- read_shimadzu_metadata(x, met = met, sep = sep) + xx <- attach_metadata(xx, meta, format_in = "shimadzu_chrom", + source_file = file, format_out = format_out, + data_format = data_format, + parser = "chromConverter") + } + xx +} + +#' Read Shimadzu Peak Table +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_peaktable <- function(file, x, idx, sep, format_in, format_out){ + nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) + table_start <- grep("Peak#", x[idx:(idx + nrows)]) + idx - 1 + if (!is.na(nrows) && nrows > 0){ + time_column <- grep("R.Time|Ret.Time", strsplit(x = x[[table_start]], split = sep)[[1]]) + t1 <- strsplit(x = x[[table_start + 3]], split = sep)[[1]][time_column] + decimal_separator <- ifelse(grepl(",", t1), ",", ".") + + peak_tab <- read.csv(file, skip = table_start-1, sep = sep, nrows = nrows, + dec = decimal_separator) + if (format_out == "chromatographr"){ + column_names <- switch(format_in, "MC" = c("Ret.Time", "Proc.From", "Proc.To", "Area", "Height"), + c("R.Time", "I.Time", "F.Time", "Area", "Height")) + peak_tab <- peak_tab[, column_names] + colnames(peak_tab) <- c("rt", "start", "end", "area", "height") + # cbind(sample = gsub("\\[|\\]","", x[idx]), peak_tab) + } + peak_tab + } else { + NA + } +} + +#' Read Shimadzu MS Spectrum +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_spectrum <- function(file, x, idx, sep){ + nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) + table_start <- grep("Intensity", x[idx:(idx + nrows)]) + idx - 1 + decimal_separator <- ifelse(grepl(".", strsplit(x[table_start + 4], split = sep)[[1]][1]), ".", ",") + + spectrum <- read.csv(file, skip = table_start-1, sep = sep, nrows = nrows, + dec = decimal_separator) + spectrum +} + +#' Extract Header from Shimadzu ASCII Files +#' This function is called internally by \code{read_shimadzu}. +#' @author Ethan Bass +#' @noRd +extract_shimadzu_header <- function(x, chrom.idx, sep){ + index <- chrom.idx + 1 + line <- x[index] + l <- length(strsplit(x = line, split = sep)[[1]]) + header <- strsplit(x = line, split = sep)[[1]] + while (l > 1) { + index <- index + 1 + line <- strsplit(x = x[index], split = sep)[[1]] + l <- length(line) + if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1]))) | grepl("R.Time|Ret.Time", line[1])) + break + header <- rbind(header, line) + } + list(header, index-1) +} diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R index 24afab6..de4a4da 100644 --- a/R/read_shimadzu_lcd.R +++ b/R/read_shimadzu_lcd.R @@ -34,6 +34,10 @@ #' @param data_format Either \code{wide} (default) or \code{long}. #' @param read_metadata Logical. Whether to attach metadata. #' @author Ethan Bass +#' @return A 3D chromatogram from the PDA stream in \code{matrix} or +#' \code{data.frame} format, according to the value of \code{format_out}. +#' The chromatograms will be returned in \code{wide} or \code{long} format +#' according to the value of \code{data_format}. #' @note This parser is experimental and may still #' need some work. It is not yet able to interpret much metadata from the files. #' @export @@ -46,7 +50,7 @@ read_shimadzu_lcd <- function(path, format_out = c("matrix", "data.frame"), olefile_installed <- reticulate::py_module_available("olefile") if (!olefile_installed){ - configure_olefile() + configure_python_environment(parser = "olefile") } # read wavelengths from "Wavelength Table" stream @@ -75,6 +79,7 @@ read_shimadzu_lcd <- function(path, format_out = c("matrix", "data.frame"), } #' Read Shimadzu "Method" stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd read_sz_method <- function(path){ @@ -110,6 +115,7 @@ read_sz_method <- function(path){ } #' Infer times from 'Shimadzu' Method stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd get_sz_times <- function(sz_method, nval){ @@ -151,6 +157,7 @@ read_shimadzu_raw <- function(path, n_lambdas = NULL){ } #' Export OLE stream +#' This function is called internally by \code{read_shimadzu_lcd}. #' Use olefile to export te specified stream. #' @param file Path to ole file. #' @author Ethan Bass @@ -172,10 +179,7 @@ export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, reticulate::py_run_string('data = st.read()') if (missing(path_out)){ - path_out <- tempfile() - } - if (.Platform$OS.type == "windows"){ - path_out <- gsub("\\\\", "/", path_out) + path_out <- fs::file_temp() } if (remove_null_bytes){ reticulate::py_run_string("data = data.replace(b'\\x00', b'')") @@ -186,7 +190,25 @@ export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, } } +#' Extract wavelengths from Shimadzu LCD +#' This function is called internally by \code{read_shimadzu_lcd}. +#' @author Ethan Bass +#' @noRd +read_shimadzu_wavelengths <- function(path){ + path_wavtab <- export_stream(path, stream = c("PDA 3D Raw Data", "Wavelength Table")) + f <- file(path_wavtab, "rb") + on.exit(close(f)) + n_lambda <- readBin(f, what="integer", size = 4) + count <- 1 + # lambdas <- numeric(n_lambda) + lambdas <- sapply(seq_len(n_lambda), function(i){ + readBin(f, what="integer", size = 4)/100 + }) + lambdas +} + #' Read 'Shimadzu' LCD data block +#' This function is called internally by \code{read_shimadzu_lcd}. #' @author Ethan Bass #' @noRd decode_shimadzu_block <- function(file) { @@ -239,6 +261,7 @@ decode_shimadzu_block <- function(file) { } #' Return twos complement from binary string +#' This function is called internally by \code{read_shimadzu_lcd}. #' @noRd twos_complement <- function(bin, exp){ if (missing(exp)){ @@ -280,56 +303,3 @@ integer_to_binary <- function(x, n) { # Return x } - -#' Extract wavelengths from Shimadzu LCD -#' @author Ethan Bass -#' @noRd -read_shimadzu_wavelengths <- function(path){ - path_wavtab <- export_stream(path, stream = c("PDA 3D Raw Data", "Wavelength Table")) - f <- file(path_wavtab, "rb") - on.exit(close(f)) - n_lambda <- readBin(f, what="integer", size = 4) - count <- 1 - # lambdas <- numeric(n_lambda) - lambdas <- sapply(seq_len(n_lambda), function(i){ - readBin(f, what="integer", size = 4)/100 - }) - lambdas -} - - -#' Configure olefile -#' -#' Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' -#' LCD parser. -#' @name configure_olefile -#' @param return_boolean Logical. Whether to return a Boolean value indicating -#' if the chromConverter environment is correctly configured. -#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value -#' indicating whether the chromConverter environment is configured correctly. -#' Otherwise, there is no return value. -#' @author Ethan Bass -#' @import reticulate -#' @export -configure_olefile <- function(return_boolean = FALSE){ - install <- FALSE - if (!dir.exists(miniconda_path())){ - install <- readline("It is recommended to install miniconda in your R library to use the Shimadzu LCD parser. Install miniconda now? (y/n)") - if (install %in% c('y', "Y", "YES", "yes", "Yes")){ - install_miniconda() - } - } - env <- reticulate::configure_environment("chromConverter") - if (!env){ - reqs <- c("olefile") - reqs_available <- sapply(reqs, reticulate::py_module_available) - if (!all(reqs_available)){ - conda_install(envname = "chromConverter", reqs[which(!reqs_available)], - pip = TRUE) - } - } - if (return_boolean){ - return(env) - } -} - diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index 7e26167..5d87d3f 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -96,26 +96,30 @@ read_thermoraw <- function(path_in, path_out = NULL, #' @return No return value. #' @author Ethan Bass #' @noRd + configure_thermo_parser <- function(reconfigure = FALSE, check = FALSE){ if (.Platform$OS.type == "windows"){ path_parser <- readLines(system.file("shell/path_parser.txt", package = 'chromConverter')) exists <- file.exists(path_parser) if (!exists & !check){ warning("ThermoRawFileParser not found!", immediate. = TRUE) - path_parser <- readline(prompt="Please provide path to `ThermoRawFileParser.exe`):") + path_parser <- readline(prompt = "Please provide path to `ThermoRawFileParser.exe`):") path_parser <- gsub("/", "\\\\", path_parser) - writeLines(path_parser, con=system.file('shell/path_parser.txt', package='chromConverter')) + writeLines(path_parser, con = system.file('shell/path_parser.txt', + package = 'chromConverter')) } } else { - shell_script <- readLines(system.file('shell/thermofileparser.sh', package='chromConverter')) + shell_script <- readLines(system.file('shell/thermofileparser.sh', + package = 'chromConverter')) path_parser <- strsplit(shell_script[2]," ")[[1]] path_parser <- path_parser[grep(".exe", path_parser)] exists <- file.exists(path_parser) if (!exists & !check){ warning("ThermoRawFileParser not found!", immediate. = TRUE) - path_parser <- readline(prompt="Please provide path to `ThermoRawFileParser.exe`):") + path_parser <- readline(prompt = "Please provide path to `ThermoRawFileParser.exe`):") shell_script[2] <- paste0("mono ", path_parser, ' "$@"') - writeLines(shell_script, con = system.file('shell/thermofileparser.sh', package='chromConverter')) + writeLines(shell_script, con = system.file('shell/thermofileparser.sh', + package = 'chromConverter')) } } if (check){ diff --git a/R/read_varian_peaklist.R b/R/read_varian_peaklist.R index f3b4de5..ed87ca9 100644 --- a/R/read_varian_peaklist.R +++ b/R/read_varian_peaklist.R @@ -3,6 +3,7 @@ #' @param file Path to Varian peak list file. #' @importFrom utils read.csv #' @author Ethan Bass +#' @return A data.frame containing the information from the specified report. #' @export read_varian_peaklist <- function(file){ @@ -14,11 +15,11 @@ read_varian_peaklist <- function(file){ column_names[1] <- "compound" colnames(x) <- column_names - x <- x[-which(x$`Line#` == ""), ] + x <- x[-which(x$`Line#` == ""),] x <- x[-which(x$`Line#` == "Line#"),] x$Area <- as.numeric(x$Area) x$Height <- as.numeric(x$Height) - x <- x[,-16] + x <- x[, -16] x } diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R new file mode 100644 index 0000000..d6b64f2 --- /dev/null +++ b/R/read_waters_raw.R @@ -0,0 +1,94 @@ +#' Read 'Waters' RAW +#' +#' Parser for reading 'Waters MassLynx (.raw) files into R. +#' +#' @param path Path to \code{.raw} file. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. +#' @param read_metadata Logical. Whether to attach metadata. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @note For now this parser only reads 1D chromatograms (not mass spectra or +#' DAD data). +#' @author Ethan Bass +#' @export + +read_waters_raw <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + + uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE) + meta_path <- grep("\\.INF", uv_paths, value=TRUE) + uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE) + + dat <- lapply(uv_paths, read_waters_chro, format_out = format_out, + data_format = data_format, read_metadata = read_metadata, + metadata_format = metadata_format) + + meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE, encoding = "Latin-1") + meta <- iconv(meta, sub="") + meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1] + meta <- gsub("^ |\\$CC\\$","", sapply(strsplit(meta, ","), function(x) x[1])) + + names(dat) <- meta + dat +} + +#' Read 'Waters' chromatograms +#' +#' Parser for reading 'Waters MassLynx' CHRO (.dat) files into R. +#' +#' @importFrom utils head tail +#' @param path Path to \code{.dat} file. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. +#' @param read_metadata Logical. Whether to attach metadata. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @noRd + +#magic 80000100 08000200 + +read_waters_chro <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + # metadata_format <- switch(metadata_format, + # chromconverter = "waters_uv", raw = "raw") + + f <- file(path, "rb") + on.exit(close(f)) + + seek(f, 0, "end") + end <- seek(f, 0, "end") + + seek(f, 128, "start") + start <- seek(f, 128, "start") + + x<-readBin(f, "numeric", size = 4, n = (end-start)/4) + times <- x[seq(1,length(x), by = 2)] + int <- x[seq(2,length(x), by = 2)] + if (data_format == "long"){ + dat <- data.frame(times = times, int = int) + } else { + dat <- data.frame(row.names = times, int = int) + } + if (format_out == "matrix"){ + dat <- as.matrix(dat) + } + dat +} diff --git a/R/reshape_chroms.R b/R/reshape_chroms.R index 5341024..29f58a9 100644 --- a/R/reshape_chroms.R +++ b/R/reshape_chroms.R @@ -1,4 +1,3 @@ - #' Reshapes list of chromatograms from wide to long format #' @name reshape_chroms #' @param x A list of chromatographic matrices in wide format. @@ -58,10 +57,11 @@ reshape_chrom <- function(x, data_format, ...){ #' @importFrom stats reshape #' @param x A chromatographic matrix in wide format. #' @param lambdas Wavelength(s) to include. +#' @param names_to Argument to \code{\link[tidyr]{pivot_longer}} #' @return A chromatographic matrix in long format. #' @author Ethan Bass #' @noRd -reshape_chrom_long <- function(x, lambdas, format_out = NULL){ +reshape_chrom_long <- function(x, lambdas, format_out = NULL, names_to = "lambda"){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "long"){ warning("The data already appear to be in long format!", immediate. = TRUE) } @@ -80,9 +80,8 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL){ xx <- xx[,lambdas, drop = FALSE] } data <- data.frame(tidyr::pivot_longer(data.frame(rt = rownames(xx), xx, check.names = FALSE), - cols = -c("rt"), names_to = "lambda", values_to = "intensity")) - data$rt <- as.numeric(data$rt) - data$lambda <- as.numeric(data$lambda) + cols = -c("rt"), names_to = names_to, values_to = "intensity")) + data <- apply(data, 2, as.numeric) } if (format_out == "matrix"){ data <- as.matrix(data) @@ -92,7 +91,7 @@ reshape_chrom_long <- function(x, lambdas, format_out = NULL){ data } - +#' Reshapes a single chromatogram from long to wide format #' @noRd reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt", value_var = "int", drop){ diff --git a/R/utils.R b/R/utils.R index 352bc09..d6b8411 100644 --- a/R/utils.R +++ b/R/utils.R @@ -10,12 +10,16 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ on.exit(close(f)) magic <- readBin(f, what = "raw", n = 4) - magic <- paste(paste0("x",as.character(magic)),collapse="/") + magic <- paste(paste0("x",as.character(magic)),collapse = "/") # magic filetype <- switch(magic, "x01/x32/x00/x00" = "AgilentChemstationMS", "x02/x02/x00/x00" = "AgilentMasshunterDADHeader", # "x02/x33/x30/x00" = "AgilentChemstationMWD", + "x02/x32/x00/x00" = "AgilentChemstationMWD", + # "x02/x33/x00/x00" = "AgilentChemstationMWD", + # "x03/x31/x00/x00" = "AgilentChemstationMWD2" + # "x01/x32/x00/x00" = "AgilentChemstationMS" "x03/x02/x00/x00" = "AgilentMasshunterDAD", "x02/x33/x30/x00" = "chemstation_30", "x02/x33/x31/x00" = "chemstation_31", @@ -36,7 +40,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ if (filetype == "chemstation_131"){ seek(f, 348) magic2 <- readBin(f, what="character", n = 2) - magic2 <- paste(magic2, collapse="") + magic2 <- paste(magic2, collapse = "") filetype <- switch(magic2, "OL" = "openlab_131", "LC" = "chemstation_131") } @@ -58,7 +62,7 @@ get_filetype <- function(path, out = c("format_in", "filetype")){ #' Check parser #' @noRd -check_parser <- function(format_in, parser=NULL, find = FALSE){ +check_parser <- function(format_in, parser = NULL, find = FALSE){ allowed_formats <- list(openchrom = c("msd","csd","wsd"), chromconverter = c("agilent_dx", "cdf", "chemstation_csv", "chemstation_ch", "chemstation_fid", @@ -67,9 +71,11 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chemstation_130", "chemstation_131", "openlab_131", "chemstation_179", "chemstation_81", - "chemstation_181", "mzml", "mdf", + "chemstation_181", "mzml", "mzxml", + "mdf", "shimadzu_ascii", "shimadzu_fid", "shimadzu_dad", - "shimadzu_lcd", "waters_arw"), + "shimadzu_lcd", "waters_arw", + "waters_raw", "waters_chro"), aston = c("chemstation", "chemstation_uv", "chemstation_131", "masshunter_dad", "other"), @@ -97,15 +103,19 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } possible_parsers <- names(allowed_formats)[grep(format_in, allowed_formats)] if (length(possible_parsers) > 1){ - possible_parsers <- possible_parsers[match( - c("thermoraw", "entab", "chromconverter", "rainbow", "aston"), possible_parsers)] - if (any(is.na(possible_parsers))){ - possible_parsers <- possible_parsers[-which(is.na(possible_parsers))] + if (format_in == "waters_raw"){ + possible_parsers <- c("rainbow") + } else{ + possible_parsers <- possible_parsers[match( + c("thermoraw", "entab", "chromconverter", "rainbow", "aston"), possible_parsers)] + if (any(is.na(possible_parsers))){ + possible_parsers <- possible_parsers[-which(is.na(possible_parsers))] + } } } possible_parsers[1] } else{ - if (!(format_in %in% allowed_formats[[parser]])){ + if (!(format_in %in% allowed_formats[[tolower(parser)]])){ stop("Mismatched arguments!", "\n\n", "The ", paste0(sQuote(format_in), " format can be converted using the following parsers: ", paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), collapse = ", "), ". \n \n", "The ", sQuote(parser), " parser can take the following formats as inputs: \n", @@ -128,7 +138,8 @@ extract_filenames <- function(files){ file_names <- strsplit(files, "/") file_names <- gsub("\\.[Dd]", "", sapply(file_names, function(n){ - ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) + ifelse(any(grepl("\\.[Dd]", n)), + grep("\\.[Dd]", n, value = TRUE), tail(n,1)) })) } else { file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1]) @@ -140,18 +151,37 @@ extract_filenames <- function(files){ #' @noRd format_to_extension <- function(format_in){ switch(format_in, - "agilent_d" = ".d|.D", - "chemstation_uv" = ".uv|.UV", - "chemstation_ch" = ".ch|.CH", - "chemstation_fid" = ".ch|.CH", - "chemstation_csv" = ".csv|.CSV", - "masshunter_dad" = ".sp|.SP", - "shimadzu_fid" = ".txt", - "shimadzu_dad" = ".txt", - "chromeleon_uv" = ".txt", - "thermoraw" = ".raw", "mzml" = ".mzml", "waters_arw" = ".arw", - "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", - "mdf" = ".mdf|.MDF", "other"=".") + "agilent_d" = "\\.d$", + "agilent_dx" = "\\.dx$", + "chemstation_uv" = "\\.uv$", + "chemstation_31" = "\\.uv$", + "chemstation_131" = "\\.uv$", + "chemstation_ch" = "\\.ch$", + "chemstation_fid" = "\\.ch$", + "chemstation_179" = "\\.ch$", + "chemstation_181" = "\\.ch$", + "chemstation_81" = "\\.ch$", + "chemstation_30" = "\\.ch$", + "chemstation_130" = "\\.ch$", + "chemstation_csv" = "\\.csv$", + "masshunter_dad" = "\\.sp$", + "shimadzu_txt" = "\\.txt$", + "shimadzu_fid" = "\\.txt$", + "shimadzu_dad" = "\\.txt$", + "shimadzu_lcd" = "\\.lcd$", + "chromeleon_uv" = "\\.txt$", + "thermoraw" = "\\.raw$", + "cdf" = "\\.cdf$", + "mzml" = "\\.mzml$", + "mzxml" = "\\.mzxml$", + "waters_arw" = "\\.arw$", + "waters_raw" = "\\.raw$", + "msd" = "\\.", + "csd" ="\\.", + "wsd" ="\\.", + "mdf" = "\\.mdf$", + "other" = "\\.", + "\\.") } #' @noRd @@ -183,24 +213,6 @@ set_temp_directory <- function(){ } } -#' Extract header from Shimadzu ascii files -#' @noRd -extract_shimadzu_header <- function(x, chrom.idx, sep){ - index <- chrom.idx + 1 - line <- x[index] - l <- length(strsplit(x = line, split = sep)[[1]]) - header <- strsplit(x = line, split = sep)[[1]] - while (l > 1) { - index <- index + 1 - line <- strsplit(x = x[index], split = sep)[[1]] - l <- length(line) - if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1])))) - break - header <- rbind(header, line) - } - list(header,index) -} - #' Check for suggested package #' @noRd #' @keywords internal @@ -248,3 +260,63 @@ rename_list <- function(x, new_names){ names(x) <- new_names x } + +#' @noRd +collapse_list <- function(x){ + while(is.list(x) && length(x) == 1){ + x <- x[[1]] + } + x +} + +#' Split vector by position +#' @note From https://stackoverflow.com/questions/16357962/r-split-numeric-vector-at-position +#' @noRd +split_at <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) + +#' Configure python environment +#' +#' Configures reticulate environment for parsers. +#' @name configure_python_environment +#' @param parser Either \code{aston}, \code{rainbow}, or \code{olefile} (for +#' \code{read_shimadzu_lcd}). +#' @param return_boolean Logical. Whether to return a Boolean value indicating +#' if the chromConverter environment is correctly configured. +#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value +#' indicating whether the chromConverter environment is configured correctly. +#' Otherwise, there is no return value. +#' @author Ethan Bass +#' @import reticulate +#' @export +configure_python_environment <- function(parser, return_boolean = FALSE){ + install <- FALSE + if (!dir.exists(miniconda_path())){ + install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)") + if (install %in% c('y', "Y", "YES", "yes", "Yes")){ + install_miniconda() + } + } + env <- reticulate::configure_environment("chromConverter") + if (!env){ + reqs <- get_parser_reqs(parser) + reqs_available <- sapply(reqs, reticulate::py_module_available) + if (!all(reqs_available)){ + conda_install(envname = "chromConverter", reqs[which(!reqs_available)], + pip = TRUE) + } + } + assign_fn <- switch(parser, aston = assign_trace_file, + rainbow = assign_rb_read, + olefile = function(...){}) + assign_fn() + if (return_boolean){ + env + } +} + +#' @noRd +get_parser_reqs <- function(parser){ + switch(parser, "aston" = c("pandas","scipy","numpy","aston"), + "olefile" = c("olefile"), + "rainbow" = c("numpy", "rainbow-api")) +} diff --git a/R/write_chroms.R b/R/write_chroms.R index cefbc09..410b13b 100644 --- a/R/write_chroms.R +++ b/R/write_chroms.R @@ -1,4 +1,4 @@ -#' Export chromatograms as csvs +#' Export chromatograms as CSVs #' @author Ethan Bass #' @noRd export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE){ @@ -13,33 +13,61 @@ export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE) #' @noRd export_cdfs <- function(data, path_out){ sapply(seq_along(data), function(i){ - write_cdf(data[[i]], sample_name = names(data)[i], - path_out = path_out) + write_cdf(data[[i]], path_out = path_out, sample_name = names(data)[i]) }) } #' Write CDF file from chromatogram +#' +#' Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography +#' format (ASTM E1947-98). This format can only accommodate unidimensional data. +#' For two-dimensional chromatograms, the column to export can be specified +#' using the \code{lambda} argument. Otherwise, a warning will be generated and +#' the first column of the chromatogram will be exported. +#' #' @author Ethan Bass -#' @noRd -write_cdf <- function(x, sample_name, path_out){ +#' @param x A chromatogram in (wide) format. +#' @param path_out The path to write the file. +#' @param sample_name The name of the file. +#' @param lambda The wavelength to export (for 2-dimensional chromatograms). +#' Must be a string matching one the columns in \code{x} or the index of the +#' column to export. +#' @param force Whether to overwrite existing files at the specified path. +#' Defaults to \code{FALSE}. +#' @return No return value. The function is called for its side effects. +#' @section Side effects: +#' Exports a chromatogram in ANDI chromatography format (netCDF) in the directory +#' specified by \code{path_out}. The file will be named according to the value +#' of \code{sample_name}. If no \code{sample_name} is provided, the +#' \code{sample_name} attribute will be used if it exists. +#' @export +write_cdf <- function(x, path_out, sample_name, lambda = NULL, force = FALSE){ check_for_pkg("ncdf4") - if (ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ - warning("The supplies chromatogram contains more than two dimensions. Only + if (missing(sample_name)){ + sample_name <- attr(x,"sample_name") + if (is.null(sample_name)){ + stop("Sample name must be provided.") + } + } + if (is.null(lambda) && ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ + warning("The supplied chromatogram contains more than two dimensions. Only the first two dimensions will be written to the ANDI chrom file.", immediate. = TRUE) } + lambda <- ifelse(is.null(lambda), 1, lambda) if (attr(x, "data_format") == "wide"){ - x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[,1]) + x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[, lambda]) x <- transfer_metadata(x1, x) } - # if (!missing(column)){ - # x1 <- x[,column,drop = FALSE] - # x <- transfer_metadata(x1,x) - # } filename <- fs::path_ext_remove(fs::path_file(sample_name)) file_out <- fs::path(path_out, filename, ext = "cdf") if (fs::file_exists(file_out)){ - warning(paste("File", sQuote(basename(file_out)), "already exists and will not be overwritten."), immediate. = TRUE) + if (!force){ + stop(paste("File", sQuote(basename(file_out)), + "already exists and will not be overwritten.")) + } else{ + fs::file_delete(file_out) + } } # define dimensions point_number <- ncdf4::ncdim_def("point_number", "", @@ -49,9 +77,11 @@ write_cdf <- function(x, sample_name, path_out){ # define variables nc_time <- ncdf4::ncvar_def("raw_data_retention", "", dim = point_number) nc_intensity <- ncdf4::ncvar_def("ordinate_values", "", dim = point_number) - other_vars <- c("actual_delay_time", "actual_run_time_length", "actual_sampling_interval", - "detector_maximum_value", "detector_minimum_value") - other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "",list())) + other_vars <- c("actual_delay_time", "actual_run_time_length", + "actual_sampling_interval", "detector_maximum_value", + "detector_minimum_value") + + other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "", list())) # write netcdf file ncdf4::nc_create(file_out, c(list(nc_time, nc_intensity), other_vars)) @@ -73,6 +103,7 @@ write_cdf <- function(x, sample_name, path_out){ # write metadata as global attributes meta <- format_metadata_for_cdf(x) nc_add_global_attributes(nc = nc, meta = meta, sample_name = sample_name) + # finish writing file ncdf4::nc_close(nc) } @@ -83,7 +114,9 @@ nc_add_global_attributes <- function(nc, meta, sample_name){ sapply(seq_along(meta), function(i){ ncdf4::ncatt_put(nc = nc, varid = 0, attname = names(meta)[i], attval = meta[[i]], - prec = ifelse(names(meta)[i] %in% c("sample_amount", "sample_injection_volume"), + prec = ifelse(names(meta)[i] %in% + c("sample_amount", + "sample_injection_volume"), "float","text")) }) if (!is.null(sample_name)){ diff --git a/README.md b/README.md index 703d14e..baa7ed5 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,19 @@ ### Overview -chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It currently consists of wrappers around file parsers from various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), [rainbow](https://rainbow-api.readthedocs.io/), and [OpenChrom](https://lablicate.com/platform/openchrom) as well as some parsers written directly in R for (mostly) text-based formats. +chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It includes a number of parsers written directly in R as well as bindings to various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), [rainbow](https://rainbow-api.readthedocs.io/), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), [OpenChrom](https://lablicate.com/platform/openchrom) and [RaMS](https://github.com/wkumler/RaMS/). ### Formats ##### ChromConverter -- Chromeleon UV ascii (`.txt`) -- mzML (`.mzml`) -- Shimadzu LabSolutions ascii (`.txt`) -- Waters ascii (`.arw`) -- 'Agilent Chemstation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) -- 'Agilent Chemstation' & 'OpenLab' `.uv` files (versions 131, 31) -- 'Shimadzu' `.lcd` (*provisional support* for PDA stream) +- 'Agilent ChemStation' & 'OpenLab' `.uv` files (versions 131, 31) +- 'Agilent ChemStation' & 'OpenLab' `.ch` files (versions 30, 130, 8, 81, 179, 181) +- Ă…NDI (Analytical Data Interchange) Chromatography & MS formats (`.cdf`) +- mzML (`.mzml`) & mzXML (.`mzxml`) (via RaMS). +- 'Shimadzu LabSolutions' ascii (`.txt`) +- 'Shimadzu LabSolutions'`.lcd` (*provisional support* for PDA stream) +- 'Thermo Scientific Chromeleon' ascii (`.txt`) +- 'Waters' ascii (`.arw`) ##### External Libraries ###### Aston/Entab (*Entab requires separate installation, see [instructions below](README.md#Installation)*) @@ -67,35 +68,6 @@ install.packages("chromConverter", repos="https://ethanbass.r-universe.dev/", ty **Note:** There are some changes in recent versions of RStudio that messed up the accessibility of the python bindings through `reticulate`. If you wish to access python-based parsers (e.g. aston or rainbow) through a recent version of RStudio, it is suggested to first change the default settings in RStudio. To do this, open `Tools:Global Options...:Python` and uncheck the box that says `Automatically activate project-local Python environments`. Then restart RStudio. Alternatively, this issue can be resolved by selecting the desired python interpreter in the Python settings pane. It is recommended to use a local installation of miniconda, which can be installed by running `reticulate::install_miniconda()`. -#### Optional additional dependencies - -Some of the parsers rely on external software libraries that must be manually installed. - -##### **Aston** - -To install Aston, call the `configure_aston()` function to install miniconda along with the necessary python dependencies. Running `read_chroms` with the Aston parser selected should also trigger a prompt to install Aston. If you're running Windows, you may need to install the latest version of ['Microsoft Visual C++'](https://docs.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170) if you don't already have it. - -##### **Entab** - -[Entab](https://github.com/bovee/entab) is a Rust-based parsing framework for converting a variety of scientific file formats into tabular data. To use parsers from Entab, you must first install [Rust](https://www.rust-lang.org/tools/install) and Entab-R. After following the [instructions](https://www.rust-lang.org/tools/install) to install Rust, you can install Entab from GitHub as follows: - -``` -remotes::install_github("https://github.com/bovee/entab/", subdir = "entab-r") -``` - -##### **ThermoRawFileParser** - -Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser) on the command-line. To install the ThermoRawFileParser, follow the instructions [here](https://github.com/compomics/ThermoRawFileParser). If you are running Linux or Mac OS X, you will also need to install [mono](https://www.mono-project.com/download/stable/#download-lin), following the instructions provided at the link. In addition, when you use chromConverter to convert Thermo RAW files for the first time you will be asked to enter the path to the program. - -##### **OpenChrom** -###### (**Note:** Support for the commmand line interface has been removed from OpenChrom (as of `version 1.5.0`). Older versions (e.g. `1.4.x`) should still work for now). - -[OpenChrom](https://lablicate.com/platform/openchrom) is opensource chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: - -1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. -2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. -3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path = "path_to_openchrom_executable"). - ### Usage ##### Importing chromatograms @@ -111,7 +83,7 @@ The `read_chroms` function will attempt to determine an appropriate parser to us ###### Exporting files -If you'd like to automatically export the files, include the argument `export = TRUE` along with the path where you'd like to export the files (`path_out`). Some parsers (e.g. `OpenChrom` and `ThermoRawFileParser`) need to export files for their basic operations. Thus, if these parsers are selected, you will need to specify an argument to `path_out`. +If you'd like to automatically export the files, include the argument `export = TRUE` along with the path where you'd like to export the files (`path_out`) and the desired file format (`export_format`). Some parsers (e.g. `OpenChrom` and `ThermoRawFileParser`) need to export files for their basic operations. Thus, if these parsers are selected, you will need to specify an argument to `path_out`. ``` library(chromConverter) @@ -132,26 +104,55 @@ chromConverter includes some options to extract metadata from the provided files ##### Importing peak lists -The `read_peak` list function can be used to import peak lists from 'Chemstation' or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. +The `read_peak` list function can be used to import peak lists from 'ChemStation' REPORT files or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. ``` pks <- read_chroms(, format_in = "chemstation") ``` +#### Optional additional dependencies + +Some of the parsers rely on external software libraries that must be manually installed. + +##### **Aston** + +To install Aston, call the `configure_aston()` function to install miniconda along with the necessary python dependencies. Running `read_chroms` with the Aston parser selected should also trigger a prompt to install Aston. If you're running Windows, you may need to install the latest version of ['Microsoft Visual C++'](https://docs.microsoft.com/en-US/cpp/windows/latest-supported-vc-redist?view=msvc-170) if you don't already have it. + +##### **Entab** + +[Entab](https://github.com/bovee/entab) is a Rust-based parsing framework for converting a variety of scientific file formats into tabular data. To use parsers from Entab, you must first install [Rust](https://www.rust-lang.org/tools/install) and Entab-R. After following the [instructions](https://www.rust-lang.org/tools/install) to install Rust, you can install Entab from GitHub as follows: + +``` +remotes::install_github("https://github.com/bovee/entab/", subdir = "entab-r") +``` + +##### **ThermoRawFileParser** + +Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser) on the command-line. To install the ThermoRawFileParser, follow the instructions [here](https://github.com/compomics/ThermoRawFileParser). If you are running Linux or Mac OS X, you will also need to install [mono](https://www.mono-project.com/download/stable/#download-lin), following the instructions provided at the link. In addition, when you use chromConverter to convert Thermo RAW files for the first time you will be asked to enter the path to the program. + +##### **OpenChrom** +###### (**Note:** Support for the commmand line interface has been removed from OpenChrom (as of `version 1.5.0`). Older versions (e.g. `1.4.x`) should still work for now). + +[OpenChrom](https://lablicate.com/platform/openchrom) is open source chromatography software, containing a large number of file parsers, which can now be conveniently accessed directly from R. Strangely, configuring OpenChrom for use on the command-line deactivates the graphical user interface (GUI). Thus, it is recommended to make a separate copy of OpenChrom if you'd still like to access the GUI. To use the OpenChrom parsers, follow the steps detailed below: + +1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. +2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. +3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path = "path_to_openchrom_executable"). + ### Further analysis For downstream analyses of chromatographic data, you can also check out my package [chromatographR](https://ethanbass.github.io/chromatographR/). For interactive visualization of chromatograms, you can check out my new package [ShinyChromViewer](https://github.com/ethanbass/ShinyChromViewer) (alpha release). ## Contributing -Contributions of source code, ideas, or documentation are very welcome. Please get in touch (preferable by opening a GitHub [issue](https://github.com/ethanbass/chromatographR/issues)) to discuss any suggestions or to file a bug report. Some good reasons to file an issue: +Contributions of source code, ideas, or documentation are always welcome. Please get in touch (preferable by opening a GitHub [issue](https://github.com/ethanbass/chromatographR/issues)) to discuss any suggestions or to file a bug report. Some good reasons to file an issue: - You've found an actual bug. - You're getting a cryptic error message that you don't understand. - You have a file format you'd like to read that isn't currently supported by chromatographR. (If you do this, please make sure to include a link to an example file!) - You have a new feature you'd like to see implemented. -**Note:** Before filing a bug report, please make sure to install the latest development version of chromConverter from GitHub, in case your bug has already been patched. After installing the latest version, you may also need to refresh your R session to remove the older version from the cache. +**Note: Before filing a bug report, please make sure to install the latest development version of chromConverter from GitHub**, in case your bug has already been patched. After installing the latest version, you may also need to refresh your R session to remove the older version from the cache. ### Other related packages @@ -161,6 +162,6 @@ Contributions of source code, ideas, or documentation are very welcome. Please g You can cite chromConverter as follows: -Bass, E. (2023). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. +Bass, E. (2023). chromConverter: Chromatographic File Converter. http://doi.org/10.5281/zenodo.6792521. -If you use external libraries to convert your files, please cite them as well in published work. +If you use external libraries to convert your files, it is suggested to also cite these libraries in published work. diff --git a/inst/CITATION b/inst/CITATION index 445a7b9..c890776 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,13 +2,13 @@ citHeader("To cite chromConverter in publications, please use:") citEntry( entry = "manual", - title = "chromConverter: chromatographic file converter", + title = "chromConverter: Chromatographic File Converter", author = "Ethan Bass", year = "2023", - version = "version 0.5.0", + version = "version 0.6.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2023).", - "chromConverter: chromatographic file converter.", + "chromConverter: Chromatographic File Converter.", "http://doi.org/10.5281/zenodo.6792521.") ) diff --git a/man/call_entab.Rd b/man/call_entab.Rd index 342dc3f..601261d 100644 --- a/man/call_entab.Rd +++ b/man/call_entab.Rd @@ -7,7 +7,7 @@ call_entab( file, data_format = c("wide", "long"), - format_in = NULL, + format_in = "", format_out = c("matrix", "data.frame"), read_metadata = TRUE, metadata_format = c("chromconverter", "raw") diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 26e1d17..091a3b4 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -32,9 +32,12 @@ vector of paths to the newly created files.} \item{verbose}{Logical. Whether to print output from OpenChrom to the console.} } \value{ -If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. -If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list -of chromatograms in \code{data.frame} format. Otherwise, it will not return anything. +If \code{return_paths} is \code{FALSE}, the function will return a +list of chromatograms (if an appropriate parser is available to import the +files into R). The chromatograms will be returned in \code{matrix} or +\code{data.frame} format according to the value of {format_out}. If +\code{return_paths} is \code{TRUE}, the function will return a character +vector of paths to the newly created files. } \description{ Writes \code{xml} batch-files and calls OpenChrom file parsers using a @@ -42,7 +45,7 @@ system call to the command-line interface. To use this function \href{https://lablicate.com/platform/openchrom}{OpenChrom} (version 0.4.0) must be manually installed. The command line interface is no longer supported in the latest versions of OpenChrom (starting with version 0.5.0), so the function -will not work with these new versions. +will not work with these newer versions. } \details{ The \code{call_openchrom} works by creating an \code{xml} batchfile and diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index ca4fddb..6f9a782 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -12,7 +12,9 @@ call_rainbow( data_format = c("wide", "long"), by = c("detector", "name"), what = NULL, - read_metadata = TRUE + read_metadata = TRUE, + collapse = TRUE, + precision = 1 ) } \arguments{ @@ -32,6 +34,11 @@ call_rainbow( \code{ELSD}). This argument only applies if \code{by == "detector"}.} \item{read_metadata}{Logical. Whether to attach metadata. Defaults to TRUE.} + +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} + +\item{precision}{Number of decimals to round mz values. Defaults to 1.} } \value{ Returns a (nested) list of \code{matrices} or \code{data.frames} according to diff --git a/man/configure_olefile.Rd b/man/configure_olefile.Rd deleted file mode 100644 index 4eef869..0000000 --- a/man/configure_olefile.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_shimadzu_lcd.R -\name{configure_olefile} -\alias{configure_olefile} -\title{Configure olefile} -\usage{ -configure_olefile(return_boolean = FALSE) -} -\arguments{ -\item{return_boolean}{Logical. Whether to return a Boolean value indicating -if the chromConverter environment is correctly configured.} -} -\value{ -If \code{return_boolean} is \code{TRUE}, returns a Boolean value -indicating whether the chromConverter environment is configured correctly. -Otherwise, there is no return value. -} -\description{ -Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' -LCD parser. -} -\author{ -Ethan Bass -} diff --git a/man/configure_aston.Rd b/man/configure_python_environment.Rd similarity index 54% rename from man/configure_aston.Rd rename to man/configure_python_environment.Rd index 518e76e..348ae2e 100644 --- a/man/configure_aston.Rd +++ b/man/configure_python_environment.Rd @@ -1,12 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R -\name{configure_aston} -\alias{configure_aston} -\title{Configure Aston} +% Please edit documentation in R/utils.R +\name{configure_python_environment} +\alias{configure_python_environment} +\title{Configure python environment} \usage{ -configure_aston(return_boolean = FALSE) +configure_python_environment(parser, return_boolean = FALSE) } \arguments{ +\item{parser}{Either \code{aston}, \code{rainbow}, or \code{olefile} (for +\code{read_shimadzu_lcd}).} + \item{return_boolean}{Logical. Whether to return a Boolean value indicating if the chromConverter environment is correctly configured.} } @@ -16,7 +19,7 @@ indicating whether the chromConverter environment is configured correctly. Otherwise, there is no return value. } \description{ -Configures reticulate to use Aston file parsers. +Configures reticulate environment for parsers. } \author{ Ethan Bass diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd deleted file mode 100644 index 9b4ffe6..0000000 --- a/man/configure_rainbow.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/call_rainbow.R -\name{configure_rainbow} -\alias{configure_rainbow} -\title{Configure rainbow} -\usage{ -configure_rainbow(return_boolean = FALSE) -} -\arguments{ -\item{return_boolean}{Logical. Whether to return a Boolean value indicating -if the chromConverter environment is correctly configured.} -} -\value{ -If \code{return_boolean} is \code{TRUE}, returns a Boolean value -indicating whether the chromConverter environment is configured correctly. -Otherwise, there is no return value. -} -\description{ -Configures reticulate to use rainbow file parsers. -} -\author{ -Ethan Bass -} diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd index b666739..abe89f8 100644 --- a/man/read_cdf.Rd +++ b/man/read_cdf.Rd @@ -9,11 +9,14 @@ read_cdf( format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), what = "chromatogram", - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + collapse = TRUE, + ... ) } \arguments{ -\item{file}{path to file} +\item{file}{Path to file.} \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} @@ -25,10 +28,20 @@ rownames of the object.} \item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} + +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} + +\item{...}{Additional arguments to parser. The \code{ms_format} argument +can be used here to specify whether to return mass spectra in \code{list} +format or as a \code{data.frame}.} } \value{ A chromatogram in the format specified by the \code{format_out} and -\code{data_format} arguments (retention time x wavelength). +\code{data_format} arguments. } \description{ Read CDF file diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 2cf355d..adfe6a2 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_ch.R \name{read_chemstation_ch} \alias{read_chemstation_ch} -\title{Parser for reading Agilent ('.ch') files into R} +\title{Read 'ChemStation' CH files} \usage{ read_chemstation_ch( path, @@ -25,11 +25,22 @@ read_chemstation_ch( or \code{raw}.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A 2D chromatogram in the format specified by \code{data_format} and +\code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +be returned with retention times as rows and a single column for the intensity. +If \code{long} format is requested, two columns will be returned: one for the +retention time and one for the intensity. The \code{format_out} argument +determines whether the chromatogram is returned as a \code{matrix} or +\code{data.frame}. Metadata can be attached to the chromatogram as +\code{\link{attributes}} if \code{read_metadata} is {TRUE}. } \description{ -Parser for reading Agilent ('.ch') files into R +Agilent \code{.ch} files come in several different varieties. This parser +can automatically detect and read several versions of these files from +'Agilent ChemStation' and 'OpenLab', including versions \code{30} and \code{130}, +which are generally produced by ultraviolet detectors, as well as \code{81}, +\code{179}, and \code{181} which are generally produced by flame ionization +detectors. } \note{ This function was adapted from the diff --git a/man/read_chemstation_reports.Rd b/man/read_chemstation_reports.Rd index 4c6010b..d957244 100644 --- a/man/read_chemstation_reports.Rd +++ b/man/read_chemstation_reports.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_report.R \name{read_chemstation_reports} \alias{read_chemstation_reports} -\title{Read Agilent Chemstation Reports} +\title{Read 'Agilent ChemStation' Reports} \usage{ read_chemstation_reports( files, @@ -11,7 +11,7 @@ read_chemstation_reports( ) } \arguments{ -\item{files}{Paths to Chemstation report files.} +\item{files}{Paths to 'ChemStation' report files.} \item{data_format}{Format to output data. Either \code{chromatographr} or \code{chemstation}.} @@ -19,8 +19,12 @@ read_chemstation_reports( \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} } +\value{ +A data.frame containing the information from the specified +'ChemStation' report. +} \description{ -Read Agilent Chemstation Reports +Read 'Agilent ChemStation' Reports } \author{ Ethan Bass diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index ac278ab..b8e121b 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_chemstation_uv.R \name{read_chemstation_uv} \alias{read_chemstation_uv} -\title{Read 'Chemstation' DAD files} +\title{Read 'ChemStation' DAD files} \usage{ read_chemstation_uv( path, @@ -25,16 +25,24 @@ read_chemstation_uv( or \code{raw}.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A 3D chromatogram in the format specified by \code{data_format} and +\code{format_out}. If \code{data_format} is \code{wide}, the chromatogram will +be returned with retention times as rows and wavelengths as columns. If +\code{long} format is requested, three columns will be returned: one for the +retention time, one for the wavelength and one for the intensity. The +\code{format_out} argument determines whether the chromatogram is returned as +a \code{matrix} or \code{data.frame}. Metadata can be attached to the +chromatogram as \code{\link{attributes}} if \code{read_metadata} is {TRUE}. } \description{ -Parser for reading Agilent UV (.uv) files into R. +Agilent \code{.uv} files come in several different varieties. This parser can +automatically detect and read several versions of these files from +'Agilent ChemStation' and 'OpenLab', including versions \code{31} and \code{131}. } \note{ This function was adapted from the parser in the rainbow project licensed under GPL 3 by Evan Shi -(https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). +\url{https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html}. } \author{ Ethan Bass diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index a0261a4..a0c309b 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -9,8 +9,9 @@ read_chroms( find_files, format_in = c("agilent_d", "agilent_dx", "chemstation", "chemstation_fid", "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", - "chromeleon_uv", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", - "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), + "chromeleon_uv", "shimadzu_ascii", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", + "thermoraw", "mzml", "mzxml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", + "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), @@ -24,11 +25,12 @@ read_chroms( cl = 1, verbose = getOption("verbose"), sample_names = NULL, - dat = NULL + dat = NULL, + ... ) } \arguments{ -\item{paths}{paths to files or folders containing files} +\item{paths}{Paths to data files or directories containing the files.} \item{find_files}{Logical. Set to \code{TRUE} (default) if you are providing the function with a folder or vector of folders containing the files. @@ -38,18 +40,19 @@ Otherwise, set to\code{FALSE}.} include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, \code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, -\code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, -\code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{cdf}, \code{mdf}, -\code{msd}, \code{csd}, \code{wsd}, or \code{other}.} +\code{shimadzu_ascii}, \code{shimadzu_fid}, \code{shimadzu_dad}, +\code{thermoraw}, \code{waters_arw}, \code{waters_raw}, \code{mzml}, +\code{mzxml}, \code{cdf}, \code{mdf}, \code{msd}, \code{csd}, \code{wsd}, +or \code{other}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} -\item{parser}{What parser to use. Current option are \code{chromconverter}, -\code{aston}, \code{entab}, \code{thermoraw}, \code{openchrom}, or -\code{rainbow}.} +\item{parser}{What parser to use (optional). Current option are +\code{chromconverter}, \code{aston}, \code{entab}, \code{thermoraw}, +\code{openchrom}, or \code{rainbow}.} -\item{format_out}{R object format (i.e. data.frame or matrix).} +\item{format_out}{Class of output (i.e. data.frame or matrix).} \item{data_format}{Whether to output data in wide or long format. Either \code{wide} or \code{long}.} @@ -82,38 +85,52 @@ of clusters to use or a cluster object created by R console.} \item{sample_names}{An optional character vector of sample names. Otherwise -sample names default to the basename of the specified files.} +sample names default to the \code{\link{basename}} of the specified files.} \item{dat}{Existing list of chromatograms to append results. (Defaults to NULL).} + +\item{...}{Additional arguments to parser.} } \value{ A list of chromatograms in \code{matrix} or \code{data.frame} format, -according to the value of \code{format_out}. +according to the value of \code{format_out}. Chromatograms may be returned +in either \code{wide} or \code{long} format according to the value of +\code{data_format}. } \description{ -Reads chromatograms from specified folders or vector of paths using file -parsers from \href{https://github.com/bovee/aston}{Aston}, +Reads chromatograms from specified folders or vector of paths using either an +internal parser or bindings to an external library, such as +\href{https://github.com/bovee/aston}{Aston}, \href{https://github.com/bovee/entab}{Entab}, \href{https://github.com/compomics/ThermoRawFileParser}{ThermoRawFileParser}, \href{https://lablicate.com/platform/openchrom}{OpenChrom}, -\href{https://rainbow-api.readthedocs.io/}{rainbow}, or internal parsers. +\href{https://rainbow-api.readthedocs.io/}{rainbow}. } \details{ -Provides a general interface to chromConverter parsers. Currently recognizes -'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'MassHunter' (\code{.dad}) -files, 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' -(\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' -(\code{.txt}). Also, wraps Openchrom parsers, which include many additional -formats. To use 'Entab', 'ThermoRawFileParser', or 'Openchrom' parsers, -they must be manually installed. Please see the instructions in the +Provides a unified interface to all chromConverter parsers. Currently recognizes +'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent +MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), +'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' +(\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps +'OpenChrom' parsers, which include many additional formats. To use 'Entab', +'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed. +Please see the instructions in the \href{https://ethanbass.github.io/chromConverter/}{README} for further details. + +If paths to individual files are provided, \code{read_chroms} will try to +infer the file format and select an appropriate parser. However, when +providing paths to directories, the file format must be specified using the +\code{format_in} argument. } \section{Side effects}{ If \code{export} is TRUE, chromatograms will be exported in the format specified by \code{export_format} in the folder -specified by \code{path_out}. Currently, the only option for export is -\code{csv} unless the \code{parser} is \code{openchrom}. +specified by \code{path_out}. Currently, the most versatile option for +exporting files is \code{csv}. However, unidimensional chromatograms can +also be exported in ANDI Chromatography (netCDF) format by selecting +\code{cdf}. If an \code{openchrom} parser is selected, ANIML and mzML are +available as additional options. } \examples{ diff --git a/man/read_peaklist.Rd b/man/read_peaklist.Rd index e2b6f2a..52e9b8f 100644 --- a/man/read_peaklist.Rd +++ b/man/read_peaklist.Rd @@ -29,8 +29,7 @@ include: \code{chemstation} or \code{shimadzu}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} -\item{data_format}{Whether to output data in wide or long format. Either -\code{wide} or \code{long}.} +\item{data_format}{Either \code{chromatographr} or \code{original}.} \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index e814f83..f821cad 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -6,19 +6,28 @@ \usage{ read_shimadzu( file, - format_in, + what = "chromatogram", + format_in = NULL, + include = c("fid", "dad", "uv", "tic", "status"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), peaktable_format = c("chromatographr", "original"), - what = "chromatogram", read_metadata = TRUE, - metadata_format = c("chromconverter", "raw") + metadata_format = c("chromconverter", "raw"), + ms_format = c("data.frame", "list"), + collapse = TRUE ) } \arguments{ -\item{file}{path to file} +\item{file}{Path to file.} + +\item{what}{Whether to extract \code{chromatogram}, \code{peak_table}, and/or +\code{ms_spectra}. Accepts multiple arguments.} + +\item{format_in}{This argument is deprecated and is no longer required.} -\item{format_in}{Format of files. \code{fid} or \code{dad}.} +\item{include}{Which chromatograms to include. Options are \code{fid}, +\code{dad}, \code{uv}, \code{tic}, and \code{status}.} \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} @@ -27,17 +36,22 @@ read_shimadzu( \item{peaktable_format}{Whether to return peak tables in \code{chromatographr} or \code{original} format.} -\item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}. -Accepts multiple arguments.} - \item{read_metadata}{Whether to read metadata from file.} \item{metadata_format}{Format to output metadata. Either \code{chromconverter} or \code{raw}.} + +\item{ms_format}{Whether to return mass spectral data as a (long) +\code{data.frame} or a \code{list}.} + +\item{collapse}{Logical. Whether to collapse lists that only contain a single +element.} } \value{ -A chromatogram in the format specified by \code{format_out} -(retention time x wavelength). +A nested list of elements from the specified \code{file}, where the +top levels are chromatograms, peak tables, and/or mass spectra according to +the value of \code{what}. Chromatograms are returned in the format specified +by \code{format_out} (retention time x wavelength). } \description{ Reads 'Shimadzu' ascii files into R. These files are exported from diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd index b929954..b480987 100644 --- a/man/read_shimadzu_lcd.Rd +++ b/man/read_shimadzu_lcd.Rd @@ -20,6 +20,12 @@ read_shimadzu_lcd( \item{read_metadata}{Logical. Whether to attach metadata.} } +\value{ +A 3D chromatogram from the PDA stream in \code{matrix} or +\code{data.frame} format, according to the value of \code{format_out}. +The chromatograms will be returned in \code{wide} or \code{long} format +according to the value of \code{data_format}. +} \description{ Read 3D PDA data stream from 'Shimadzu' LCD files. } diff --git a/man/read_varian_peaklist.Rd b/man/read_varian_peaklist.Rd index 410d0f6..db1f4de 100644 --- a/man/read_varian_peaklist.Rd +++ b/man/read_varian_peaklist.Rd @@ -10,6 +10,9 @@ read_varian_peaklist(file) \arguments{ \item{file}{Path to Varian peak list file.} } +\value{ +A data.frame containing the information from the specified report. +} \description{ Read 'Varian' peaklist. Read peak list(s) from 'Varian MS Workstation'. diff --git a/man/read_waters_raw.Rd b/man/read_waters_raw.Rd new file mode 100644 index 0000000..14efc61 --- /dev/null +++ b/man/read_waters_raw.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waters_raw.R +\name{read_waters_raw} +\alias{read_waters_raw} +\title{Read 'Waters' RAW} +\usage{ +read_waters_raw( + path, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") +) +} +\arguments{ +\item{path}{Path to \code{.raw} file.} + +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Either \code{wide} (default) or \code{long}.} + +\item{read_metadata}{Logical. Whether to attach metadata.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} +} +\value{ +A chromatogram in the format specified by \code{format_out} +(retention time x wavelength). +} +\description{ +Parser for reading 'Waters MassLynx (.raw) files into R. +} +\note{ +For now this parser only reads 1D chromatograms (not mass spectra or +DAD data). +} +\author{ +Ethan Bass +} diff --git a/man/sp_converter.Rd b/man/sp_converter.Rd index 7863de9..d0c2c38 100644 --- a/man/sp_converter.Rd +++ b/man/sp_converter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R +% Please edit documentation in R/call_aston.R \name{sp_converter} \alias{sp_converter} \title{Converter for Agilent MassHunter UV files} diff --git a/man/uv_converter.Rd b/man/uv_converter.Rd index 679a3a0..4e53f5c 100644 --- a/man/uv_converter.Rd +++ b/man/uv_converter.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aston_parsers.R +% Please edit documentation in R/call_aston.R \name{uv_converter} \alias{uv_converter} \title{Converter for Agilent ChemStation UV files} diff --git a/man/write_cdf.Rd b/man/write_cdf.Rd new file mode 100644 index 0000000..f901e0e --- /dev/null +++ b/man/write_cdf.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_chroms.R +\name{write_cdf} +\alias{write_cdf} +\title{Write CDF file from chromatogram} +\usage{ +write_cdf(x, path_out, sample_name, lambda = NULL, force = FALSE) +} +\arguments{ +\item{x}{A chromatogram in (wide) format.} + +\item{path_out}{The path to write the file.} + +\item{sample_name}{The name of the file.} + +\item{lambda}{The wavelength to export (for 2-dimensional chromatograms). +Must be a string matching one the columns in \code{x} or the index of the +column to export.} + +\item{force}{Whether to overwrite existing files at the specified path. +Defaults to \code{FALSE}.} +} +\value{ +No return value. The function is called for its side effects. +} +\description{ +Exports a chromatogram in ANDI (Analytical Data Interchange) chromatography +format (ASTM E1947-98). This format can only accommodate unidimensional data. +For two-dimensional chromatograms, the column to export can be specified +using the \code{lambda} argument. Otherwise, a warning will be generated and +the first column of the chromatogram will be exported. +} +\section{Side effects}{ + +Exports a chromatogram in ANDI chromatography format (netCDF) in the directory +specified by \code{path_out}. The file will be named according to the value +of \code{sample_name}. If no \code{sample_name} is provided, the +\code{sample_name} attribute will be used if it exists. +} + +\author{ +Ethan Bass +} diff --git a/man/write_openchrom_batchfile.Rd b/man/write_openchrom_batchfile.Rd index 75a5ab9..597fea6 100644 --- a/man/write_openchrom_batchfile.Rd +++ b/man/write_openchrom_batchfile.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/call_openchrom.R \name{write_openchrom_batchfile} \alias{write_openchrom_batchfile} -\title{Writes OpenChrom XML batch file} +\title{Writes OpenChrom XML batch file +This function is called internally by \code{call_openchrom}.} \usage{ write_openchrom_batchfile( files, @@ -26,6 +27,7 @@ Returns path to newly created xml batch file. } \description{ Writes OpenChrom XML batch file +This function is called internally by \code{call_openchrom}. } \author{ Ethan Bass diff --git a/tests/testthat/test-extra.R b/tests/testthat/test-extra.R index 6f152b0..9ecdd9d 100644 --- a/tests/testthat/test-extra.R +++ b/tests/testthat/test-extra.R @@ -1,18 +1,47 @@ +test_that("read_chroms can read 'Agilent MS' files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + skip_if_not_installed("entab") + + path <- system.file("chemstation_MSD.MS", + package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + + x <- read_chroms(path, parser = "entab", progress_bar = FALSE)[[1]] + expect_equal(class(x), "data.frame") + expect_equal(dim(x), c(95471, 3)) + expect_equal(attr(x, "parser"), "entab") + expect_equal(colnames(x), c("rt", "mz", "intensity")) + + x1 <- read_chroms(path, parser = "rainbow", + progress_bar = FALSE, precision = 0)[[1]] + expect_equal(class(x1)[1], "matrix") + expect_equal(dim(x1), c(2534, 841)) + + x2 <- read_chroms(path, parser = "rainbow", + progress_bar = FALSE, data_format = "long", + precision = 0)[[1]] + expect_equal(class(x2)[1], "matrix") + expect_equal(dim(x2), c(2131094, 3)) + expect_equal(colnames(x2), c("rt", "mz", "intensity")) +}) + test_that("read_chroms can read 'Agilent Chemstation' version 30 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") path <- system.file("chemstation_30.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) - x <- read_chroms(path, progress_bar = FALSE, parser="chromconverter") + x <- read_chroms(path, parser="chromconverter", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") expect_equal(dim(x[[1]]), c(38405, 1)) expect_equal(attr(x[[1]], "parser"), "chromconverter") - x1 <- read_chroms(path, progress_bar = FALSE, format_out = "data.frame", - data_format = "long", parser = "chromconverter") + x1 <- read_chroms(path, parser = "chromconverter", format_out = "data.frame", + data_format = "long", progress_bar = FALSE) expect_equal(class(x1[[1]])[1], "data.frame") expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) @@ -22,7 +51,10 @@ test_that("read_chroms can read 'Agilent Chemstation' 31 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") skip_if_not_installed("entab") + path <- system.file("chemstation_31.uv", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_chroms(path, progress_bar = FALSE, parser = "chromconverter")[[1]] x1 <- read_chroms(path, progress_bar = FALSE, parser = "entab")[[1]] @@ -36,13 +68,13 @@ test_that("read_chroms can read 'Agilent Chemstation' 31 files", { expect_equal(attr(x, "parser"), "chromconverter") }) - test_that("read_chroms can read 'Agilent Chemstation' version 81 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") path <- system.file("chemstation_81.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -63,6 +95,8 @@ test_that("read_chroms can read 'Agilent Chemstation' version 130 files", { path <- system.file("chemstation_130.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") expect_equal(dim(x[[1]]), c(12750, 1)) @@ -73,12 +107,13 @@ test_that("read_chroms can read 'Agilent Chemstation' version 130 files", { }) -test_that("read_chroms can read 'Agilent Chemstation' 179 files", { +test_that("read_chroms can read 'Agilent OpenLab' 179 files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") - path <- system.file("chemstation_179.ch", + path <- system.file("openlab_179.ch", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -92,6 +127,34 @@ test_that("read_chroms can read 'Agilent Chemstation' 179 files", { expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) }) +test_that("read_chroms can read 'Agilent ChemStation' 179 files (8-byte format)", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_179_mustang.ch", + package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(54704, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") +}) + +test_that("read_chroms can read 'Agilent ChemStation' 179 (4-byte format)", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_179_asterix.ch", + package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(22800, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") +}) + test_that("read_chroms can read 'Agilent Masshunter' dad files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") @@ -99,9 +162,13 @@ test_that("read_chroms can read 'Agilent Masshunter' dad files", { path <- system.file("masshunter.d/AcqData/DAD1.sp", package = "chromConverterExtraTests") - x <- read_chroms(path, format_in = "masshunter_dad", progress_bar = FALSE) + skip_if_not(file.exists(path)) + + x <- read_chroms(path, format_in = "masshunter_dad", parser = "entab", + progress_bar = FALSE) x1 <- read_chroms(path, format_in = "masshunter_dad", parser = "aston", progress_bar = FALSE) + expect_equal(dim(x[[1]]), c(240, 276)) expect_equal(class(x[[1]])[1], "matrix") expect_equal(x[[1]], x1[[1]], ignore_attr = TRUE) @@ -126,6 +193,7 @@ test_that("read_chroms can read 'Waters ARW' PDA files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("waters_pda.arw", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "waters_arw", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -140,12 +208,41 @@ test_that("read_chroms can read 'Waters ARW' PDA files", { # expect_equal(dim(x1[[1]])) }) +test_that("read_chroms can read 'Waters RAW' files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("waters_blue.raw", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + + x <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + precision = 0)[[1]] + expect_equal(names(x), c("MS","UV","CAD")) + expect_equal(dim(x$MS), c(725, 740)) + expect_equal(attr(x$MS, "parser"), "rainbow") + expect_equal(attr(x$MS, "data_format"), "wide") + expect_equal(attr(x$MS, "metadata")$polarity,"+") + + x1 <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + parser = "chromconverter")[[1]] + expect_equal(class(x1$CAD)[1], "matrix") + expect_equal(x$CAD, x1$CAD, ignore_attr = TRUE) + + x2 <- read_chroms(path, format_in = "waters_raw", progress_bar = FALSE, + what = "MS", data_format = "long", precision = 0)[[1]] + expect_equal(nrow(x2$MS), nrow(x$MS)*ncol(x$MS)) + expect_equal(colnames(x2$MS), c("rt", "mz", "intensity")) + # no metadata + # expect_equal(attr(x1$CAD, "parser"), "chromconverter") +}) + test_that("read_chroms can read 'Chromeleon' comma-separated files", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") path <- system.file("chromeleon_comma.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "chromeleon_uv", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -167,6 +264,7 @@ test_that("read_chroms can read 'Chromeleon' period-separated files", { path <- system.file("chromeleon_period.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE) expect_equal(class(x[[1]])[1], "matrix") @@ -180,12 +278,14 @@ test_that("read_chroms can read 'Chromeleon' period-separated files", { expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) }) -test_that("read_peaklist can read `Shimadzu` PDA files", { +test_that("read_peaklist can read `Shimadzu` ascii (PDA) files", { skip_on_cran() skip_if_missing_dependecies() skip_if_not_installed("chromConverterExtraTests") + path <- system.file("shimadzuDAD_Anthocyanin.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_peaklist(path, format_in="shimadzu_dad", progress_bar = FALSE)[[1]] expect_equal(class(x), "list") @@ -195,26 +295,30 @@ test_that("read_peaklist can read `Shimadzu` PDA files", { expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) }) -test_that("read_chroms can read 'Shimadzu' PDA files", { +test_that("read_chroms can read 'Shimadzu' PDA files (ascii and LCD)", { skip_on_cran() skip_if_not_installed("chromConverterExtraTests") - path <- system.file("shimadzuDAD_Anthocyanin.txt", + path_ascii <- system.file("shimadzuDAD_Anthocyanin.txt", package = "chromConverterExtraTests") + skip_if_not(file.exists(path_ascii)) + + path_lcd <- system.file("Anthocyanin.lcd", package = "chromConverterExtraTests") + skip_if_not(file.exists(path_lcd)) - x <- read_chroms(path, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] + x <- read_chroms(path_ascii, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] expect_equal(class(x)[1], "matrix") expect_equal(dim(x), c(4689, 328)) expect_equal(attr(x, "parser"), "chromconverter") expect_equal(attr(x, "data_format"), "wide") - x1 <- read_chroms(path, format_in="shimadzu_dad", progress_bar = FALSE, + x1 <- read_chroms(path_ascii, format_in="shimadzu_dad", progress_bar = FALSE, data_format = "long", format_out = "data.frame")[[1]] expect_equal(class(x1)[1], "data.frame") expect_equal(dim(x1), c(4689*328, 3)) - path <- system.file("Anthocyanin.lcd", package = "chromConverterExtraTests") - x2 <- read_chroms(path, progress_bar = FALSE)[[1]] + + x2 <- read_chroms(path_lcd, progress_bar = FALSE)[[1]] expect_equal(dim(x2),c(4689,328)) expect_equal(x, x2, ignore_attr = TRUE) }) @@ -224,6 +328,7 @@ test_that("read_chroms can read 'Agilent' dx files", { skip_if_not_installed("chromConverterExtraTests") path <- system.file("agilent.dx", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) x <- read_chroms(path, format_in = "agilent_dx", progress_bar = FALSE)[[1]] expect_equal(class(x)[1], "matrix") @@ -250,11 +355,10 @@ test_that("read_chroms can read 'Thermo' RAW files", { expect_equal(names(x), c("MS1", "MS2", "DAD", "BPC", "TIC", "chroms", "metadata")) }) - # test_that("thermoraw parser works",{ # skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) + # file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" + # x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) # expect_equal(class(x[[1]])[1], "matrix") # expect_equal(attributes(x[[1]])$instrument, "GC-2014") # }) @@ -266,10 +370,14 @@ test_that("read_chroms can use 'OpenChrom' parsers", { skip_if_missing_openchrom() path <- system.file("DCM1.SMS", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, verbose = FALSE, export_format = "csv")[[1]] + expect_equal(class(x)[1], "matrix") expect_equal(dim(x), c(3032, 297)) + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, verbose = FALSE)[[1]] expect_equal(class(x), "list") @@ -282,6 +390,8 @@ test_that("read_varian_peaklist function works", { path <- system.file("varian_peaklist.csv", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_varian_peaklist(path) expect_s3_class(x, "data.frame") expect_equal(dim(x), c(46476, 15)) @@ -291,8 +401,37 @@ test_that("read_cdf function can read peak tables", { skip_on_cran() skip_if_not_installed("ncdf4") skip_if_not_installed("chromConverterExtraTests") + path <- system.file("VARIAN1.CDF", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + x <- read_cdf(path, what = "peak_table") + # what about chromatograms expect_s3_class(x, "data.frame") expect_equal(dim(x), c(8,5)) }) + +test_that("read_chroms can read ANDI MS files", { + skip_on_cran() + skip_if_not_installed("ncdf4") + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("HP_MS.CDF", package = "chromConverterExtraTests") + skip_if_not(file.exists(path)) + + x <- read_chroms(path, what=c("chromatogram","ms_spectra"), progress_bar = FALSE)[[1]] + expect_equal(names(x), c("chromatogram", "ms_spectra")) + expect_s3_class(x$ms_spectra, "data.frame") + expect_equal(class(x$chromatogram)[1], "matrix") + expect_true(all(dim(x$chromatogram) == c(621, 1))) + expect_true(all(dim(x$ms_spectra) == c(7638, 3))) + + x1 <- read_chroms(path, what=c("chromatogram"), data_format="long", + progress_bar = FALSE)[[1]] + expect_equal(ncol(x1), 2) + + x2 <- read_chroms(path, what=c("ms_spectra"), ms_format="list", + progress_bar = FALSE)[[1]] + expect_type(x2, "list") + expect_equal(length(x2), length(unique(x$ms_spectra$rt))) +}) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index f3798bf..95a1294 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -43,9 +43,11 @@ test_that("entab parser can read `Agilent Chemstation` 131 files", { skip_on_cran() skip_if_not_installed("entab") file <- "testdata/dad1.uv" + x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "entab", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(class(x1[[1]])[1], "matrix") @@ -55,19 +57,23 @@ test_that("entab parser can read `Agilent Chemstation` 131 files", { test_that("Shimadzu ascii parser works", { file <- "testdata/ladder.txt" + x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) test_that("read_mzml works", { + skip_on_cran() ext_filepath <- system.file("extdata", package = "RaMS") DAD_filepath <- list.files(ext_filepath, full.names = TRUE, - pattern = "uv_test_mini.mzML") + pattern = "uv_test_mini.mzML.gz") dad_long <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE) expect_equal(dad_long, - RaMS::grabMSdata(files = DAD_filepath, grab_what = "DAD", verbosity = FALSE) + RaMS::grabMSdata(files = DAD_filepath, grab_what = "DAD", + verbosity = FALSE) ) dad_wide <- read_mzml(DAD_filepath, what = "DAD", verbose = FALSE, data_format = "wide") @@ -85,15 +91,28 @@ test_that("Rainbow parser can read chemstation 131 files", { skip_if_missing_dependecies() skip_on_cran() skip_on_ci() + x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "rainbow", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(class(x1[[1]])[1], "matrix") expect_equal(attr(x1[[1]], "parser"), "rainbow") expect_equal(attr(x1[[1]], "data_format"), "wide") + expect_equal(attr(x1$dad1, "metadata")$unit, "mAU") + + x2 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "rainbow", + find_files = FALSE, data_format = "long", + read_metadata = TRUE, + progress_bar = FALSE) + expect_equal(nrow(x2$dad1), nrow(x1$dad1)*ncol(x1$dad1)) + expect_equal(colnames(x2$dad1), c("rt", "lambda", "intensity")) + expect_equal(attr(x2$dad1, "metadata")$unit, "mAU") + expect_equal(attr(x2[[1]], "data_format"), "long") + expect_equal(attr(x2[[1]], "parser"), "rainbow") }) test_that("chromconverter parser can read chemstation 130 files", { @@ -132,14 +151,15 @@ test_that("read_chroms exports cdf files correctly", { path_out <- tempdir(check = TRUE) on.exit(unlink(c(fs::path(path_out, "ladder", ext = "cdf"), path_out))) file <- "testdata/ladder.txt" - x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, path_out = path_out, - export_format = "cdf", progress_bar = FALSE) + x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, + path_out = path_out, export_format = "cdf", + progress_bar = FALSE) x1_out <- read_cdf(fs::path(path_out, "ladder", ext = "cdf")) expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) }) test_that("read_peaklist can read chemstation reports", { - path <- "testdata/RUTIN2.D/Report.TXT" + path <- "testdata/RUTIN2.D/" x <- read_peaklist(path, format_in = "chemstation") expect_equal(class(x[[1]]), "list") expect_equal(class(x[[1]][[1]]), "data.frame") @@ -166,17 +186,16 @@ test_that("read_peaklist can read chemstation reports", { test_that("read_peaklist can read 'Shimadzu' fid files", { path <- "testdata/ladder.txt" x <- read_peaklist(path, format_in = "shimadzu_fid", progress_bar = FALSE) - expect_equal(class(x[[1]]), "list") - expect_equal(class(x[[1]][[1]]), "data.frame") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(colnames(x[[1]][[1]]), + expect_equal(class(x[[1]]), "data.frame") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) + x <- read_peaklist(path, format_in = "shimadzu_fid", data_format = "original") - expect_equal(class(x[[1]]), "list") - expect_equal(class(x[[1]][[1]]), "data.frame") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") - expect_equal(colnames(x[[1]][[1]]), + expect_equal(class(x[[1]]), "data.frame") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(x[[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]]), c("sample","Peak.","R.Time","I.Time","F.Time","Area","Height", "A.H","Conc.","Mark","ID.","Name", "k.", "Plate..", "Plate.Ht.", "Tailing", "Resolution", "Sep.Factor", "Area.Ratio", "Height.Ratio",