diff --git a/DESCRIPTION b/DESCRIPTION index 88e9d74..0f19fa6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,16 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.3.3 +Version: 0.4.0 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6175-6739")), person(given = "James", family = "Dillon", role = c("ctb", "cph"), - comment = "Author and copyright holder of source code adapted from the - 'Chromatography Toolbox' for parsing Agilent FID files.") + comment = c("Author and copyright holder of source code adapted from the + 'Chromatography Toolbox' for parsing 'Agilent' FID files.")), + person(given = "Evan", family = "Shi", role = c("ctb", "cph"), + comment = c("Author and copyright holder of source code adapted from 'rainbow' + for parsing 'Agilent' UV files.")) ) Maintainer: Ethan Bass Description: Reads chromatograms from binary formats into R objects. Currently supports conversion of 'Agilent ChemStation', 'Agilent MassHunter', and 'ThermoRaw' files as well as various text-based formats. Utilizes file parsers from external libraries, such as 'Aston' , 'Entab' , 'rainbow' , and 'ThermoRawFileParser' . @@ -30,6 +33,7 @@ Imports: Suggests: entab, mzR, + ncdf4, pbapply, testthat (>= 3.0.0) Config/reticulate: list( packages = list( list(package = "scipy"), diff --git a/NAMESPACE b/NAMESPACE index bb1c652..4e30146 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,16 +4,20 @@ export(call_entab) export(call_openchrom) export(call_rainbow) export(configure_aston) +export(configure_openchrom) export(configure_rainbow) export(extract_metadata) +export(read_cdf) export(read_chemstation_ch) export(read_chemstation_csv) export(read_chemstation_uv) export(read_chromeleon) export(read_chroms) +export(read_mdf) export(read_mzml) export(read_shimadzu) export(read_thermoraw) +export(read_varian_peaklist) export(read_waters_arw) export(sp_converter) export(uv_converter) diff --git a/NEWS.md b/NEWS.md index fcf052c..fa2bf9c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,26 @@ +## chromConverter 0.4.0 + +### New features + +* Added parser for ANDI chrom `cdf` files through the `read_cdf` function. +* Added parser for 'Lumex' `.mdf` files through the `read_mdf` function. +* Added additional options for file exports. New options for writing +`chemstation_csv` (utf-16) and ANDI chrom `cdf` files through `read_chroms`. +* Added preliminary support for automatic filetype detection by `read_chroms` when providing direct paths to files (i.e. when `find_files == FALSE`). +* Added `read_varian_peaklist` function for reading peak lists from 'Varian MS Workstation'. + +### Other improvements and bug fixes: + +* Added `wide` and `long` `data_format` options for 2D data, such that the `wide` format option writes retention times as rownames of the matrix or data.frame. while the `long` format writes retention times as the first column of the object. +* Updated `configure_openchrom` for better discovery of 'OpenChrom' path and added `path` argument for directly specifying the path to 'OpenChrom'. +* Slightly restructured metadata fields. Added `source_file` field to track +data origin. +* Standardized datetime stamps so they are always converted to POSIXct format. +* Now use `fs` package for parsing paths, eliminating buggy `check_paths` function. +* Fixed bug causing sloppy 'Chemstation' FID metadata. +* Fixed bug that caused padding of 'Chemstation 130' files with extra zeros. +* Added additional tests. + ## chromConverter 0.3.3 * Added R-based parser for "Chemstation" UV (`.uv`) files (version 131) through @@ -9,6 +32,8 @@ and returning it as a `data.frame` or `tibble`. * Added wide format option in `read_mzml`. * Added automatic detection of file formats by `read_chroms`. * Minor changes to storage of metadata in attributes for the purpose of simplification. +* Fixed bug preventing removal of file extensions for 'Agilent' data when using `read_chroms`. +* Standardized run date/time in metadata to `POSIXct` format. * Minor updates to documentation. ## chromConverter 0.3.2 diff --git a/R/aston_parsers.R b/R/aston_parsers.R index 3ff99ba..57f497c 100644 --- a/R/aston_parsers.R +++ b/R/aston_parsers.R @@ -33,7 +33,7 @@ sp_converter <- function(file, format_out = c("matrix", "data.frame"), meta <- read_masshunter_metadata(file) x <- attach_metadata(x, meta, format_in = "masshunter_dad", format_out = format_out, data_format = "wide", - parser = "aston") + parser = "aston", source_file = file) } x } @@ -82,7 +82,7 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"), meta <- read_chemstation_metadata(file) x <- attach_metadata(x, meta, format_in = "chemstation_uv", format_out = format_out, data_format = "wide", - parser = "Aston") + parser = "Aston", source_file = file) } x } diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 8781570..272a93c 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -6,29 +6,36 @@ #' @param format_in Chromatogram format #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether data is in wide or long format. -#' @param parser What parser was used to decode the data +#' @param parser What parser was used to decode the data. +#' @param source_file The path to the source file. #' @return A chromatogram with attached metadata. #' @author Ethan Bass -attach_metadata <- function(x, meta, format_in, format_out, data_format, parser = NULL){ - if (format_in == "waters_arw"){ +attach_metadata <- function(x, meta, format_in, format_out, data_format, parser = NULL, + source_file){ + switch(format_in, + "waters_arw" = { structure(x, instrument = NA, detector = NA, software = NA, method = meta$`Instrument Method Name`, batch = meta$`Sample Set Name`, operator = NA, - run_date = NA, + run_datetime = NA, sample_name = meta$SampleName, sample_id = NA, - injection_volume = NA, + sample_injection_volume = NA, + sample_amount = NA, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = meta$Channel, + detector_unit = NA, + source_file = source_file, data_format = "long", parser = "chromConverter", format_out = format_out) - } else if (format_in == "shimadzu"){ + }, "shimadzu" = { structure(x, instrument = meta$`Instrument Name`, detector = meta$`Detector Name`, @@ -37,37 +44,59 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser method = meta$`Method File`, batch = meta$`Batch File`, operator = meta$`Operator Name`, - run_date = meta$Acquired, + run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"), sample_name = meta$`Sample Name`, sample_id = meta$`Sample ID`, - injection_volume = meta$`Injection Volume`, - start_time = meta$`Start Time(min)`, - end_time = meta$`End Time(min)`, + sample_injection_volume = meta$`Injection Volume`, + sample_amount = meta$`Injection Volume`, + time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`), + # start_time = meta$`Start Time(min)`, + # end_time = meta$`End Time(min)`, time_interval = meta$`Interval(msec)`, - start_wavelength = meta$`Start Wavelength(nm)`, - end_wavelength = meta$`End Wavelength(nm)`, + time_interval_unit = get_time_unit( + grep("Interval", names(meta), value=TRUE)[1], format_in = "shimadzu"), + time_unit = get_time_unit( + grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"), + detector_range = c(meta$`Start Wavelength(nm)`,meta$`End Wavelength(nm)`), + # detector_end = meta$`End Wavelength(nm)`, + detector_unit = NA, + source_file = source_file, data_format = data_format, parser = "chromConverter", format_out = format_out) - } else if (format_in == "chromeleon"){ + }, "chromeleon" = { structure(x, instrument = NA, detector = meta$Detector, software = meta$`Generating Data System`, method = meta$`Instrument Method`, batch = NA, operator = meta$`Operator`, - run_date = meta$`Injection Date`, - run_time = meta$`Injection Time`, + run_datetime = as.POSIXct(paste(meta$`Injection Date`, meta$`Injection Time`), + format = "%m/%d/%Y %H:%M:%S"), + # run_date = meta$`Injection Date`, + # run_time = meta$`Injection Time`, sample_name = meta$Injection, sample_id = NA, - injection_volume = meta$`Injection Volume`, - start_time = meta$`Time Min. (min)`, - end_time = meta$`Time Max. (min)`, + sample_injection_volume = meta$`Injection Volume`, + sample_amount = meta$`Injection Volume`, + time_range = c(meta$`Time Min. (min)`, meta$`Time Max. (min)`), + # start_time = meta$`Time Min. (min)`, + # end_time = meta$`Time Max. (min)`, time_interval = meta$`Average Step (s)`, + time_interval_unit <- get_time_unit( + grep("Average Step", names(meta), value = TRUE)[1], + format_in = "chromeleon"), + time_unit = get_time_unit( + grep("Time Min.", names(meta), value = TRUE)[1], + format_in="chromeleon"), + # uniform_sampling = meta$`Min. Step (s)` == meta$`Max. Step (s)`, detector_range = NA, + detector_unit = meta$`Signal Unit`, + source_file = source_file, + format_out = format_out, data_format = "long", - parser = "chromConverter", - format_out = format_out) + parser = "chromConverter" + ) # } else if (format_in == "entab"){ # structure(x, instrument = meta$instrument, # detector = NA, @@ -75,7 +104,7 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # method = meta$method, # batch = meta$SeqPathAndFile, # operator = meta$operator, - # run_date = meta$run_date, + # run_datetime = meta$run_date, # sample_name = meta$sample, # sample_id = NA, # injection_volume = meta$InjVolume, @@ -85,58 +114,137 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # format = data_format, # parser = "entab", # format_out = format_out) - } else if (format_in == "chemstation_uv"){ + }, "chemstation_uv" = { structure(x, instrument = meta$AcqInstName, detector = NA, software = meta$Version, method = meta$AcqMeth, batch = meta$SeqPathAndFile, operator = meta$AcqOp, - run_date = meta$InjDateTime, + run_datetime = meta$InjDateTime, sample_name = meta$SampleName, sample_id = NA, - injection_volume = meta$InjVolume, + sample_injection_volume = meta$InjVolume, + sample_amount = meta$InjVolume, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = NA, + detector_unit = NA, + source_file = source_file, data_format = data_format, parser = parser, format_out = format_out) - } else if (format_in == "masshunter_dad"){ - structure(x, instrument = meta$Instrument, - detector = NA, - software = NA, - method = meta$Method, - batch = NA, - operator = meta$OperatorName, - run_date = meta$AcqTime, - sample_name = meta$`Sample Name`, - sample_id = meta$`Sample ID`, - injection_volume = meta$`Inj Vol`, - time_range = NA, - time_interval = NA, - detector_range = NA, - data_format = data_format, - parser = parser, - format_out = format_out) - } else { + }, "masshunter_dad" = { + structure(x, instrument = meta$Instrument, + detector = NA, + software = NA, + method = meta$Method, + batch = NA, + operator = meta$OperatorName, + run_datetime = meta$AcqTime, + sample_name = meta$`Sample Name`, + sample_id = meta$`Sample ID`, + sample_injection_volume = meta$`Inj Vol`, + sample_amount = meta$`Inj Vol`, + time_range = NA, + time_interval = NA, + time_unit = NA, + detector_range = NA, + detector_unit = NA, + source_file = source_file, + data_format = data_format, + parser = parser, + format_out = format_out) + }, "cdf" = { + structure(x, instrument = NA, + detector = get_metadata_field(meta, "detector_name"), + software = NA, + method = NA, + batch = get_metadata_field(meta, "experiment_title"), + operator = get_metadata_field(meta, "operator_name"), + run_datetime = as.POSIXct( + get_metadata_field(meta, "injection_date_time_stamp"), + format = "%Y%m%d%H%M%S%z"), + sample_name = get_metadata_field(meta, "sample_name"), + sample_id = get_metadata_field(meta, "sample_id"), + sample_type = get_metadata_field(meta, "sample_type"), + sample_injection_volume = get_metadata_field(meta, "sample_injection_volume"), + sample_amount = get_metadata_field(meta, "sample_amount"), + time_start = NA, + time_end = NA, + time_interval = NA, + time_unit = get_metadata_field(meta, "retention_unit"), + detector_range = NA, + # detector_end = NA, + detector_unit = get_metadata_field(meta, "detector_unit"), + source_file = ifelse(missing(source_file), NA, source_file), + format_out = ifelse(missing(format_out), NA, format_out), + data_format = ifelse(missing(data_format), NA, data_format), + parser = "chromConverter") + }, "mdf" = { + structure(x, instrument = meta[meta$Property == "Instrument","Value"], + detector = "Variable Wavelength Detector", + software = NA, + method = NA, + batch = get_metadata_field(meta, "experiment_title"), + operator = meta[meta$Property == "Operator", "Value"], + run_datetime = as.POSIXct( + meta[meta$Property == "Time", "Value"], + format = "%d.%m.%Y %H:%M:%S"), + sample_name = get_metadata_field(meta, "sample_name"), + sample_id = get_metadata_field(meta, "sample_id"), + sample_type = "unknown", + sample_injection_volume = 1, + sample_amount = 1, + time_start = meta[meta$Group=="Interval Time" & meta$Property == "From", "Value"], + time_end = meta[meta$Group=="Interval Time" & meta$Property == "To", "Value"], + time_interval = meta[meta$Group=="Interval Time" & meta$Property == "Step", "Value"], + time_unit = meta[meta$Group=="Interval Time" & meta$Property == "Units", "Value"], + detector_range = meta[meta$Property == "Wave", "Value"], + # detector_end = meta[meta$Property == "Wave", "Value"], + detector_unit = meta[meta$Group=="Array photometric" & meta$Property == "Units", "Value"], + source_file = ifelse(missing(source_file), NA, source_file), + format_out = ifelse(missing(format_out), NA, format_out), + data_format = ifelse(missing(data_format), NA, data_format), + parser = "chromConverter") + }, "default" = { structure(x, instrument = meta$Instrument, detector = NA, software = NA, method = meta$Method, batch = NA, operator = meta$OperatorName, - run_date = meta$AcqTime, + run_datetime = meta$AcqTime, sample_name = meta$`Sample Name`, sample_id = meta$`Sample ID`, - injection_volume = meta$`Inj Vol`, + sample_injection_volume = meta$`Inj Vol`, + sample_amount = meta$`Inj Vol`, time_range = NA, time_interval = NA, + time_unit = NA, detector_range = NA, + detector_unit = NA, + source_file = source_file, + format_out = ifelse(missing(format_out), NA, format_out), data_format = ifelse(missing(data_format), NA, data_format), - parser = ifelse(missing(parser), NA, parser), - format_out = ifelse(missing(format_out), NA, format_out) - ) + parser = ifelse(missing(parser), NA, parser) + ) + } + ) +} + +#' @noRd +get_metadata_field <- function(x, field){ + ifelse(!is.null(x[[field]]), x[[field]], NA) +} + +#' @noRd +get_time_unit <- function(string, format_in){ + if (format_in %in% c("chromeleon","shimadzu")){ + pattern <- "\\((.*?)\\)" + unit <- gsub("\\(|\\)", "", regmatches(string, regexpr(pattern, string))[[1]]) + switch(unit, "min" = "Minutes", "sec" = "Seconds") } } diff --git a/R/call_entab.R b/R/call_entab.R index 0ef6112..fc5b6fb 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -47,7 +47,8 @@ call_entab <- function(file, data_format = c("wide","long"), meta <- c(meta, metadata_from_file) } x <- attach_metadata(x, meta, format_in = format_in, format_out = format_out, - data_format = data_format, parser = "entab") + data_format = data_format, parser = "entab", + source_file = file) } x } diff --git a/R/call_openchrom.R b/R/call_openchrom.R index 47b2d72..717effd 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -32,7 +32,12 @@ #' @section Side effects: Chromatograms will be exported in the format specified #' by \code{export_format} in the folder specified by \code{path_out}. #' @author Ethan Bass -#' @export call_openchrom +#' @references +#' Wenig, Philip and Odermatt, Juergen. OpenChrom: A Cross-Platform Open Source +#' Software for the Mass Spectrometric Analysis of Chromatographic Data. \emph{ +#' BMC Bioinformatics} \bold{11}, no. 1 (July 30, 2010): 405. \doi{ +#' 10.1186/1471-2105-11-405}. +#' @export call_openchrom <- function(files, path_out, format_in, export_format = c("csv", "cdf", "mzml", "animl"), @@ -50,17 +55,25 @@ call_openchrom <- function(files, path_out, format_in, if(!file.exists(path_out)){ stop("'path_out' not found. Make sure directory exists.") } - openchrom_path <- configure_call_openchrom() - path_xml <- write_openchrom_batchfile(files = files, path_out=path_out, format_in = format_in, - export_format = export_format) + openchrom_path <- configure_openchrom() + path_xml <- write_openchrom_batchfile(files = files, path_out = path_out, + format_in = format_in, + export_format = export_format) system(paste0(openchrom_path, " -nosplash -cli -batchfile ", path_xml)) - new_files <- paste0(path_out, sapply(strsplit(basename(files), "\\."), function(x) x[1]), ".", export_format) + new_files <- fs::path(path_out, + fs::path_ext_remove(fs::path_file(files)), + ext = switch(export_format, "animl" = "animl", + "csv" = "csv", "cdf" = "CDF", + "mzml" = "mzML")) if (return_paths){ new_files } else{ - if (export_format == "csv"){ - lapply(new_files, read.csv) - } + file_reader <- switch(export_format, + "csv" = read.csv, + "cdf" = read_cdf, + "animl" = warning("An animl parser is not currently available in chromConverter"), + "mzml" = read_mzml) + lapply(new_files, file_reader) } } @@ -113,52 +126,76 @@ write_openchrom_batchfile <- function(files, path_out, "animl" = wsd_animl_converter) } x %>% xml_children %>% .[[4]] %>% xml_add_child(.value=gsub("path_out", path_out, parser)) - path_xml <- paste0(path_out, "batchfile_", strftime(Sys.time(),format = "%Y-%m-%d_%H-%M-%S"), ".xml") + path_xml <- fs::path(path_out, paste0("batchfile_", strftime(Sys.time(),format = "%Y-%m-%d_%H-%M-%S")), ext = "xml") write_xml(x, file = path_xml) path_xml } #' Configure OpenChrom parser #' -#' @name configure_call_openchrom +#' @name configure_openchrom #' @param cli Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. #' If "false", R will disable CLI. If NULL, R will not modify the ini file. +#' @param path Path to 'OpenChrom' executable (Optional). The supplied path will +#' overwrite the current path. +#' @importFrom utils read.table write.table #' @return Returns path to OpenChrom command-line application #' @author Ethan Bass -#' @noRd -configure_call_openchrom <- function(cli = c(NULL, "true", "false")){ - cli <- match.arg(cli, c(NULL, "true", "false")) - path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) +#' @export + +configure_openchrom <- function(cli = c("null", "true", "false", "status"), path = NULL){ + cli <- match.arg(cli, c("null", "true", "false", "status")) + if (is.null(path)){ + path_parser <- readLines(system.file("shell/path_to_openchrom_commandline.txt", package = 'chromConverter')) + if (path_parser == "NULL"){ + path_parser <- switch(.Platform$OS.type, + unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", + windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom/openchrom.exe"), + linux = "/snap/bin/openchrom" + ) + } + } else{ + path_parser <- path + } + writeLines(path_parser, + con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + if (!file.exists(path_parser)){ warning("OpenChrom not found!", immediate. = TRUE) path_parser <- readline(prompt="Please provide path to `OpenChrom` command line):") if (.Platform$OS.type == "windows"){ path_parser <- gsub("/","\\\\", path_parser) } - writeLines(path_parser, con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + writeLines(path_parser, + con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) } path_ini <- switch(.Platform$OS.type, "unix" = paste0(gsub("MacOS/openchrom", "", path_parser), "Eclipse/openchrom.ini"), "linux" = paste0(path_parser, ".uni"), "windows" = paste0(gsub(".exe", "", path_parser), ".ini")) ini <- readLines(path_ini) - cli_index <- grep("-Denable.cli.support",ini) - ini_split <- strsplit(ini[cli_index],"=")[[1]] - cli_tf <- ini_split[2] - if(cli_tf == "false"){ - message(" The OpenChrom command-line interface is turned off! - Update `openchrom.ini` to activate the command-line interface (y/n)? - (Warning: This will deactivate the GUI on your OpenChrom installation!)") - ans <- readline() - if (ans %in% c("y","Y", "yes", "Yes", "YES")){ - cli <- "true" - } else{ - stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") + cli_index <- grep("-Denable.cli.support", ini) + ini_split <- strsplit(ini[cli_index], "=")[[1]] + cli_bool <- ini_split[2] + + if (cli == "null"){ + if (cli_bool == "false"){ + message(" The OpenChrom command-line interface is turned off! + Update `openchrom.ini` to activate the command-line interface (y/n)? + (Warning: This will deactivate the GUI on your OpenChrom installation!)") + ans <- readline() + if (ans %in% c("y","Y", "yes", "Yes", "YES")){ + cli <- "true" + } else{ + stop("-Denable.cli.support must be enabled to use the OpenChrom parsers from R.") + } } + } else if (cli == "status"){ + return(cli_bool) } if (cli %in% c("true", "false")){ ini_split[2] <- cli - ini[cli_index] <- paste(ini_split, collapse="=") + ini[cli_index] <- paste(ini_split, collapse = "=") writeLines(ini, path_ini) } path_parser[1] diff --git a/R/rainbow_parser.R b/R/call_rainbow.R similarity index 97% rename from R/rainbow_parser.R rename to R/call_rainbow.R index a129473..b634598 100644 --- a/R/rainbow_parser.R +++ b/R/call_rainbow.R @@ -40,7 +40,9 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun "masshunter" = rb_read$read, "chemstation" = rb_parse_agilent$chemstation$parse_file, "chemstation_uv" = rb_parse_agilent$chemstation$parse_file, - "chemstation_fid" = rb_parse_agilent$chemstation$parse_file) + "chemstation_fid" = rb_parse_agilent$chemstation$parse_file, + "chemstation_ch" = rb_parse_agilent$chemstation$parse_file, + "default" = rb_read$read) if (format_in %in% c("chemstation", "chemstation_uv", "chemstation_fid")){ by <- "single" } diff --git a/R/parsers.R b/R/parsers.R index e0bae96..53aa4d1 100644 --- a/R/parsers.R +++ b/R/parsers.R @@ -3,14 +3,18 @@ #' @importFrom utils tail read.csv #' @param file path to file #' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. #' @return A chromatogram in the format specified by \code{format_out}. #' (retention time x wavelength). #' @author Ethan Bass #' @export + read_chromeleon <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) + data_format <- match.arg(data_format, c("wide","long")) xx <- readLines(file) start <- tail(grep("Data:", xx), 1) x <- read.csv(file, skip = start, sep="\t") @@ -20,14 +24,20 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"), x <- apply(x, 2, function(x) gsub(",", ".", x)) } x <- apply(x, 2, as.numeric) - x <- as.matrix(x) - rownames(x) <- x[,1] - x <- x[,2, drop = FALSE] + colnames(x) <- c("RT","Intensity") + if (data_format == "wide"){ + rownames(x) <- x[,1] + x <- x[, 2, drop = FALSE] + } + if (format_out == "data.frame"){ + x <- as.data.frame(x) + } if (read_metadata){ meta <- try(read_chromeleon_metadata(xx)) if (!inherits(meta, "try-error")){ x <- attach_metadata(x, meta, format_in = "chromeleon", format_out = format_out, - data_format = "wide", parser = "chromConverter") + data_format = "wide", parser = "chromConverter", + source_file = file) } } x @@ -61,8 +71,8 @@ read_shimadzu <- function(file, format_in, read_metadata = TRUE){ if (missing(format_in)) stop("`format_in` must be specified. The options are `fid` or `dad`.") - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) x <- readLines(file) sep <- substr(x[2], 17, 17) @@ -77,7 +87,7 @@ read_shimadzu <- function(file, format_in, if (length(chrom.idx) != 0){ header <- try(extract_header(x = x, chrom.idx = chrom.idx, sep = sep)) met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2,2]),",",".") + decimal_separator <- ifelse(grepl(",", met[2, 2]),",",".") if (decimal_separator == ","){ met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) } @@ -90,7 +100,9 @@ read_shimadzu <- function(file, format_in, rownames(xx) <- xx[,1] xx <- xx[, 2, drop = FALSE] colnames(xx) <- "Intensity" - data_format <- "long" + if (data_format == "long"){ + xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) + } } else if (format_in == "dad"){ nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]),2]) ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]),2]) @@ -153,7 +165,7 @@ read_shimadzu <- function(file, format_in, } xx <- switch(what, "chromatogram" = xx, "peak_table" = peak_tab, - "both" = list(xx, peak_tab)) + "both" = list(chromatogram = xx, peak_table = peak_tab)) if (read_metadata){ idx <- which(x[headings] %in% c("[Header]", "[File Information]", "[Sample Information]", @@ -170,17 +182,18 @@ read_shimadzu <- function(file, format_in, } rownames(meta) <- meta[, 1] meta <- as.list(meta[,2]) - data_format <- switch(format_in, - "fid" = "long", - "dad" = "wide") + # data_format <- switch(format_in, + # "fid" = "long", + # "dad" = "wide") if (inherits(xx, "list")){ xx <- lapply(xx, function(xxx){ - attach_metadata(xxx, meta, format_in = "shimadzu", format_out = format_out, - data_format = data_format, - parser = "chromConverter") + attach_metadata(xxx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, + data_format = data_format, parser = "chromConverter") }) } else{ - xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out, + xx <- attach_metadata(xx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, data_format = data_format, parser = "chromConverter") } @@ -199,24 +212,36 @@ read_shimadzu <- function(file, format_in, #' @importFrom utils tail read.csv #' @param file path to file #' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @author Ethan Bass #' @export -read_waters_arw <- function(file, read_metadata = TRUE, - format_out = c("matrix","data.frame")){ + +read_waters_arw <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) - x <- read.csv(file, sep="\t", skip = 2, header=FALSE, row.names=1) - if (format_out == "matrix") + data_format <- match.arg(data_format, c("wide","long")) + x <- read.csv(file, sep="\t", skip = 2, header = FALSE, row.names = 1) + if (ncol(x) == 1){ + colnames(x) <- "Intensity" + if (data_format == "long"){ + x <- data.frame(RT = rownames(x), Intensity=x[,1]) + } + } + if (format_out == "matrix"){ x <- as.matrix(x) + } if (read_metadata){ meta <- try(read_waters_metadata(file)) if (!inherits(meta, "try-error")){ x <- attach_metadata(x, meta, format_in = "waters_arw", format_out = format_out, - data_format = "wide", - parser = "chromConverter") + data_format = data_format, + parser = "chromConverter", + source_file = file) } } x @@ -232,6 +257,7 @@ read_waters_arw <- function(file, read_metadata = TRUE, #' (retention time x wavelength). #' @author Ethan Bass #' @export + read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){ format_out <- match.arg(format_out, c("matrix", "data.frame")) x <- read.csv(file, row.names = 1, header = TRUE, @@ -272,7 +298,8 @@ read_mzml <- function(path, format_out = c("matrix", "data.frame"), data_format = c("long","wide"), parser=c("RaMS","mzR"), what=c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), verbose = FALSE, + "chroms", "metadata", "everything"), + verbose = FALSE, ...){ parser <- match.arg(parser, c("RaMS", "mzR")) format_out <- match.arg(format_out, c("matrix", "data.frame")) diff --git a/R/read_cdf.R b/R/read_cdf.R new file mode 100644 index 0000000..276084e --- /dev/null +++ b/R/read_cdf.R @@ -0,0 +1,97 @@ +#' Read CDF file +#' @param file path to file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' For 2D files, "long" format returns the retention time as the first column of +#' the data.frame or matrix while "wide" format returns the retention time as the +#' rownames of the object. +#' @param read_metadata Whether to read metadata from file. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). +#' @author Ethan Bass +#' @export + +read_cdf <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + what = "chromatogram", read_metadata = TRUE){ + check_for_pkg("ncdf4") + nc <- ncdf4::nc_open(file) + if ("ordinate_values" %in% names(nc$var)){ + format = "chrom" + } else { + format = "ms" + } + ncdf4::nc_close(nc) + fn <- switch(format, chrom = read_andi_chrom, ms = andi_ms_error) + fn(file = file, data_format = data_format, format_out = format_out, + what = what, read_metadata = read_metadata) +} + +#' @noRd +andi_ms_error <- function(...){ + stop("The `cdf` MS format is not yet supported by an internal `chromConverter` parser. + Please try the OpenChrom `msd` parser instead.") +} + +#' Read ANDI chrom file +#' @noRd +read_andi_chrom <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + what = "chromatogram", read_metadata = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) + nc <- ncdf4::nc_open(file) + if (any(what == "chromatogram")){ + y <- ncdf4::ncvar_get(nc, "ordinate_values") + nvals <- ncdf4::ncvar_get(nc, "actual_run_time_length") + n_interval <- ncdf4::ncvar_get(nc, "actual_sampling_interval") + n_start <- ncdf4::ncvar_get(nc, "actual_delay_time") + x <- seq(from = n_start, to = nvals, length.out=length(y)) + data = data.frame(RT = x, Intensity = y) + if (data_format == "wide"){ + rownames(data) <- data[,1] + data <- data[,-1, drop = FALSE] + } + if (format_out == "matrix"){ + data <- as.matrix(data) + } + } + if (any(what == "peak_table")){ + peak_table_vars <- names(which(sapply(nc$var, function(x){ + x$dim[[1]]$name + }) == "peak_number")) + if (length(peak_table_vars) > 0){ + peak_tab <- sapply(peak_table_vars, function(var){ + ncdf4::ncvar_get(nc, varid = var) + }) + peak_tab <- as.data.frame(peak_tab) + } + } + if ("peak_table" %in% what & "chromatogram" %in% what){ + what <- "both" + } + data <- switch(what, "chromatogram" = data, + "peak_table" = peak_tab, + "both" = list(chromatogram=data, peak_table = peak_tab)) + if (read_metadata){ + meta <- ncdf4::ncatt_get(nc, varid = 0) + # data <- attach_metadata(data, meta = meta, format_in = format_in, + # format_out = format_out, data_format = data_format, + # parser = "chromconverter", source_file = file) + if (inherits(data, "list")){ + data <- lapply(data, function(xx){ + attach_metadata(data, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + }) + } else{ + data <- attach_metadata(data, meta = meta, format_in = "cdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + } + ncdf4::nc_close(nc) + data +} diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index bdc0670..8493fc2 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -1,8 +1,9 @@ #' Parser for reading Agilent ('.ch') files into R #' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file +#' @param format_out Matrix or data.frame. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to attach metadata. -#' @param format_out Matrix or data.frame #' @author Ethan Bass #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). @@ -10,9 +11,11 @@ #' ((c) James Dillon 2014). #' @export -read_chemstation_ch <- function(path, read_metadata = TRUE, - format_out = c("matrix","data.frame")){ +read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) + data_format <- match.arg(data_format, c("wide","long")) f <- file(path, "rb") on.exit(close(f)) @@ -20,83 +23,8 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, # HEADER seek(f, 1, "start") version <- readBin(f, "character", n = 1) - version <- match.arg(version, choices = c("8", "81", "130", "179","181")) - if (version %in% c("179","181")){ - offsets <- list( - file_type = 347, #0x15B - sample_name = 858, #0x35A - operator = 1880, #0x758 - date = 2391, # 0x957 - instrument = 2492, # 0x9BC - method = 2574, # 0xA0E - software = 3089, # 0xC11 - unit = 4172, # 0x104C - signal = 4213, # 0x1075 - num_times = 278, # 0x116 - rt_first = 282, # 0x11A - rt_last = 286, # 0x11E - scaling_factor = 4732, # 0x127C - intercept = 4724, - data_start = 4096 # 0x1000 - ) - } else if (version == "130"){ - offsets <- list( - # sequence_line_or_injection = 252, #UINT16 - # injection_or_sequence_line = 256, #UINT16 - # data_offset = 264, # UINT32 - # start_time = 282, - # end_time = 286, - # version_string = 326, # utf16 - file_type = 347, # utf16 - sample_name = 858, # utf16 - operator = 1880, # utf16 - date = 2391, # utf16 - inlet = 2492, # utf16 - instrument = 2533, # utf16' - method = 2574, # utf16 - software_version = 3601, #utf16' - software = 3089, # 'utf16' - software_revision = 3802, #'utf16' - units = 4172, # 'utf16' - signal = 4213, # 'utf16' - zero = 4110, # INT32 - scaling_factor = 4732) #ENDIAN + 'd' - } else if (version == 30){ - offsets <- list( - file_type = 4, # utf16 - # sample_name = 858, # utf16 - operator = 148, # utf16 - date = 178, # utf16 - # inlet = 2492, # utf16 - instrument = 208, # utf16' - method = 228, # utf16 - software_version = 355, #utf16' - software = 322, # 'utf16' - software_revision = 405, #'utf16' - units = 580, # 'utf16' - signal = 596, # 'utf16' - zero = 4110, # INT32 - scaling_factor = 4732, - data_start = 1024 #ENDIAN + 'd' - ) - } else if (version %in% c("8","81")){ - offsets <- list(sample_name = 24, - description = 86, - operator = 148, - date = 178, - instrument = 218, - inlet = 208, - method = 228, - software = 0xC11, - unit = 580, - num_times = 0x116, - rt_first = 0x11A, - rt_last = 0x11E, - scaling_factor = 644, - intercept = 636, - data_start = 4096) - } - + version <- match.arg(version, choices = c("8", "81", "130", "179", "181")) + offsets <- get_agilent_offsets(version) decoder <- switch(version, "8" = decode_delta, "81" = decode_double_delta, @@ -114,8 +42,8 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, data <- decoder(f, offset) - seek(f, 282, "start") - seek(f, 282, "start") + seek(f, where = 282, origin = "start") + seek(f, where = 282, origin = "start") if (version %in% c("8","130")){ xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 @@ -134,7 +62,11 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, data <- data * scaling_factor + intercept - data <- data.frame(Intensity = data, row.names = times) + if (data_format == "wide"){ + data <- data.frame(Intensity = data, row.names = times) + } else if (data_format == "long"){ + data <- data.frame(RT = times, Intensity = data) + } if (format_out == "matrix"){ data <- as.matrix(data) } @@ -150,14 +82,21 @@ read_chemstation_ch <- function(path, read_metadata = TRUE, n <- get_nchar(f) cc_collapse(readBin(f, "character", n = n)) }) - + if (read_metadata){ + datetime_regex <- "(\\d{2}-[A-Za-z]{3}-\\d{2}, \\d{2}:\\d{2}:\\d{2})|(\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2}:\\d{2} (?:AM|PM)?)" + datetime <- regmatches(meta$date, gregexpr(datetime_regex, meta$date))[[1]] + datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") + datetime <- as.POSIXct(datetime, tz = "UTC", tryFormats = datetime_formats) data <- structure(data, file_version = version, sample_name = meta$sample_name, - run_date = meta$date, instrument = meta$instrument, + run_date = datetime, + instrument = meta$instrument, method = meta$method, software_version = meta$software_version, software = meta$software, software_rev = meta$software_revision, - signal = meta$signal, unit = meta$unit, - time_range = c(xmin, xmax), - data_format = "long", parser = "chromConverter") + signal = meta$signal, detector_unit = meta$unit, + time_range = c(xmin, xmax), time_interval = mean(diff(times)), + time_unit = "Minutes", source_file = path, + data_format = data_format, parser = "chromConverter") + } } data } @@ -182,7 +121,7 @@ get_chemstation_dir_name <- function(path){ #' @noRd get_nchar <- function(f){ - as.numeric(readBin(f, what = "raw", n = 1))*2 + as.numeric(readBin(f, what = "raw", n = 1)) } #' @noRd @@ -239,49 +178,119 @@ decode_double_array <- function(file, offset) { #' @noRd #' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). - -# file <- f decode_delta <- function(file, offset) { - # if (ftell(file) == -1) { - # signal <- c() - # return(signal) - # } else { seek(file, 0, 'end') fsize <- seek(file, NA, "current") seek(file, offset, "start") start <- seek(file, NA, "current") - # } - signal <- rep(0, round((fsize - start)/2)) + signal <- rep(NA, round((fsize - start)/2)) buffer <- rep(0, 4) index <- 1 - while (seek(file, NA, "current") < fsize) { - buffer[1] <- readBin(file, "integer", n=1, endian = "big", size = 2) - buffer[2] <- buffer[4] - - if (bitShiftL(buffer[1], 12) == 0) { - # signal <- signal[1:index-1] - break - } - - for (i in (1:bitAnd(buffer[1], 4095))) { - buffer[3] <- readBin(file, "integer", n = 1, endian = "big", size = 2) - - if (buffer[3] != -32768) { - buffer[2] <- buffer[2] + buffer[3] - } else { - buffer[2] <- readBin(file, "integer", n = 1, endian = "big", size = 4) + while (TRUE) { + head <- readBin(file, "integer", n = 1, size = 1, endian = "big") + if (head != 0x10) { + break } - - signal[index] <- buffer[2] - index <- index + 1 - } - - buffer[4] <- buffer[2] + buffer[2] <- buffer[4] + + segment_length <- readBin(file, "integer", n = 1, size = 1, endian = "big") + for (i in seq_len(segment_length)){ + # for (i in (1:bitwAnd(buffer[1], 4095L))) { + buffer[3] <- readBin(file, "integer", n = 1, size = 2, endian = "big") + if (buffer[3] != -32768L) { + buffer[2] <- buffer[2] + buffer[3] + } else { + buffer[2] <- readBin(file, "integer", n = 1, size =4 ,endian = "big") + } + + signal[index] <- buffer[2] + index <- index + 1 + } + buffer[4] <- buffer[2] } - + signal <- signal[!is.na(signal)] return(signal) } +#' @noRd +get_agilent_offsets <- function(version){ + if (version %in% c("179","181")){ + offsets <- list( + file_type = 347, #0x15B + sample_name = 858, #0x35A + operator = 1880, #0x758 + date = 2391, # 0x957 + instrument = 2492, # 0x9BC + method = 2574, # 0xA0E + software = 3089, # 0xC11 + unit = 4172, # 0x104C + signal = 4213, # 0x1075 + num_times = 278, # 0x116 + rt_first = 282, # 0x11A + rt_last = 286, # 0x11E + scaling_factor = 4732, # 0x127C + intercept = 4724, + data_start = 4096 # 0x1000 + ) + } else if (version == "130"){ + offsets <- list( + # sequence_line_or_injection = 252, #UINT16 + # injection_or_sequence_line = 256, #UINT16 + # data_offset = 264, # UINT32 + start_time = 282, + # end_time = 286, + # version_string = 326, # utf16 + file_type = 347, # utf16 + sample_name = 858, # utf16 + operator = 1880, # utf16 + date = 2391, # utf16 + inlet = 2492, # utf16 + instrument = 2533, # utf16' + method = 2574, # utf16 + software_version = 3601, #utf16' + software = 3089, # 'utf16' + software_revision = 3802, #'utf16' + units = 4172, # 'utf16' + signal = 4213, # 'utf16' + zero = 4110, # INT32 + scaling_factor = 4732) #ENDIAN + 'd' + } else if (version == 30){ + offsets <- list( + file_type = 4, # utf16 + # sample_name = 858, # utf16 + operator = 148, # utf16 + date = 178, # utf16 + # inlet = 2492, # utf16 + instrument = 208, # utf16' + method = 228, # utf16 + software_version = 355, #utf16' + software = 322, # 'utf16' + software_revision = 405, #'utf16' + units = 580, # 'utf16' + signal = 596, # 'utf16' + zero = 4110, # INT32 + scaling_factor = 4732, + data_start = 1024 #ENDIAN + 'd' + ) + } else if (version %in% c("8","81")){ + offsets <- list(sample_name = 24, + description = 86, + operator = 148, + date = 178, + instrument = 218, + inlet = 208, + method = 228, + # software = 0xC11, + unit = 580, + num_times = 0x116, + rt_first = 0x11A, + rt_last = 0x11E, + scaling_factor = 644, + intercept = 636, + data_start = 4096) + } + offsets +} diff --git a/R/read_chemstation_uv.R b/R/read_chemstation_uv.R index 1e7bb14..6712bdc 100644 --- a/R/read_chemstation_uv.R +++ b/R/read_chemstation_uv.R @@ -12,9 +12,9 @@ #' (https://rainbow-api.readthedocs.io/en/latest/agilent/uv.html). #' @export -read_chemstation_uv <- function(path, read_metadata = TRUE, - format_out = c("matrix","data.frame"), - data_format = c("wide","long")){ +read_chemstation_uv <- function(path, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) @@ -78,7 +78,7 @@ read_chemstation_uv <- function(path, read_metadata = TRUE, seek(f, offsets$data_start + 0x8) # Read and unpack wavelength information - wave_info <- readBin(f, integer(), n=3, size=2, endian="little") + wave_info <- readBin(f, integer(), n = 3, size = 2, endian = "little") lambda_start <- wave_info[1] %/% 20 lambda_end <- wave_info[2] %/% 20 delta_lambda <- wave_info[3] %/% 20 @@ -127,6 +127,7 @@ read_chemstation_uv <- function(path, read_metadata = TRUE, if (read_metadata){ data <- structure(data, file_version = meta$file_type, sample_name = meta$sample_name, + file_source = path, operator = meta$operator, run_date = meta$date, instrument = meta$detector, method = meta$method, software_version = NA, diff --git a/R/read_chroms.R b/R/read_chroms.R index 37cb455..1eca1c7 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -34,16 +34,20 @@ #' @param format_out R object format (i.e. data.frame or matrix). #' @param data_format Whether to output data in wide or long format. Either #' \code{wide} or \code{long}. -#' @param export Logical. If TRUE, will export files as csvs. +#' @param export Logical. If TRUE, the program will export files in the format +#' specified by \code{export_format} in the directory specified by \code{path_out}. #' @param path_out Path for exporting files. If path not specified, files will #' export to current working directory. -#' @param export_format Export format. Currently the only option is \code{.csv}, -#' unless you are using OpenChrom parsers, where you could have \code{csv}, -#' \code{cdf}, \code{mzml}, or \code{animl}. +#' @param export_format Export format. Currently the options include \code{.csv}, +#' \code{chemstation_csv} (utf-16 encoding), and \code{cdf}, unless you are using +#' OpenChrom parsers, where there are two additional options: \code{mzml}, and +#' \code{animl}. #' @param read_metadata Logical, whether to attach metadata (if it's available). #' Defaults to TRUE. #' @param progress_bar Logical. Whether to show progress bar. Defaults to #' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed. +#' @param sample_names An optional character vector of sample names. Otherwise +#' sample names default to the basename of the specified files. #' @param dat Existing list of chromatograms to append results. #' (Defaults to NULL). #' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, @@ -67,15 +71,16 @@ read_chroms <- function(paths, find_files, "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", - "msd", "csd", "wsd", "other"), + "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), data_format = c("wide","long"), export = FALSE, path_out = NULL, - export_format = c("csv", "cdf", "mzml", "animl"), - read_metadata = TRUE, progress_bar, dat = NULL){ + export_format = c("csv", "chemstation_csv", "cdf", "mzml", "animl"), + read_metadata = TRUE, progress_bar, sample_names = NULL, + dat = NULL){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix", "data.frame")) parser <- match.arg(parser, c("", "chromconverter", "aston","entab", @@ -110,14 +115,18 @@ read_chroms <- function(paths, find_files, "chemstation_csv", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", - "waters_raw", "msd", "csd", "wsd", "other")) + "waters_raw", "msd", "csd", "wsd", "mdf", + "cdf", "other")) if (parser == ""){ parser <- check_parser(format_in, find = TRUE) } - export_format <- match.arg(export_format, c("csv", "cdf", "mzml", "animl")) + export_format <- match.arg(export_format, choices = + c("csv", "chemstation_csv", "cdf", "mzml", "animl")) check_parser(format_in, parser) - if (parser != "openchrom" & export_format != "csv") - stop("Only `csv` format is currently supported for exporting files unless the parser is `openchrom`.") + if (parser != "openchrom" && !(export_format %in% c("csv", "chemstation_csv", "cdf"))) + stop("The selected export format is currently only supported by `openchrom` parsers.") + # if (export_format == "cdf" && format_in != "mdf" && parser != "openchrom") + # stop("Currently CDF exports are only available for MDF files.") if (parser == "entab" & !requireNamespace("entab", quietly = TRUE)) { stop("The entab R package must be installed to use entab parsers: install.packages('entab', repos='https://ethanbass.github.io/drat/')", @@ -127,19 +136,16 @@ read_chroms <- function(paths, find_files, if (all(!exists)){ stop("Cannot locate files. None of the supplied paths exist.") } - if (!is.null(path_out)){ - path_out <- check_path(path_out) - } if (export | format_in == "thermoraw" | parser == "openchrom"){ if (is.null(path_out)){ path_out <- set_temp_directory() } if (!dir.exists(path_out)){ - stop(paste0("The export directory '", path_out, "' does not exist.")) + stop(paste0("The export directory '", path_out, "' could not be found.")) } } if (is.null(dat)){ - dat<-list()} + dat <- list()} # choose converter entab_parser <- partial(call_entab, format_in = format_in, @@ -178,21 +184,24 @@ read_chroms <- function(paths, find_files, converter <- rainbow_parser } else if (format_in == "chromeleon_uv"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) - converter <- partial(read_chromeleon, read_metadata = read_metadata, format_out = format_out) + converter <- partial(read_chromeleon, format_out = format_out, + data_format = data_format, read_metadata = read_metadata) } else if (format_in == "shimadzu_fid"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "fid", - read_metadata = read_metadata, format_out = format_out) + format_out = format_out, data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "shimadzu_dad"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "dad", - read_metadata = read_metadata, format_out = format_out) + format_out = format_out, data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "thermoraw"){ pattern <- ifelse(is.null(pattern), ".raw", pattern) converter <- switch(parser, "thermoraw" = partial(read_thermoraw, path_out = path_out, - read_metadata = read_metadata, - format_out = format_out), + format_out = format_out, + read_metadata = read_metadata), "entab" = entab_parser) } else if (format_in == "mzml"){ pattern <- ifelse(is.null(pattern), ".mzML", pattern) @@ -207,11 +216,12 @@ read_chroms <- function(paths, find_files, pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) } else if (format_in %in% c("chemstation_fid", "chemstation_ch")){ - data_format <- "long" pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, - "chromconverter" = partial(read_chemstation_ch, read_metadata = read_metadata, - format_out = format_out), + "chromconverter" = partial(read_chemstation_ch, + format_out = format_out, + data_format = data_format, + read_metadata = read_metadata), "rainbow" = rainbow_parser, "entab" = entab_parser) } else if (format_in %in% c("msd", "csd", "wsd")){ @@ -222,7 +232,17 @@ read_chroms <- function(paths, find_files, converter <- partial(call_openchrom, path_out = path_out, format_in = format_in, export_format = export_format, return_paths = return_paths) - } else{ + } else if (format_in == "mdf"){ + pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) + converter <- partial(read_mdf, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) + } else if (format_in == "cdf"){ + pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) + converter <- partial(read_cdf, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) + } else { converter <- switch(parser, "aston" = partial(trace_converter, format_out = format_out, data_format = data_format, @@ -230,11 +250,10 @@ read_chroms <- function(paths, find_files, "entab" = entab_parser ) } - writer <- switch(export_format, "csv" = export_csvs) if (find_files){ files <- find_files(paths, pattern) - } else{ + } else { files <- paths if (!is.null(pattern)){ match <- grep(pattern, files, ignore.case = TRUE) @@ -249,12 +268,12 @@ read_chroms <- function(paths, find_files, } } - if (format_in %in% c("chemstation_uv", "masshunter_dad", "chemstation", "chemstation_fid")){ + if (all(grepl(".d$", files, ignore.case = TRUE))){ file_names <- strsplit(files, "/") file_names <- gsub("\\.[Dd]", "", sapply(file_names, function(n){ - ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value=TRUE), tail(n,1)) - } )) + ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) + })) } else {file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])} if (parser != "openchrom"){ laplee <- choose_apply_fnc(progress_bar) @@ -272,8 +291,16 @@ read_chroms <- function(paths, find_files, } else{ data <- converter(files) } - names(data) <- file_names + if (!is.null(sample_names)){ + names(data) <- sample_names + } else{ + names(data) <- file_names + } if (export & !(parser %in% c("thermoraw", "openchrom"))){ + writer <- switch(export_format, csv = export_csvs, + chemstation_csv = purrr::partial(export_csvs, fileEncoding = "utf16"), + cdf = export_cdfs) + writer(data, path_out) } dat <- append(dat, data) diff --git a/R/read_mdf.R b/R/read_mdf.R new file mode 100644 index 0000000..494d552 --- /dev/null +++ b/R/read_mdf.R @@ -0,0 +1,90 @@ +#' Read MDF files into R +#' @param file Path to a 'Lumex' \code{.mdf} file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param read_metadata Whether to read metadata from file. +#' @return A chromatogram in the format specified by the \code{format_out} and +#' \code{data_format} arguments (retention time x wavelength). +#' @author Ethan Bass +#' @export + +read_mdf <- function(file, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), read_metadata = TRUE){ + data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix","data.frame")) + + f <- file(file, "rb") + + # extract metadata + metadata <- readBin(f, "character", n = 1) + meta <- extract_mdf_metadata(metadata) + + array1_len <- as.numeric(meta[which(meta$Group == "Array photometric" & meta$Property == "Size"),"Value"]) + array2_len <- as.numeric(meta[which(meta$Group == "Array current" & meta$Property == "Size"),"Value"]) + + # read array 1 + end_metadata <- seek(f, NA, "current") - 1 + seek(f, end_metadata, "start") + seek(f, end_metadata, "start") + photo_array <- readBin(f, "double", size = 8, n = array1_len) + + # read array 2 + current_array <- readBin(f, "integer", size = 4, n = array2_len) + + # close file + close(f) + + t1 <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "From"), "Value"]) + t2 <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "To"), "Value"]) + t_step <- as.numeric(meta[which(meta$Group == "Interval Time" & meta$Property == "Step"), "Value"]) + + # create time array + time_array <- seq(t1, t2, by = t_step) + + # construct data.frame + if (data_format == "wide"){ + data <- data.frame(Intensity = photo_array, Current = current_array, + row.names = time_array) + } else if (data_format == "long"){ + data <- data.frame(RT = time_array, Intensity = photo_array, + Current = current_array) + } + + if (data_format == "long"){ + data <- reshape_chrom(data) + } + if (format_out == "matrix"){ + as.matrix(format_out) + } + if (read_metadata){ + data <- attach_metadata(x = data, meta = meta, format_in = "mdf", + format_out = format_out, data_format = data_format, + parser = "chromconverter", source_file = file) + } + data +} + +#' Extract MDF metadata +#' @noRd +extract_mdf_metadata <- function(x){ + x <- stringr::str_replace_all(x, "\xb5", "micro") + ma <- strsplit(x, "\n")[[1]] + splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos))) + x2 <- splitAt(ma, grep("\\[*\\]", ma)) + x2 <- x2[-length(x2)] + names(x2) <- gsub("\\[|\\]","",sapply(x2, function(xx) xx[[1]])) + x3 <- lapply(seq_along(x2), function(i){ + xx <- x2[[i]][-1] + xx <- as.data.frame(do.call(rbind,lapply(xx, function(xxx){ + stringr::str_split_fixed(xxx, "=", 2) + }))) + xx[,3] <- names(x2)[i] + colnames(xx) <- c("Property","Value","Group") + xx + }) + names(x3) <- names(x2) + meta <- do.call(rbind, x3) + meta <- meta[-which(meta$Property == ""),] + meta +} + diff --git a/R/read_peaklist.R b/R/read_peaklist.R new file mode 100644 index 0000000..9ce8676 --- /dev/null +++ b/R/read_peaklist.R @@ -0,0 +1,24 @@ +#' Read varian peaklist. +#' Read peak list from 'Varian MS Workstation'. +#' @param file Path to varian peaklist file. +#' @importFrom utils read.csv +#' @author Ethan Bass +#' @export + +read_varian_peaklist <- function(file){ + x <- read.csv(file, skip = 5, header = FALSE) + x$V1[x$V1 == ""] <- NA + x <- tidyr::fill(data = x, "V1", .direction = "down") + + column_names <- x[2,] + column_names[1] <- "compound" + colnames(x) <- column_names + + x <- x[-which(x$`Line#`=="Line#"),] + x <- x[-which(x$`Line#` == ""), ] + + x$Area <- as.numeric(x$Area) + x$Height <- as.numeric(x$Height) + x <- x[,-16] + x +} diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index 78eb2b9..a50decc 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -21,7 +21,7 @@ #' @references #' Hulstaert Niels, Jim Shofstahl, Timo Sachsenberg, Mathias Walzer, #' Harald Barsnes, Lennart Martens, and Yasset Perez-Riverol. -#' “=ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion.” +#' ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion. #' \emph{Journal of Proteome Research} \bold{19}, no. 1 (January 3, 2020): 537–42. #' \doi{10.1021/acs.jproteome.9b00328}. #' @export read_thermoraw diff --git a/R/utils.R b/R/utils.R index c0aa7a1..9c9b7b9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,11 +8,13 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ "chemstation_fid", "chemstation_uv", "chromeleon_uv", "mzml", "shimadzu_fid", "shimadzu_dad", - "waters_arw"), - aston = c("chemstation_uv", "masshunter_dad", "other"), - entab = c("chemstation_uv", "chemstation_fid", "masshunter_dad", "thermoraw", "other"), - rainbow = c("chemstation_uv", "waters_raw", - "agilent_d", "chemstation", "chemstation_fid"), + "waters_arw", "mdf", "cdf"), + aston = c("chemstation", "chemstation_uv", "masshunter_dad", "other"), + entab = c("chemstation", "chemstation_ch", "chemstation_fid", + "chemstation_uv", "masshunter_dad", "thermoraw", "other"), + rainbow = c("chemstation", "chemstation_ch", "chemstation_fid", + "chemstation_uv", "waters_raw", + "agilent_d"), thermoraw = c("thermoraw") ) if (find){ @@ -40,6 +42,22 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } } +#' @noRd +format_to_extension <- function(format_in){ + switch(format_in, + "agilent_d" = ".d|.D", + "chemstation_uv" = ".uv|.UV", + "chemstation_ch" = ".ch|.CH", + "chemstation_fid" = ".ch|.CH", + "chemstation_csv" = ".csv|.CSV", + "masshunter_dad" = ".sp|.SP", + "shimadzu_fid" = ".txt", + "shimadzu_dad" = ".txt", + "chromeleon_uv" = ".txt", + "thermoraw" = ".raw", "mzml" = ".mzml", "waters_arw" = ".arw", + "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", "mdf" = ".mdf|.MDF", "other"=".") +} + #' @noRd find_files <- function(paths, pattern){ files <- unlist(lapply(paths, function(path){ @@ -57,49 +75,18 @@ find_files <- function(paths, pattern){ })) } -#' @noRd -export_csvs <- function(data, path.out){ - sapply(seq_along(data), function(i){ - write.csv(data[[i]], file = paste0(paste0(path.out, names(data)[i]),".CSV")) - }) -} - #' @noRd set_temp_directory <- function(){ ans <- readline("Export directory not specified! Export files to `temp` directory (y/n)?") if (ans %in% c("y","Y")){ - if (!dir.exists("temp")) - dir.create("temp") - path_out <- paste0(getwd(),'/temp/') + fs::dir_create("temp") + path_out <- fs::path(getwd(),"temp") path_out } else{ stop("Must specify directory to export files.") } } -#' Check path -#' Check that path is properly formatted. -#' @param path path as character string -#' @noRd -check_path <- function(path){ - # check for leading slash - if (.Platform$OS.type %in% c("unix","linux")){ - if (!(substr(path,1,1) %in% c("/", "~"))){ - path <- paste0("/", path) - } - } - - # check for trailing slash - n <- nchar(path) - if (substr(path, n, n) != "/"){ - path <- paste0(path, "/") - } - if (.Platform$OS.type == "windows"){ - path <- gsub("/", "\\\\", path) - } - path -} - #' Extract header from Shimadzu ascii files #' @noRd extract_header <- function(x, chrom.idx, sep){ @@ -120,18 +107,18 @@ extract_header <- function(x, chrom.idx, sep){ #' Check for suggested package #' @noRd +#' @keywords internal check_for_pkg <- function(pkg, return_boolean = FALSE){ pkg_exists <- requireNamespace(pkg, quietly = TRUE) - if (!pkg_exists) { + if (return_boolean){ + return(pkg_exists) + } else if (!pkg_exists) { stop(paste( "Package", sQuote(pkg), "must be installed to perform this action: try", paste0("`install.packages('", pkg, "')`.")), call. = FALSE ) } - if (return_boolean){ - pkg_exists - } } #' Choose apply function diff --git a/R/write_chroms.R b/R/write_chroms.R new file mode 100644 index 0000000..cefbc09 --- /dev/null +++ b/R/write_chroms.R @@ -0,0 +1,142 @@ +#' Export chromatograms as csvs +#' @author Ethan Bass +#' @noRd +export_csvs <- function(data, path_out, fileEncoding = "utf8", row.names = TRUE){ + sapply(seq_along(data), function(i){ + write.csv(data[[i]], file = fs::path(path_out, names(data)[i], ext = "csv"), + fileEncoding = fileEncoding, row.names = row.names) + }) +} + +#' Export chromatograms as CDFs +#' @author Ethan Bass +#' @noRd +export_cdfs <- function(data, path_out){ + sapply(seq_along(data), function(i){ + write_cdf(data[[i]], sample_name = names(data)[i], + path_out = path_out) + }) +} + +#' Write CDF file from chromatogram +#' @author Ethan Bass +#' @noRd +write_cdf <- function(x, sample_name, path_out){ + check_for_pkg("ncdf4") + if (ncol(x) + as.numeric(attr(x, "data_format") == "wide") > 2){ + warning("The supplies chromatogram contains more than two dimensions. Only + the first two dimensions will be written to the ANDI chrom file.", + immediate. = TRUE) + } + if (attr(x, "data_format") == "wide"){ + x1 <- data.frame(RT = as.numeric(rownames(x)), Intensity = x[,1]) + x <- transfer_metadata(x1, x) + } + # if (!missing(column)){ + # x1 <- x[,column,drop = FALSE] + # x <- transfer_metadata(x1,x) + # } + filename <- fs::path_ext_remove(fs::path_file(sample_name)) + file_out <- fs::path(path_out, filename, ext = "cdf") + if (fs::file_exists(file_out)){ + warning(paste("File", sQuote(basename(file_out)), "already exists and will not be overwritten."), immediate. = TRUE) + } + # define dimensions + point_number <- ncdf4::ncdim_def("point_number", "", + vals = seq_along(x[,1]), + create_dimvar = FALSE) + + # define variables + nc_time <- ncdf4::ncvar_def("raw_data_retention", "", dim = point_number) + nc_intensity <- ncdf4::ncvar_def("ordinate_values", "", dim = point_number) + other_vars <- c("actual_delay_time", "actual_run_time_length", "actual_sampling_interval", + "detector_maximum_value", "detector_minimum_value") + other_vars <- lapply(other_vars, function(x) ncdf4::ncvar_def(x, "",list())) + + # write netcdf file + ncdf4::nc_create(file_out, c(list(nc_time, nc_intensity), other_vars)) + + # open netcdf file + nc <- ncdf4::nc_open(file_out, write = TRUE) + + # write data to file + ncdf4::ncvar_put(nc = nc, varid = "raw_data_retention", vals = x[,1]) + ncdf4::ncvar_put(nc = nc, varid = "ordinate_values", vals = x[,2]) + ncdf4::ncatt_put(nc, varid="ordinate_values", + attname = "uniform_sampling_flag", attval = "Y") + ncdf4::ncvar_put(nc = nc, varid = "actual_run_time_length", vals = tail(x[,1],1)) + ncdf4::ncvar_put(nc = nc, varid = "actual_delay_time", vals = head(x[,1],1)) + ncdf4::ncvar_put(nc = nc, varid = "actual_sampling_interval", vals = mean(diff(x[,1]))) + ncdf4::ncvar_put(nc = nc, varid = "detector_maximum_value", vals = 1000) + ncdf4::ncvar_put(nc = nc, varid = "detector_minimum_value", vals = -1000) + + # write metadata as global attributes + meta <- format_metadata_for_cdf(x) + nc_add_global_attributes(nc = nc, meta = meta, sample_name = sample_name) + # finish writing file + ncdf4::nc_close(nc) +} + +#' Add global attributes to CDF file +#' @noRd +nc_add_global_attributes <- function(nc, meta, sample_name){ + sapply(seq_along(meta), function(i){ + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = names(meta)[i], attval = meta[[i]], + prec = ifelse(names(meta)[i] %in% c("sample_amount", "sample_injection_volume"), + "float","text")) + }) + if (!is.null(sample_name)){ + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = "sample_name", attval = sample_name) + ncdf4::ncatt_put(nc = nc, varid = 0, + attname = "experiment_title", attval = sample_name) + } +} + +#' Format metadata for CDF +#' @author Ethan Bass +#' @noRd +format_metadata_for_cdf <- function(x){ + # datetime_str <- x[which(x$Property=="Time"),"Value"] + # datetime_standard <- as.POSIXct(datetime_str, format = "%d.%m.%Y %H:%M:%S") + datetime <- format(attr(x, "run_datetime"), "%Y%m%d%H%M%S%z") + # rt_units <- x[which(x$Group=="Interval Time" & x$Property == "Units"), "Value"] + rt_units <- switch(tolower(attr(x, "time_unit")), + "sec" = "Seconds", "seconds" = "Seconds", + "min" = "Minutes", "minutes" = "Minutes", + "default" = "Minutes") + get_nc_version <- switch(.Platform$OS.type, "windows" = ) + meta <- list(dataset_completeness = "C1", + aia_template_revision = "1.0", + protocol_template_revision = "1.0", + netcdf_revision = paste("netCDF", stringr::str_extract(ncdf4::nc_version(), + "(?<=library version\\s)\\d+\\.\\d+\\.\\d+")), + administrative_comments = paste("Collected on", attr(x, "instrument")), + languages = "English only", + converter_name = "chromconverter", + converter_description = "AIA/ANDI netCDF Chromatography", + converter_input_source = attr(x,"source_file"), + date_time_stamp = datetime, + dataset_date_time_stamp = datetime, + injection_date_time_stamp = datetime, + detector_units = attr(x,"detector_unit"), + detector_unit = attr(x,"detector_unit"), + retention_units = rt_units, + retention_unit = rt_units, + sample_id_comments = "", + detector_name = attr(x,"detector"), + # experiment_title = "", + sample_amount = as.numeric(attr(x, "sample_amount")), + sample_injection_volume = as.numeric(attr(x, "sample_injection_volume")), + sample_type = attr(x, "sample_type") + ) + meta$sample_type <- ifelse(!is.null(meta$sample_type), meta$sample_type, "unknown") + meta$sample_amount <- + ifelse(length(meta$sample_amount) != 0, meta$sample_amount, 1) + meta$sample_injection_volume <- + ifelse(length(meta$sample_injection_volume) != 0, meta$sample_injection_volume, 1) + meta[sapply(meta, is.null)] <- "" + meta[sapply(meta, is.na)] <- "" + meta +} diff --git a/README.md b/README.md index 029880d..384a180 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,7 @@ Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://g 1) Download OpenChrom from the [website](https://lablicate.com/platform/openchrom/download) and place it into a directory of your choice. 2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. -3) Follow the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to activate OpenChrom's command-line interface. Alternatively, the command-line option can be activated from R by calling `configure_openchrom_parser(cli = "true")` or by calling the OpenChrom parser and following the prompts. -4) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, it will ask you to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. +3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path="path_to_openchrom_executable"). ### Usage @@ -150,6 +149,6 @@ Contributions of source code, ideas, or documentation are very welcome. Please g You can cite chromConverter as follows: -Bass, E. (2022). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. +Bass, E. (2023). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. If you use external libraries to convert your files, please cite them as well in published work. diff --git a/inst/CITATION b/inst/CITATION index aa3b367..3343464 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -4,8 +4,8 @@ citEntry( entry = "manual", title = "chromConverter: chromatographic file converter", author = "Ethan Bass", - year = "2022", - version = "version 0.3.3", + year = "2023", + version = "version 0.4.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2022).", diff --git a/inst/shell/path_to_openchrom_commandline.txt b/inst/shell/path_to_openchrom_commandline.txt index 1ccd88a..7951def 100644 --- a/inst/shell/path_to_openchrom_commandline.txt +++ b/inst/shell/path_to_openchrom_commandline.txt @@ -1 +1 @@ -/Applications/Eclipse.app/Contents/MacOS/openchrom \ No newline at end of file +NULL diff --git a/man/attach_metadata.Rd b/man/attach_metadata.Rd index 11b8d42..cc2fed9 100644 --- a/man/attach_metadata.Rd +++ b/man/attach_metadata.Rd @@ -4,7 +4,15 @@ \alias{attach_metadata} \title{Attaches metadata to chromatogram} \usage{ -attach_metadata(x, meta, format_in, format_out, data_format, parser = NULL) +attach_metadata( + x, + meta, + format_in, + format_out, + data_format, + parser = NULL, + source_file +) } \arguments{ \item{x}{chromatogram} @@ -17,7 +25,9 @@ attach_metadata(x, meta, format_in, format_out, data_format, parser = NULL) \item{data_format}{Whether data is in wide or long format.} -\item{parser}{What parser was used to decode the data} +\item{parser}{What parser was used to decode the data.} + +\item{source_file}{The path to the source file.} } \value{ A chromatogram with attached metadata. diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index ccd3a2a..2e7a9fb 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -54,6 +54,12 @@ call from R. by \code{export_format} in the folder specified by \code{path_out}. } +\references{ +Wenig, Philip and Odermatt, Juergen. OpenChrom: A Cross-Platform Open Source +Software for the Mass Spectrometric Analysis of Chromatographic Data. \emph{ +BMC Bioinformatics} \bold{11}, no. 1 (July 30, 2010): 405. \doi{ +10.1186/1471-2105-11-405}. +} \author{ Ethan Bass } diff --git a/man/call_rainbow.Rd b/man/call_rainbow.Rd index 1445d88..ca4fddb 100644 --- a/man/call_rainbow.Rd +++ b/man/call_rainbow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rainbow_parser.R +% Please edit documentation in R/call_rainbow.R \name{call_rainbow} \alias{call_rainbow} \title{Parse Agilent or Waters files with rainbow parser} diff --git a/man/configure_openchrom.Rd b/man/configure_openchrom.Rd new file mode 100644 index 0000000..671b830 --- /dev/null +++ b/man/configure_openchrom.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/call_openchrom.R +\name{configure_openchrom} +\alias{configure_openchrom} +\title{Configure OpenChrom parser} +\usage{ +configure_openchrom(cli = c("null", "true", "false", "status"), path = NULL) +} +\arguments{ +\item{cli}{Defaults to NULL. If "true", R will rewrite openchrom ini file to enable CLI. +If "false", R will disable CLI. If NULL, R will not modify the ini file.} + +\item{path}{Path to 'OpenChrom' executable (Optional). The supplied path will +overwrite the current path.} +} +\value{ +Returns path to OpenChrom command-line application +} +\description{ +Configure OpenChrom parser +} +\author{ +Ethan Bass +} diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd index c8e6af3..570c190 100644 --- a/man/configure_rainbow.Rd +++ b/man/configure_rainbow.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/rainbow_parser.R +% Please edit documentation in R/call_rainbow.R \name{configure_rainbow} \alias{configure_rainbow} \title{Configure rainbow} diff --git a/man/read_cdf.Rd b/man/read_cdf.Rd new file mode 100644 index 0000000..b666739 --- /dev/null +++ b/man/read_cdf.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_cdf.R +\name{read_cdf} +\alias{read_cdf} +\title{Read CDF file} +\usage{ +read_cdf( + file, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + what = "chromatogram", + read_metadata = TRUE +) +} +\arguments{ +\item{file}{path to file} + +\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format. +For 2D files, "long" format returns the retention time as the first column of +the data.frame or matrix while "wide" format returns the retention time as the +rownames of the object.} + +\item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}.} + +\item{read_metadata}{Whether to read metadata from file.} +} +\value{ +A chromatogram in the format specified by the \code{format_out} and +\code{data_format} arguments (retention time x wavelength). +} +\description{ +Read CDF file +} +\author{ +Ethan Bass +} diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 6dea593..0459c8d 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -6,16 +6,19 @@ \usage{ read_chemstation_ch( path, - read_metadata = TRUE, - format_out = c("matrix", "data.frame") + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{path}{Path to \code{.ch} file} -\item{read_metadata}{Logical. Whether to attach metadata.} +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} -\item{format_out}{Matrix or data.frame} +\item{read_metadata}{Logical. Whether to attach metadata.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index c37ab7d..8a4683d 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -6,19 +6,19 @@ \usage{ read_chemstation_uv( path, - read_metadata = TRUE, format_out = c("matrix", "data.frame"), - data_format = c("wide", "long") + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{path}{Path to \code{.uv} file} -\item{read_metadata}{Logical. Whether to attach metadata.} - \item{format_out}{Matrix or data.frame} \item{data_format}{Either \code{wide} (default) or \code{long}.} + +\item{read_metadata}{Logical. Whether to attach metadata.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chromeleon.Rd b/man/read_chromeleon.Rd index aa67387..d5a8dc4 100644 --- a/man/read_chromeleon.Rd +++ b/man/read_chromeleon.Rd @@ -7,6 +7,7 @@ read_chromeleon( file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), read_metadata = TRUE ) } @@ -15,6 +16,8 @@ read_chromeleon( \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + \item{read_metadata}{Whether to read metadata from file.} } \value{ diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index 21b731a..d9f410c 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -10,16 +10,17 @@ read_chroms( format_in = c("agilent_d", "chemstation", "chemstation_uv", "chemstation_csv", "chemstation_ch", "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", - "msd", "csd", "wsd", "other"), + "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), export = FALSE, path_out = NULL, - export_format = c("csv", "cdf", "mzml", "animl"), + export_format = c("csv", "chemstation_csv", "cdf", "mzml", "animl"), read_metadata = TRUE, progress_bar, + sample_names = NULL, dat = NULL ) } @@ -49,14 +50,16 @@ case file extension will be deduced from \code{format_in}.} \item{data_format}{Whether to output data in wide or long format. Either \code{wide} or \code{long}.} -\item{export}{Logical. If TRUE, will export files as csvs.} +\item{export}{Logical. If TRUE, the program will export files in the format +specified by \code{export_format} in the directory specified by \code{path_out}.} \item{path_out}{Path for exporting files. If path not specified, files will export to current working directory.} -\item{export_format}{Export format. Currently the only option is \code{.csv}, -unless you are using OpenChrom parsers, where you could have \code{csv}, -\code{cdf}, \code{mzml}, or \code{animl}.} +\item{export_format}{Export format. Currently the options include \code{.csv}, +\code{chemstation_csv} (utf-16 encoding), and \code{cdf}, unless you are using +OpenChrom parsers, where there are two additional options: \code{mzml}, and +\code{animl}.} \item{read_metadata}{Logical, whether to attach metadata (if it's available). Defaults to TRUE.} @@ -64,6 +67,9 @@ Defaults to TRUE.} \item{progress_bar}{Logical. Whether to show progress bar. Defaults to \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.} +\item{sample_names}{An optional character vector of sample names. Otherwise +sample names default to the basename of the specified files.} + \item{dat}{Existing list of chromatograms to append results. (Defaults to NULL).} } diff --git a/man/read_mdf.Rd b/man/read_mdf.Rd new file mode 100644 index 0000000..b687d2b --- /dev/null +++ b/man/read_mdf.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_mdf.R +\name{read_mdf} +\alias{read_mdf} +\title{Read MDF files into R} +\usage{ +read_mdf( + file, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE +) +} +\arguments{ +\item{file}{Path to a 'Lumex' \code{.mdf} file} + +\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{read_metadata}{Whether to read metadata from file.} +} +\value{ +A chromatogram in the format specified by the \code{format_out} and +\code{data_format} arguments (retention time x wavelength). +} +\description{ +Read MDF files into R +} +\author{ +Ethan Bass +} diff --git a/man/read_thermoraw.Rd b/man/read_thermoraw.Rd index 8a9da64..7a08f73 100644 --- a/man/read_thermoraw.Rd +++ b/man/read_thermoraw.Rd @@ -44,7 +44,7 @@ read_thermoraw(path) \references{ Hulstaert Niels, Jim Shofstahl, Timo Sachsenberg, Mathias Walzer, Harald Barsnes, Lennart Martens, and Yasset Perez-Riverol. -“=ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion.” +ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion. \emph{Journal of Proteome Research} \bold{19}, no. 1 (January 3, 2020): 537–42. \doi{10.1021/acs.jproteome.9b00328}. } diff --git a/man/read_varian_peaklist.Rd b/man/read_varian_peaklist.Rd new file mode 100644 index 0000000..4e5a0f7 --- /dev/null +++ b/man/read_varian_peaklist.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_peaklist.R +\name{read_varian_peaklist} +\alias{read_varian_peaklist} +\title{Read varian peaklist. +Read peak list from 'Varian MS Workstation'.} +\usage{ +read_varian_peaklist(file) +} +\arguments{ +\item{file}{Path to varian peaklist file.} +} +\description{ +Read varian peaklist. +Read peak list from 'Varian MS Workstation'. +} +\author{ +Ethan Bass +} diff --git a/man/read_waters_arw.Rd b/man/read_waters_arw.Rd index 7738972..24c25b8 100644 --- a/man/read_waters_arw.Rd +++ b/man/read_waters_arw.Rd @@ -6,16 +6,19 @@ \usage{ read_waters_arw( file, - read_metadata = TRUE, - format_out = c("matrix", "data.frame") + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE ) } \arguments{ \item{file}{path to file} -\item{read_metadata}{Whether to read metadata from file.} - \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{read_metadata}{Whether to read metadata from file.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 73afaf1..956c06f 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -1,13 +1,13 @@ library(testthat) -path_csv <- "testdata/DAD1.CSV" +path_csv <- "testdata/dad1.csv" path_uv <- "testdata/dad1.uv" x <- read_chroms(path_csv, format_in = "chemstation_csv", progress_bar = FALSE) test_that("aston parser works", { skip_if_missing_dependecies() - paths <- rep(path_uv,2) + paths <- rep(path_uv, 2) x1 <- read_chroms(paths, format_in = "chemstation_uv", parser = "aston", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) @@ -16,7 +16,8 @@ test_that("aston parser works", { expect_equal(length(x1), length(paths)) expect_equal(class(x1[[1]])[1], "matrix") expect_equal(attr(x1[[1]], "data_format"), "wide") - }) + expect_equal(names(x1), c("dad1", "dad1")) +}) x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "chromconverter", find_files = FALSE, @@ -40,7 +41,7 @@ test_that ("extract_metadata function works", { test_that("entab parser works", { skip_if_not_installed("entab") - file <- "testdata/DAD1.uv" + file <- "testdata/dad1.uv" x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "entab", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) @@ -51,20 +52,6 @@ test_that("entab parser works", { expect_equal(attr(x1[[1]], "data_format"), "wide") }) - -# test_that("rainbow parser works", { -# skip_if_missing_dependecies() -# file <- "testdata/DAD1.uv" -# x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "rainbow", -# find_files = FALSE, -# read_metadata = TRUE) -# expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) -# expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) -# expect_equal(class(x1[[1]])[1], "matrix") -# expect_equal(attr(x1[[1]], "parser"), "rainbow") -# expect_equal(attr(x1[[1]], "data_format"), "wide") -# }) - test_that("shimadzu parser works", { file <- "testdata/ladder.txt" x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) @@ -72,25 +59,6 @@ test_that("shimadzu parser works", { expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) -test_that("check_path works on unix/linux", { - skip_on_os("windows") - expect_equal(check_path("~/Downloads"), "~/Downloads/") - expect_equal(check_path("Downloads"), "/Downloads/") - expect_equal(check_path("~/Downloads/"), "~/Downloads/") - expect_equal(check_path("/Users/foo/"), "/Users/foo/") - expect_equal(check_path("Users/foo/"), "/Users/foo/") - expect_equal(check_path("/Users/foo"), "/Users/foo/") - expect_equal(check_path("Users/foo"), "/Users/foo/") -}) - -# test_that("thermoraw parser works",{ -# skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) -# expect_equal(class(x[[1]])[1], "matrix") -# expect_equal(attributes(x[[1]])$instrument, "GC-2014") -# }) - test_that("read_mzml works", { ext_filepath <- system.file("extdata", package = "RaMS") DAD_filepath <- list.files(ext_filepath, full.names = TRUE, @@ -109,3 +77,61 @@ test_that("read_mzml works", { test_that("get_filetype works as expected", { expect_equal(get_filetype(path_uv), "chemstation_uv") }) + +# test_that("thermoraw parser works",{ +# skip_if_not(configure_thermo_parser(check = TRUE)) +# file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW" +# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) +# expect_equal(class(x[[1]])[1], "matrix") +# expect_equal(attributes(x[[1]])$instrument, "GC-2014") +# }) + +# test_that("rainbow parser works", { +# skip_if_missing_dependecies() +# file <- "testdata/DAD1.uv" +# x1 <- read_chroms(file, format_in = "chemstation_uv", parser = "rainbow", +# find_files = FALSE, +# read_metadata = TRUE) +# expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) +# expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) +# expect_equal(class(x1[[1]])[1], "matrix") +# expect_equal(attr(x1[[1]], "parser"), "rainbow") +# expect_equal(attr(x1[[1]], "data_format"), "wide") +# }) + +# test_that("check_path works on unix/linux", { +# skip_on_os("windows") +# expect_equal(check_path("~/Downloads"), "~/Downloads/") +# expect_equal(check_path("Downloads"), "/Downloads/") +# expect_equal(check_path("~/Downloads/"), "~/Downloads/") +# expect_equal(check_path("/Users/foo/"), "/Users/foo/") +# expect_equal(check_path("Users/foo/"), "/Users/foo/") +# expect_equal(check_path("/Users/foo"), "/Users/foo/") +# expect_equal(check_path("Users/foo"), "/Users/foo/") +# }) + +test_that("read_chroms exports csvs correctly", { + skip_on_cran() + path_out <- tempdir(check = TRUE) + on.exit(unlink(c(fs::path(path_out, "dad1", ext = "csv"), path_out))) + x1 <- read_chroms(paths = path_uv, export=TRUE, path_out = path_out, + export_format="csv", format_out = "data.frame", + progress_bar = FALSE) + x1_out <- read.csv(fs::path(path_out, "dad1", ext="csv"), row.names=1) + expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) + # unlink(fs::path(path_out, "dad1", ext = "csv")) + # unlink(path_out) +}) + +test_that("read_chroms exports cdf files correctly", { + skip_on_cran() + skip_if_not_installed("ncdf4") + path_out <- tempdir(check = TRUE) + on.exit(unlink(c(fs::path(path_out, "ladder", ext = "cdf"), path_out))) + file <- "testdata/ladder.txt" + x1 <- read_chroms(paths = file, format_in = "shimadzu_fid", export = TRUE, path_out = path_out, + export_format = "cdf", progress_bar = FALSE) + x1_out <- read_cdf(fs::path(path_out, "ladder", ext = "cdf")) + expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) +}) + diff --git a/tests/testthat/testdata/DAD1.CSV b/tests/testthat/testdata/dad1.csv similarity index 100% rename from tests/testthat/testdata/DAD1.CSV rename to tests/testthat/testdata/dad1.csv