From 3089624cf1696f0037c5d5501a5a8fbecd9976c4 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sun, 5 Feb 2023 15:05:53 -0500 Subject: [PATCH] v0.3.1 with support for chemstation .ch (versions 130 & v 8) --- DESCRIPTION | 3 +- NAMESPACE | 4 +- NEWS.md | 8 + R/aston_parsers.R | 1 + R/parsers.R | 112 ++++++---- ...hemstation_fid.R => read_chemstation_ch.R} | 208 ++++++++++-------- R/read_chroms.R | 20 +- R/utils.R | 51 +++-- README.md | 8 +- inst/CITATION | 2 +- inst/shell/path_to_openchrom_commandline.txt | 2 +- ...mstation_fid.Rd => read_chemstation_ch.Rd} | 15 +- man/read_chroms.Rd | 12 +- man/read_shimadzu.Rd | 4 +- tests/testthat/test-read_chroms.R | 3 +- 15 files changed, 271 insertions(+), 182 deletions(-) rename R/{read_chemstation_fid.R => read_chemstation_ch.R} (51%) rename man/{read_chemstation_fid.Rd => read_chemstation_ch.Rd} (55%) diff --git a/DESCRIPTION b/DESCRIPTION index 7f5d940..e55650a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.3.0 +Version: 0.3.1 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), @@ -15,6 +15,7 @@ License: GPL (>= 3) URL: https://ethanbass.github.io/chromConverter, https://github.com/ethanbass/chromConverter BugReports: https://github.com/ethanbass/chromConverter/issues Imports: + bitops, purrr, readxl, reticulate, diff --git a/NAMESPACE b/NAMESPACE index da70c19..a95b22e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,8 +5,8 @@ export(call_openchrom) export(call_rainbow) export(configure_aston) export(configure_rainbow) +export(read_chemstation_ch) export(read_chemstation_csv) -export(read_chemstation_fid) export(read_chromeleon) export(read_chroms) export(read_mzml) @@ -19,6 +19,8 @@ import(magrittr) import(reticulate) import(xml2) importFrom(RaMS,grabMSdata) +importFrom(bitops,bitAnd) +importFrom(bitops,bitShiftL) importFrom(purrr,partial) importFrom(readxl,read_xls) importFrom(stats,reshape) diff --git a/NEWS.md b/NEWS.md index a1b2981..c72d5fa 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +## chromConverter 0.3.1 + +* Added support for "Chemstation" UV (`.ch`) files (version 130). +* Added provisional support for "Chemstation" FID (version 8). +* Changed name of `read_chemstation_fid` function to `read_chemstation_ch`. +* Ignore case when matching file extensions in `read_chroms`. +* Added note to README about configuring RStudio correctly for accessing python parsers. + ## chromConverter 0.3.0 * Fixed bug causing "Chromeleon" metadata parser to fail. diff --git a/R/aston_parsers.R b/R/aston_parsers.R index f051bdb..3ff99ba 100644 --- a/R/aston_parsers.R +++ b/R/aston_parsers.R @@ -13,6 +13,7 @@ #' @return A chromatogram in \code{data.frame} format (retention time x wavelength). #' @import reticulate #' @export sp_converter + sp_converter <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), read_metadata = TRUE){ diff --git a/R/parsers.R b/R/parsers.R index a4fe90e..5e60ef4 100644 --- a/R/parsers.R +++ b/R/parsers.R @@ -43,61 +43,77 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"), #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Whether to read metadata from file. -#' @param what Whether to extract \code{chromatogram}, \code{peak_table} or -#' \code{both}. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' Accepts multiple arguments. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @author Ethan Bass #' @export + read_shimadzu <- function(file, format_in, format_out = c("matrix","data.frame"), data_format = c("wide","long"), - what = "chromatogram", read_metadata = TRUE){ + what = "chromatogram", + read_metadata = TRUE){ if (missing(format_in)) stop("`format_in` must be specified. The options are `fid` or `dad`.") format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) x <- readLines(file) + sep <- substr(x[2], 17, 17) headings <- grep("\\[*\\]", x) peaktab.idx <- grep("\\[Peak Table", x) chrom_heading <- switch(format_in, "fid" = "\\[Chromatogram .*]", "dad" = "\\[PDA 3D]") chrom.idx <- grep(chrom_heading, x) - header <- extract_header(x, chrom.idx) - met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2,2]),",",".") - if (decimal_separator == ","){ - met[2:3,2] <- gsub(",",".",met[2:3,2]) - } + if (any(what == "chromatogram")){ - if (format_in == "fid"){ - xx <- read.csv(file, skip = header[[2]], sep="\t", colClasses="numeric", - na.strings=c("[FractionCollectionReport]","#ofFractions"), - dec = decimal_separator) + if (length(chrom.idx) != 0){ + header <- try(extract_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + decimal_separator <- ifelse(grepl(",", met[2,2]),",",".") + if (decimal_separator == ","){ + met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) + } - xx <- as.matrix(xx[!is.na(xx[,1]),]) - rownames(xx) <- xx[,1] - xx <- xx[, 2, drop = FALSE] - colnames(xx) <- "Intensity" - data_format <- "long" - } else if (format_in == "dad"){ - xx <- read.csv(file, skip = header[[2]], sep="\t", colClasses="numeric", - na.strings=c("[FractionCollectionReport]","#ofFractions"), row.names = 1, - nrows = as.numeric(met[7,2]), dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - times <- round(seq(met[2,2], met[3,2], length.out = as.numeric(met[7,2])),2) - wavelengths <- round(seq(met[4,2], met[5,2], length.out = as.numeric(met[6,2])),2) - colnames(xx) <- wavelengths - if (data_format == "long"){ - xx <- reshape_chrom(xx) + if (format_in == "fid"){ + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric", + na.strings=c("[FractionCollectionReport]","#ofFractions"), + dec = decimal_separator) + + xx <- as.matrix(xx[!is.na(xx[,1]),]) + rownames(xx) <- xx[,1] + xx <- xx[, 2, drop = FALSE] + colnames(xx) <- "Intensity" + data_format <- "long" + } else if (format_in == "dad"){ + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric", + na.strings=c("[FractionCollectionReport]","#ofFractions"), row.names = 1, + nrows = as.numeric(met[7,2]), dec = decimal_separator) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + times <- round(seq(met[2,2], met[3,2], length.out = as.numeric(met[7,2])),2) + wavelengths <- round(seq(met[4,2], met[5,2], length.out = as.numeric(met[6,2])),2) + colnames(xx) <- wavelengths + if (data_format == "long"){ + xx <- reshape_chrom(xx) + } + } + if (format_out == "data.frame"){ + xx <- as.data.frame(xx) + } + } else{ + if (length(what) == 1){ + stop("Chromatogram not found.") + } else{ + warning("Chromatogram not found.") + what = "peak_table" } } - if (format_out == "data.frame"){ - xx <- as.data.frame(xx) - } - } + } + + ### extract peak_table if (any(what == "peak_table")){ if (length(peaktab.idx) == 0){ if (length(what) == 1){ @@ -108,9 +124,15 @@ read_shimadzu <- function(file, format_in, } } peak_tab <- lapply(peaktab.idx, function(idx){ - nrows <- as.numeric(strsplit(x[idx+1],"\t")[[1]][2]) - peak_tab <- read.csv(file, skip = (idx+1), sep = "\t", nrows = nrows, - dec=decimal_separator) + nrows <- as.numeric(strsplit(x = x[idx+1], split = sep)[[1]][2]) + if (!is.na(nrows) && nrows > 0){ + time_column <- grep("R.Time", strsplit(x = x[[idx+2]], split = sep)[[1]]) + t1 <- strsplit(x = x[[idx+3]], split = sep)[[1]][time_column] + decimal_separator <- ifelse(grepl(".", t1), ".", ",") + + peak_tab <- read.csv(file, skip = (idx+1), sep = sep, nrows = nrows, + dec = decimal_separator) + } else{NA} }) names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx]) } @@ -133,17 +155,27 @@ read_shimadzu <- function(file, format_in, meta <- x[(meta_start+1):(meta_end-1)] meta <- meta[meta!=""] meta <- meta[-grep("\\[", meta)] - meta <- stringr::str_split_fixed(meta, "\t", n = 2) - meta <- rbind(meta, met) + meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) + if (exists("met")){ + meta <- rbind(meta, met) + } rownames(meta) <- meta[, 1] meta <- as.list(meta[,2]) data_format <- switch(format_in, "fid" = "long", "dad" = "wide") - xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out, - data_format = data_format, - parser = "chromConverter") + if (inherits(xx, "list")){ + xx <- lapply(xx, function(xxx){ + attach_metadata(xxx, meta, format_in = "shimadzu", format_out = format_out, + data_format = data_format, + parser = "chromConverter") + }) + } else{ + xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out, + data_format = data_format, + parser = "chromConverter") } + } xx } diff --git a/R/read_chemstation_fid.R b/R/read_chemstation_ch.R similarity index 51% rename from R/read_chemstation_fid.R rename to R/read_chemstation_ch.R index d319f1a..be73e28 100644 --- a/R/read_chemstation_fid.R +++ b/R/read_chemstation_ch.R @@ -1,11 +1,14 @@ -#' Parser for reading Agilent FID (.ch) files into R +#' Parser for reading Agilent ('.ch') files into R +#' @importFrom bitops bitAnd bitShiftL #' @param path Path to \code{.ch} file #' @param read_metadata Logical. Whether to attach metadata. #' @param format_out Matrix or data.frame #' @author Ethan Bass -#' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). +#' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} +#' ((c) James Dillon 2014). #' @export -read_chemstation_fid <- function(path, read_metadata = TRUE, + +read_chemstation_ch <- function(path, read_metadata = TRUE, format_out = c("matrix","data.frame")){ format_out <- match.arg(format_out, c("matrix","data.frame")) @@ -15,27 +18,46 @@ read_chemstation_fid <- function(path, read_metadata = TRUE, # HEADER seek(f, 1, "start") version <- readBin(f, "character", n = 1) - version <- match.arg(version, choices = c("8", "81", "179","181")) - # endian <- switch(version, - # "179" = "little", - # "180" = "big", - # "181" = "big") + version <- match.arg(version, choices = c("8", "81", "130", "179","181")) if (version %in% c("179","181")){ - offsets <- list(file_type = 0x15B, - sample_name = 0x35A, - operator = 0x758, - date = 0x957, - instrument = 0x9BC, - method = 0xA0E, - software = 0xC11, - unit = 0x104C, - signal = 0x1075, - num_times = 0x116, - rt_first = 0x11A, - rt_last = 0x11E, - scaling_factor = 0x127C, + offsets <- list(file_type = 347, #0x15B + sample_name = 858, #0x35A + operator = 1880, #0x758 + date = 2391, # 0x957 + instrument = 2492, # 0x9BC + method = 2574, # 0xA0E + software = 3089, # 0xC11 + unit = 4172, # 0x104C + signal = 4213, # 0x1075 + num_times = 278, # 0x116 + rt_first = 282, # 0x11A + rt_last = 286, # 0x11E + scaling_factor = 4732, # 0x127C intercept = 4724, - data_start = 0x1000) + data_start = 4096 # 0x1000 + ) + } else if (version == "130"){ + offsets <- list( + # sequence_line_or_injection = 252, #UINT16 + # injection_or_sequence_line = 256, #UINT16 + # data_offset = 264, # UINT32 + # start_time = 282, + # end_time = 286, + # version_string = 326, # utf16 + file_type = 347, # utf16 + sample_name = 858, # utf16 + operator = 1880, # utf16 + date = 2391, # utf16 + inlet = 2492, # utf16 + instrument = 2533, # utf16'), + method = 2574, # utf16 + software_version = 3601, #utf16'), + software = 3089, # 'utf16'), + software_revision = 3802, #'utf16'), + units = 4172, # 'utf16'), + signal = 4213, # 'utf16'), + zero = 4110, # INT32), + scaling_factor = 4732) #ENDIAN + 'd') } else if (version %in% c("8","81")){ offsets <- list(sample_name = 24, description = 86, @@ -57,6 +79,7 @@ read_chemstation_fid <- function(path, read_metadata = TRUE, decoder <- switch(version, "8" = decode_delta, "81" = decode_double_delta, + "130" = decode_delta, "181" = decode_double_delta, "179" = decode_double_array) @@ -74,38 +97,47 @@ read_chemstation_fid <- function(path, read_metadata = TRUE, seek(f, 282, "start") seek(f, 282, "start") # seek(f, where = 0x11A, origin="start") - if (version == "8"){ + if (version %in% c("8","130")){ xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 xmax <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 } else { - xmin <- readBin(f, "numeric", n=1, endian = "big", size=4) / 60000 - xmax <- readBin(f, "numeric", n=1, endian = "big", size=4) / 60000 - + xmin <- readBin(f, "numeric", n = 1, endian = "big", size = 4) / 60000 + xmax <- readBin(f, "numeric", n = 1, endian = "big", size = 4) / 60000 + } times <- seq(xmin, xmax, length.out = length(data)) seek(f, offsets$intercept, "start") - intercept <- readBin(f, "double", n=1, endian = "big", size = 8) + intercept <- readBin(f, "double", n = 1, endian = "big", size = 8) seek(f, offsets$scaling_factor, "start") - scaling_factor <- readBin(f, "double", n=1, endian = "big", size = 8) + scaling_factor <- readBin(f, "double", n = 1, endian = "big", size = 8) data <- data * scaling_factor + intercept - } - data <- data.frame(Intensity=data,row.names=times) - if (format_out == "matrix"){ - data <- as.matrix(data) - } - if (read_metadata){ - meta <- lapply(offsets[1:9], function(offset){ + + data <- data.frame(Intensity = data, row.names = times) + if (format_out == "matrix"){ + data <- as.matrix(data) + } + if (read_metadata){ + meta_slots <- switch(version, "8" = 9, + "81" = 9, + "130" = 12, + "181" = 9, + "179" = 9) + + meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ seek(f, where = offset, origin = "start") n <- get_nchar(f) cc_collapse(readBin(f, "character", n = n)) }) - data <- structure(data, version = version, sample_name = meta$sample_name, - run_date = meta$date, instrument = meta$instrument, method = meta$method, - software = meta$software, unit = meta$unit, signal=meta$signal, + + data <- structure(data, file_version = version, sample_name = meta$sample_name, + run_date = meta$date, instrument = meta$instrument, + method = meta$method, software_version = meta$software_version, + software = meta$software, software_rev = meta$software_revision, + signal = meta$signal, unit = meta$unit, time_range = c(xmin, xmax), - data_format = "long", parser="chromConverter") + data_format = "long", parser = "chromConverter") } data } @@ -138,8 +170,8 @@ get_nchar <- function(f){ decode_double_delta <- function(file, offset) { - seek(file, 0, 'end'); - fsize = seek(file, NA, "current"); + seek(file, 0, 'end') + fsize <- seek(file, NA, "current") # Read data @@ -174,9 +206,8 @@ decode_double_delta <- function(file, offset) { #' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). decode_double_array <- function(file, offset) { - - seek(file, 0, 'end'); - fsize = seek(file, NA, "current"); + seek(file, 0, 'end') + fsize <- seek(file, NA, "current") offset <- 0x1800 # Read data seek(file, offset, "start") @@ -189,49 +220,48 @@ decode_double_array <- function(file, offset) { #' @noRd #' @note This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). -# decode_delta <- function(file, offset) { -# if (ftell(file) == -1) { -# signal <- c() -# return(signal) -# } else { -# fseek(file, 0, "start") -# stop <- ftell(file) -# -# fseek(file, offset, "end") -# start <- ftell(file) -# } -# -# signal <- rep(0, round((stop - start)/2)) -# buffer <- rep(0, 4) -# index <- 1 -# -# while (seek(file, NA, "current") < fsize) { -# buffer[1] <- readBin(file, "integer", n=1, endian = "big", size = 2) -# buffer[2] <- buffer[4] -# -# if (bitshift(buffer[1], 12, "int16") == 0) { -# signal <- signal[1:index-1] -# break -# } -# -# for (i in 1:bitand(buffer[1], 4095, "int16")) { -# buffer[3] <- readBin(file, "integer", n=1, endian = "big", size = 2) -# -# if (buffer[3] != -32768) { -# buffer[2] <- buffer[2] + buffer[3] -# } else { -# buffer[2] <- readBin(file, "integer", n = 1, endian = "big", size = 4) -# } -# -# signal[index] <- buffer[2] -# index <- index + 1 -# } -# -# buffer[4] <- buffer[2] -# } -# -# return(signal) -# } -decode_delta <-function(){ - stop("Unfortunately, version 8 FID files are not yet supported.") +# file <- f +decode_delta <- function(file, offset) { + # if (ftell(file) == -1) { + # signal <- c() + # return(signal) + # } else { + seek(file, 0, 'end') + fsize <- seek(file, NA, "current") + + seek(file, offset, "start") + start <- seek(file, NA, "current") + # } + + signal <- rep(0, round((fsize - start)/2)) + buffer <- rep(0, 4) + index <- 1 + + while (seek(file, NA, "current") < fsize) { + buffer[1] <- readBin(file, "integer", n=1, endian = "big", size = 2) + buffer[2] <- buffer[4] + + if (bitShiftL(buffer[1], 12) == 0) { + # signal <- signal[1:index-1] + break + } + + for (i in (1:bitAnd(buffer[1], 4095))) { + buffer[3] <- readBin(file, "integer", n = 1, endian = "big", size = 2) + + if (buffer[3] != -32768) { + buffer[2] <- buffer[2] + buffer[3] + } else { + buffer[2] <- readBin(file, "integer", n = 1, endian = "big", size = 4) + } + + signal[index] <- buffer[2] + index <- index + 1 + } + + buffer[4] <- buffer[2] + } + + return(signal) } + diff --git a/R/read_chroms.R b/R/read_chroms.R index 3ca5dfe..b5bc492 100644 --- a/R/read_chroms.R +++ b/R/read_chroms.R @@ -20,9 +20,10 @@ #' the function with a folder or vector of folders containing the files. #' Otherwise, set to\code{FALSE}. #' @param format_in Format of files to be imported/converted. The current options -#' are: \code{chemstation_uv}, \code{chemstation}, \code{chemstation_csv}, -#' \code{masshunter}, \code{masshunter_dad}, \code{shimadzu_fid}, \code{shimadzu_dad}, -#' \code{chromeleon_uv}, \code{agilent_d}, \code{thermoraw}, \code{mzml}, +#' are: \code{chemstation_uv}, \code{chemstation}, \code{chemstation_ch}, +#' \code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, +#' \code{shimadzu_fid}, \code{shimadzu_dad}, \code{chromeleon_uv}, +#' \code{agilent_d}, \code{thermoraw}, \code{mzml}, #' \code{waters_arw}, \code{waters_raw}, \code{msd}, \code{csd}, \code{wsd}, #' or \code{other}. #' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which @@ -59,7 +60,8 @@ read_chroms <- function(paths, find_files, format_in=c("agilent_d", "chemstation", "chemstation_uv", - "chemstation_csv", "chemstation_fid", "masshunter_dad", + "chemstation_csv", "chemstation_ch", + "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "other"), @@ -71,8 +73,12 @@ read_chroms <- function(paths, find_files, export = FALSE, path_out = NULL, export_format = c("csv", "cdf", "mzml", "animl"), read_metadata = TRUE, dat = NULL){ + if (length(format_in) > 1){ + stop("Please specify the file format of your chromatograms by setting the `format_in` argument.") + } format_in <- match.arg(format_in, c("agilent_d", "chemstation", "chemstation_uv", - "chemstation_csv", "chemstation_fid", "masshunter_dad", + "chemstation_ch", "chemstation_fid", + "chemstation_csv", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "other")) @@ -182,11 +188,11 @@ read_chroms <- function(paths, find_files, } else if (format_in == "chemstation_csv"){ pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) - } else if (format_in == "chemstation_fid"){ + } else if (format_in %in% c("chemstation_fid", "chemstation_ch")){ data_format <- "long" pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, - "chromconverter" = partial(read_chemstation_fid, read_metadata = read_metadata, + "chromconverter" = partial(read_chemstation_ch, read_metadata = read_metadata, format_out = format_out), "rainbow" = rainbow_parser, "entab" = entab_parser) diff --git a/R/utils.R b/R/utils.R index 52a7ab0..129e753 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,10 +1,13 @@ utils::globalVariables(names = c('.')) # Globals <- list() +#' @noRd check_parser <- function(format_in, parser=NULL, find = FALSE){ allowed_formats <- list(openchrom = c("msd","csd","wsd"), - chromconverter = c("chemstation_csv", "shimadzu_fid", "shimadzu_dad", - "chromeleon_uv", "waters_arw", "mzml", "chemstation_fid"), + chromconverter = c("chemstation_csv", "shimadzu_fid", + "shimadzu_dad", "chromeleon_uv", + "waters_arw", "mzml", + "chemstation_fid", "chemstation_ch"), aston = c("chemstation_uv", "masshunter_dad", "other"), entab = c("chemstation_uv", "chemstation_fid", "masshunter_dad", "thermoraw", "other"), rainbow = c("chemstation_uv", "waters_raw", @@ -36,10 +39,11 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){ } } +#' @noRd find_files <- function(paths, pattern){ files <- unlist(lapply(paths, function(path){ files <- list.files(path = path, pattern = pattern, - full.names = TRUE, recursive = TRUE) + full.names = TRUE, recursive = TRUE, ignore.case = TRUE) if (length(files)==0){ if (!dir.exists(path)){ warning(paste0("The directory '", basename(path), "' does not exist.")) @@ -52,12 +56,14 @@ find_files <- function(paths, pattern){ })) } +#' @noRd export_csvs <- function(data, path.out){ sapply(seq_along(data), function(i){ write.csv(data[[i]], file = paste0(paste0(path.out, names(data)[i]),".CSV")) }) } +#' @noRd set_temp_directory <- function(){ ans <- readline("Export directory not specified! Export files to `temp` directory (y/n)?") if (ans %in% c("y","Y")){ @@ -70,25 +76,8 @@ set_temp_directory <- function(){ } } - -extract_header <- function(x, chrom.idx){ - index <- chrom.idx+1 - line <- x[index] - l <- length(strsplit(line,"\t")[[1]]) - header <- strsplit(line,"\t")[[1]] - while (l > 1) { - index <- index+1 - line <- strsplit(x[index], "\t")[[1]] - l <- length(line) - if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1])))) - break - header <- rbind(header, line) - } - list(header,index) -} - -#' check path -#' check that path is properly formatted +#' Check path +#' Check that path is properly formatted #' @param path path as character string #' @noRd check_path <- function(path){ @@ -109,3 +98,21 @@ check_path <- function(path){ } path } + +#' Extract header from Shimadzu ascii files +#' @noRd +extract_header <- function(x, chrom.idx, sep){ + index <- chrom.idx+1 + line <- x[index] + l <- length(strsplit(x = line, split = sep)[[1]]) + header <- strsplit(x = line, split = sep)[[1]] + while (l > 1) { + index <- index+1 + line <- strsplit(x = x[index], split = sep)[[1]] + l <- length(line) + if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1])))) + break + header <- rbind(header, line) + } + list(header,index) +} diff --git a/README.md b/README.md index 75ffb84..e2897b6 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,10 @@ chromConverter includes some options to extract metadata from the provided files For downstream analyses of chromatographic data, you can also check out my package [chromatographR](https://ethanbass.github.io/chromatographR/). For interactive visualization of chromatograms, you can check out my new package [ShinyChromViewer](https://github.com/ethanbass/ShinyChromViewer) (alpha release). +### Other related packages + +- For tidy extraction of mzML data, see [RaMS](https://github.com/wkumler/RaMS/). + ### Citation You can cite chromConverter as follows: @@ -137,7 +141,3 @@ You can cite chromConverter as follows: Bass, E. (2022). chromConverter: chromatographic file converter. http://doi.org/10.5281/zenodo.6792521. If you use external libraries to convert your files, please cite them as well in published work. - -### Other related packages - -- For tidy extraction of mzML data, see [RaMS](https://github.com/wkumler/RaMS/). diff --git a/inst/CITATION b/inst/CITATION index 4d16947..93240f9 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: chromatographic file converter", author = "Ethan Bass", year = "2022", - version = "version 0.3.0", + version = "version 0.3.1", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2022).", diff --git a/inst/shell/path_to_openchrom_commandline.txt b/inst/shell/path_to_openchrom_commandline.txt index 0e12ee2..1ccd88a 100644 --- a/inst/shell/path_to_openchrom_commandline.txt +++ b/inst/shell/path_to_openchrom_commandline.txt @@ -1 +1 @@ -/Applications/OpenChrom_CL.app/Contents/MacOS/openchrom +/Applications/Eclipse.app/Contents/MacOS/openchrom \ No newline at end of file diff --git a/man/read_chemstation_fid.Rd b/man/read_chemstation_ch.Rd similarity index 55% rename from man/read_chemstation_fid.Rd rename to man/read_chemstation_ch.Rd index e1d596f..96bbca9 100644 --- a/man/read_chemstation_fid.Rd +++ b/man/read_chemstation_ch.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_chemstation_fid.R -\name{read_chemstation_fid} -\alias{read_chemstation_fid} -\title{Parser for reading Agilent FID (.ch) files into R} +% Please edit documentation in R/read_chemstation_ch.R +\name{read_chemstation_ch} +\alias{read_chemstation_ch} +\title{Parser for reading Agilent ('.ch') files into R} \usage{ -read_chemstation_fid( +read_chemstation_ch( path, read_metadata = TRUE, format_out = c("matrix", "data.frame") @@ -18,10 +18,11 @@ read_chemstation_fid( \item{format_out}{Matrix or data.frame} } \description{ -Parser for reading Agilent FID (.ch) files into R +Parser for reading Agilent ('.ch') files into R } \note{ -This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} ((c) James Dillon 2014). +This function was adapted from the \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} +((c) James Dillon 2014). } \author{ Ethan Bass diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index ad59a44..76015af 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -8,8 +8,9 @@ read_chroms( paths, find_files, format_in = c("agilent_d", "chemstation", "chemstation_uv", "chemstation_csv", - "chemstation_fid", "masshunter_dad", "shimadzu_fid", "shimadzu_dad", "chromeleon_uv", - "thermoraw", "mzml", "waters_arw", "waters_raw", "msd", "csd", "wsd", "other"), + "chemstation_ch", "chemstation_fid", "masshunter_dad", "shimadzu_fid", + "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", + "msd", "csd", "wsd", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), @@ -29,9 +30,10 @@ the function with a folder or vector of folders containing the files. Otherwise, set to\code{FALSE}.} \item{format_in}{Format of files to be imported/converted. The current options -are: \code{chemstation_uv}, \code{chemstation}, \code{chemstation_csv}, -\code{masshunter}, \code{masshunter_dad}, \code{shimadzu_fid}, \code{shimadzu_dad}, -\code{chromeleon_uv}, \code{agilent_d}, \code{thermoraw}, \code{mzml}, +are: \code{chemstation_uv}, \code{chemstation}, \code{chemstation_ch}, +\code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, +\code{shimadzu_fid}, \code{shimadzu_dad}, \code{chromeleon_uv}, +\code{agilent_d}, \code{thermoraw}, \code{mzml}, \code{waters_arw}, \code{waters_raw}, \code{msd}, \code{csd}, \code{wsd}, or \code{other}.} diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index baf205f..73a51ef 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -22,8 +22,8 @@ read_shimadzu( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} -\item{what}{Whether to extract \code{chromatogram}, \code{peak_table} or -\code{both}.} +\item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}. +Accepts multiple arguments.} \item{read_metadata}{Whether to read metadata from file.} } diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index daaafad..b481460 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -52,7 +52,7 @@ test_that("shimadzu parser works", { expect_equal(attributes(x[[1]])$instrument, "GC-2014") }) -test_that("check_path works", { +test_that("check_path works on unix/linux", { skip_on_os("windows") expect_equal(check_path("~/Downloads"), "~/Downloads/") expect_equal(check_path("Downloads"), "/Downloads/") @@ -63,7 +63,6 @@ test_that("check_path works", { expect_equal(check_path("Users/foo"), "/Users/foo/") }) - # test_that("thermoraw parser works",{ # skip_if_not(configure_thermo_parser(check = TRUE)) # file <- "/Users/ethanbass/Downloads/chrom_files/small.RAW"