v0.3.1 with support for chemstation .ch (versions 130 & v 8)

ethanbass · Feb 5, 2023 · 3089624 · 3089624
1 parent f17124a
commit 3089624
Show file tree

Hide file tree

Showing 15 changed files with 271 additions and 182 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: chromConverter
 Title: Chromatographic File Converter
-Version: 0.3.0
+Version: 0.3.1
 Authors@R: c(
     person(given = "Ethan", family = "Bass", email = "[email protected]",
                   role = c("aut", "cre"),
@@ -15,6 +15,7 @@ License: GPL (>= 3)
 URL: https://ethanbass.github.io/chromConverter, https://github.com/ethanbass/chromConverter
 BugReports: https://github.com/ethanbass/chromConverter/issues
 Imports:
+    bitops,
     purrr,
     readxl,
     reticulate,

diff --git a/NAMESPACE b/NAMESPACE
@@ -5,8 +5,8 @@ export(call_openchrom)
 export(call_rainbow)
 export(configure_aston)
 export(configure_rainbow)
+export(read_chemstation_ch)
 export(read_chemstation_csv)
-export(read_chemstation_fid)
 export(read_chromeleon)
 export(read_chroms)
 export(read_mzml)
@@ -19,6 +19,8 @@ import(magrittr)
 import(reticulate)
 import(xml2)
 importFrom(RaMS,grabMSdata)
+importFrom(bitops,bitAnd)
+importFrom(bitops,bitShiftL)
 importFrom(purrr,partial)
 importFrom(readxl,read_xls)
 importFrom(stats,reshape)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+## chromConverter 0.3.1
+
+* Added support for "Chemstation" UV (`.ch`) files (version 130).
+* Added provisional support for "Chemstation" FID (version 8).
+* Changed name of `read_chemstation_fid` function to `read_chemstation_ch`.
+* Ignore case when matching file extensions in `read_chroms`.
+* Added note to README about configuring RStudio correctly for accessing python parsers.
+
 ## chromConverter 0.3.0
 
 * Fixed bug causing "Chromeleon" metadata parser to fail.

diff --git a/R/aston_parsers.R b/R/aston_parsers.R
@@ -13,6 +13,7 @@
 #' @return A chromatogram in \code{data.frame} format (retention time x wavelength).
 #' @import reticulate
 #' @export sp_converter
+
 sp_converter <- function(file, format_out = c("matrix", "data.frame"),
                          data_format = c("wide","long"),
                          read_metadata = TRUE){

diff --git a/R/parsers.R b/R/parsers.R
@@ -43,61 +43,77 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"),
 #' @param format_out R format. Either \code{matrix} or \code{data.frame}.
 #' @param data_format Whether to return data in \code{wide} or \code{long} format.
 #' @param read_metadata Whether to read metadata from file.
-#' @param what Whether to extract \code{chromatogram}, \code{peak_table} or
-#' \code{both}.
+#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}.
+#' Accepts multiple arguments.
 #' @return A chromatogram in the format specified by \code{format_out}
 #' (retention time x wavelength).
 #' @author Ethan Bass
 #' @export
+
 read_shimadzu <- function(file, format_in,
                           format_out = c("matrix","data.frame"),
                           data_format = c("wide","long"),
-                          what = "chromatogram", read_metadata = TRUE){
+                          what = "chromatogram",
+                          read_metadata = TRUE){
   if (missing(format_in))
     stop("`format_in` must be specified. The options are `fid` or `dad`.")
   format_out <- match.arg(format_out, c("matrix","data.frame"))
   data_format <- match.arg(data_format, c("wide","long"))
   what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE)
   x <- readLines(file)
+  sep <- substr(x[2], 17, 17)
   headings <- grep("\\[*\\]", x)
   peaktab.idx <- grep("\\[Peak Table", x)
   chrom_heading <- switch(format_in,
                           "fid" = "\\[Chromatogram .*]",
                           "dad" = "\\[PDA 3D]")
   chrom.idx <- grep(chrom_heading, x)
-  header <- extract_header(x, chrom.idx)
-  met <- header[[1]]
-  decimal_separator <- ifelse(grepl(",", met[2,2]),",",".")
-  if (decimal_separator == ","){
-    met[2:3,2] <- gsub(",",".",met[2:3,2])
-  }
+
   if (any(what == "chromatogram")){
-    if (format_in == "fid"){
-      xx <- read.csv(file, skip = header[[2]], sep="\t", colClasses="numeric",
-                     na.strings=c("[FractionCollectionReport]","#ofFractions"),
-                     dec = decimal_separator)
+    if (length(chrom.idx) != 0){
+      header <- try(extract_header(x = x, chrom.idx = chrom.idx, sep = sep))
+      met <- header[[1]]
+      decimal_separator <- ifelse(grepl(",", met[2,2]),",",".")
+      if (decimal_separator == ","){
+        met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2])
+      }
 
-      xx <- as.matrix(xx[!is.na(xx[,1]),])
-      rownames(xx) <- xx[,1]
-      xx <- xx[, 2, drop = FALSE]
-      colnames(xx) <- "Intensity"
-      data_format <- "long"
-    } else if (format_in == "dad"){
-        xx <- read.csv(file, skip = header[[2]], sep="\t", colClasses="numeric",
-                       na.strings=c("[FractionCollectionReport]","#ofFractions"), row.names = 1,
-                       nrows = as.numeric(met[7,2]), dec = decimal_separator)
-        xx <- as.matrix(xx[!is.na(xx[,1]),])
-        times <- round(seq(met[2,2], met[3,2], length.out = as.numeric(met[7,2])),2)
-        wavelengths <- round(seq(met[4,2], met[5,2], length.out = as.numeric(met[6,2])),2)
-        colnames(xx) <- wavelengths
-        if (data_format == "long"){
-          xx <- reshape_chrom(xx)
+        if (format_in == "fid"){
+          xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric",
+                         na.strings=c("[FractionCollectionReport]","#ofFractions"),
+                         dec = decimal_separator)
+
+          xx <- as.matrix(xx[!is.na(xx[,1]),])
+          rownames(xx) <- xx[,1]
+          xx <- xx[, 2, drop = FALSE]
+          colnames(xx) <- "Intensity"
+          data_format <- "long"
+        } else if (format_in == "dad"){
+            xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric",
+                           na.strings=c("[FractionCollectionReport]","#ofFractions"), row.names = 1,
+                           nrows = as.numeric(met[7,2]), dec = decimal_separator)
+            xx <- as.matrix(xx[!is.na(xx[,1]),])
+            times <- round(seq(met[2,2], met[3,2], length.out = as.numeric(met[7,2])),2)
+            wavelengths <- round(seq(met[4,2], met[5,2], length.out = as.numeric(met[6,2])),2)
+            colnames(xx) <- wavelengths
+            if (data_format == "long"){
+              xx <- reshape_chrom(xx)
+            }
+          }
+          if (format_out == "data.frame"){
+            xx <- as.data.frame(xx)
+          }
+      } else{
+        if (length(what) == 1){
+          stop("Chromatogram not found.")
+        } else{
+          warning("Chromatogram not found.")
+          what = "peak_table"
         }
       }
-      if (format_out == "data.frame"){
-        xx <- as.data.frame(xx)
-      }
-    }
+  }
+
+  ### extract peak_table
   if (any(what == "peak_table")){
     if (length(peaktab.idx) == 0){
       if (length(what) == 1){
@@ -108,9 +124,15 @@ read_shimadzu <- function(file, format_in,
       }
     }
     peak_tab <- lapply(peaktab.idx, function(idx){
-      nrows <- as.numeric(strsplit(x[idx+1],"\t")[[1]][2])
-      peak_tab <- read.csv(file, skip = (idx+1), sep = "\t", nrows = nrows,
-                           dec=decimal_separator)
+      nrows <- as.numeric(strsplit(x = x[idx+1], split = sep)[[1]][2])
+      if (!is.na(nrows) && nrows > 0){
+        time_column <- grep("R.Time", strsplit(x = x[[idx+2]], split = sep)[[1]])
+        t1 <- strsplit(x = x[[idx+3]], split = sep)[[1]][time_column]
+        decimal_separator <- ifelse(grepl(".", t1), ".", ",")
+
+        peak_tab <- read.csv(file, skip = (idx+1), sep = sep, nrows = nrows,
+                             dec = decimal_separator)
+      } else{NA}
     })
     names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx])
   }
@@ -133,17 +155,27 @@ read_shimadzu <- function(file, format_in,
     meta <- x[(meta_start+1):(meta_end-1)]
     meta <- meta[meta!=""]
     meta <- meta[-grep("\\[", meta)]
-    meta <- stringr::str_split_fixed(meta, "\t", n = 2)
-    meta <- rbind(meta, met)
+    meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2)
+    if (exists("met")){
+      meta <- rbind(meta, met)
+    }
     rownames(meta) <- meta[, 1]
     meta <- as.list(meta[,2])
     data_format <- switch(format_in,
                           "fid" = "long",
                           "dad" = "wide")
-    xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out,
-                          data_format = data_format,
-                          parser = "chromConverter")
+    if (inherits(xx, "list")){
+      xx <- lapply(xx, function(xxx){
+        attach_metadata(xxx, meta, format_in = "shimadzu", format_out = format_out,
+                        data_format = data_format,
+                        parser = "chromConverter")
+      })
+    } else{
+      xx <- attach_metadata(xx, meta, format_in = "shimadzu", format_out = format_out,
+                      data_format = data_format,
+                      parser = "chromConverter")
     }
+  }
   xx
 }