v0.4.1

* Added support for "Chemstation" UV (`.ch`) files (version 30). * Updated `read_chromeleon` to better deal with comma decimal separators in metadata. * Updated `read_chromeleon` to deal with more datetime formats. * Updated `read_chromeleon` to deal with unicode microliters. * Added tests for rainbow parser and `read_chemstation_ch`. ### Bug fixes * Fixed bug preventing compilation of PDF manual. * Fixed new bug causing failure to correctly read names of chemstation files from .D directory.
ethanbass · May 30, 2023 · 7c4c283 · 7c4c283
1 parent 96af6d1
commit 7c4c283
Show file tree

Hide file tree

Showing 17 changed files with 159 additions and 70 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: chromConverter
 Title: Chromatographic File Converter
-Version: 0.4.0
+Version: 0.4.1
 Authors@R: c(
     person(given = "Ethan", family = "Bass", email = "[email protected]",
                   role = c("aut", "cre"),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,21 @@
+## chromConverter 0.4.1
+
+### New features 
+
+* Added support for "Chemstation" UV (`.ch`) files (version 30).
+
+### Minor improvements
+
+* Updated `read_chromeleon` to better deal with comma decimal separators in metadata.
+* Updated `read_chromeleon` to deal with more datetime formats.
+* Updated `read_chromeleon` to deal with unicode microliters.
+* Added tests for rainbow parser and `read_chemstation_ch`.
+
+### Bug fixes
+
+* Fixed bug preventing compilation of PDF manual.
+* Fixed new bug causing failure to correctly read names of chemstation files from .D directory.
+
 ## chromConverter 0.4.0
 
 ### New features

diff --git a/R/aston_parsers.R b/R/aston_parsers.R
@@ -121,11 +121,15 @@ trace_converter <- function(file, format_out = c("matrix", "data.frame"),
 #'
 #' Configures reticulate to use Aston file parsers.
 #' @name configure_aston
-#' @return No return value.
+#' @param return_boolean Logical. Whether to return a Boolean value indicating
+#' if the chromConverter environment is correctly configured.
+#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value
+#' indicating whether the chromConverter environment is configured correctly.
+#' Otherwise, there is no return value.
 #' @author Ethan Bass
 #' @import reticulate
 #' @export
-configure_aston <- function(){
+configure_aston <- function(return_boolean=FALSE){
   install <- FALSE
   # path <- miniconda_path()
   if (!dir.exists(miniconda_path())){
@@ -146,6 +150,9 @@ configure_aston <- function(){
     }
   }
   assign_trace_file()
+  if (return_boolean){
+    return(env)
+  }
 }
 
 #' @noRd

diff --git a/R/attach_metadata.R b/R/attach_metadata.R
@@ -65,14 +65,27 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser
               parser = "chromConverter",
               format_out = format_out)
   }, "chromeleon" = {
+    datetime.idx <- unlist(sapply(c("Date$","Time$"), function(str) grep(str, names(meta))))
+    datetime <- unlist(meta[datetime.idx])
+    if (length(datetime > 1)){
+      datetime <- paste(datetime, collapse=" ")
+    }
+    datetime <- as.POSIXct(datetime, format = c("%m/%d/%Y %H:%M:%S", "%d.%m.%Y %H:%M:%S",
+                                      "%m/%d/%Y %H:%M:%S %p %z"))
+    datetime <- datetime[!is.na(datetime)]
+    time_interval_unit <- tryCatch({
+      get_time_unit(grep("Average Step", names(meta), value = TRUE)[1],
+                    format_in = "chromeleon")}, error = function(err) NA)
+    time_unit <- tryCatch({
+      get_time_unit(grep("Time Min.", names(meta), value = TRUE)[1],
+                    format_in="chromeleon")}, error = function(err) NA)
     structure(x, instrument = NA,
               detector = meta$Detector,
               software = meta$`Generating Data System`,
               method = meta$`Instrument Method`,
               batch = NA,
               operator = meta$`Operator`,
-              run_datetime = as.POSIXct(paste(meta$`Injection Date`, meta$`Injection Time`),
-                                        format = "%m/%d/%Y %H:%M:%S"),
+              run_datetime = datetime,
               # run_date = meta$`Injection Date`,
               # run_time = meta$`Injection Time`,
               sample_name = meta$Injection,
@@ -82,13 +95,9 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser
               time_range = c(meta$`Time Min. (min)`, meta$`Time Max. (min)`),
               # start_time = meta$`Time Min. (min)`,
               # end_time = meta$`Time Max. (min)`,
-              time_interval = meta$`Average Step (s)`,
-              time_interval_unit <- get_time_unit(
-                grep("Average Step", names(meta), value = TRUE)[1],
-                format_in = "chromeleon"),
-              time_unit = get_time_unit(
-                grep("Time Min.", names(meta), value = TRUE)[1],
-                format_in="chromeleon"),
+              time_interval = meta[[grep("Average Step", names(meta))]],
+              time_interval_unit = time_interval_unit,
+              time_unit = time_unit,
               # uniform_sampling = meta$`Min. Step (s)` == meta$`Max. Step (s)`,
               detector_range = NA,
               detector_unit = meta$`Signal Unit`,

diff --git a/R/call_openchrom.R b/R/call_openchrom.R
@@ -140,7 +140,9 @@ write_openchrom_batchfile <- function(files, path_out,
 #' @param path Path to 'OpenChrom' executable (Optional). The supplied path will
 #' overwrite the current path.
 #' @importFrom utils read.table write.table
-#' @return Returns path to OpenChrom command-line application
+#' @return If \code{cli} is set to \code{"status"}, returns a Boolean value
+#' indicating whether 'OpenChrom' is configured correctly. Otherwise, returns
+#' the path to OpenChrom command-line application.
 #' @author Ethan Bass
 #' @export
 

diff --git a/R/call_rainbow.R b/R/call_rainbow.R
@@ -113,11 +113,15 @@ extract_rb_names <- function(xx){
 #'
 #' Configures reticulate to use rainbow file parsers.
 #' @name configure_rainbow
-#' @return No return value.
+#' @param return_boolean Logical. Whether to return a Boolean value indicating
+#' if the chromConverter environment is correctly configured.
+#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value
+#' indicating whether the chromConverter environment is configured correctly.
+#' Otherwise, there is no return value.
 #' @author Ethan Bass
 #' @import reticulate
 #' @export
-configure_rainbow <- function(){
+configure_rainbow <- function(return_boolean = FALSE){
   install <- FALSE
   if (!dir.exists(miniconda_path())){
     install <- readline("It is recommended to install miniconda in your R library to use rainbow parsers. Install miniconda now? (y/n)")
@@ -134,6 +138,9 @@ configure_rainbow <- function(){
     }
   }
   assign_rb_read()
+  if (return_boolean){
+    env
+  }
 }
 
 #' @noRd

diff --git a/R/parsers.R b/R/parsers.R
@@ -16,10 +16,13 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"),
   format_out <- match.arg(format_out, c("matrix","data.frame"))
   data_format <- match.arg(data_format, c("wide","long"))
   xx <- readLines(file)
+  xx <- remove_unicode_chars(xx)
   start <- tail(grep("Data:", xx), 1)
-  x <- read.csv(file, skip = start, sep="\t")
+  x <- read.csv(file, skip = start, sep="\t", row.names = NULL)
   x <- x[,-2, drop = FALSE]
+  x <- x[,colSums(is.na(x)) < nrow(x)]
   if (any(grepl(",",as.data.frame(x)[-1,2]))){
+    decimal_separator <- ","
     x <- apply(x, 2, function(x) gsub("\\.", "", x))
     x <- apply(x, 2, function(x) gsub(",", ".", x))
   }
@@ -34,6 +37,9 @@ read_chromeleon <- function(file, format_out = c("matrix","data.frame"),
   }
   if (read_metadata){
     meta <- try(read_chromeleon_metadata(xx))
+    if (decimal_separator == ","){
+      meta <- lapply(meta, function(x) gsub(",",".",x))
+    }
     if (!inherits(meta, "try-error")){
       x <- attach_metadata(x, meta, format_in = "chromeleon", format_out = format_out,
                            data_format = "wide", parser = "chromConverter",

diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R
@@ -24,11 +24,12 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"),
   # HEADER
   seek(f, 1, "start")
   version <- readBin(f, "character", n = 1)
-  version <- match.arg(version, choices = c("8", "81", "130", "179", "181"))
+  version <- match.arg(version, choices = c("8", "81", "30", "130", "179", "181"))
   offsets <- get_agilent_offsets(version)
   decoder <- switch(version,
                     "8" = decode_delta,
                     "81" = decode_double_delta,
+                    "30" = decode_delta,
                     "130" = decode_delta,
                     "181" = decode_double_delta,
                     "179" = decode_double_array)
@@ -46,7 +47,7 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"),
   seek(f, where = 282, origin = "start")
   seek(f, where = 282, origin = "start")
 
-  if (version %in% c("8","130")){
+  if (version %in% c("8", "30", "130")){
     xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000
     xmax <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000
   } else {
@@ -74,14 +75,19 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"),
     if (read_metadata){
       meta_slots <- switch(version, "8" = 9,
                                     "81" = 9,
+                                    "30" = 11,
                                     "130" = 12,
                                     "181" = 9,
                                     "179" = 9)
 
     meta <- lapply(offsets[seq_len(meta_slots)], function(offset){
       seek(f, where = offset, origin = "start")
       n <- get_nchar(f)
-      cc_collapse(readBin(f, "character", n = n))
+      if (version == "30"){
+        readBin(f, what = "character")
+      } else{
+        cc_collapse(readBin(f, "character", n = n))
+      }
     })
   if (read_metadata){
     datetime_regex <- "(\\d{2}-[A-Za-z]{3}-\\d{2}, \\d{2}:\\d{2}:\\d{2})|(\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2}:\\d{2} (?:AM|PM)?)"
@@ -177,7 +183,7 @@ decode_double_array <- function(file, offset) {
   seek(file, offset, "start")
   signal <- readBin(file, what = "double", size = 4, endian = "little",
                     n = (fsize - offset))
-  signal <- signal[seq(2,length(signal),2)]
+  signal <- signal[seq(2, length(signal), 2)]
   return(signal)
 }
 
@@ -264,12 +270,12 @@ get_agilent_offsets <- function(version){
       software_revision = 3802, #'utf16'
       units = 4172, # 'utf16'
       signal = 4213, # 'utf16'
-      zero = 4110, # INT32
+      intercept = 4110, # INT32
       scaling_factor = 4732) #ENDIAN + 'd'
   } else if (version == 30){
     offsets <- list(
       file_type = 4, # utf16
-      # sample_name = 858, # utf16
+      sample_name = 24, # utf16
       operator = 148, # utf16
       date = 178, # utf16
       # inlet = 2492, # utf16
@@ -280,8 +286,8 @@ get_agilent_offsets <- function(version){
       software_revision = 405, #'utf16'
       units = 580, # 'utf16'
       signal = 596, # 'utf16'
-      zero = 4110, # INT32
-      scaling_factor = 4732,
+      intercept = 636, # INT32
+      scaling_factor = 644,
       data_start = 1024 #ENDIAN + 'd'
     )
   } else if (version %in% c("8","81")){

diff --git a/R/read_chroms.R b/R/read_chroms.R
@@ -107,7 +107,7 @@ read_chroms <- function(paths, find_files,
   }
   if (length(format_in) > 1){
     if (!find_files){
-      format_in <- get_filetype(ifelse(length(paths)>1, paths[[1]], paths))
+      format_in <- get_filetype(paths[1])
     } else{
         stop("Please specify the file format of your chromatograms by setting the `format_in` argument.")
     }
@@ -269,14 +269,15 @@ read_chroms <- function(paths, find_files,
     }
     }
   }
-
   if (all(grepl(".d$", files, ignore.case = TRUE))){
     file_names <- strsplit(files, "/")
     file_names <- gsub("\\.[Dd]", "",
                        sapply(file_names, function(n){
                          ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1))
                        }))
-  } else {file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])}
+  } else {
+    file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1])
+  }
   if (parser != "openchrom"){
     laplee <- choose_apply_fnc(progress_bar)
     data <- laplee(X = files, function(file){

diff --git a/R/utils.R b/R/utils.R
@@ -1,6 +1,7 @@
 utils::globalVariables(names = c('.'))
 # Globals <- list()
 
+#' Check parser
 #' @noRd
 check_parser <- function(format_in, parser=NULL, find = FALSE){
   allowed_formats <- list(openchrom = c("msd","csd","wsd"),
@@ -42,6 +43,13 @@ check_parser <- function(format_in, parser=NULL, find = FALSE){
   }
 }
 
+#' Remove unicode characters
+#' @noRd
+remove_unicode_chars <- function(x){
+  stringr::str_replace_all(x, "\xb5", "micro")
+}
+
+#' Format extension
 #' @noRd
 format_to_extension <- function(format_in){
   switch(format_in,
@@ -135,6 +143,7 @@ choose_apply_fnc <- function(progress_bar, parallel = FALSE, cl = NULL){
   fn
 }
 
+#' Transfer metadata
 #'@noRd
 transfer_metadata <- function (new_object, old_object, exclude = c("names", "row.names",
                                               "class", "dim", "dimnames"))
@@ -145,6 +154,7 @@ transfer_metadata <- function (new_object, old_object, exclude = c("names", "row
   new_object
 }
 
+#' Get filetype
 #' @noRd
 get_filetype <- function(file, out = c("format_in", "filetype")){
   out <- match.arg(out, c("format_in", "filetype"))
@@ -154,7 +164,7 @@ get_filetype <- function(file, out = c("format_in", "filetype")){
   filetype <- switch(magic,
                      "x01/x32/x00/x00" = "AgilentChemstationMS",
                      "x02/x02/x00/x00" = "AgilentMasshunterDADHeader",
-                     "x02/x33/x30/x00" = "AgilentChemstationMWD",
+                     # "x02/x33/x30/x00" = "AgilentChemstationMWD",
                      "x02/x33/x31/x00" = "AgilentChemstationDAD",
                      "x02/x38/x31/x00" = "AgilentChemstationFID", #81
                      "x03/x02/x00/x00" = "AgilentMasshunterDAD",

diff --git a/inst/CITATION b/inst/CITATION
@@ -5,7 +5,7 @@ citEntry(
   title    = "chromConverter: chromatographic file converter",
   author   = "Ethan Bass",
   year     = "2023",
-  version  = "version 0.4.0",
+  version  = "version 0.4.1",
   doi      = "10.5281/zenodo.6792521",
   url      = "https://ethanbass.github.io/chromConverter/",
   textVersion = paste("Bass, E. (2022).",

diff --git a/man/configure_aston.Rd b/man/configure_aston.Rd
diff --git a/man/configure_openchrom.Rd b/man/configure_openchrom.Rd
diff --git a/man/configure_rainbow.Rd b/man/configure_rainbow.Rd
diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
@@ -5,7 +5,7 @@ elementwise.all.equal <- Vectorize(function(x, y, ...) {isTRUE(all.equal(x, y, .
 # helper function to skip tests if we don't have the right python dependencies
 skip_if_missing_dependecies <- function() {
   reqs <- c("scipy","numpy", "aston", "pandas")
-  have_reqs <- sapply(reqs, py_module_available)
+  have_reqs <- sapply(reqs, reticulate::py_module_available)
   if (mean(have_reqs) < 1)
     skip(paste("required packages", reqs[!have_reqs],
                "not available for testing"))