Merge pull request #23 from ethanbass/dev

* Added parser for reading ANDI MS (`.cdf`) files. * Fixed parsing of Agilent MS files with 'entab' reader. * Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding. * Re-factored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once. * Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`. * Exported `write_cdf` and added additional arguments (`lambda` and `force`) for greater control by users. * Added internal parser for 1D 'Waters RAW' chromatograms (`read_waters_raw`). * Added `collapse` argument to `call_rainbow` and to collapse superfluous lists. * Added `...` argument to `read_chroms` for supplying additional arguments to parsers. * Added alias to `read_chroms` for reading `mzxml` files with `RaMS`. * Added `precision` argument to `call_rainbow` to control number of digits "mz" values are rounded to. (Also changed default behavior so values are rounded to one decimal by default). * Fixed bug in `read_shimadzu_lcd` on Windows due to issue with passing escaped paths to Python. * Updated documentation of various functions.
ethanbass · Dec 25, 2023 · a50105e · a50105e
2 parents 2873753 + 474effe
commit a50105e
Show file tree

Hide file tree

Showing 49 changed files with 1,470 additions and 698 deletions.
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
@@ -52,7 +52,7 @@ jobs:
           install.packages("chromConverterExtraTests", repos = "https://ethanbass.github.io/drat/")
           reticulate::install_miniconda()
           reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas'))
-          reticulate::conda_install('r-reticulate', packages = c('aston', "olefile"), pip=TRUE)
+          reticulate::conda_install('r-reticulate', packages = c('aston', "olefile", "rainbow-api"), pip=TRUE)
         shell: Rscript {0}
 
       - if: runner.os == 'macOS'

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: chromConverter
 Title: Chromatographic File Converter
-Version: 0.5.0
+Version: 0.6.0
 Authors@R: c(
     person(given = "Ethan", family = "Bass", email = "[email protected]",
                   role = c("aut", "cre"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,10 +3,8 @@
 export(call_entab)
 export(call_openchrom)
 export(call_rainbow)
-export(configure_aston)
-export(configure_olefile)
 export(configure_openchrom)
-export(configure_rainbow)
+export(configure_python_environment)
 export(extract_metadata)
 export(read_agilent_dx)
 export(read_cdf)
@@ -24,8 +22,10 @@ export(read_shimadzu_lcd)
 export(read_thermoraw)
 export(read_varian_peaklist)
 export(read_waters_arw)
+export(read_waters_raw)
 export(sp_converter)
 export(uv_converter)
+export(write_cdf)
 import(magrittr)
 import(reticulate)
 import(xml2)

diff --git a/NEWS.md b/NEWS.md
@@ -1,14 +1,30 @@
+## chromConverter 0.6.0
+
+* Added parser for reading ANDI MS (`.cdf`) files.
+* Fixed parsing of Agilent MS files with 'entab' reader.
+* Fixed `read_chemstation_ch` parser to correctly read "Mustang Chemstation" 179 files with 8-byte encoding.
+* Re-factored `read_shimadzu` function and added support for new types of chromatograms (e.g. status, uv and total ion chromatograms). Added support for reading multiple types of chromatograms at once.
+* Added support for reading MS spectra from 'Shimadzu' ascii files using `read_shimadzu`.
+* Exported `write_cdf` and added additional arguments (`lambda` and `force`) for greater control by users.
+* Added internal parser for 1D 'Waters RAW' chromatograms (`read_waters_raw`).
+* Added `collapse` argument to `call_rainbow` and  to collapse superfluous lists. 
+* Added `...` argument to `read_chroms` for supplying additional arguments to parsers.
+* Added alias to `read_chroms` for reading `mzxml` files with `RaMS`.
+* Added `precision` argument to `call_rainbow` to control number of digits "mz" values are rounded to. (Also changed default behavior so values are rounded to one decimal by default).
+* Fixed bug in `read_shimadzu_lcd` on Windows due to issue with passing escaped paths to Python.
+* Updated documentation of various functions.
+
 ## chromConverter 0.5.0
 
 ### New features
 
 * Added support for parallel processing through `pbapply` package. (**Note**: The `pbapply` package must be manually installed to enable parallel processing). 
-* Added internal parser for 'Agilent Chemstation' version 31 files (through `read_chemstation_uv` function).
+* Added internal parser for 'Agilent ChemStation' version 31 files (through `read_chemstation_uv` function).
 * Added support for 'Agilent OpenLab' version 131 files through internal parser. 
 * Added preliminary support for reading 'Agilent' (`.dx`) files (through `read_agilentdx` function).
-* Added support for reading 'Chemstation' REPORT files.
+* Added support for reading 'ChemStation' REPORT files.
 * Added parser for Shimadzu `.lcd` files through the `read_shimadzu_lcd` function. Only the PDA stream (not MS) is currently supported.
-* Added `read_peaklist` function for reading peak lists. Currently 'Agilent Chemstation' and 'Shimadzu ASCII' formats are supported.
+* Added `read_peaklist` function for reading peak lists. Currently 'Agilent ChemStation' and 'Shimadzu ASCII' formats are supported.
 * Added `verbose` argument to control console output for external parsers ('OpenChrom' and 'ThermoRawFileParser').
 
 ### Other Improvements
@@ -40,7 +56,7 @@
 
 ### New features 
 
-* Added support for "Chemstation" UV (`.ch`) files (version 30).
+* Added support for "ChemStation" UV (`.ch`) files (version 30).
 
 ### Minor improvements
 
@@ -52,7 +68,7 @@
 ### Bug fixes
 
 * Fixed bug preventing compilation of PDF manual.
-* Fixed new bug causing failure to correctly read names of chemstation files from .D directory.
+* Fixed new bug causing failure to correctly read names of 'ChemStation' files from .D directory.
 
 ## chromConverter 0.4.0
 
@@ -73,13 +89,13 @@
 data origin.
 * Standardized datetime stamps so they are always converted to POSIXct format.
 * Now use `fs` package for parsing paths, eliminating buggy `check_paths` function.
-* Fixed bug causing sloppy 'Chemstation' FID metadata.
-* Fixed bug that caused padding of 'Chemstation 130' files with extra zeros.
+* Fixed bug causing sloppy 'ChemStation' FID metadata.
+* Fixed bug that caused padding of 'ChemStation 130' files with extra zeros.
 * Added additional tests.
 
 ## chromConverter 0.3.3
 
-* Added R-based parser for "Chemstation" UV (`.uv`) files (version 131) through
+* Added R-based parser for "ChemStation" UV (`.uv`) files (version 131) through
 the `read_chemstation_uv` function.
 * Added `extract_metadata` function for extracting metadata from a list of chromatograms
 and returning it as a `data.frame` or `tibble`.
@@ -98,21 +114,21 @@ and returning it as a `data.frame` or `tibble`.
 
 ## chromConverter 0.3.1
 
-* Added support for "Chemstation" UV (`.ch`) files (version 130).
-* Added provisional support for "Chemstation" FID (version 8).
+* Added support for "ChemStation" UV (`.ch`) files (version 130).
+* Added provisional support for "ChemStation" FID (version 8).
 * Changed name of `read_chemstation_fid` function to `read_chemstation_ch`.
 * Ignore case when matching file extensions in `read_chroms`.
 * Added note to README about configuring RStudio correctly for accessing python parsers.
 
 ## chromConverter 0.3.0
 
 * Fixed bug causing "Chromeleon" metadata parser to fail.
-* Fixed bug in "Chemstation" metadata parser.
+* Fixed bug in "ChemStation" metadata parser.
 * Changed `format_data` argument to `data_format` to select wide or long format.
 * Added support for parsing `mzML` files with `RaMS`.
 * Added support for parsing "Agilent" (`.D`) and "Waters" (`.raw`) files with [rainbow](https://rainbow-api.readthedocs.io/).
 * Made `data_format` option available consistently for choosing `wide` or `long` format.
-* Added parser in R for "Chemstation" FID (`.ch`) data (versions 81, 179 & 181).
+* Added parser in R for "ChemStation" FID (`.ch`) data (versions 81, 179 & 181).
 * Improved error handling when loading python modules.
 * Improved error-handling for parsing metadata so small problems no longer error out the whole program.
 

diff --git a/R/attach_metadata.R b/R/attach_metadata.R
@@ -47,23 +47,21 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser
                 data_format = data_format,
                 parser = "chromconverter",
                 format_out = format_out)
-  }, "shimadzu" = {
+  }, "shimadzu_dad" = {
     structure(x,
-              instrument = meta$`Instrument Name`,
-              detector = meta$`Detector Name`,
-              software_name = meta$`Application Name`,
-              software_version = meta$Version,
-              method = meta$`Method File`,
-              batch = meta$`Batch File`,
-              operator = meta$`Operator Name`,
+              instrument = get_metadata_field(meta, "Instrument Name"),
+              detector = get_metadata_field(meta, "Detector Name"),
+              software_name = get_metadata_field(meta, "Application Name"),
+              software_version = get_metadata_field(meta, "Version"),
+              method = get_metadata_field(meta, "Method File"),
+              batch = get_metadata_field(meta, "Batch File"),
+              operator = get_metadata_field(meta, "Operator Name"),
               run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"),
-              sample_name = meta$`Sample Name`,
-              sample_id = meta$`Sample ID`,
-              sample_injection_volume = meta$`Injection Volume`,
-              sample_amount = meta$`Injection Volume`,
+              sample_name = get_metadata_field(meta, "Sample Name"),
+              sample_id = get_metadata_field(meta, "Sample ID"),
+              sample_injection_volume = get_metadata_field(meta, "Injection Volume"),
+              sample_amount = get_metadata_field(meta, "Injection Volume"),
               time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`),
-              # start_time = meta$`Start Time(min)`,
-              # end_time = meta$`End Time(min)`,
               time_interval = meta$`Interval(msec)`,
               time_interval_unit = get_time_unit(
                 grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"),
@@ -76,6 +74,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser
               data_format = data_format,
               parser = "chromconverter",
               format_out = format_out)
+  }, "shimadzu_chrom" = {
+    structure(x,
+              instrument = get_metadata_field(meta, "Instrument Name"),
+              detector = get_metadata_field(meta, "Detector Name"),
+              software_name = get_metadata_field(meta, "Application Name"),
+              software_version = get_metadata_field(meta, "Version"),
+              method = get_metadata_field(meta, "Method File"),
+              batch = get_metadata_field(meta, "Batch File"),
+              operator = get_metadata_field(meta, "Operator Name"),
+              run_datetime = as.POSIXct(meta$Acquired, format = "%m/%d/%Y %I:%M:%S %p"),
+              sample_name = get_metadata_field(meta, "Sample Name"),
+              sample_id = get_metadata_field(meta, "Sample ID"),
+              sample_injection_volume = get_metadata_field(meta, "Injection Volume"),
+              sample_amount = get_metadata_field(meta, "Injection Volume"),
+              time_range = c(meta$`Start Time(min)`, meta$`End Time(min)`),
+              time_interval = meta$`Interval(msec)`,
+              time_interval_unit = get_time_unit(
+                grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"),
+              time_unit = get_time_unit(
+                grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"),
+              wavelength = get_metadata_field(meta, "Wavelength(nm)"),
+              bandwidth = get_metadata_field(meta, "Bandwidth(nm)"),
+              # detector_end = meta$`End Wavelength(nm)`,
+              detector_unit = get_metadata_field(meta, "Intensity Units"),
+              source_file = source_file,
+              data_format = data_format,
+              parser = "chromconverter",
+              format_out = format_out)
   }, "chromeleon" = {
     datetime.idx <- unlist(sapply(c("Date$", "Time$"), function(str) grep(str, names(meta))))
     datetime <- unlist(meta[datetime.idx])
@@ -118,23 +144,6 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser
               data_format = data_format,
               parser = "chromconverter"
               )
-  # } else if (format_in == "entab"){
-  #   structure(x, instrument = meta$instrument,
-  #             detector = NA,
-  #             software = meta$Version,
-  #             method = meta$method,
-  #             batch = meta$SeqPathAndFile,
-  #             operator = meta$operator,
-  #             run_datetime = meta$run_date,
-  #             sample_name = meta$sample,
-  #             sample_id = NA,
-  #             injection_volume = meta$InjVolume,
-  #             time_range = NA,
-  #             time_interval = NA,
-  #             detector_range = NA,
-  #             format = data_format,
-  #             parser = "entab",
-  #             format_out = format_out)
   }, "chemstation" = {
     datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p")
     meta$date <- as.POSIXct(meta$date, tz = "UTC", tryFormats = datetime_formats)

diff --git a/R/aston_parsers.R → R/call_aston.R b/R/aston_parsers.R → R/call_aston.R
@@ -131,48 +131,13 @@ trace_converter <- function(file, format_out = c("matrix", "data.frame"),
   x
 }
 
-#' Configure Aston
-#'
-#' Configures reticulate to use Aston file parsers.
-#' @name configure_aston
-#' @param return_boolean Logical. Whether to return a Boolean value indicating
-#' if the chromConverter environment is correctly configured.
-#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value
-#' indicating whether the chromConverter environment is configured correctly.
-#' Otherwise, there is no return value.
-#' @author Ethan Bass
-#' @import reticulate
-#' @export
-configure_aston <- function(return_boolean = FALSE){
-  install <- FALSE
-  if (!dir.exists(miniconda_path())){
-    install <- readline("It is recommended to install miniconda in your R library to use Aston parsers. Install miniconda now? (y/n)")
-    if (install %in% c('y', "Y", "YES", "yes", "Yes")){
-      install_miniconda()
-    }
-  }
-  env <- reticulate::configure_environment("chromConverter")
-  if (!env){
-    reqs <- c("pandas","scipy","numpy","aston")
-    reqs_available <- sapply(reqs, reticulate::py_module_available)
-    if (!all(reqs_available)){
-      conda_install(envname = "chromConverter", reqs[which(!reqs_available)],
-                    pip = TRUE)
-    }
-  }
-  assign_trace_file()
-  if (return_boolean){
-    return(env)
-  }
-}
-
 #' @noRd
 check_aston_configuration <- function(){
   assign_trace_file()
   if (length(trace_file) == 0){
     ans <- readline("Aston not found. Configure Aston? (y/n)?")
     if (ans %in% c('y', "Y", "YES", "yes", "Yes")){
-      configure_aston()
+      configure_python_environment(parser = "aston")
     }
   }
 }

diff --git a/R/call_entab.R b/R/call_entab.R
@@ -12,7 +12,7 @@
 #' @export
 
 call_entab <- function(file, data_format = c("wide", "long"),
-                       format_in = NULL,
+                       format_in = "",
                        format_out = c("matrix", "data.frame"),
                        read_metadata = TRUE,
                        metadata_format = c("chromconverter", "raw")){
@@ -28,17 +28,29 @@ call_entab <- function(file, data_format = c("wide", "long"),
   metadata_format <- switch(metadata_format,
                             chromconverter = format_in, raw = "raw")
   r <- entab::Reader(file)
+  file_format <- r$parser()
   x <- entab::as.data.frame(r)
-  signal.idx <- grep("signal", colnames(x))
-  if (length(signal.idx) == 1){
-    colnames(x)[signal.idx] <- "wavelength"
-  }
-  if (data_format == "wide"){
+  if (grepl("dad$|uv$", file_format)){
+    signal.idx <- grep("signal", colnames(x))
+    if (length(signal.idx) == 1){
+      colnames(x)[signal.idx] <- "wavelength"
+    }
+    if (data_format == "wide"){
       x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength",
                               value_var = "intensity")
-  }
-  if (format_out == "matrix"){
-    x <- as.matrix(x)
+      if (format_out == "matrix"){
+        x <- as.matrix(x)
+      }
+    }
+  } else if (grepl("fid$", file_format)){
+    if (data_format == "wide"){
+      x <- data.frame(row.names = x$time, intensity = x$intensity)
+    }
+    if (format_out == "matrix"){
+      x <- as.matrix(x)
+    }
+  } else if (grepl("ms$", file_format)){
+    colnames(x)[1] <- "rt"
   }
   if (read_metadata){
     meta <- r$metadata()

diff --git a/R/call_openchrom.R b/R/call_openchrom.R
@@ -5,7 +5,7 @@
 #' [OpenChrom](https://lablicate.com/platform/openchrom) (version 0.4.0) must be
 #' manually installed. The command line interface is no longer supported in the
 #' latest versions of OpenChrom (starting with version 0.5.0), so the function
-#' will not work with these new versions.
+#' will not work with these newer versions.
 #'
 #' The \code{call_openchrom} works by creating an \code{xml} batchfile and
 #' feeding it to the OpenChrom command-line interface. OpenChrom batchfiles
@@ -34,9 +34,12 @@
 #' @param return_paths Logical. If TRUE, the function will return a character
 #' vector of paths to the newly created files.
 #' @param verbose Logical. Whether to print output from OpenChrom to the console.
-#' @return If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files.
-#' If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list
-#' of chromatograms in \code{data.frame} format. Otherwise, it will not return anything.
+#' @return If \code{return_paths} is \code{FALSE}, the function will return a
+#' list of chromatograms (if an appropriate parser is available to import the
+#' files into R). The chromatograms will be returned in \code{matrix} or
+#' \code{data.frame} format according to the value of {format_out}. If
+#' \code{return_paths} is \code{TRUE}, the function will return a character
+#' vector of paths to the newly created files.
 #' @section Side effects: Chromatograms will be exported in the format specified
 #' by \code{export_format} in the folder specified by \code{path_out}.
 #' @author Ethan Bass
@@ -95,6 +98,7 @@ call_openchrom <- function(files, path_out = NULL, format_in,
 }
 
 #' Writes OpenChrom XML batch file
+#' This function is called internally by \code{call_openchrom}.
 #' @import xml2
 #' @import magrittr
 #' @param files Paths to files for conversion
@@ -177,6 +181,9 @@ configure_openchrom <- function(cli = c("null", "true", "false", "status"), path
   } else{
     path_parser <- path
   }
+  if (grepl("app/?$", path_parser)){
+    path_parser <- fs::path(path_parser, "Contents/MacOS/openchrom")
+  }
   writeLines(path_parser,
              con = system.file('shell/path_to_openchrom_commandline.txt',
                                package='chromConverter'))