Merge pull request #20 from ethanbass/dev

v0.5.0
ethanbass · Nov 10, 2023 · 2873753 · 2873753
2 parents f5b14ae + ba4ca88
commit 2873753
Show file tree

Hide file tree

Showing 54 changed files with 2,538 additions and 914 deletions.
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
@@ -47,11 +47,12 @@ jobs:
       - name: Install dependencies
         run: |
           install.packages(c("remotes", "rcmdcheck"))
-          install.packages(c("ncdf4"))
           remotes::install_deps(dependencies = TRUE)
+          install.packages(c("ncdf4"))
+          install.packages("chromConverterExtraTests", repos = "https://ethanbass.github.io/drat/")
           reticulate::install_miniconda()
           reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas'))
-          reticulate::conda_install('r-reticulate', packages = c('aston'), pip=TRUE)
+          reticulate::conda_install('r-reticulate', packages = c('aston', "olefile"), pip=TRUE)
         shell: Rscript {0}
 
       - if: runner.os == 'macOS'

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: chromConverter
 Title: Chromatographic File Converter
-Version: 0.4.3
+Version: 0.5.0
 Authors@R: c(
     person(given = "Ethan", family = "Bass", email = "[email protected]",
                   role = c("aut", "cre"),
@@ -32,15 +32,17 @@ Imports:
     xml2
 Suggests: 
     entab,
-    mzR,
     ncdf4,
     pbapply,
-    testthat (>= 3.0.0)
-Config/reticulate: list( packages = list( list(package = "scipy"),
+    testthat (>= 3.0.0),
+    mzR,
+    chromConverterExtraTests
+Config/reticulate: list( packages = list(list(package = "scipy"),
                                     list(package="numpy"),
                                     list(package = "pandas"),
                                     list(package="aston", pip = TRUE),
-                                    list(package="rainbow-api", pip = TRUE)) )
+                                    list(package="rainbow-api", pip = TRUE),
+                                    list(package = "olefile", pip = TRUE)) )
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,18 +4,23 @@ export(call_entab)
 export(call_openchrom)
 export(call_rainbow)
 export(configure_aston)
+export(configure_olefile)
 export(configure_openchrom)
 export(configure_rainbow)
 export(extract_metadata)
+export(read_agilent_dx)
 export(read_cdf)
 export(read_chemstation_ch)
 export(read_chemstation_csv)
+export(read_chemstation_reports)
 export(read_chemstation_uv)
 export(read_chromeleon)
 export(read_chroms)
 export(read_mdf)
 export(read_mzml)
+export(read_peaklist)
 export(read_shimadzu)
+export(read_shimadzu_lcd)
 export(read_thermoraw)
 export(read_varian_peaklist)
 export(read_waters_arw)
@@ -31,12 +36,12 @@ importFrom(purrr,partial)
 importFrom(readxl,read_xls)
 importFrom(stats,reshape)
 importFrom(stringr,str_split_fixed)
-importFrom(tidyr,pivot_wider)
 importFrom(utils,file_test)
 importFrom(utils,head)
 importFrom(utils,read.csv)
 importFrom(utils,read.table)
 importFrom(utils,tail)
+importFrom(utils,unzip)
 importFrom(utils,write.csv)
 importFrom(utils,write.table)
 importFrom(xml2,read_xml)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,36 @@
+## chromConverter 0.5.0
+
+### New features
+
+* Added support for parallel processing through `pbapply` package. (**Note**: The `pbapply` package must be manually installed to enable parallel processing). 
+* Added internal parser for 'Agilent Chemstation' version 31 files (through `read_chemstation_uv` function).
+* Added support for 'Agilent OpenLab' version 131 files through internal parser. 
+* Added preliminary support for reading 'Agilent' (`.dx`) files (through `read_agilentdx` function).
+* Added support for reading 'Chemstation' REPORT files.
+* Added parser for Shimadzu `.lcd` files through the `read_shimadzu_lcd` function. Only the PDA stream (not MS) is currently supported.
+* Added `read_peaklist` function for reading peak lists. Currently 'Agilent Chemstation' and 'Shimadzu ASCII' formats are supported.
+* Added `verbose` argument to control console output for external parsers ('OpenChrom' and 'ThermoRawFileParser').
+
+### Other Improvements
+
+* Improved automatic filetype detection by `read_chroms`.
+* Refactored `read_thermoraw` function to simplify paths.
+* The `thermoraw` and `openchrom` parsers now use a proper temp directory if an export directory is not specified through the `path_out` argument.
+* Re-factored `reshape_chroms`, speeding up conversion from wide to long format.
+* Added additional tests, attaining 82% test coverage.
+* Changed default `openchrom` export format to `mzml`.
+* Minor changes to some metadata fields to better standardize results across different file formats and parsers.
+
+### Bug fixes
+
+* Corrected 'Shimadzu' DAD parser so it reads wavelengths from the file instead of inferring them.
+* Fixed bug causing failure of 'Shimadzu' ascii parser (when `what == "peak_table"` and `read_metadata == TRUE`).
+* Fixed bug causing 'MDF' files to export as data.frames when `format_out == "matrix"`.
+* Fixed misleading `data_format` attributes in 'Waters ARW' and 'Chromeleon' parsers.
+
 ## chromConverter 0.4.3
 
-* Fixed bug in `chemstation_ch` parser ([#17](https://github.com/ethanbass/chromConverter/issues/17))
+* Fixed bug in `chemstation_ch` parser (version 130) ([#17](https://github.com/ethanbass/chromConverter/issues/17))
 
 ## chromConverter 0.4.2
 

diff --git a/R/aston_parsers.R b/R/aston_parsers.R
@@ -11,28 +11,35 @@
 #' @param data_format Whether to return data in \code{wide} or \code{long} format.
 #' @param read_metadata Logical. Whether to read metadata and attach it to the
 #' chromatogram.
+#' @param metadata_format Format to output metadata. Either \code{chromconverter}
+#' or \code{raw}.
 #' @return A chromatogram in \code{data.frame} format (retention time x wavelength).
 #' @import reticulate
 #' @export sp_converter
 
 sp_converter <- function(file, format_out = c("matrix", "data.frame"),
                          data_format = c("wide","long"),
-                         read_metadata = TRUE){
+                         read_metadata = TRUE,
+                         metadata_format = c("chromconverter", "raw")){
   check_aston_configuration()
   format_out <- match.arg(format_out, c("matrix","data.frame"))
   data_format <- match.arg(data_format, c("wide","long"))
+  metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
+  metadata_format <- switch(metadata_format,
+                            chromconverter = "masshunter_dad", raw = "raw")
+
   x <- trace_file$agilent_uv$AgilentDAD(file)
   x <- pd$DataFrame(x$data$values, columns = x$data$columns,
                     index = x$data$index)
   if (data_format == "long"){
-    x <- reshape_chrom(x)
+    x <- reshape_chrom(x, data_format = "long")
   }
   if (format_out == "matrix"){
     x <- as.matrix(x)
   }
   if (read_metadata){
     meta <- read_masshunter_metadata(file)
-    x <- attach_metadata(x, meta, format_in = "masshunter_dad",
+    x <- attach_metadata(x, meta, format_in = metadata_format,
                          format_out = format_out, data_format = "wide",
                          parser = "aston", source_file = file)
   }
@@ -54,22 +61,28 @@ sp_converter <- function(file, format_out = c("matrix", "data.frame"),
 #' TRUE.
 #' @param read_metadata Logical. Whether to read metadata and attach it to the
 #' chromatogram.
+#' @param metadata_format Format to output metadata. Either \code{chromconverter}
+#' or \code{raw}.
 #' @return A chromatogram in \code{data.frame} format (retention time x wavelength).
 #' @import reticulate
 #' @export uv_converter
 uv_converter <- function(file, format_out = c("matrix","data.frame"),
                          data_format = c("wide","long"),
-                         correction=TRUE, read_metadata = TRUE){
+                         correction = TRUE, read_metadata = TRUE,
+                         metadata_format = c("chromconverter", "raw")){
   check_aston_configuration()
   format_out <- match.arg(format_out, c("matrix","data.frame"))
   data_format <- match.arg(data_format, c("wide","long"))
+  metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
+  metadata_format <- switch(metadata_format,
+                            chromconverter = "chemstation_uv", raw = "raw")
   trace_file <- reticulate::import("aston.tracefile")
   pd <- reticulate::import("pandas")
   x <- trace_file$TraceFile(file)
   x <- pd$DataFrame(x$data$values, columns=x$data$columns,
                     index=x$data$index)
   if (data_format == "long"){
-    x <- reshape_chrom(x)
+    x <- reshape_chrom(x, data_format = "long")
   }
   if (format_out == "matrix"){
     x <- as.matrix(x)
@@ -81,7 +94,7 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"),
   }
   if (read_metadata){
     meta <- read_chemstation_metadata(file)
-    x <- attach_metadata(x, meta, format_in = "chemstation_uv",
+    x <- attach_metadata(x, meta, format_in = metadata_format,
                          format_out = format_out, data_format = "wide",
                          parser = "Aston", source_file = file)
   }
@@ -100,17 +113,17 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"),
 #' @import reticulate
 #' @noRd
 trace_converter <- function(file, format_out = c("matrix", "data.frame"),
-                            data_format = c("wide","long")){
+                            data_format = c("wide", "long")){
   check_aston_configuration()
-  format_out <- match.arg(format_out, c("matrix","data.frame"))
-  data_format <- match.arg(data_format, c("wide","long"))
+  format_out <- match.arg(format_out, c("matrix", "data.frame"))
+  data_format <- match.arg(data_format, c("wide", "long"))
   trace_file <- reticulate::import("aston.tracefile")
   pd <- reticulate::import("pandas")
   x <- trace_file$TraceFile(file)
-  x <- pd$DataFrame(x$data$values, columns=x$data$columns,
-                    index=x$data$index)
+  x <- pd$DataFrame(x$data$values, columns = x$data$columns,
+                    index = x$data$index)
   if (data_format == "long"){
-    x <- reshape_chrom(x)
+    x <- reshape_chrom(x, data_format = "long")
   }
   if (format_out == "matrix"){
     x <- as.matrix(x)