Skip to content

Commit

Permalink
v 0.6.2, additional shimadzu support
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanbass committed Aug 6, 2024
1 parent 28883be commit 2dafc43
Show file tree
Hide file tree
Showing 18 changed files with 605 additions and 63 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: chromConverter
Title: Chromatographic File Converter
Version: 0.6.1
Version: 0.6.2
Authors@R: c(
person(given = "Ethan", family = "Bass", email = "[email protected]",
role = c("aut", "cre"),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ export(read_mdf)
export(read_mzml)
export(read_peaklist)
export(read_shimadzu)
export(read_shimadzu_gcd)
export(read_shimadzu_lcd)
export(read_shimadzu_lcd_2D)
export(read_shimadzu_lcd_3D)
export(read_thermoraw)
export(read_varian_peaklist)
export(read_waters_arw)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## chromConverter 0.6.2

* Updated `read_shimadzu_lcd` function to correctly determine the number of blocks in the "Shimadzu" LCD PDA stream.
* Added preliminary support for 2D data streams from "Shimadzu LCD" files.
* Added parser for 'Shimadzu GCD' files (from GC-FID).

## chromConverter 0.6.1

* Added support for 'Shimadzu ascii' files with '[LC Chromatogram...]' sub-header.
Expand Down
24 changes: 15 additions & 9 deletions R/read_chroms.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
#' 'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'Agilent
#' MassHunter' (\code{.dad}), 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}),
#' 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII'
#' (\code{.txt}), and 'Shimadzu LCD' files (preliminary support). Also, wraps
#' 'OpenChrom' parsers, which include many additional formats. To use 'Entab',
#' (\code{.txt}), 'Shimadzu GCD', and 'Shimadzu LCD' files (preliminary support).
#' Also, wraps 'OpenChrom' parsers, which include many additional formats. To use 'Entab',
#' 'ThermoRawFileParser', or 'OpenChrom' parsers, they must be manually installed.
#' Please see the instructions in the
#' [README](https://ethanbass.github.io/chromConverter/) for further details.
Expand Down Expand Up @@ -89,12 +89,12 @@
#' @export read_chroms

read_chroms <- function(paths, find_files,
format_in=c("agilent_d", "agilent_dx", "chemstation",
format_in = c("agilent_d", "agilent_dx", "chemstation",
"chemstation_fid", "chemstation_ch",
"chemstation_csv", "chemstation_uv",
"masshunter_dad", "chromeleon_uv",
"shimadzu_ascii",
"shimadzu_fid", "shimadzu_dad",
"shimadzu_ascii", "shimadzu_dad",
"shimadzu_fid", "shimadzu_gcd",
"shimadzu_lcd", "thermoraw", "mzml",
"mzxml", "waters_arw", "waters_raw",
"msd", "csd", "wsd", "mdf", "other"),
Expand Down Expand Up @@ -147,9 +147,10 @@ read_chroms <- function(paths, find_files,
"chemstation_130", "chemstation_131",
"openlab_131", "chemstation_179",
"chemstation_81", "chemstation_181",
"chemstation_fid", "chemstation_csv", "masshunter_dad",
"shimadzu_fid", "shimadzu_dad",
"shimadzu_ascii", "shimadzu_lcd",
"chemstation_fid", "chemstation_csv",
"masshunter_dad", "shimadzu_ascii",
"shimadzu_dad", "shimadzu_fid",
"shimadzu_gcd", "shimadzu_lcd",
"chromeleon_uv", "thermoraw", "mzml", "mzxml",
"waters_arw", "waters_raw", "msd", "csd",
"wsd", "mdf", "cdf", "other"))
Expand Down Expand Up @@ -245,10 +246,15 @@ read_chroms <- function(paths, find_files,
data_format = data_format,
read_metadata = read_metadata,
metadata_format = metadata_format, ...)
} else if (format_in == "shimadzu_gcd"){
converter <- partial(read_shimadzu_gcd, format_out = format_out,
data_format = data_format,
read_metadata = read_metadata,
metadata_format = metadata_format, ...)
} else if (format_in == "shimadzu_lcd"){
converter <- partial(read_shimadzu_lcd, format_out = format_out,
data_format = data_format,
read_metadata = read_metadata)
read_metadata = read_metadata, ...)
} else if (format_in == "thermoraw"){
converter <- switch(parser,
"thermoraw" = partial(read_thermoraw, path_out = path_out,
Expand Down
97 changes: 97 additions & 0 deletions R/read_shimadzu_gcd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#' Shimadzu GCD parser
#'
#' Read 2D PDA data stream from 'Shimadzu' GCD files.
#'
#' A parser to read chromatogram data streams from 'Shimadzu' \code{.gcd} files.
#' GCD files are encoded as 'Microsoft' OLE documents. The parser relies on the
#' [olefile](https://pypi.org/project/olefile/) package in Python to unpack the
#' files. The PDA data is encoded in a stream called \code{PDA 3D Raw Data:3D Raw Data}.
#' The GCD data stream contains a segment for each retention time, beginning
#' with a 24-byte header.
#'
#' The 24 byte header consists of the following fields:
#' * 4 bytes: segment label (\code{17234}).
#' * 4 bytes: Little-endian integer specifying the sampling interval in milliseconds.
#' * 4 bytes: Little-endian integer specifying the number of values in the file.
#' * 4 bytes: Little-endian integer specifying the total number of bytes in the file
#' (However, this seems to be off by a few bytes?).
#' * 8 bytes of \code{00}s
#'
#' After the header, the data are simply encoded as 64-bit (little-endian)
#' floating-point numbers. The retention times can be (approximately?) derived
#' from the number of values and the sampling interval encoded in the header.
#' @param path Path to GCD file.
#' @param format_out Matrix or data.frame.
#' @param data_format Either \code{wide} (default) or \code{long}.
#' @param read_metadata Logical. Whether to attach metadata.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
#' @author Ethan Bass
#' @return A 2D chromatogram from the chromatogram stream in \code{matrix} or
#' \code{data.frame} format, according to the value of \code{format_out}.
#' The chromatograms will be returned in \code{wide} or \code{long} format
#' according to the value of \code{data_format}.
#' @note This parser is experimental and may still need some work. It is not
#' yet able to interpret much metadata from the files.
#' @export

read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame"),
data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter","raw")){
format_out <- match.arg(format_out, c("matrix", "data.frame"))
data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(metadata_format, c("chromconverter","raw"))

olefile_installed <- reticulate::py_module_available("olefile")
if (!olefile_installed){
configure_python_environment(parser = "olefile")
}

existing_streams <- check_streams(path, what = "chromatogram")

dat <- lapply(existing_streams, function(stream){
decode_shimadzu_gcd(path, stream = stream)
})

# infer times from "PDA.1.Method" stream
# method_metadata <- read_sz_method(path, stream = c("GUMM_Information", "ShimadzuGC.1",
# "GUC.1.METHOD"))

if (data_format == "wide"){
dat <- lapply(dat, function(x){
data.frame(int = x$int, row.names = x$rt)
})
}

if (format_out == "matrix"){
dat <- lapply(dat, function(x){
as.matrix(x)
})
}
if (length(dat) == 1){
dat <- dat[[1]]
}
dat
}

#' @noRd
decode_shimadzu_gcd <- function(path, stream){
path_stream <- export_stream(path, stream = stream)

f <- file(path_stream, "rb")
on.exit(close(f))

block_start <- seek(f, NA, "current")

readBin(f, what = "integer", n = 1, size = 4) #skip
interval <- readBin(f, what = "integer",size = 4,endian = "little")
nval <- readBin(f, what = "integer", size = 4, endian = "little")

readBin(f, what="double", size=4, n = 3, endian = "little") #skip

signal <- readBin(f, what = "double", n = nval, endian = "little")

times <- seq(40, nval*interval, interval)/60000
data.frame(rt = times, int=signal)
}
Loading

0 comments on commit 2dafc43

Please sign in to comment.