Skip to content

Commit

Permalink
Merge pull request #31 from ethanbass/dev
Browse files Browse the repository at this point in the history
v0.7.0
  • Loading branch information
ethanbass authored Jan 26, 2024
2 parents 9f4d90b + babf42a commit 708ab3a
Show file tree
Hide file tree
Showing 67 changed files with 2,956 additions and 944 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: chromatographR
Title: Chromatographic Data Analysis Toolset
Version: 0.6.1
Version: 0.7.0
Authors@R: c(
person("Ethan", "Bass", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-6175-6739")),
Expand Down Expand Up @@ -43,7 +43,6 @@ Imports:
parallel,
ptw,
purrr,
pvclust,
scales,
stats,
utils,
Expand All @@ -55,6 +54,7 @@ Suggests:
openxlsx,
pbapply,
plotly,
pvclust,
reticulate,
rmarkdown,
rsvg,
Expand All @@ -68,4 +68,4 @@ Encoding: UTF-8
Language: en-US
LazyData: true
LazyDataCompression: xz
RoxygenNote: 7.2.3
RoxygenNote: 7.3.0
9 changes: 3 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ export(filter_peaks)
export(filter_peaktable)
export(find_peaks)
export(fit_peaks)
export(get_agilent_threshold)
export(get_peaks)
export(get_peaktable)
export(load_chroms)
export(get_purity)
export(merge_peaks)
export(mirror_plot)
export(normalize_data)
Expand All @@ -33,8 +34,8 @@ export(read_chroms)
export(reshape_chroms)
export(reshape_peaktable)
export(scan_chrom)
export(trim_peak)
export(write_peaktable)
import(chromConverter)
import(ptw)
importFrom(Formula,Formula)
importFrom(caTools,runmean)
Expand Down Expand Up @@ -62,9 +63,6 @@ importFrom(lattice,stripplot)
importFrom(methods,new)
importFrom(minpack.lm,nlsLM)
importFrom(parallel,mclapply)
importFrom(pvclust,pvclust)
importFrom(pvclust,pvpick)
importFrom(pvclust,pvrect)
importFrom(scales,alpha)
importFrom(scales,rescale)
importFrom(stats,aggregate)
Expand Down Expand Up @@ -92,6 +90,5 @@ importFrom(stats,smooth.spline)
importFrom(stats,terms)
importFrom(stats,var)
importFrom(utils,head)
importFrom(utils,read.csv)
importFrom(utils,tail)
importFrom(utils,write.csv)
34 changes: 33 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,36 @@
# chromatographR 0.7.0

* Updated `correct_peaks` function so it works properly for correcting retention times in peak lists.
* Added `fixed_levels` argument to `reshape_peaktable` so features can be plotted in the order they're provided by the user.
* Added option for summing split peaks using the `merge_peaks` function by selecting `method = "sum"`.
* Updated `get_peaktable` so that the `use.cor` argument works correctly (to use corrected retention times stored in a separate column).
* Fixed `mirror_plot` so it can take numeric input for lambdas.
* Changed default setting of `verbose` argument in `correct_rt` from `FALSE`
to default setting.
* Removed `load_chroms` function. Use `read_chroms` instead.
* Eliminated spurious warning from `attach_ref_spectra` function.
* Changed name of `index` argument in `plot.peak_list` to `idx`. The original argument is now deprecated.
* Fixed bug affecting `plot_purity` argument in `plot.peak_list`.
* Fixed bug in `reshape_chroms` so empty metadata column no longer appears.
* The `plot_spectrum` function now includes the peak names when plotting spectra.
* Fixed `correct_rt` so it no longer requires user-provided `lambdas` for 1D chromatograms.
* Added `subset.peak_table` function for easily subsetting peak_tables (e.g. to exclude specific
peaks or samples).
* Added `what` argument for `plot_all_spectra` (e.g. to plot multiple spectra at a particular retention time).

#### Refactoring of `cluster_spectra` function:

* For simplicity, `cluster_spectra` now requires reference spectra to be attached to peak table.
* Accordingly, the `chrom_list` argument is no longer needed.
* Saving to RDS is now turned off by default.
* The `pvclust` package is now suggested instead of being required.

#### Updates to vignette and documentation

* Suggest numeric input to lambdas instead of character input to reduce unnecessary confusion.
* Made other minor changes to text of vignette to (hopefully) improve clarity.
* Added a short section on the attachment of reference spectra.

# chromatographR 0.6.1

* Fixed bug in plot functions (e.g. `plot_chroms` and `plot_spectrum`) causing error when retention times are inconsistent between chromatograms.
Expand All @@ -12,7 +45,6 @@
* Updated `get_peaktable` for greater flexibility (e.g. for usage of 'ChemStation' peak lists as input).

#### Other changes

* Made some minor changes to vignette to improve clarity (e.g. using single wavelength for integration, etc.)

#### Bug fixes
Expand Down
11 changes: 7 additions & 4 deletions R/attach_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ attach_metadata <- function(peak_table, metadata, column){
missing_meta <- !(meta[, column] %in% metadata[, column])
if (sum(missing_meta) > 0)
warning("The supplied metadata does not include all samples.")
meta <- keep_order(meta, merge, y=metadata, all.x = TRUE, all.y = FALSE,
meta <- keep_order(meta, merge, y = metadata, all.x = TRUE, all.y = FALSE,
sort = FALSE, by = column)
peak_table$sample_meta <- meta
return(peak_table)
Expand All @@ -52,7 +52,7 @@ attach_metadata <- function(peak_table, metadata, column){
#' @noRd
keep_order <- function(data, fn, ...) {
col <- ".sortColumn"
data[,col] <- 1:nrow(data)
data[,col] <- seq_len(nrow(data))
out <- fn(data, ...)
if (!col %in% colnames(out)) stop("Ordering column not preserved by function")
out <- out[order(out[,col]),]
Expand Down Expand Up @@ -100,11 +100,13 @@ get_reference_spectra <- function(peak_table, chrom_list,
apply(x, 2, as.numeric)
})
sp.ref <- sapply(seq_along(sp.l), function(i){
sp.l[[i]][,which.max(colMeans(cor(sp.l[[i]][,which(apply((sp.l[[i]]), 2, sd)!=0), drop=FALSE])))]
sp.l[[i]][, which.max(
colMeans(cor(sp.l[[i]][, which(apply((sp.l[[i]]), 2, sd)!=0),
drop = FALSE])))]
})
} else {
sp.ref <- sapply(colnames(peak_table$tab), function(pk){
try(plot_spectrum(loc = pk, peak_table, chrom_list, plot_trace=FALSE,
try(plot_spectrum(loc = pk, peak_table, chrom_list, plot_trace = FALSE,
plot_spectrum = FALSE, export_spectrum = TRUE,
verbose = FALSE,
scale_spectrum = TRUE, engine = "base")
Expand Down Expand Up @@ -144,6 +146,7 @@ get_reference_spectra <- function(peak_table, chrom_list,

attach_ref_spectra <- function(peak_table, chrom_list, ref = c("max.cor","max.int")){
check_peaktable(peak_table)
ref <- match.arg(ref, c("max.cor","max.int"))
peak_table$ref_spectra <- get_reference_spectra(peak_table, chrom_list, ref = ref)
peak_table$args["reference_spectra"] <- ref
return(peak_table)
Expand Down
69 changes: 42 additions & 27 deletions R/chromatographR-package.R
Original file line number Diff line number Diff line change
@@ -1,57 +1,72 @@


#' chromatographR
#'
#' chromatographR
#'
#' \tabular{ll}{ Package: \tab chromatographR\cr Type: \tab Package\cr Version:
#' \tab 0.6.1 \cr Date: \tab 2023-10-23\cr License: GPL (>= 2) }
#' Chromatographic Data Analysis Toolset
#'
#' @name chromatographR-package
#' @aliases chromatographR-package chromatographR
#' @docType package
#' Tools for high-throughput analysis of HPLC-DAD/UV
#' chromatograms (or similar data). Includes functions for preprocessing, alignment,
#' peak-finding and fitting, peak-table construction, data-visualization, etc.
#' Preprocessing and peak-table construction follow the rough formula laid out
#' in alsace (Wehrens, R., Bloemberg, T.G., and Eilers P.H.C., 2015.
#' \doi{10.1093/bioinformatics/btv299}). Alignment of chromatograms is available
#' using parametric time warping (ptw) (Wehrens, R., Bloemberg, T.G., and Eilers
#' P.H.C. 2015. \doi{10.1093/bioinformatics/btv299}) or variable penalty dynamic
#' time warping (VPdtw) (Clifford, D., & Stone, G. 2012. \doi{10.18637/jss.v047.i08}).
#' Peak-finding relies on the algorithm suggested by Tom O'Haver in his
#' \href{https://terpconnect.umd.edu/~toh/spectrum/PeakFindingandMeasurement.htm}{
#' Pragmatic Introduction to Signal Processing}. Peaks are then fitted to a
#' gaussian or exponential-gaussian hybrid peak shape using non-linear least
#' squares (Lan, K. & Jorgenson, J. W. 2001. \doi{10.1016/S0021-9673(01)00594-5}).
#' More details on package usage and a suggested workflow can be found in the
#' vignette.
#' @author Ethan Bass
#'
#' Maintainer: Ethan Bass
#' @keywords package
NULL
"_PACKAGE"

#' HPLC-DAD data of goldenrod root extracts.
#' Raw goldenrod root chromatograms
#'
#' Four HPLC-DAD data matrices of *Solidago altissima* roots extracted in 90
#' percent methanol.
#' A list of four HPLC-DAD data matrices of \emph{Solidago altissima} roots
#' extracted in 90\% methanol. Retention times are stored in rows and
#' wavelengths are stored in columns.
#'
#' @name Sa
#' @docType data
#' @format A list of four matrices (time x wavelength).
#' @keywords data
#' @usage data(Sa)
#' @format A list of four matrices (1301 times x 60 wavelengths).
NULL

#' HPLC-DAD data of goldenrod root extracts.
#' Preprocessed goldenrod root chromatograms
#'
#' Pre-processed chromatograms.
#' A list of four pre-processed HPLC-DAD chromatograms derived from the raw data
#' stored in \code{Sa}. Retention times are stored in rows and wavelengths
#' are stored in columns. The time axis is compressed to save space and
#' processing time so the data are a little choppy.
#'
#' @name Sa_pr
#' @keywords data
#' @docType data
#' @format Four pre-processed matrices (time x wavelength) to use in examples.
#' @usage data(Sa_pr)
#' @format A list of four pre-processed matrices (434 retention times x 60 wavelengths).
NULL

#' HPLC-DAD data of goldenrod root extracts.
#' Warped goldenrod root chromatograms.
#'
#' Pre-processed and warped chromatograms.
#' A list of four pre-processed and warped goldenrod root chromatograms derived
#' from the raw data stored in \code{Sa}.
#'
#' @name Sa_warp
#' @docType data
#' @format Four pre-processed and warped matrices (time x wavelength) to use in
#' examples.
#' @keywords data
#' @usage data(Sa_warp)
#' @format A list of four pre-processed and warped matrices (434 times x 60 wavelengths).
NULL

#' Goldenrod peak table
#'
#' Peak table generated from example goldenrod extracts for examples.
#' Peak table generated from exemplary goldenrod root extracts.
#'
#' @name pk_tab
#' @docType data
#' @keywords data
#' @usage data(pk_tab)
#' @format A \code{peak_table} object.
NULL


110 changes: 56 additions & 54 deletions R/cluster_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,10 @@ setClass("cluster", representation(peaks = "character", pval = "numeric"))
#' the confidence threshold will be returned.
#'
#' @name cluster_spectra
#' @importFrom pvclust pvclust pvrect pvpick
#' @importFrom stats cor
#' @importFrom methods new
#' @importFrom graphics matplot
#' @param peak_table Peak table from \code{\link{get_peaktable}}.
#' @param chrom_list A list of chromatograms in matrix format (timepoints x
#' wavelengths). If no argument is provided here, the function will try to find
#' the \code{chrom_list} object used to create the provided \code{peak_table}.
#' @param peak_no Minimum and maximum thresholds for the number of peaks a
#' cluster may have.
#' @param alpha Confidence threshold for inclusion of cluster.
Expand Down Expand Up @@ -78,69 +74,75 @@ setClass("cluster", representation(peaks = "character", pval = "numeric"))
#' @examples \donttest{
#' data(pk_tab)
#' data(Sa_warp)
#' pk_tab <- attach_ref_spectra(pk_tab, Sa_warp, ref="max.int")
#' cl <- cluster_spectra(pk_tab, nboot=100, max.only = FALSE, save = FALSE, alpha = .97)
#' }
#' @export cluster_spectra
#' @md

cluster_spectra <- function(peak_table, chrom_list, peak_no = c(5,100),
alpha=0.95, nboot=1000, plot_dend=TRUE,
plot_spectra=TRUE, verbose=TRUE, save=TRUE,
parallel=TRUE, max.only=FALSE,
output=c("clusters", "pvclust", "both"),
cluster_spectra <- function(peak_table, peak_no = c(5,100), alpha = 0.95,
nboot = 1000, plot_dend = TRUE, plot_spectra = TRUE,
verbose = getOption("verbose"),
save = FALSE, parallel = TRUE, max.only = FALSE,
output = c("pvclust", "clusters"),
...){
check_for_pkg("pvclust")
check_peaktable(peak_table)
if (missing(chrom_list)){
chrom_list <- get_chrom_list(peak_table)
} else get_chrom_list(peak_table, chrom_list)
output <- match.arg(output, c("clusters","pvclust","both"))
output <- match.arg(output, c("pvclust", "clusters"), several.ok = TRUE)
if (is.data.frame(peak_table$ref_spectra) | is.matrix(peak_table$ref_spectra)){
rep <- peak_table$ref_spectra
} else{
if (verbose)
print('...collecting representative spectra')
rep <- sapply(colnames(peak_table[[1]]), function(j){
sp <- plot_spectrum(loc=j, peak_table=peak_table, chrom_list = chrom_list,
scale_spectrum=TRUE, plot_trace=FALSE,
export_spectrum=TRUE, plot_spectrum=FALSE, verbose=verbose)
})
rep <- data.frame(do.call(cbind,rep))
names(rep) <- paste0('V',seq_len(ncol(rep)))
spectra <- peak_table$ref_spectra
} else {
stop("Please attach reference spectra (using the `attach_ref_spectra` function) before running `cluster_spectra`.")
}
rm <- which(apply(spectra, 2, sd) == 0)
if (length(rm) > 0){
if (verbose){
message(paste0("Removing peaks due to bad spectra: ", paste(sQuote(colnames(spectra)[rm]),collapse=", ")))
}
spectra <- spectra[, -rm]
}
rm <- which(apply(rep,2,sd)==0)
if (length(rm)>0)
rep <- rep[,-rm]
d <- 1 - abs(cor(rep, method="pearson"))
if (verbose)
print('...clustering spectra')
result <- suppressWarnings(pvclust(rep, method.dist = "cor",
nboot = nboot, parallel = parallel, quiet=!verbose, ...)
message('...clustering spectra')
result <- suppressWarnings(pvclust::pvclust(spectra, method.dist = "cor",
nboot = nboot, parallel = parallel,
quiet = !verbose, ...)
)
if (plot_dend){
plot(result,labels = FALSE, cex.pv=0.5, print.pv = 'au',print.num = FALSE)
pvrect(result, alpha = alpha, max.only = max.only)
plot(result, labels = FALSE, cex.pv = 0.5, print.pv = 'au',
print.num = FALSE)
pvclust::pvrect(result, alpha = alpha, max.only = max.only)
}
if (save){
saveRDS(result, 'pvclust.RDS')
}
if (save) saveRDS(result, 'pvclust.RDS')
p <- pvpick(result, alpha = alpha, max.only = max.only)
l <- sapply(p$clusters, length)
sub <- p$clusters[which(l > peak_no[1] & l < peak_no[2])]
pval <- 1-result$edges[p$edges[which(l > peak_no[1] & l < peak_no[2])],'au']
sub <- lapply(seq_along(sub), function(i){
new("cluster", peaks=sub[[i]], pval=pval[i])})
pval <- format(round(
result$edges[p$edges[which(l > peak_no[1] & l < peak_no[2])],'au'],2),
nsmall=2)
names(sub) <- paste0('c', seq_along(sub))
picks <- pvclust::pvpick(result, alpha = alpha, max.only = max.only)
cl_size <- sapply(picks$clusters, length)

if (plot_spectra){
if (verbose) print('...plotting clustered spectra')
new.lambdas <- colnames(chrom_list[[1]])
sapply(seq_along(sub), function(i){
matplot(new.lambdas,rep[,sub[[i]]@peaks],
type='l', ylab='', yaxt='n', xlab=expression(lambda),
main=paste0('cluster ', i, '; p = ',
format(round(sub[[i]]@pval,2),nsmall=2))
)})
## filter clusters ##
cl_idx <- which(cl_size > peak_no[1] & cl_size < peak_no[2])
if (verbose && length(cl_idx) < length(cl_size)){
message(paste0("Removing ", length(cl_size) - length(cl_idx), " under- or oversized clusters from results."))
}
clusters <- picks$clusters[cl_idx]
pval <- 1 - result$edges[picks$edges[cl_idx], 'au']
clusters <- lapply(seq_along(clusters), function(i){
new("cluster", peaks = clusters[[i]], pval = pval[i])
})
if (length(clusters) != 0){
names(clusters) <- paste0('c', seq_along(clusters))

if (plot_spectra){
if (verbose) message('...plotting clustered spectra')
lambdas <- as.numeric(rownames(spectra))
sapply(seq_along(clusters), function(i){
matplot(lambdas, spectra[, clusters[[i]]@peaks],
type = 'l', ylab = '', yaxt = 'n', xlab = expression(lambda),
main = paste0('cluster ', i, '; p = ',
format(round(clusters[[i]]@pval, 2), nsmall = 2))
)
})
}
}
switch(output, "clusters" = sub, "pvclust" = result, "both" = list(result,sub))
pvclust <- result
mget(output)
}
Loading

0 comments on commit 708ab3a

Please sign in to comment.