Skip to content

Commit

Permalink
Merge pull request #23 from ethanbass/improve_parallelization
Browse files Browse the repository at this point in the history
v0.5.0
  • Loading branch information
ethanbass committed May 7, 2023
2 parents 562025e + 1c22b69 commit 0ac1312
Show file tree
Hide file tree
Showing 44 changed files with 1,252 additions and 331 deletions.
15 changes: 8 additions & 7 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ jobs:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: macos-latest, r: 'release', visual_tests: true}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
VISUAL_TESTS: ${{ matrix.config.visual_tests }}
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

Expand All @@ -39,6 +40,11 @@ jobs:
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
Expand All @@ -51,12 +57,7 @@ jobs:
Rscript -e "reticulate::conda_install('r-reticulate', 'python-kaleido')"
Rscript -e "reticulate::conda_install('r-reticulate', 'plotly', channel = 'plotly')"
Rscript -e "reticulate::use_miniconda('r-reticulate')"
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
14 changes: 11 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
Type: Package
Package: chromatographR
Title: Chromatographic Data Analysis Toolset
Version: 0.4.8
Authors@R:
Version: 0.5.0
Authors@R: c(
person("Ethan", "Bass", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-6175-6739"))
comment = c(ORCID = "0000-0002-6175-6739")),
person(c("Hans","W"), "Borchers", role = c("ctb", "cph"),
comment = c("Author of savgol and pinv functions bundled from pracma"))
)
Maintainer: Ethan Bass <[email protected]>
Description: Tools for high-throughput analysis of HPLC-DAD/UV
chromatograms (or similar data). Includes functions for preprocessing, alignment,
Expand All @@ -31,25 +34,30 @@ Imports:
dynamicTreeCut,
fastcluster,
Formula,
fs,
graphics,
grDevices,
lattice,
methods,
minpack.lm,
parallel,
ptw,
purrr,
pvclust,
scales,
stats,
utils,
VPdtw
Suggests:
cowplot,
ggplot2,
knitr,
openxlsx,
pbapply,
plotly,
reticulate,
rmarkdown,
rsvg,
spelling,
testthat (>= 3.0.0),
vdiffr
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ importFrom(lattice,panel.stripplot)
importFrom(lattice,stripplot)
importFrom(methods,new)
importFrom(minpack.lm,nlsLM)
importFrom(parallel,mclapply)
importFrom(pvclust,pvclust)
importFrom(pvclust,pvpick)
importFrom(pvclust,pvrect)
Expand Down
35 changes: 33 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,38 @@
# chromatographR 0.5.0

#### New features

* Added `ggplot2` option to `plot_spectrum`, `plot.peak_table` and `plot_all_spectra` functions.
* Reworked `write_chroms` for more sensible handling of paths and added `filename` argument.
* Updated `get_purity` function to improve speed.
* Added additional argument to `reshape_chroms` function for subsetting data by
retention times (`rts`).
* Added parallel processing through the `pbapply` package for the `correct_rt`,
`get_peaks`, and `preprocess` functions by setting the `cl` argument.

#### Other changes

* Changed behavior of `preprocess` when inferring retention times so chromatograms are no longer rounded down to the largest integer.
* In `preprocess`, spectral smoothing is no longer applied on 2D chromatograms, removing error message when preprocess is used with default settings.
* Moved position of `...` argument to end in `plot.peak_table`.
* Changed `progress_bar` argument to `show_progress` in `correct_rt`, `preprocess`
and `get_peaks` to fix strange `pmatch` behavior with additional arguments to
preprocess.
* Changed orientation of "plotly" plots generated by `plot_spectrum` to match other
plotting engines.
* Deprecated the `mc.cores` argument in `correct_rt` is now deprecated in favor of the new
`cl` argument.
* Deprecated the `parallel` argument in `preprocess` in favor of just using `cl`.
* Changed name of first argument in `mirror_plot` from `peak_table` to `x`. Otherwise the function has not changed.
* Added additional tests, improving test coverage to 80%.
* Updated `get_chrom_list` (internal) to allow parsing of subsetted lists.

# chromatographR 0.4.8

* Fixed `merge_peaks` function so it works properly (to combine 2 or more peaks in peak table).
* Fixed bug in `merge_peaks` function so it works properly (to combine 2 or more
peaks in a peak table).
* Fixed bugs in `plot_chroms` preventing plotting with `ggplot2` and plotting wrong chromatograms in base R.
* Added additional tests for `plot_chroms`, reshape functions.
* Added additional tests for `plot_chroms` and reshape functions.

# chromatographR 0.4.7

Expand Down Expand Up @@ -33,6 +63,7 @@
# chromatographR 0.4.5

### New Features

* Added `reshape_chroms` function for converting chromatograms to "long" format.
* Added `write_peaktable` function to easily write peak_table to `csv` or `xlsx`.
* Added `get_purity` function for assessing peak purity.
Expand Down
4 changes: 2 additions & 2 deletions R/attach_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ get_reference_spectra <- function(peak_table, chrom_list,

attach_ref_spectra <- function(peak_table, chrom_list, ref = c("max.cor","max.int")){
check_peaktable(peak_table)
peak_table$ref_spectra <- get_reference_spectra(peak_table, chrom_list, ref)
peak_table$ref_spectra <- get_reference_spectra(peak_table, chrom_list, ref = ref)
peak_table$args["reference_spectra"] <- ref
return(peak_table)
}
Expand Down Expand Up @@ -193,7 +193,7 @@ normalize_data <- function(peak_table, column, chrom_list,
})))
rownames(pktab) <- rownames(peak_table$tab)
peak_table$tab <- pktab
peak_table$args[c("normalized","normalization_by")] <- c(TRUE, column)
peak_table$args[c("normalized", "normalization_by")] <- c(TRUE, column)
return(peak_table)
} else if (what == "chrom_list"){
if (missing(chrom_list)){
Expand Down
13 changes: 7 additions & 6 deletions R/correct_rt.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@
#' @param maxshift Integer. Maximum allowable shift for \code{\link[VPdtw]{VPdtw}}.
#' Defaults to 50.
#' @param verbose Whether to print verbose output.
#' @param progress_bar Logical. Whether to show progress bar. Defaults to
#' @param show_progress Logical. Whether to show progress bar. Defaults to
#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed. Currently works
#' only for \code{ptw} alignments.
#' @param cl Argument to \code{\link[pbapply]{pblapply}} or \code{\link[parallel]{mclapply}}.
#' Either an integer specifying the number of clusters to use for parallel
#' processing or a cluster object created by \code{\link[parallel]{makeCluster}}.
#' Defaults to 2. On Windows integer values will be ignored.
#' @param \dots Optional arguments for the \code{\link[ptw:ptw]{ptw}} function.
#' The only argument that cannot be changed is \code{warp.type}: this is always
#' equal to \code{"global"}.
Expand Down Expand Up @@ -80,7 +84,7 @@ correct_rt <- function(chrom_list, lambdas, models = NULL, reference = 'best',
init.coef = c(0, 1, 0), n.traces = NULL, n.zeros = 0,
scale = FALSE, trwdth = 200, plot_it = FALSE,
penalty = 5, maxshift = 50,
verbose = FALSE, progress_bar, ...){
verbose = FALSE, show_progress = NULL, cl = 2, ...){
what <- match.arg(what, c("corrected.values", "models"))
alg <- match.arg(alg, c("ptw", "vpdtw"))

Expand Down Expand Up @@ -134,10 +138,7 @@ correct_rt <- function(chrom_list, lambdas, models = NULL, reference = 'best',
penalty = penalty, maxshift = maxshift))
if (alg == "ptw"){
if (is.null(models)){
if (missing(progress_bar)){
progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE)
}
laplee <- choose_apply_fnc(progress_bar)
laplee <- choose_apply_fnc(show_progress, cl = cl)
models <- laplee(seq_len(dim(allmats)[3]), function(ii){
ptw(allmats.t[,, reference],
allmats.t[,, ii], selected.traces = traces, init.coef = init.coef,
Expand Down
16 changes: 10 additions & 6 deletions R/fit_peaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,12 @@
find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva","none"),
smooth_window = .001, slope_thresh = 0, amp_thresh = 0,
bounds = TRUE){
if (!is.vector(y)){
stop("Please provide a vector to argument `y` to proceed.")
}
smooth_type <- match.arg(smooth_type, c("gaussian", "box","savgol", "mva","tmva","none"))
if (smooth_window < 1){
smooth_window <- length(y)*smooth_window
smooth_window <- max(length(y) * smooth_window, 1)
}
# compute derivative (with or without smoothing)
if (smooth_type == "savgol"){
Expand All @@ -71,7 +74,7 @@ find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva
} else if (smooth_type == "box"){
d <- diff(ksmooth(seq_along(y), y, kernel = "box", bandwidth = smooth_window)$y)
} else if (smooth_type == "tmva"){
d <- runmean(runmean(diff(y), k=smooth_window), k=smooth_window)
d <- runmean(runmean(diff(y), k = smooth_window), k = smooth_window)
} else{
d <- diff(y)
}
Expand Down Expand Up @@ -159,9 +162,10 @@ find_peaks <- function(y, smooth_type=c("gaussian", "box", "savgol", "mva","tmva
#' good model for chromatographic peaks in isocratic HPLC? \emph{Chromatographia},
#' /bold{26}: 285-296. \doi{10.1007/BF02268168}.
#' @export fit_peaks

fit_peaks <- function (x, lambda, pos = NULL, sd.max = 50,
fit = c("egh", "gaussian", "raw"), max.iter = 1000,
estimate_purity = TRUE, noise_threshold=.001, ...){
estimate_purity = TRUE, noise_threshold = .001, ...){
y <- x[,lambda]
fit <- match.arg(fit, c("egh", "gaussian", "raw"))
if (is.null(pos)){
Expand Down Expand Up @@ -209,7 +213,7 @@ fit_peaks <- function (x, lambda, pos = NULL, sd.max = 50,
#' Gaussian function
#' @note: Adapted from \href{https://github.com/robertdouglasmorrison/DuffyTools/blob/master/R/gaussian.R}
#' @noRd
gaussian <- function(x, center=0, width=1, height=NULL, floor=0) {
gaussian <- function(x, center = 0, width = 1, height=NULL, floor=0) {

# adapted from Earl F. Glynn; Stowers Institute for Medical Research, 2007
twoVar <- 2 * width * width
Expand Down Expand Up @@ -315,7 +319,7 @@ fit_egh <- function(x1, y1, start.center=NULL, start.width=NULL, start.tau=NULL,
if (is.null( start.floor)) start.floor <- quantile( y1, seq(0,1,0.1))[2]
starts <- c(starts, "floor"=start.floor)
nlsAns <- try(nlsLM(y1 ~ egh(x1, center, width, height, tau, floor),
start=starts, control=controlList), silent=TRUE)
start = starts, control = controlList), silent = TRUE)
}

# package up the results to pass back
Expand Down Expand Up @@ -398,7 +402,7 @@ fitpk_raw <- function(x, pos, lambda, max.iter,
#' @param fl Filter length (for instance fl = 51..151), has to be odd.
#' @param forder filter order Filter order (2 = quadratic filter, 4 = quartic).
#' @param dorder Derivative order (0 = smoothing, 1 = first derivative, etc.).
#' @note This function is ported from \href{https://cran.r-project.org/web/packages/pracma/index.html}{pracma},
#' @note This function is bundled from \href{https://cran.r-project.org/web/packages/pracma/index.html}{pracma},
#' where it is licensed under GPL (>= 3).
#' @importFrom stats convolve
#' @noRd
Expand Down
13 changes: 7 additions & 6 deletions R/get_peaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@
#' @param estimate_purity Logical. Whether to estimate purity or not. Defaults
#' to FALSE. (If TRUE, this will slow down the function significantly).
#' @param noise_threshold Noise threshold. Argument to \code{get_purity}.
#' @param progress_bar Logical. Whether to show progress bar. Defaults to
#' @param show_progress Logical. Whether to show progress bar. Defaults to
#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.
#' @param cl Argument to \code{\link[pbapply]{pblapply}} or \code{\link[parallel]{mclapply}}.
#' Either an integer specifying the number of clusters to use for parallel
#' processing or a cluster object created by \code{\link[parallel]{makeCluster}}.
#' Defaults to 2. On Windows integer values will be ignored.
#' @param \dots Additional arguments to \code{\link{find_peaks}}.
#' @return The result is an S3 object of class \code{peak_list}, containing a nested
#' list of data.frames containing information about the peaks fitted for each
Expand Down Expand Up @@ -69,7 +73,7 @@ get_peaks <- function(chrom_list, lambdas, fit = c("egh", "gaussian", "raw"),
sd.max = 50, max.iter = 100,
time.units = c("min", "s", "ms"),
estimate_purity = FALSE, noise_threshold = .001,
progress_bar, ...){
show_progress = NULL, cl = 2, ...){
time.units <- match.arg(time.units, c("min", "s", "ms"))
tfac <- switch(time.units, "min" = 1, "s" = 60, "ms" = 60*1000)
fit <- match.arg(fit, c("egh", "gaussian", "raw"))
Expand All @@ -90,10 +94,7 @@ get_peaks <- function(chrom_list, lambdas, fit = c("egh", "gaussian", "raw"),
names(chrom_list) <- seq_along(chrom_list)
}
peaks<-list()
if (missing(progress_bar)){
progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE)
}
laplee <- choose_apply_fnc(progress_bar)
laplee <- choose_apply_fnc(show_progress, cl = cl)
result <- laplee(seq_along(chrom_list), function(sample){
suppressWarnings(ptable <- lapply(lambdas, function(lambda){
cbind(sample = names(chrom_list)[sample], lambda,
Expand Down
16 changes: 8 additions & 8 deletions R/get_peaktable.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
#' get_peaktable(pks, response = "area")
#' @seealso \code{\link{attach_ref_spectra}} \code{\link{attach_metadata}}
#' @export get_peaktable
#'

get_peaktable <- function(peak_list, chrom_list, response = c("area", "height"),
use.cor = FALSE, hmax = 0.2, plot_it = FALSE,
ask = plot_it, clust = c("rt","sp.rt"),
Expand Down Expand Up @@ -132,28 +132,28 @@ get_peaktable <- function(peak_list, chrom_list, response = c("area", "height"),
}
if (clust == 'sp.rt'){
if (is.null(sigma.t)){
sigma.t <- .5*mean(do.call(rbind,unlist(pkLst,recursive = FALSE))$end -
sigma.t <- 0.5 * mean(do.call(rbind,unlist(pkLst,recursive = FALSE))$end -
do.call(rbind,unlist(pkLst,recursive = FALSE))$start)
}
ts<- as.numeric(rownames(chrom_list[[1]]))
ts <- as.numeric(rownames(chrom_list[[1]]))
sp <- sapply(seq_along(pkcenters), function(i){
rescale(t(chrom_list[[file.idx[i]]][
which(elementwise.all.equal(ts, pkcenters[i])),]))
}, simplify=TRUE)
cor.matrix <- cor(sp, method = "pearson")
mint <- abs(outer(unlist(pkcenters), unlist(pkcenters), FUN="-"))
S <- (exp((-(1-abs(cor.matrix))^2)/(2*sigma.r^2)))*exp(-(mint^2)/(2*sigma.t^2))
D <- 1-S
S <- (exp((-(1 - abs(cor.matrix))^2)/(2*sigma.r^2)))*exp(-(mint^2)/(2*sigma.t^2))
D <- 1 - S
linkage <- "average"
pkcenters.hcl <- hclust(as.dist(D), method = linkage)
pkcenters.cl <- cutreeDynamicTree(pkcenters.hcl, maxTreeHeight = hmax,
deepSplit = deepSplit, minModuleSize = 2)
sing <- which(pkcenters.cl == 0)
pkcenters.cl[sing] <- max(pkcenters.cl) + seq_along(sing)
}
cl.centers <- aggregate(cbind(rt,start,end,sd,tau,FWHM,r.squared,purity) ~
pkcenters.cl, data=xx, "mean", na.rm=TRUE,
na.action="na.pass")[,-1]
vars <- c("rt", "start", "end", "sd", "tau", "FWHM", "r.squared", "purity")
cl.centers <- aggregate(xx[,vars], by = list(pkcenters.cl), FUN = "mean",
na.rm = TRUE, na.action = "na.pass")[,-1]
ncl <- length(cl.centers$rt)

## re-order clusters from small to large rt
Expand Down
Loading

0 comments on commit 0ac1312

Please sign in to comment.