From 9345a7865428e3cbbcfe00c9c6a8579611871585 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:23:40 +0100 Subject: [PATCH] Move function to ProtGenerics. --- DESCRIPTION | 4 +- NAMESPACE | 12 +- NEWS.md | 7 + R/Spectra-functions.R | 154 ---------------------- R/Spectra.R | 167 +++++++++++++++++++++++- man/Spectra.Rd | 2 +- man/addProcessing.Rd | 53 ++++---- man/processingChunkSize.Rd | 17 +-- tests/testthat/test_Spectra-functions.R | 98 -------------- tests/testthat/test_Spectra.R | 98 ++++++++++++++ 10 files changed, 318 insertions(+), 294 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b355aa2d..7c1bc499 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.4 +Version: 1.17.5 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different @@ -42,7 +42,7 @@ Depends: S4Vectors, BiocParallel Imports: - ProtGenerics (>= 1.39.1), + ProtGenerics (>= 1.39.2), methods, IRanges, MsCoreUtils (>= 1.7.5), diff --git a/NAMESPACE b/NAMESPACE index c3dd72ab..45f8da1a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,11 @@ # Generated by roxygen2: do not edit by hand -export("processingChunkSize<-") export(MsBackendCached) export(MsBackendDataFrame) export(MsBackendHdf5Peaks) export(MsBackendMemory) export(MsBackendMzR) export(PrecursorMzParam) -export(applyProcessing) export(asDataFrame) export(chunkapply) export(combinePeaksData) @@ -30,8 +28,6 @@ export(plotMzDelta) export(plotSpectra) export(plotSpectraOverlay) export(ppm) -export(processingChunkFactor) -export(processingChunkSize) export(processingLog) export(reduceSpectra) export(scalePeaks) @@ -61,6 +57,7 @@ exportMethods("mz<-") exportMethods("peaksData<-") exportMethods("polarity<-") exportMethods("precursorMz<-") +exportMethods("processingChunkSize<-") exportMethods("rtime<-") exportMethods("smoothed<-") exportMethods("spectraData<-") @@ -68,6 +65,7 @@ exportMethods("spectraNames<-") exportMethods(Spectra) exportMethods(acquisitionNum) exportMethods(addProcessing) +exportMethods(applyProcessing) exportMethods(backendBpparam) exportMethods(backendInitialize) exportMethods(backendMerge) @@ -131,6 +129,8 @@ exportMethods(precScanNum) exportMethods(precursorCharge) exportMethods(precursorIntensity) exportMethods(precursorMz) +exportMethods(processingChunkFactor) +exportMethods(processingChunkSize) exportMethods(replaceIntensitiesBelow) exportMethods(reset) exportMethods(rtime) @@ -240,12 +240,14 @@ importMethodsFrom(ProtGenerics,"mz<-") importMethodsFrom(ProtGenerics,"peaksData<-") importMethodsFrom(ProtGenerics,"polarity<-") importMethodsFrom(ProtGenerics,"precursorMz<-") +importMethodsFrom(ProtGenerics,"processingChunkSize<-") importMethodsFrom(ProtGenerics,"rtime<-") importMethodsFrom(ProtGenerics,"smoothed<-") importMethodsFrom(ProtGenerics,"spectraData<-") importMethodsFrom(ProtGenerics,"spectraNames<-") importMethodsFrom(ProtGenerics,acquisitionNum) importMethodsFrom(ProtGenerics,addProcessing) +importMethodsFrom(ProtGenerics,applyProcessing) importMethodsFrom(ProtGenerics,backendBpparam) importMethodsFrom(ProtGenerics,backendInitialize) importMethodsFrom(ProtGenerics,backendMerge) @@ -292,6 +294,8 @@ importMethodsFrom(ProtGenerics,precScanNum) importMethodsFrom(ProtGenerics,precursorCharge) importMethodsFrom(ProtGenerics,precursorIntensity) importMethodsFrom(ProtGenerics,precursorMz) +importMethodsFrom(ProtGenerics,processingChunkFactor) +importMethodsFrom(ProtGenerics,processingChunkSize) importMethodsFrom(ProtGenerics,rtime) importMethodsFrom(ProtGenerics,scanIndex) importMethodsFrom(ProtGenerics,setBackend) diff --git a/NEWS.md b/NEWS.md index d9088114..fe134ff2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,12 @@ # Spectra 1.17 +## Change in 1.17.5 + +- Move generics `processingChunkSize()`, `processingChunkFactor()` and + `applyproceesing()` to *ProtGenerics*. Required *ProtGenerics* version + 1.39.2 or higher. These were moved to be able to implement them in the + *Chromatograms* package. + ## Change in 1.17.4 - Import `extractByIndex()` from *ProtGenerics*. diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index 48ede719..01861f59 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -213,59 +213,6 @@ NULL bplapply(split(x, f), FUN = FUN, ..., BPPARAM = BPPARAM) } -#' @export applyProcessing -#' -#' @rdname addProcessing -applyProcessing <- function(object, f = processingChunkFactor(object), - BPPARAM = bpparam(), ...) { - queue <- object@processingQueue - if (!length(queue)) - return(object) - if (isReadOnly(object@backend)) - stop(class(object@backend), " is read-only. 'applyProcessing' works ", - "only with backends that support writing data.") - BPPARAM <- backendBpparam(object@backend, BPPARAM) - svars <- .processingQueueVariables(object) - pv <- peaksVariables(object) - if (length(f)) { - if (!is.factor(f)) - f <- factor(f, levels = unique(f)) - if (length(f) != length(object)) - stop("length 'f' has to be equal to the length of 'object' (", - length(object), ")") - bknds <- bplapply( - split(object@backend, f = f), function(z, queue, pv, svars) { - if (length(svars)) - spd <- as.data.frame(spectraData(z, columns = svars)) - else spd <- NULL - peaksData(z) <- .apply_processing_queue( - peaksData(z, columns = pv), spd, queue) - z - }, queue = queue, pv = pv, svars = svars, BPPARAM = BPPARAM) - bknds <- backendMerge(bknds) - if (is.unsorted(f)) - bknds <- extractByIndex( - bknds, order(unlist(split(seq_along(bknds), f), - use.names = FALSE))) - object@backend <- bknds - } else { - if (length(svars)) - spd <- as.data.frame(spectraData(object@backend, columns = svars)) - else spd <- NULL - peaksData(object@backend) <- .apply_processing_queue( - peaksData(object@backend, columns = pv), spd, queue) - } - object@processing <- .logging(object@processing, - "Applied processing queue with ", - length(object@processingQueue), - " steps") - object@processingQueue <- list() - if (!.hasSlot(object, "processingQueueVariables")) - object <- updateObject(object, check = FALSE) - object@processingQueueVariables <- character() - object -} - #' @description #' #' Simple helper function to test parameter msLevel. Returns `TRUE` if parameter @@ -983,107 +930,6 @@ filterPrecursorPeaks <- function(object, tolerance = 0, ppm = 20, else backendParallelFactor(x@backend) } -#' @title Parallel and chunk-wise processing of `Spectra` -#' -#' @description -#' -#' Many operations on `Spectra` objects, specifically those working with -#' the actual MS data (peaks data), allow a chunk-wise processing in which -#' the `Spectra` is splitted into smaller parts (chunks) that are -#' iteratively processed. This enables parallel processing of the data (by -#' data chunk) and also reduces the memory demand since only the MS data -#' of the currently processed subset is loaded into memory and processed. -#' This chunk-wise processing, which is by default disabled, can be enabled -#' by setting the processing chunk size of a `Spectra` with the -#' `processingChunkSize()` function to a value which is smaller than the -#' length of the `Spectra` object. Setting `processingChunkSize(sps) <- 1000` -#' will cause any data manipulation operation on the `sps`, such as -#' `filterIntensity()` or `bin()`, to be performed eventually in parallel for -#' sets of 1000 spectra in each iteration. -#' -#' Such chunk-wise processing is specifically useful for `Spectra` objects -#' using an *on-disk* backend or for very large experiments. For small data -#' sets or `Spectra` using an in-memory backend, a direct processing might -#' however be more efficient. Setting the chunk size to `Inf` will disable -#' the chunk-wise processing. -#' -#' For some backends a certain type of splitting and chunk-wise processing -#' might be preferable. The `MsBackendMzR` backend for example needs to load -#' the MS data from the original (mzML) files, hence chunk-wise processing -#' on a per-file basis would be ideal. The [backendParallelFactor()] function -#' for `MsBackend` allows backends to suggest a preferred splitting of the -#' data by returning a `factor` defining the respective data chunks. The -#' `MsBackendMzR` returns for example a `factor` based on the *dataStorage* -#' spectra variable. A `factor` of length 0 is returned if no particular -#' preferred splitting should be performed. The suggested chunk definition -#' will be used if no finite `processingChunkSize()` is defined. Setting -#' the `processingChunkSize` overrides `backendParallelFactor`. -#' -#' See the *Large-scale data handling and processing with Spectra* for more -#' information and examples. -#' -#' Functions to configure parallel or chunk-wise processing: -#' -#' - `processingChunkSize()`: allows to get or set the size of the chunks for -#' parallel processing or chunk-wise processing of a `Spectra` in general. -#' With a value of `Inf` (the default) no chunk-wise processing will be -#' performed. -#' -#' - `processingChunkFactor()`: returns a `factor` defining the chunks into -#' which a `Spectra` will be split for chunk-wise (parallel) processing. -#' A `factor` of length 0 indicates that no chunk-wise processing will be -#' performed. -#' -#' @note -#' -#' Some backends might not support parallel processing at all. -#' For these, the `backendBpparam()` function will always return a -#' `SerialParam()` independently on how parallel processing was defined. -#' -#' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()] -#' for more information. -#' -#' @param object `Spectra` object. -#' -#' @param x `Spectra`. -#' -#' @param value `integer(1)` defining the chunk size. -#' -#' @return `processingChunkSize()` returns the currently defined processing -#' chunk size (or `Inf` if it is not defined). `processingChunkFactor()` -#' returns a `factor` defining the chunks into which `x` will be split -#' for (parallel) chunk-wise processing or a `factor` of length 0 if -#' no splitting is defined. -#' -#' @author Johannes Rainer -#' -#' @export -processingChunkSize <- function(x) { - if (.hasSlot(x, "processingChunkSize")) - x@processingChunkSize - else Inf -} - -#' @rdname processingChunkSize -#' -#' @export -`processingChunkSize<-` <- function(x, value) { - if (length(value) != 1L) - stop("'value' has to be of length 1") - if (!.hasSlot(x, "processingChunkSize")) - x <- updateObject(x) - x@processingChunkSize <- value - x -} - -#' @rdname processingChunkSize -#' -#' @export -processingChunkFactor <- function(x) { - if (!inherits(x, "Spectra")) - stop("'x' is supposed to be a 'Spectra' object") - .parallel_processing_factor(x) -} #' @title Filter peaks based on spectra and peaks variable ranges #' diff --git a/R/Spectra.R b/R/Spectra.R index 86fa6c8e..98d8ceca 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -31,7 +31,7 @@ NULL #' #' - [spectraData()] for accessing and using MS data through `Spectra` objects. #' - [filterMsLevel()] to subset and filter `Spectra` objects. -#' - [plotSpectra()] for visualization of `Spectra` orbjects. +#' - [plotSpectra()] for visualization of `Spectra` objects. #' - [processingChunkSize()] for information on parallel and chunk-wise data #' processing. #' - [combineSpectra()] for merging, aggregating and splitting of `Spectra` @@ -3276,6 +3276,64 @@ setMethod("addProcessing", "Spectra", function(object, FUN, ..., object }) +#' @importMethodsFrom ProtGenerics applyProcessing +#' +#' @exportMethod applyProcessing +#' +#' @rdname addProcessing +setMethod("applyProcessing", + signature(object = "Spectra"), + function(object, + f = processingChunkFactor(object), + BPPARAM = bpparam(), ...) { + queue <- object@processingQueue + if (!length(queue)) + return(object) + if (isReadOnly(object@backend)) + stop(class(object@backend), " is read-only. 'applyProcessing' works ", + "only with backends that support writing data.") + BPPARAM <- backendBpparam(object@backend, BPPARAM) + svars <- .processingQueueVariables(object) + pv <- peaksVariables(object) + if (length(f)) { + if (!is.factor(f)) + f <- factor(f, levels = unique(f)) + if (length(f) != length(object)) + stop("length 'f' has to be equal to the length of 'object' (", + length(object), ")") + bknds <- bplapply( + split(object@backend, f = f), function(z, queue, pv, svars) { + if (length(svars)) + spd <- as.data.frame(spectraData(z, columns = svars)) + else spd <- NULL + peaksData(z) <- .apply_processing_queue( + peaksData(z, columns = pv), spd, queue) + z + }, queue = queue, pv = pv, svars = svars, BPPARAM = BPPARAM) + bknds <- backendMerge(bknds) + if (is.unsorted(f)) + bknds <- extractByIndex( + bknds, order(unlist(split(seq_along(bknds), f), + use.names = FALSE))) + object@backend <- bknds + } else { + if (length(svars)) + spd <- as.data.frame(spectraData(object@backend, columns = svars)) + else spd <- NULL + peaksData(object@backend) <- .apply_processing_queue( + peaksData(object@backend, columns = pv), spd, queue) + } + object@processing <- .logging(object@processing, + "Applied processing queue with ", + length(object@processingQueue), + " steps") + object@processingQueue <- list() + if (!.hasSlot(object, "processingQueueVariables")) + object <- updateObject(object, check = FALSE) + object@processingQueueVariables <- character() + object + }) + #' @rdname addProcessing #' #' @importMethodsFrom ProtGenerics bin @@ -3498,6 +3556,113 @@ setMethod("spectrapply", "Spectra", function(object, FUN, ..., BPPARAM = backendBpparam(object@backend, BPPARAM)) }) +#' @title Parallel and chunk-wise processing of `Spectra` +#' +#' @rdname processingChunkSize +#' +#' @aliases processingChunkSize processingChunkSize<- +#' @aliases processingChunkFactor +#' +#' @description +#' +#' Many operations on `Spectra` objects, specifically those working with +#' the actual MS data (peaks data), allow a chunk-wise processing in which +#' the `Spectra` is splitted into smaller parts (chunks) that are +#' iteratively processed. This enables parallel processing of the data (by +#' data chunk) and also reduces the memory demand since only the MS data +#' of the currently processed subset is loaded into memory and processed. +#' This chunk-wise processing, which is by default disabled, can be enabled +#' by setting the processing chunk size of a `Spectra` with the +#' `processingChunkSize()` function to a value which is smaller than the +#' length of the `Spectra` object. Setting `processingChunkSize(sps) <- 1000` +#' will cause any data manipulation operation on the `sps`, such as +#' `filterIntensity()` or `bin()`, to be performed eventually in parallel for +#' sets of 1000 spectra in each iteration. +#' +#' Such chunk-wise processing is specifically useful for `Spectra` objects +#' using an *on-disk* backend or for very large experiments. For small data +#' sets or `Spectra` using an in-memory backend, a direct processing might +#' however be more efficient. Setting the chunk size to `Inf` will disable +#' the chunk-wise processing. +#' +#' For some backends a certain type of splitting and chunk-wise processing +#' might be preferable. The `MsBackendMzR` backend for example needs to load +#' the MS data from the original (mzML) files, hence chunk-wise processing +#' on a per-file basis would be ideal. The [backendParallelFactor()] function +#' for `MsBackend` allows backends to suggest a preferred splitting of the +#' data by returning a `factor` defining the respective data chunks. The +#' `MsBackendMzR` returns for example a `factor` based on the *dataStorage* +#' spectra variable. A `factor` of length 0 is returned if no particular +#' preferred splitting should be performed. The suggested chunk definition +#' will be used if no finite `processingChunkSize()` is defined. Setting +#' the `processingChunkSize` overrides `backendParallelFactor`. +#' +#' See the *Large-scale data handling and processing with Spectra* for more +#' information and examples. +#' +#' Functions to configure parallel or chunk-wise processing: +#' +#' - `processingChunkSize()`: allows to get or set the size of the chunks for +#' parallel processing or chunk-wise processing of a `Spectra` in general. +#' With a value of `Inf` (the default) no chunk-wise processing will be +#' performed. +#' +#' - `processingChunkFactor()`: returns a `factor` defining the chunks into +#' which a `Spectra` will be split for chunk-wise (parallel) processing. +#' A `factor` of length 0 indicates that no chunk-wise processing will be +#' performed. +#' +#' @note +#' +#' Some backends might not support parallel processing at all. +#' For these, the `backendBpparam()` function will always return a +#' `SerialParam()` independently on how parallel processing was defined. +#' +#' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()] +#' for more information. +#' +#' @param object `Spectra` object. +#' +#' @param value `integer(1)` defining the chunk size. +#' +#' @return `processingChunkSize()` returns the currently defined processing +#' chunk size (or `Inf` if it is not defined). `processingChunkFactor()` +#' returns a `factor` defining the chunks into which `x` will be split +#' for (parallel) chunk-wise processing or a `factor` of length 0 if +#' no splitting is defined. +#' +#' @author Johannes Rainer +#' +#' @importMethodsFrom ProtGenerics processingChunkSize processingChunkSize<- +#' +#' @exportMethod processingChunkSize +setMethod("processingChunkSize", "Spectra", function(object) { + if (.hasSlot(object, "processingChunkSize")) + object@processingChunkSize + else Inf +}) + +#' @rdname processingChunkSize +#' +#' @exportMethod processingChunkSize<- +setReplaceMethod("processingChunkSize", "Spectra", function(object, value) { + if (length(value) != 1L) + stop("'value' has to be of length 1") + object <- updateObject(object) + object@processingChunkSize <- value + object +}) + +#' @rdname processingChunkSize +#' +#' @importMethodsFrom ProtGenerics processingChunkFactor +#' +#' @exportMethod processingChunkFactor +setMethod("processingChunkFactor", "Spectra", function(object) { + .parallel_processing_factor(object) +}) + + #' @title Estimate Precursor Intensities #' #' @aliases estimatePrecursorIntensity diff --git a/man/Spectra.Rd b/man/Spectra.Rd index 1d7efdf0..82b2c117 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -117,7 +117,7 @@ Documentation on other topics and functionality of \code{Spectra}can be found in \itemize{ \item \code{\link[=spectraData]{spectraData()}} for accessing and using MS data through \code{Spectra} objects. \item \code{\link[=filterMsLevel]{filterMsLevel()}} to subset and filter \code{Spectra} objects. -\item \code{\link[=plotSpectra]{plotSpectra()}} for visualization of \code{Spectra} orbjects. +\item \code{\link[=plotSpectra]{plotSpectra()}} for visualization of \code{Spectra} objects. \item \code{\link[=processingChunkSize]{processingChunkSize()}} for information on parallel and chunk-wise data processing. \item \code{\link[=combineSpectra]{combineSpectra()}} for merging, aggregating and splitting of \code{Spectra} diff --git a/man/addProcessing.Rd b/man/addProcessing.Rd index e431bb21..1746659b 100644 --- a/man/addProcessing.Rd +++ b/man/addProcessing.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/Spectra-functions.R, R/Spectra.R -\name{applyProcessing} -\alias{applyProcessing} +\name{processingLog} \alias{processingLog} \alias{scalePeaks} \alias{addProcessing} +\alias{applyProcessing} \alias{bin} \alias{containsMz} \alias{containsNeutralLoss} @@ -15,6 +15,7 @@ \alias{smooth} \alias{spectrapply} \alias{addProcessing,Spectra-method} +\alias{applyProcessing,Spectra-method} \alias{bin,Spectra-method} \alias{containsMz,Spectra-method} \alias{containsNeutralLoss,Spectra-method} @@ -27,19 +28,19 @@ \alias{spectrapply,Spectra-method} \title{Data manipulation and analysis methods} \usage{ -applyProcessing( - object, - f = processingChunkFactor(object), - BPPARAM = bpparam(), - ... -) - processingLog(x) scalePeaks(x, by = sum, msLevel. = uniqueMsLevels(x)) \S4method{addProcessing}{Spectra}(object, FUN, ..., spectraVariables = character()) +\S4method{applyProcessing}{Spectra}( + object, + f = processingChunkFactor(object), + BPPARAM = bpparam(), + ... +) + \S4method{bin}{Spectra}( x, binSize = 1L, @@ -109,23 +110,6 @@ scalePeaks(x, by = sum, msLevel. = uniqueMsLevels(x)) ) } \arguments{ -\item{object}{A \code{Spectra} object.} - -\item{f}{For \code{spectrapply()} and \code{applyProcessing()}: \code{factor} defining -how \code{object} should be splitted for eventual parallel processing. -Defaults to \code{factor()} for \code{spectrapply()} hence the object is not -splitted while it defaults to \code{f = processingChunkSize(object)} for -\code{applyProcessing()} splitting thus the object by default into chunks -depending on \code{\link[=processingChunkSize]{processingChunkSize()}}.} - -\item{BPPARAM}{Parallel setup configuration. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} -for more information. This is passed directly to the -\code{\link[=backendInitialize]{backendInitialize()}} method of the \linkS4class{MsBackend}. See also -\code{\link[=processingChunkSize]{processingChunkSize()}} for additional information on parallel -processing.} - -\item{...}{Additional arguments passed to internal and downstream functions.} - \item{x}{A \code{Spectra}.} \item{by}{For \code{scalePeaks()}: function to calculate a single \code{numeric} from @@ -137,6 +121,8 @@ spectrum.} \item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which the function should be applied (defaults to all MS levels of \code{object}.} +\item{object}{A \code{Spectra} object.} + \item{FUN}{For \code{addProcessing()}: function to be applied to the peak matrix of each spectrum in \code{object}. For \code{bin()}: function to aggregate intensity values of peaks falling @@ -144,10 +130,25 @@ into the same bin. Defaults to \code{FUN = sum} thus summing up intensities. For \code{spectrapply()} and \code{chunkapply()}: function to be applied to each individual or each chunk of \code{Spectra}.} +\item{...}{Additional arguments passed to internal and downstream functions.} + \item{spectraVariables}{For \code{addProcessing()}: \code{character} with additional spectra variables that should be passed along to the function defined with \code{FUN}. See function description for details.} +\item{f}{For \code{spectrapply()} and \code{applyProcessing()}: \code{factor} defining +how \code{object} should be splitted for eventual parallel processing. +Defaults to \code{factor()} for \code{spectrapply()} hence the object is not +splitted while it defaults to \code{f = processingChunkSize(object)} for +\code{applyProcessing()} splitting thus the object by default into chunks +depending on \code{\link[=processingChunkSize]{processingChunkSize()}}.} + +\item{BPPARAM}{Parallel setup configuration. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} +for more information. This is passed directly to the +\code{\link[=backendInitialize]{backendInitialize()}} method of the \linkS4class{MsBackend}. See also +\code{\link[=processingChunkSize]{processingChunkSize()}} for additional information on parallel +processing.} + \item{binSize}{For \code{bin()}: \code{numeric(1)} defining the size for the m/z bins. Defaults to \code{binSize = 1}.} diff --git a/man/processingChunkSize.Rd b/man/processingChunkSize.Rd index ce32edb0..6b3a4703 100644 --- a/man/processingChunkSize.Rd +++ b/man/processingChunkSize.Rd @@ -1,27 +1,28 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-functions.R, R/Spectra.R -\name{processingChunkSize} +% Please edit documentation in R/Spectra.R +\name{processingChunkSize,Spectra-method} +\alias{processingChunkSize,Spectra-method} \alias{processingChunkSize} \alias{processingChunkSize<-} \alias{processingChunkFactor} +\alias{processingChunkSize<-,Spectra-method} +\alias{processingChunkFactor,Spectra-method} \alias{backendBpparam,Spectra-method} \title{Parallel and chunk-wise processing of \code{Spectra}} \usage{ -processingChunkSize(x) +\S4method{processingChunkSize}{Spectra}(object) -processingChunkSize(x) <- value +\S4method{processingChunkSize}{Spectra}(object) <- value -processingChunkFactor(x) +\S4method{processingChunkFactor}{Spectra}(object) \S4method{backendBpparam}{Spectra}(object, BPPARAM = bpparam()) } \arguments{ -\item{x}{\code{Spectra}.} +\item{object}{\code{Spectra} object.} \item{value}{\code{integer(1)} defining the chunk size.} -\item{object}{\code{Spectra} object.} - \item{BPPARAM}{Parallel setup configuration. See \code{\link[BiocParallel:register]{BiocParallel::bpparam()}} for more information.} } diff --git a/tests/testthat/test_Spectra-functions.R b/tests/testthat/test_Spectra-functions.R index 2dbcf372..a1f0f90a 100644 --- a/tests/testthat/test_Spectra-functions.R +++ b/tests/testthat/test_Spectra-functions.R @@ -14,76 +14,6 @@ test_that("addProcessing works", { show(tst) }) -test_that("applyProcessing works", { - ## Initialize required objects. - sps_mzr <- filterRt(Spectra(sciex_mzr), rt = c(10, 20)) - ## Add processings. - centroided(sps_mzr) <- TRUE - sps_mzr <- replaceIntensitiesBelow(sps_mzr, threshold = 5000, - value = NA_real_) - sps_mzr <- filterIntensity(sps_mzr) - expect_true(length(sps_mzr@processingQueue) == 2) - expect_error(applyProcessing(sps_mzr), "is read-only") - - ## Create writeable backends. - sps_mem <- setBackend(sps_mzr, backend = MsBackendDataFrame()) - sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), - files = c(tempfile(), tempfile()), - f = rep(1, length(sps_mzr))) - expect_true(length(sps_mem@processingQueue) == 2) - expect_true(length(sps_h5@processingQueue) == 2) - expect_identical(peaksData(sps_mzr), peaksData(sps_mem)) - expect_identical(peaksData(sps_h5), peaksData(sps_mem)) - - ## MsBackendDataFrame - res <- applyProcessing(sps_mem) - expect_true(length(res@processingQueue) == 0) - expect_true(length(res@processing) > length(sps_mem@processing)) - expect_identical(rtime(res), rtime(sps_mem)) - expect_identical(peaksData(res), peaksData(sps_mem)) - - ## MsBackendHdf5Peaks - res <- applyProcessing(sps_h5) - expect_true(length(res@processingQueue) == 0) - expect_true(length(res@processing) > length(sps_h5@processing)) - expect_identical(rtime(res), rtime(sps_mem)) - expect_identical(peaksData(res), peaksData(sps_mem)) - expect_true(all(res@backend@modCount > sps_h5@backend@modCount)) - - ## Applying the processing queue invalidated the original object! - expect_error(peaksData(sps_h5)) - sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), - files = c(tempfile(), tempfile()), - f = rep(1, length(sps_mzr))) - - ## Use an arbitrary splitting factor ensuring that the results are still OK. - f <- rep(letters[1:9], 8) - f <- sample(f) - - ## MsBackendHdf5Peaks - res <- applyProcessing(sps_mem, f = f) - expect_true(length(res@processingQueue) == 0) - expect_true(length(res@processing) > length(sps_mem@processing)) - expect_identical(rtime(res), rtime(sps_mem)) - expect_identical(peaksData(res), peaksData(sps_mem)) - - ## MsBackendHdf5Peaks: throws an error, because the factor f does not - ## match the dataStorage. - expect_error(applyProcessing(sps_h5, f = f)) - - sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), - files = c(tempfile(), tempfile()), - f = rep(1, length(sps_mzr))) - res <- applyProcessing(sps_h5, f = rep(1, length(sps_h5))) - expect_true(length(res@processingQueue) == 0) - expect_true(length(res@processing) > length(sps_h5@processing)) - expect_identical(rtime(res), rtime(sps_mem)) - expect_identical(peaksData(res), peaksData(sps_mem)) - expect_true(all(res@backend@modCount > sps_h5@backend@modCount)) - - expect_error(applyProcessing(sps_mem, f = 1:2), "has to be equal to the") -}) - test_that(".check_ms_level works", { expect_true(.check_ms_level(sciex_mzr, 1)) expect_warning(.check_ms_level(sciex_mzr, 2)) @@ -798,34 +728,6 @@ test_that("filterPrecursorPeaks,Spectra works", { expect_equal(lengths(res)[msLevel(x) == 1L], lengths(x)[msLevel(x) == 1L]) }) -test_that("processingChunkSize works", { - s <- Spectra() - expect_equal(processingChunkSize(s), Inf) - processingChunkSize(s) <- 1000 - expect_equal(processingChunkSize(s), 1000) - expect_error(processingChunkSize(s) <- c(1, 2), "length 1") - expect_error(processingChunkSize(s) <- "A", "character") -}) - -test_that("processingChunkFactor works", { - s <- Spectra() - expect_equal(processingChunkFactor(s), factor()) - tmp <- Spectra(sciex_mzr) - - expect_equal(length(processingChunkFactor(tmp)), length(tmp)) - expect_true(is.factor(processingChunkFactor(tmp))) - - processingChunkSize(tmp) <- 1000 - res <- processingChunkFactor(tmp) - expect_true(is.factor(res)) - expect_true(length(res) == length(tmp)) - expect_equal(levels(res), c("1", "2")) - - expect_equal(.parallel_processing_factor(tmp), processingChunkFactor(tmp)) - - expect_error(processingChunkFactor("a"), "Spectra") -}) - test_that("filterPeaksRanges,Spectra works", { df <- data.frame(rtime = 123.3, new_var = 4, msLevel = 2L) df$mz <- list(c(100.1, 100.2, 100.3, 100.4, 200.1, 200.2, 200.3, diff --git a/tests/testthat/test_Spectra.R b/tests/testthat/test_Spectra.R index 4cc721d9..53598428 100644 --- a/tests/testthat/test_Spectra.R +++ b/tests/testthat/test_Spectra.R @@ -1992,3 +1992,101 @@ test_that("precursorMz<-,Spectra works", { precursorMz(a) <- c(12.3, 1.1, 34.3) expect_equal(precursorMz(a), c(12.3, 1.1, 34.3)) }) + +test_that("processingChunkSize works", { + s <- Spectra() + expect_equal(processingChunkSize(s), Inf) + processingChunkSize(s) <- 1000 + expect_equal(processingChunkSize(s), 1000) + expect_error(processingChunkSize(s) <- c(1, 2), "length 1") + expect_error(processingChunkSize(s) <- "A", "character") +}) + +test_that("processingChunkFactor works", { + s <- Spectra() + expect_equal(processingChunkFactor(s), factor()) + tmp <- Spectra(sciex_mzr) + + expect_equal(length(processingChunkFactor(tmp)), length(tmp)) + expect_true(is.factor(processingChunkFactor(tmp))) + + processingChunkSize(tmp) <- 1000 + res <- processingChunkFactor(tmp) + expect_true(is.factor(res)) + expect_true(length(res) == length(tmp)) + expect_equal(levels(res), c("1", "2")) + + expect_equal(.parallel_processing_factor(tmp), processingChunkFactor(tmp)) + + expect_error(processingChunkFactor("a"), "signature") +}) + +test_that("applyProcessing works", { + ## Initialize required objects. + sps_mzr <- filterRt(Spectra(sciex_mzr), rt = c(10, 20)) + ## Add processings. + centroided(sps_mzr) <- TRUE + sps_mzr <- replaceIntensitiesBelow(sps_mzr, threshold = 5000, + value = NA_real_) + sps_mzr <- filterIntensity(sps_mzr) + expect_true(length(sps_mzr@processingQueue) == 2) + expect_error(applyProcessing(sps_mzr), "is read-only") + + ## Create writeable backends. + sps_mem <- setBackend(sps_mzr, backend = MsBackendDataFrame()) + sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), + files = c(tempfile(), tempfile()), + f = rep(1, length(sps_mzr))) + expect_true(length(sps_mem@processingQueue) == 2) + expect_true(length(sps_h5@processingQueue) == 2) + expect_identical(peaksData(sps_mzr), peaksData(sps_mem)) + expect_identical(peaksData(sps_h5), peaksData(sps_mem)) + + ## MsBackendDataFrame + res <- applyProcessing(sps_mem) + expect_true(length(res@processingQueue) == 0) + expect_true(length(res@processing) > length(sps_mem@processing)) + expect_identical(rtime(res), rtime(sps_mem)) + expect_identical(peaksData(res), peaksData(sps_mem)) + + ## MsBackendHdf5Peaks + res <- applyProcessing(sps_h5) + expect_true(length(res@processingQueue) == 0) + expect_true(length(res@processing) > length(sps_h5@processing)) + expect_identical(rtime(res), rtime(sps_mem)) + expect_identical(peaksData(res), peaksData(sps_mem)) + expect_true(all(res@backend@modCount > sps_h5@backend@modCount)) + + ## Applying the processing queue invalidated the original object! + expect_error(peaksData(sps_h5)) + sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), + files = c(tempfile(), tempfile()), + f = rep(1, length(sps_mzr))) + + ## Use an arbitrary splitting factor ensuring that the results are still OK. + f <- rep(letters[1:9], 8) + f <- sample(f) + + ## MsBackendHdf5Peaks + res <- applyProcessing(sps_mem, f = f) + expect_true(length(res@processingQueue) == 0) + expect_true(length(res@processing) > length(sps_mem@processing)) + expect_identical(rtime(res), rtime(sps_mem)) + expect_identical(peaksData(res), peaksData(sps_mem)) + + ## MsBackendHdf5Peaks: throws an error, because the factor f does not + ## match the dataStorage. + expect_error(applyProcessing(sps_h5, f = f)) + + sps_h5 <- setBackend(sps_mzr, backend = MsBackendHdf5Peaks(), + files = c(tempfile(), tempfile()), + f = rep(1, length(sps_mzr))) + res <- applyProcessing(sps_h5, f = rep(1, length(sps_h5))) + expect_true(length(res@processingQueue) == 0) + expect_true(length(res@processing) > length(sps_h5@processing)) + expect_identical(rtime(res), rtime(sps_mem)) + expect_identical(peaksData(res), peaksData(sps_mem)) + expect_true(all(res@backend@modCount > sps_h5@backend@modCount)) + + expect_error(applyProcessing(sps_mem, f = 1:2), "has to be equal to the") +})