Merge pull request #345 from rformassspectrometry/phili

Changes of functions to method and move to ProtGenerics
rformassspectrometry · Jan 16, 2025 · 31ab50b · 31ab50b
2 parents 6bcbc94 + 9345a78
commit 31ab50b
Show file tree

Hide file tree

Showing 10 changed files with 318 additions and 294 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Spectra
 Title: Spectra Infrastructure for Mass Spectrometry Data
-Version: 1.17.4
+Version: 1.17.5
 Description: The Spectra package defines an efficient infrastructure
    for storing and handling mass spectrometry spectra and functionality to
    subset, process, visualize and compare spectra data. It provides different
@@ -42,7 +42,7 @@ Depends:
     S4Vectors,
     BiocParallel
 Imports:
-    ProtGenerics (>= 1.39.1),
+    ProtGenerics (>= 1.39.2),
     methods,
     IRanges,
     MsCoreUtils (>= 1.7.5),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
-export("processingChunkSize<-")
 export(MsBackendCached)
 export(MsBackendDataFrame)
 export(MsBackendHdf5Peaks)
 export(MsBackendMemory)
 export(MsBackendMzR)
 export(PrecursorMzParam)
-export(applyProcessing)
 export(asDataFrame)
 export(chunkapply)
 export(combinePeaksData)
@@ -30,8 +28,6 @@ export(plotMzDelta)
 export(plotSpectra)
 export(plotSpectraOverlay)
 export(ppm)
-export(processingChunkFactor)
-export(processingChunkSize)
 export(processingLog)
 export(reduceSpectra)
 export(scalePeaks)
@@ -61,13 +57,15 @@ exportMethods("mz<-")
 exportMethods("peaksData<-")
 exportMethods("polarity<-")
 exportMethods("precursorMz<-")
+exportMethods("processingChunkSize<-")
 exportMethods("rtime<-")
 exportMethods("smoothed<-")
 exportMethods("spectraData<-")
 exportMethods("spectraNames<-")
 exportMethods(Spectra)
 exportMethods(acquisitionNum)
 exportMethods(addProcessing)
+exportMethods(applyProcessing)
 exportMethods(backendBpparam)
 exportMethods(backendInitialize)
 exportMethods(backendMerge)
@@ -131,6 +129,8 @@ exportMethods(precScanNum)
 exportMethods(precursorCharge)
 exportMethods(precursorIntensity)
 exportMethods(precursorMz)
+exportMethods(processingChunkFactor)
+exportMethods(processingChunkSize)
 exportMethods(replaceIntensitiesBelow)
 exportMethods(reset)
 exportMethods(rtime)
@@ -240,12 +240,14 @@ importMethodsFrom(ProtGenerics,"mz<-")
 importMethodsFrom(ProtGenerics,"peaksData<-")
 importMethodsFrom(ProtGenerics,"polarity<-")
 importMethodsFrom(ProtGenerics,"precursorMz<-")
+importMethodsFrom(ProtGenerics,"processingChunkSize<-")
 importMethodsFrom(ProtGenerics,"rtime<-")
 importMethodsFrom(ProtGenerics,"smoothed<-")
 importMethodsFrom(ProtGenerics,"spectraData<-")
 importMethodsFrom(ProtGenerics,"spectraNames<-")
 importMethodsFrom(ProtGenerics,acquisitionNum)
 importMethodsFrom(ProtGenerics,addProcessing)
+importMethodsFrom(ProtGenerics,applyProcessing)
 importMethodsFrom(ProtGenerics,backendBpparam)
 importMethodsFrom(ProtGenerics,backendInitialize)
 importMethodsFrom(ProtGenerics,backendMerge)
@@ -292,6 +294,8 @@ importMethodsFrom(ProtGenerics,precScanNum)
 importMethodsFrom(ProtGenerics,precursorCharge)
 importMethodsFrom(ProtGenerics,precursorIntensity)
 importMethodsFrom(ProtGenerics,precursorMz)
+importMethodsFrom(ProtGenerics,processingChunkFactor)
+importMethodsFrom(ProtGenerics,processingChunkSize)
 importMethodsFrom(ProtGenerics,rtime)
 importMethodsFrom(ProtGenerics,scanIndex)
 importMethodsFrom(ProtGenerics,setBackend)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,12 @@
 # Spectra 1.17
 
+## Change in 1.17.5
+
+- Move generics `processingChunkSize()`, `processingChunkFactor()` and
+  `applyproceesing()` to *ProtGenerics*. Required *ProtGenerics* version
+  1.39.2 or higher. These were moved to be able to implement them in the
+  *Chromatograms* package.
+
 ## Change in 1.17.4
 
 - Import `extractByIndex()` from *ProtGenerics*.

diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R
@@ -213,59 +213,6 @@ NULL
     bplapply(split(x, f), FUN = FUN, ..., BPPARAM = BPPARAM)
 }
 
-#' @export applyProcessing
-#'
-#' @rdname addProcessing
-applyProcessing <- function(object, f = processingChunkFactor(object),
-                            BPPARAM = bpparam(), ...) {
-    queue <- object@processingQueue
-    if (!length(queue))
-        return(object)
-    if (isReadOnly(object@backend))
-        stop(class(object@backend), " is read-only. 'applyProcessing' works ",
-             "only with backends that support writing data.")
-    BPPARAM <- backendBpparam(object@backend, BPPARAM)
-    svars <- .processingQueueVariables(object)
-    pv <- peaksVariables(object)
-    if (length(f)) {
-        if (!is.factor(f))
-            f <- factor(f, levels = unique(f))
-        if (length(f) != length(object))
-            stop("length 'f' has to be equal to the length of 'object' (",
-                 length(object), ")")
-        bknds <- bplapply(
-            split(object@backend, f = f), function(z, queue, pv, svars) {
-                if (length(svars))
-                    spd <- as.data.frame(spectraData(z, columns = svars))
-                else spd <- NULL
-                peaksData(z) <- .apply_processing_queue(
-                    peaksData(z, columns = pv), spd, queue)
-                z
-            }, queue = queue, pv = pv, svars = svars, BPPARAM = BPPARAM)
-        bknds <- backendMerge(bknds)
-        if (is.unsorted(f))
-            bknds <- extractByIndex(
-                bknds, order(unlist(split(seq_along(bknds), f),
-                                    use.names = FALSE)))
-        object@backend <- bknds
-    } else {
-        if (length(svars))
-            spd <- as.data.frame(spectraData(object@backend, columns = svars))
-        else spd <- NULL
-        peaksData(object@backend) <- .apply_processing_queue(
-            peaksData(object@backend, columns = pv), spd, queue)
-    }
-    object@processing <- .logging(object@processing,
-                                  "Applied processing queue with ",
-                                  length(object@processingQueue),
-                                  " steps")
-    object@processingQueue <- list()
-    if (!.hasSlot(object, "processingQueueVariables"))
-        object <- updateObject(object, check = FALSE)
-    object@processingQueueVariables <- character()
-    object
-}
-
 #' @description
 #'
 #' Simple helper function to test parameter msLevel. Returns `TRUE` if parameter
@@ -983,107 +930,6 @@ filterPrecursorPeaks <- function(object, tolerance = 0, ppm = 20,
     else backendParallelFactor(x@backend)
 }
 
-#' @title Parallel and chunk-wise processing of `Spectra`
-#'
-#' @description
-#'
-#' Many operations on `Spectra` objects, specifically those working with
-#' the actual MS data (peaks data), allow a chunk-wise processing in which
-#' the `Spectra` is splitted into smaller parts (chunks) that are
-#' iteratively processed. This enables parallel processing of the data (by
-#' data chunk) and also reduces the memory demand  since only the MS data
-#' of the currently processed subset is loaded into memory and processed.
-#' This chunk-wise processing, which is by default disabled, can be enabled
-#' by setting the processing chunk size of a `Spectra` with the
-#' `processingChunkSize()` function to a value which is smaller than the
-#' length of the `Spectra` object. Setting `processingChunkSize(sps) <- 1000`
-#' will cause any data manipulation operation on the `sps`, such as
-#' `filterIntensity()` or `bin()`, to be performed eventually in parallel for
-#' sets of 1000 spectra in each iteration.
-#'
-#' Such chunk-wise processing is specifically useful for `Spectra` objects
-#' using an *on-disk* backend or for very large experiments. For small data
-#' sets or `Spectra` using an in-memory backend, a direct processing might
-#' however be more efficient. Setting the chunk size to `Inf` will disable
-#' the chunk-wise processing.
-#'
-#' For some backends a certain type of splitting and chunk-wise processing
-#' might be preferable. The `MsBackendMzR` backend for example needs to load
-#' the MS data from the original (mzML) files, hence chunk-wise processing
-#' on a per-file basis would be ideal. The [backendParallelFactor()] function
-#' for `MsBackend` allows backends to suggest a preferred splitting of the
-#' data by returning a `factor` defining the respective data chunks. The
-#' `MsBackendMzR` returns for example a `factor` based on the *dataStorage*
-#' spectra variable. A `factor` of length 0 is returned if no particular
-#' preferred splitting should be performed. The suggested chunk definition
-#' will be used if no finite `processingChunkSize()` is defined. Setting
-#' the `processingChunkSize` overrides `backendParallelFactor`.
-#'
-#' See the *Large-scale data handling and processing with Spectra* for more
-#' information and examples.
-#'
-#' Functions to configure parallel or chunk-wise processing:
-#'
-#' - `processingChunkSize()`: allows to get or set the size of the chunks for
-#'   parallel processing or chunk-wise processing of a `Spectra` in general.
-#'   With a value of `Inf` (the default) no chunk-wise processing will be
-#'   performed.
-#'
-#' - `processingChunkFactor()`: returns a `factor` defining the chunks into
-#'   which a `Spectra` will be split for chunk-wise (parallel) processing.
-#'   A `factor` of length 0 indicates that no chunk-wise processing will be
-#'   performed.
-#'
-#' @note
-#'
-#' Some backends might not support parallel processing at all.
-#' For these, the `backendBpparam()` function will always return a
-#' `SerialParam()` independently on how parallel processing was defined.
-#'
-#' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()]
-#'     for more information.
-#'
-#' @param object `Spectra` object.
-#'
-#' @param x `Spectra`.
-#'
-#' @param value `integer(1)` defining the chunk size.
-#'
-#' @return `processingChunkSize()` returns the currently defined processing
-#'     chunk size (or `Inf` if it is not defined). `processingChunkFactor()`
-#'     returns a `factor` defining the chunks into which `x` will be split
-#'     for (parallel) chunk-wise processing or a `factor` of length 0 if
-#'     no splitting is defined.
-#'
-#' @author Johannes Rainer
-#'
-#' @export
-processingChunkSize <- function(x) {
-    if (.hasSlot(x, "processingChunkSize"))
-        x@processingChunkSize
-    else Inf
-}
-
-#' @rdname processingChunkSize
-#'
-#' @export
-`processingChunkSize<-` <- function(x, value) {
-    if (length(value) != 1L)
-        stop("'value' has to be of length 1")
-    if (!.hasSlot(x, "processingChunkSize"))
-        x <- updateObject(x)
-    x@processingChunkSize <- value
-    x
-}
-
-#' @rdname processingChunkSize
-#'
-#' @export
-processingChunkFactor <- function(x) {
-    if (!inherits(x, "Spectra"))
-        stop("'x' is supposed to be a 'Spectra' object")
-    .parallel_processing_factor(x)
-}
 
 #' @title Filter peaks based on spectra and peaks variable ranges
 #'