feat: add c and coerce methods for XcmsExperiment

- Add `c()` method to combine `XcmsExperiment` objects. - Add a method to coerce from `XCMSnExp` to `XcmsExperiment` objects. - Fix references in documentation.
sneumann · Dec 17, 2024 · 6a67604 · 6a67604
1 parent 2d658b6
commit 6a67604
Show file tree

Hide file tree

Showing 34 changed files with 408 additions and 151 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -95,7 +95,7 @@ S3method(plot, xcmsEIC)
 S3method(split, xcmsSet)
 S3method(c, xcmsSet)
 S3method(c, XCMSnExp)
-
+S3method(c, XcmsExperiment)
 S3method(split, xcmsRaw)
 
 exportClasses(

diff --git a/NEWS.md b/NEWS.md
@@ -10,6 +10,8 @@
   chromatographic peak detection with parameter `verboseBetaColumns = TRUE`.
 - Add `chromPeakSummary` generic (issue #705).
 - Add `chromPeakSummary()` method to calculate the *beta* quality metrics.
+- Add `c()` method to combine multiple `XcmsExperiment` objects into one.
+- Add a method to coerce from `XCMSnExp` to `XcmsExperiment` objects.
 
 ## Changes in version 4.5.2
 

diff --git a/R/AllGenerics.R b/R/AllGenerics.R
@@ -98,7 +98,7 @@ setGeneric("addProcessHistory", function(object, ...)
 #'     parameter in \code{\link{profile-matrix}} documentation for more details.
 #'
 #' @param BPPARAM parallel processing setup. Defaults to `BPPARAM = bpparam()`.
-#'     See [bpparam()] for details.
+#'     See [BiocParallel::bpparam()] for details.
 #'
 #' @param centerSample \code{integer(1)} defining the index of the center sample
 #'     in the experiment. It defaults to
@@ -143,7 +143,7 @@ setGeneric("addProcessHistory", function(object, ...)
 #'
 #' @param family For `PeakGroupsParam`: `character(1)` defining the method for
 #'     loess smoothing. Allowed values are `"gaussian"` and `"symmetric"`. See
-#'     [loess()] for more information.
+#'     [stats::loess()] for more information.
 #'
 #' @param gapExtend For `ObiwarpParam`: `numeric(1)` defining the penalty for
 #'     gap enlargement. The default value for `gapExtend` depends on the value
@@ -177,8 +177,8 @@ setGeneric("addProcessHistory", function(object, ...)
 #' @param msLevel For `adjustRtime`: `integer(1)` defining the MS level on
 #'     which the alignment should be performed.
 #'
-#' @param object For `adjustRtime`: an [OnDiskMSnExp()], [XCMSnExp()],
-#'     [MsExperiment()] or [XcmsExperiment()] object.
+#' @param object For `adjustRtime`: an [MSnbase::OnDiskMSnExp()], [XCMSnExp()],
+#'     [MsExperiment::MsExperiment()] or [XcmsExperiment()] object.
 #'
 #' @param param The parameter object defining the alignment method (and its
 #'     setting).
@@ -212,7 +212,7 @@ setGeneric("addProcessHistory", function(object, ...)
 #'
 #' @param span For `PeakGroupsParam`: `numeric(1)` defining
 #'     the degree of smoothing (if `smooth = "loess"`). This parameter is
-#'     passed to the internal call to [loess()].
+#'     passed to the internal call to [stats::loess()].
 #'
 #' @param subset For `ObiwarpParam` and `PeakGroupsParam`: `integer` with the
 #'     indices of samples within the experiment on which the alignment models
@@ -463,7 +463,8 @@ setGeneric("chromPeakData<-", function(object, value)
 #'    The columns will be named as they are written in the `chromPeaks` object
 #'    with a prefix `"chrom_peak_"`. Defaults to `c("mz", "rt")`.
 #'
-#' @param BPPARAM parallel processing setup. Defaults to [bpparam()].
+#' @param BPPARAM parallel processing setup. Defaults to
+#'    [BiocParallel::bpparam()].
 #'
 #' @param ... ignored.
 #'
@@ -567,7 +568,8 @@ setGeneric("chromPeakSpectra", function(object, ...)
 #'   to a bell curve and the signal-to-noise ratio calculated on the residuals
 #'   of this test.
 #'
-#' @param BPPARAM Parallel processing setup. See [bpparam()] for details.
+#' @param BPPARAM Parallel processing setup. See
+#'     [BiocParallel::bpparam()] for details.
 #'
 #' @param chunkSize `integer(1)` defining the number of samples from which data
 #'     should be loaded and processed at a time.
@@ -701,8 +703,8 @@ setGeneric("family<-", function(object, value) standardGeneric("family<-"))
 #'     chromatogram.
 #'
 #' @param BPPARAM For `object` being an `XcmsExperiment`: parallel processing
-#'     setup. Defaults to `BPPARAM = bpparam()`. See [bpparam()] for more
-#'     information.
+#'     setup. Defaults to `BPPARAM = bpparam()`. See [BiocParallel::bpparam()]
+#'     for more information.
 #'
 #' @param chunkSize For `object` being an `XcmsExperiment`: `integer(1)`
 #'     defining the number of files from which the data should be loaded at
@@ -869,7 +871,8 @@ setGeneric("featureDefinitions<-", function(object, value)
 #' spectra per feature).
 #'
 #' The information from `featureDefinitions` for each feature can be included
-#' in the returned [Spectra()] object using the `featureColumns` parameter.
+#' in the returned [Spectra::Spectra()] object using the `featureColumns`
+#' parameter.
 #' This is useful for keeping details such as the median retention time (`rtmed`)
 #' or median m/z (`mzmed`). The columns will retain their names as specified
 #' in the `featureDefinitions` object, prefixed by `"feature_"`
@@ -878,9 +881,11 @@ setGeneric("featureDefinitions<-", function(object, value)
 #' as a metadata column named `"feature_id"`.
 #'
 #' See also [chromPeakSpectra()], as it supports a similar parameter for
-#' including columns from the chromatographic peaks in the returned spectra object.
+#' including columns from the chromatographic peaks in the returned spectra
+#' object.
 #' These parameters can be used in combination to include information from both
-#' the chromatographic peaks and the features in the returned [Spectra()].
+#' the chromatographic peaks and the features in the returned
+#' [Spectra::Spectra()].
 #' The *peak ID* (i.e., the row name of the peak in the `chromPeaks` matrix)
 #' is added as a metadata column named `"chrom_peak_id"`.
 #'
@@ -906,7 +911,8 @@ setGeneric("featureDefinitions<-", function(object, value)
 #'
 #' @return
 #'
-#' The function returns either a [Spectra()] (for `return.type = "Spectra"`)
+#' The function returns either a [Spectra::Spectra()] (for
+#' `return.type = "Spectra"`)
 #' or a `List` of `Spectra` (for `return.type = "List"`). For the latter,
 #' the order and the length matches parameter `features` (or if no `features`
 #' is defined the order of the features in `featureDefinitions(object)`).
@@ -1205,7 +1211,7 @@ setGeneric("filterFeatureDefinitions", function(object, ...)
 #'     object will remove previous results.
 #'
 #' @param BPPARAM Parallel processing setup. Uses by default the system-wide
-#'     default setup. See [bpparam()] for more details.
+#'     default setup. See [BiocParallel::bpparam()] for more details.
 #'
 #' @param chunkSize `integer(1)` for `object` being an `MsExperiment` or
 #'     [XcmsExperiment()]: defines the number of files (samples) for which the
@@ -1224,14 +1230,15 @@ setGeneric("filterFeatureDefinitions", function(object, ...)
 #'     will thus in most settings cause an out-of-memory error.
 #'     By setting `chunkSize = -1` the peak detection will be performed
 #'     separately, and in parallel, for each sample. This will however not work
-#'     for all `Spectra` *backends* (see eventually [Spectra()] for details).
+#'     for all `Spectra` *backends* (see eventually [Spectra::Spectra()] for
+#'     details).
 #'
 #' @param msLevel `integer(1)` defining the MS level on which the
 #'     chromatographic peak detection should be performed.
 #'
 #' @param object The data object on which to perform the peak detection. Can be
-#'     an [OnDiskMSnExp()], [XCMSnExp()], [MChromatograms()] or [MsExperiment()]
-#'     object.
+#'     an [MSnbase::OnDiskMSnExp()], [XCMSnExp()], [MSnbase::MChromatograms()]
+#'     or [MsExperiment::MsExperiment()] object.
 #'
 #' @param param The parameter object selecting and configuring the algorithm.
 #'
@@ -1301,7 +1308,8 @@ setGeneric("findChromPeaks", function(object, param, ...)
 #'     more information.
 #'
 #' @param BPPARAM if `object` is an `MsExperiment` or `XcmsExperiment`:
-#'     parallel processing setup. See [bpparam()] for more information.
+#'     parallel processing setup. See [BiocParallel::bpparam()] for more
+#'     information.
 #'
 #' @param ... currently not used.
 #'
@@ -1596,7 +1604,8 @@ setGeneric("loadRaw", function(object, ...) standardGeneric("loadRaw"))
 #' chromatographic peaks into features by providing their index in the
 #' object's `chromPeaks` matrix.
 #'
-#' @param BPPARAM parallel processing settings (see [bpparam()] for details).
+#' @param BPPARAM parallel processing settings (see [BiocParallel::bpparam()]
+#'     for details).
 #'
 #' @param chromPeaks For `manualChromPeaks`: `matrix` defining the boundaries
 #'     of the chromatographic peaks with one row per chromatographic peak and
@@ -1804,9 +1813,9 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ"))
 #'   Each MS2 chromatographic peak selected for an MS1 peak will thus represent
 #'   one **mass peak** in the reconstructed spectrum.
 #'
-#' The resulting [Spectra()] object provides also the peak IDs of the MS2
-#' chromatographic peaks for each spectrum as well as their correlation value
-#' with spectra variables *ms2_peak_id* and *ms2_peak_cor*.
+#' The resulting [Spectra::Spectra()] object provides also the peak IDs of
+#' the MS2 chromatographic peaks for each spectrum as well as their
+#' correlation value with spectra variables *ms2_peak_id* and *ms2_peak_cor*.
 #'
 #' @param object `XCMSnExp` with identified chromatographic peaks.
 #'
@@ -1833,8 +1842,8 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ"))
 #'     `chromPeaks`) of MS1 peaks for which MS2 spectra should be reconstructed.
 #'     By default they are reconstructed for all MS1 chromatographic peaks.
 #'
-#' @param BPPARAM parallel processing setup. See [bpparam()] for more
-#'     information.
+#' @param BPPARAM parallel processing setup. See [BiocParallel::bpparam()]
+#'     for more information.
 #'
 #' @param return.type `character(1)` defining the type of the returned object.
 #'     Only `return.type = "Spectra"` is supported, `return.type = "MSpectra"`
@@ -1844,14 +1853,14 @@ setGeneric("rawMZ", function(object, ...) standardGeneric("rawMZ"))
 #'
 #' @return
 #'
-#' - [Spectra()] object (defined in the `Spectra` package) with the
+#' - [Spectra::Spectra()] object (defined in the `Spectra` package) with the
 #'   reconstructed MS2 spectra for all MS1 peaks in `object`. Contains
 #'   empty spectra (i.e. without m/z and intensity values) for MS1 peaks for
 #'   which reconstruction was not possible (either no MS2 signal was recorded
 #'   or the correlation of the MS2 chromatographic peaks with the MS1
 #'   chromatographic peak was below threshold `minCor`. Spectra variables
-#'   `"ms2_peak_id"` and `"ms2_peak_cor"` (of type [CharacterList()]
-#'   and [NumericList()] with length equal to the number of peaks per
+#'   `"ms2_peak_id"` and `"ms2_peak_cor"` (of type [IRanges::CharacterList()]
+#'   and [IRanges::NumericList()] with length equal to the number of peaks per
 #'   reconstructed MS2 spectrum) providing the IDs and the correlation of the
 #'   MS2 chromatographic peaks from which the MS2 spectrum was reconstructed.
 #'   As retention time the median retention times of all MS2 chromatographic
@@ -1947,7 +1956,7 @@ setGeneric("reconstructChromPeakSpectra", function(object, ...)
 #'
 #' @param BPPARAM parameter object to set up parallel processing. Uses the
 #'     default parallel processing setup returned by `bpparam()`. See
-#'     [bpparam()] for details and examples.
+#'     [BiocParallel::bpparam()] for details and examples.
 #'
 #' @param chunkSize For `refineChromPeaks` if `object` is either an
 #'     `XcmsExperiment`: `integer(1)` defining the number of files (samples)

diff --git a/R/MsExperiment-functions.R b/R/MsExperiment-functions.R
@@ -546,3 +546,34 @@
     x@sampleDataLinks[["spectra"]] <- sdl
     x
 }
+
+#' WARNING: this only joins @sampleData, @spectra and
+#' `@sampleDataLinks[["spectra"]]`! All other slots are ignored.
+#'
+#' @noRd
+.mse_combine <- function(x) {
+    if (!all(vapply(x, inherits, NA, "MsExperiment")))
+        stop("Only objects extending 'MsExperiment' accepted as input.")
+    ## check other slots
+    lapply(x, function(z) {
+        if (length(z@experimentFiles) || length(z@qdata) || length(z@otherData))
+            stop("Slots 'experimentFiles', 'qdata' or 'otherData' are not ",
+                 "empty! Can only combine objects for which these data slots ",
+                 "are empty.", call. = FALSE)
+    })
+    res <- x[[1L]]
+    res@sampleData <- do.call(MsCoreUtils::rbindFill, lapply(x, sampleData))
+    res@spectra <- do.call(c, lapply(x, spectra))
+    sl <- lapply(x, function(z) z@sampleDataLinks[["spectra"]])
+    nsamp <- lengths(x)
+    nsamp <- c(0, cumsum(nsamp)[-length(nsamp)])
+    nspec <- vapply(sl, nrow, NA_integer_)
+    nspec <- c(0, cumsum(nspec)[-length(nspec)])
+    res@sampleDataLinks[["spectra"]] <- do.call(
+        rbind, mapply(function(z, i, j) {
+            z[, 1L] <- z[, 1L] + i
+            z[, 2L] <- z[, 2L] + j
+            z
+        }, sl, nsamp, nspec, SIMPLIFY = FALSE, USE.NAMES = FALSE))
+    res
+}
diff --git a/R/XcmsExperiment-functions.R b/R/XcmsExperiment-functions.R
@@ -533,7 +533,7 @@
                 )
                 if ("beta_cor" %in% cn) {
                     res[i, c("beta_cor", "beta_snr")] <- .get_beta_values(
-                        vapply(xsub[nr > 0], function(z) sum(z[, "intensity"]), 
+                        vapply(xsub[nr > 0], function(z) sum(z[, "intensity"]),
                                NA_real_),
                         rt[keep][nr > 0])
                 }
@@ -544,7 +544,7 @@
 }
 
 
-#' Calculates quality metrics for a chromatographic peak. 
+#' Calculates quality metrics for a chromatographic peak.
 #'
 #' @param x `list` of peak matrices (from a single MS level and from a single
 #'     file/sample).
@@ -1209,6 +1209,70 @@ XcmsExperiment <- function() {
     as(MsExperiment(), "XcmsExperiment")
 }
 
+#' Convert a XCMSnExp to a XcmsExperiment.
+#'
+#' @noRd
+.xcms_n_exp_to_xcms_experiment <- function(from) {
+    requireNamespace("MsExperiment", quietly = TRUE)
+    ## Check requirements:
+    ## - an empty processing queue
+    if (length(from@spectraProcessingQueue))
+        stop("Processing queue is not empty. Can only convert objects with ",
+             "an empty spectra processing queue.")
+    res <- readMsExperiment(spectraFiles = fileNames(from),
+                            sampleData = MSnbase::pData(from))
+    res <- as(res, "XcmsExperiment")
+    res@processHistory <- from@.processHistory
+    res <- filterSpectra(
+        res, filterRt, rt = range(rtime(from, adjusted = FALSE)))
+    if (hasAdjustedRtime(from)) {
+        rts <- rtime(from)
+        if (length(rts) != length(res@spectra))
+            stop("Number of spectra don't match. Was the XCMSnExp subset?")
+        res@spectra$rtime_adjusted <- unname(rts)
+    }
+    if (hasChromPeaks(from)) {
+        res@chromPeaks <- chromPeaks(from)
+        res@chromPeakData <- as.data.frame(chromPeakData(from))
+    }
+    if (hasFeatures(from))
+        res@featureDefinitions <- as.data.frame(featureDefinitions(from))
+    res
+}
+
+#' Combine `XcmsExperiment` objects. Only combining of chrom peaks is supported.
+#' Any alignment or correspondence results are removed.
+#'
+#' @param x `list` of `XcmsExperiment` objects.
+#'
+#' @noRd
+.xmse_combine <- function(x) {
+    x <- lapply(x, function(z) {
+        if (!is(z, "XcmsExperiment"))
+            stop("Only 'XcmsExperiment' objects accepted.")
+        if (hasFeatures(z))
+            z <- dropFeatureDefinitions(z)
+        if (hasAdjustedRtime(z))
+            z <- dropAdjustedRtime(z)
+        z
+    })
+    res <- .mse_combine(x)
+    nsamp <- lengths(x)
+    nsamp <- c(0, cumsum(nsamp)[-length(nsamp)])
+    res@chromPeaks <- do.call(rbindFill, mapply(function(z, i) {
+        z <- chromPeaks(z)
+        z[, "sample"] <- z[, "sample"] + i
+        z
+    }, x, nsamp, SIMPLIFY = FALSE, USE.NAMES = FALSE))
+    rownames(res@chromPeaks) <- .featureIDs(nrow(res@chromPeaks), "CP")
+    res@chromPeakData <- do.call(
+        rbindFill,
+        lapply(x, chromPeakData, return.type = "data.frame"))
+    rownames(res@chromPeakData) <- rownames(res@chromPeaks)
+    res@processHistory <- do.call(c, lapply(x, processHistory))
+    res
+}
+
 #' function to convert an XcmsExperiment into an XCMSnExp.
 #'
 #' @author Johannes Rainer
@@ -1231,7 +1295,7 @@ XcmsExperiment <- function() {
              " of the Spectra object is not empty.")
     ## -> OnDiskMSnExp
     n@processingData <- new("MSnProcess",
-                            processing = paste0("Data converted [", date(), "]"),
+                            processing = paste0("Data converted [", date(),"]"),
                             files = fileNames(from),
                             smoothed = NA)
     n@phenoData <- new("NAnnotatedDataFrame", as.data.frame(sampleData(from)))