diff --git a/DESCRIPTION b/DESCRIPTION index 2057828e..b1a33c7d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.1 +Version: 1.17.2 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index 8d8185f9..d70ef776 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,6 +75,7 @@ exportMethods(backendParallelFactor) exportMethods(backendRequiredSpectraVariables) exportMethods(bin) exportMethods(c) +exportMethods(cbind2) exportMethods(centroided) exportMethods(collisionEnergy) exportMethods(combinePeaks) @@ -309,4 +310,5 @@ importMethodsFrom(S4Vectors,extractROWS) importMethodsFrom(S4Vectors,isEmpty) importMethodsFrom(S4Vectors,lapply) importMethodsFrom(S4Vectors,split) +importMethodsFrom(methods,cbind2) importMethodsFrom(methods,show) diff --git a/NEWS.md b/NEWS.md index f498b62d..40622b24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # Spectra 1.17 +## Change in 1.17.2 + +- Add `cbind2()` method to easily add multiple `spectraVariables` and their + content to the `spectraData` of a `Spectra` object. + See also [issue #342](https://github.com/rformassspectrometry/Spectra/issues/342) + ## Changes in 1.17.1 - Refactor `containsMz()` to support chunk-wise processing. diff --git a/R/MsBackend.R b/R/MsBackend.R index 186f26c6..92b8214a 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -182,10 +182,15 @@ #' @param value replacement value for `<-` methods. See individual #' method description or expected data type. #' -#' @param values for `filterValues()`: A `numeric` vector that define the +#' @param values For `filterValues()`: A `numeric` vector that define the #' values to filter the `object`. `values` needs to be of same length than #' parameter `spectraVariables` and in the same order. #' +#' @param y For `cbind2()`: A `data.frame` or `DataFrame` with the +#' spectra variables to be added to the backend. Need to be of the same +#' length as the number of spectra in the backend. The number of rows and +#' their order has to match the number of spectra and their order in x. +#' #' @param x Object extending `MsBackend`. #' #' @param ... Additional arguments. @@ -313,6 +318,12 @@ #' `dropNaSpectraVariables()` might still show columns containing `NA` values #' for *core* spectra variables. #' +#' - `cbind2()`: allows to appends multiple spectra variables to the backend at +#' once. The `Spectra` and the values for the new spectra variables have to +#' be in a matching order. Replacing existing spectra variables is not +#' supported through this function. For a more controlled way of adding +#' spectra variables, the `joinSpectraData()` should be used. +#' #' - `centroided()`, `centroided<-`: gets or sets the centroiding #' information of the spectra. `centroided()` returns a `logical` #' vector of length equal to the number of spectra with `TRUE` if a @@ -1022,6 +1033,29 @@ setMethod("peaksVariables", "MsBackend", function(object) { c("mz", "intensity") }) + +setClassUnion("dataframeOrDataFrameOrmatrix", c("data.frame", "DataFrame", "matrix")) +#' @exportMethod cbind2 +#' +#' @importMethodsFrom methods cbind2 +#' +#' @rdname MsBackend +setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (any(colnames(spectraData(x)) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") + if (nrow(y) != length(x)) + stop("Number of row in 'y' does not match the number of spectra in 'x'") + for (i in colnames(y)) { + x[[i]] <- y[, i] + } + x +}) + + #' @exportMethod centroided #' #' @aliases centroided<-,MsBackend-method @@ -1344,7 +1378,7 @@ setMethod("filterRanges", "MsBackend", return(object) if (!is.numeric(ranges)) stop("filterRanges only support filtering for numerical ", - "'spectraVariables'") + "'spectraVariables'") match <- match.arg(match) if (is.character(spectraVariables)){ if(!all(spectraVariables %in% spectraVariables(object))) @@ -1354,7 +1388,7 @@ setMethod("filterRanges", "MsBackend", "function to list possible values.") } else stop("The 'spectraVariables' parameter needs to be of type ", - "'character'.") + "'character'.") if (length(spectraVariables) != length(ranges) / 2) stop("Length of 'ranges' needs to be twice the length of ", "the parameter 'spectraVariables' and define the lower ", diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index 6959d771..28d692cc 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -567,6 +567,25 @@ setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendDataFrame", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (any(colnames(spectraData(x)) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") + if (nrow(y) != length(x)) + stop("Number of row in 'y' does not match the number of ", + "spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendDataFrame", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 4bde69ac..4d232496 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -670,6 +670,26 @@ setMethod("[", "MsBackendMemory", function(x, i, j, ..., drop = FALSE) { .df_subset(x, i) }) +#' @importMethodsFrom methods cbind2 +#' +#' @rdname hidden_aliases +setMethod("cbind2", signature = c("MsBackendMemory", + "dataframeOrDataFrameOrmatrix"), + function(x, y = data.frame(), ...) { + if (is(y, "matrix")) + y <- as.data.frame(y) + if (any(colnames(spectraData(x)) %in% colnames(y))) + stop("spectra variables in 'y' are already present in 'x' ", + "replacing them is not allowed") + + if (nrow(y) != length(x)) + stop("Number of row in'y' does not match the number of ", + "spectra in 'x'") + x@spectraData <- cbind(x@spectraData, y) + validObject(x) + x + }) + #' @rdname hidden_aliases setMethod("split", "MsBackendMemory", function(x, f, drop = FALSE, ...) { if (!is.factor(f)) diff --git a/R/Spectra.R b/R/Spectra.R index 73520422..aa45025a 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1447,6 +1447,7 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' @aliases combineSpectra #' @aliases split #' @aliases joinSpectraData +#' @aliases cbind2 #' #' @description #' @@ -1463,6 +1464,16 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' function and to eventually (if needed) apply the processing queue using #' the [applyProcessing()] function. #' +#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, +#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so +#' *blindly* (e.g. do not check rownames compatibility) and is therefore at +#' the risk of the user. The function also does not allow to replace existing +#' spectra variables. For a more controlled way of adding spectra +#' variables, the `joinSpectraData()` should be used. It will return a +#' `Spectra` object with the appended spectra variables. `cbind2()` does +#' check however that the number of rows of the `data.frame` or `DataFrame` +#' matches the number of spectra in the `Spectra` object. +#' #' - `combineSpectra()`: combines sets of spectra (defined with parameter `f`) #' into a single spectrum per set aggregating their MS data (i.e. their #' *peaks data* matrices with the *m/z* and intensity values of their @@ -1507,6 +1518,8 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' should be explored and ideally be removed using for #' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar #' functions. +#' For a more general function that allows to append `data.frame`, +#' `DataFrame` and `matrix` see `cbind2()`. #' #' - `split()`: splits the `Spectra` object based on parameter `f` into a `list` #' of `Spectra` objects. @@ -1543,7 +1556,10 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' @param x A `Spectra` object. #' -#' @param y A `DataFrame` with the spectra variables to join/add. +#' @param y For `joinSpectraData()`: `DataFrame` with the spectra variables +#' to join/add. For `cbind2()`: a `data.frame`, `DataFrame` or +#' `matrix`. The number of rows and their order has to match the +#' number of spectra in `x`, respectively their order. #' #' @param ... Additional arguments. #' @@ -1660,6 +1676,10 @@ setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { #' #' spectraVariables(sciex2) #' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] +#' +#' ## Append new spectra variables with cbind2() +#' df <- data.frame(cola = seq_len(length(sciex1)), colb = "b") +#' data_append <- cbind2(sciex1, df) NULL #' @rdname combineSpectra @@ -1669,6 +1689,15 @@ setMethod("c", "Spectra", function(x, ...) { .concatenate_spectra(unname(list(unname(x), ...))) }) +#' @rdname combineSpectra +#' +#' @export +setMethod("cbind2", signature(x = "Spectra", + y = "dataframeOrDataFrameOrmatrix"), + function(x, y, ...) { + x@backend <- cbind2(x@backend, y, ...) + }) + #' @rdname combineSpectra setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { bcknds <- split(x@backend, f, ...) diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 93adce0d..76d676a4 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -59,6 +59,28 @@ test_that("[", { expect_equal(res, be[which(l)]) }) +test_that("cbind2 works", { + seql <- length(be) + df <- data.frame(cola = seq_len(seql), colb = "b", colz = "z") + res <- cbind2(be, df) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 3) + expect_equal(res$cola, seq_len(seql)) + expect_equal(res$colb, rep("b", seql)) + expect_equal(res$colz, rep("z", seql)) + df2 <- data.frame(cola = 3:6, colb = "b", colz = "z") + expect_error(cbind2(be, df2), "does not match") + ## with matrix + m <- matrix(1:seql, ncol = 1, dimnames = list(NULL, "m")) + res <- cbind2(be, m) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), length(spectraVariables(be)) + 1) + expect_equal(res$m, 1:seql) + ## no replacing + expect_error(cbind2(be, data.frame(scanIndex = 1:seql)), + "are already present") +}) + #' extractByIndex. Uses [ if not implemented test_that("extractByIndex", { i <- sample(seq_along(be), floor(length(be) / 2)) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 279576a5..3ff8c4e5 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -34,6 +34,7 @@ \alias{acquisitionNum,MsBackend-method} \alias{peaksData,MsBackend-method} \alias{peaksVariables,MsBackend-method} +\alias{cbind2,MsBackend,dataframeOrDataFrameOrmatrix-method} \alias{centroided,MsBackend-method} \alias{centroided<-,MsBackend-method} \alias{collisionEnergy,MsBackend-method} @@ -133,6 +134,8 @@ \S4method{peaksVariables}{MsBackend}(object) +\S4method{cbind2}{MsBackend,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{centroided}{MsBackend}(object) \S4method{centroided}{MsBackend}(object) <- value @@ -325,6 +328,13 @@ in the individual \code{matrix} of the returned \code{list}. Defaults to \code{peaksVariables(object)} and depends on what \emph{peaks variables} the backend provides.} +\item{x}{Object extending \code{MsBackend}.} + +\item{y}{For \code{cbind2()}: A \code{data.frame} or \code{DataFrame} with the +spectra variables to be added to the backend. Need to be of the same +length as the number of spectra in the backend. The number of rows and +their order has to match the number of spectra and their order in x.} + \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} @@ -410,12 +420,10 @@ to be used to subset/filter \code{object}.} \item{msLevel.}{same as \code{msLevel} above.} -\item{values}{for \code{filterValues()}: A \code{numeric} vector that define the +\item{values}{For \code{filterValues()}: A \code{numeric} vector that define the values to filter the \code{object}. \code{values} needs to be of same length than parameter \code{spectraVariables} and in the same order.} -\item{x}{Object extending \code{MsBackend}.} - \item{use.names}{For \code{lengths()}: whether spectrum names should be used.} \item{drop}{For \code{[}: not considered.} @@ -600,6 +608,11 @@ object's \code{spectraData} that contain only missing values (\code{NA}). Note t while columns with only \code{NA}s are removed, a \code{spectraData()} call after \code{dropNaSpectraVariables()} might still show columns containing \code{NA} values for \emph{core} spectra variables. +\item \code{cbind2()}: allows to appends multiple spectra variables to the backend at +once. The \code{Spectra} and the values for the new spectra variables have to +be in a matching order. Replacing existing spectra variables is not +supported through this function. For a more controlled way of adding +spectra variables, the \code{joinSpectraData()} should be used. \item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding information of the spectra. \code{centroided()} returns a \code{logical} vector of length equal to the number of spectra with \code{TRUE} if a diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd index d4f7bdb0..19fcded3 100644 --- a/man/combineSpectra.Rd +++ b/man/combineSpectra.Rd @@ -5,7 +5,9 @@ \alias{combineSpectra} \alias{joinSpectraData} \alias{split} +\alias{cbind2} \alias{c,Spectra-method} +\alias{cbind2,Spectra,dataframeOrDataFrameOrmatrix-method} \alias{split,Spectra,ANY-method} \title{Merging, aggregating and splitting Spectra} \usage{ @@ -24,6 +26,8 @@ joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") \S4method{c}{Spectra}(x, ...) +\S4method{cbind2}{Spectra,dataframeOrDataFrameOrmatrix}(x, y, ...) + \S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) } \arguments{ @@ -48,7 +52,10 @@ of the spectra. Defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}. information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method of the \linkS4class{MsBackend}.} -\item{y}{A \code{DataFrame} with the spectra variables to join/add.} +\item{y}{For \code{joinSpectraData()}: \code{DataFrame} with the spectra variables +to join/add. For \code{cbind2()}: a \code{data.frame}, \code{DataFrame} or +\code{matrix}. The number of rows and their order has to match the +number of spectra in \code{x}, respectively their order.} \item{by.x}{A \code{character(1)} specifying the spectra variable used for merging. Default is \code{"spectrumId"}.} @@ -76,6 +83,15 @@ objects. In such cases it is suggested to first change the backends of all \code{Spectra} to the same type of backend (using the \code{\link[=setBackend]{setBackend()}} function and to eventually (if needed) apply the processing queue using the \code{\link[=applyProcessing]{applyProcessing()}} function. +\item \code{cbind2()}: Appends multiple spectra variables from a \code{data.frame}, +\code{DataFrame} or \code{matrix} to the \code{Spectra} object at once. It does so +\emph{blindly} (e.g. do not check rownames compatibility) and is therefore at +the risk of the user. The function also does not allow to replace existing +spectra variables. For a more controlled way of adding spectra +variables, the \code{joinSpectraData()} should be used. It will return a +\code{Spectra} object with the appended spectra variables. \code{cbind2()} does +check however that the number of rows of the \code{data.frame} or \code{DataFrame} +matches the number of spectra in the \code{Spectra} object. \item \code{combineSpectra()}: combines sets of spectra (defined with parameter \code{f}) into a single spectrum per set aggregating their MS data (i.e. their \emph{peaks data} matrices with the \emph{m/z} and intensity values of their @@ -120,6 +136,8 @@ throw a warning and only the last occurrence is kept. These should be explored and ideally be removed using for \code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar functions. +For a more general function that allows to append \code{data.frame}, +\code{DataFrame} and \code{matrix} see \code{cbind2()}. } \item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list} of \code{Spectra} objects. @@ -228,6 +246,10 @@ sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") spectraVariables(sciex2) spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] + +## Append new spectra variables with cbind2() +df <- data.frame(cola = seq_len(length(sciex1)), colb = "b") +data_append <- cbind2(sciex1, df) } \seealso{ \itemize{ diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index ce4e63e9..c03adb62 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -59,6 +59,7 @@ \alias{tic,MsBackendDataFrame-method} \alias{$,MsBackendDataFrame-method} \alias{$<-,MsBackendDataFrame-method} +\alias{cbind2,MsBackendDataFrame,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} \alias{backendRequiredSpectraVariables,MsBackendHdf5Peaks-method} @@ -133,6 +134,7 @@ \alias{$,MsBackendMemory-method} \alias{$<-,MsBackendMemory-method} \alias{[,MsBackendMemory-method} +\alias{cbind2,MsBackendMemory,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} \alias{backendRequiredSpectraVariables,MsBackendMzR-method} @@ -272,6 +274,8 @@ \S4method{[}{MsBackendDataFrame}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendDataFrame,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendDataFrame,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendDataFrame}( @@ -432,6 +436,8 @@ \S4method{[}{MsBackendMemory}(x, i, j, ..., drop = FALSE) +\S4method{cbind2}{MsBackendMemory,dataframeOrDataFrameOrmatrix}(x, y = data.frame(), ...) + \S4method{split}{MsBackendMemory,ANY}(x, f, drop = FALSE, ...) \S4method{filterAcquisitionNum}{MsBackendMemory}( diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index 238e945a..2cc04795 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -635,6 +635,22 @@ test_that("[,MsBackendDataFrame works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendDataFrame works", { + be <- MsBackendDataFrame() + df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("selectSpectraVariables,MsBackendDataFrame works", { be <- MsBackendDataFrame() res <- selectSpectraVariables(be, c("dataStorage", "msLevel")) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index 119e2c56..c4df695f 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -564,6 +564,22 @@ test_that("[,MsBackendMemory works", { expect_equal(res, res_2) }) +test_that("cbind2, MsBackendMemory works", { + be <- new("MsBackendMemory") + df <- data.frame(scanIndex = 1:2, a = "a", b = "b") + be <- backendInitialize(be, df) + df2 <- data.frame(cola = 3:4, colb = "b", colz = "z") + res <- cbind2(be, df2) + expect_true(validObject(res)) + expect_equal(ncol(spectraData(res)), ncol(spectraData(be)) +3) + expect_equal(res$cola, c(3, 4)) + expect_equal(res$colb, c("b", "b")) + expect_equal(res$colz, c("z", "z")) + expect_equal(res$scanIndex, 1:2) + df3 <- data.frame(colv = 1:6, colw = "b") + expect_error(cbind2(be, df3), "does not match") +}) + test_that("split,MsBackendMemory works", { be <- new("MsBackendMemory") be <- backendInitialize(be, test_df)