diff --git a/.Rbuildignore b/.Rbuildignore index 37442c70..22a5d1be 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,7 +1,6 @@ ^\.github$ .editorconfig .travis.yml -man/figures* local_data favicon logo.png diff --git a/.editorconfig b/.editorconfig index 0cebcc70..71842659 100644 --- a/.editorconfig +++ b/.editorconfig @@ -6,7 +6,7 @@ root = true charset = utf-8 end_of_line = lf trim_trailing_whitespace = true -insert_final_newline = true +insert_final_newline = false [*.R] indent_style = space @@ -22,4 +22,4 @@ indent_style = tab [*.yml] indent_style = space -indent_size = 2 \ No newline at end of file +indent_size = 2 diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 5c1ebc66..c7c036d7 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -22,8 +22,7 @@ on: push: - paths-ignore: - - 'README.md' + pull_request: name: R-CMD-check-bioc @@ -53,9 +52,9 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - - { os: macOS-latest, r: '4.4', bioc: '3.20'} - - { os: windows-latest, r: '4.4', bioc: '3.20'} + - { os: ubuntu-latest, r: 'devel', bioc: '3.19', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: macOS-latest, r: 'next', bioc: '3.19'} + - { os: windows-latest, r: 'next', bioc: '3.19'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} @@ -126,7 +125,32 @@ jobs: - name: Install macOS system dependencies if: matrix.config.os == 'macOS-latest' run: | - shell: Rscript {0} + ## Enable installing XML from source if needed + brew install libxml2 + echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + ## For textshaping, required by ragg, and required by pkgdown + brew install harfbuzz fribidi + + brew install libgit2 + ## Helps compile RCurl from source + ## brew uninstall curl + + ## required for ncdf4 - can not use the homebrew one because that uses GCC + ## Use pre-compiled libraries from https://mac.r-project.org/libs-4/ + curl -O https://mac.r-project.org/libs-4/netcdf-4.7.4-darwin.17-x86_64.tar.gz + tar fvxzm netcdf-4.7.4-darwin.17-x86_64.tar.gz -C / + rm netcdf-4.7.4-darwin.17-x86_64.tar.gz + curl -O https://mac.r-project.org/libs-4/hdf5-1.12.0-darwin.17-x86_64.tar.gz + tar fvxzm hdf5-1.12.0-darwin.17-x86_64.tar.gz -C / + rm hdf5-1.12.0-darwin.17-x86_64.tar.gz + curl -O https://mac.r-project.org/libs-4/szip-2.1.1-darwin.17-x86_64.tar.gz + tar fvxzm szip-2.1.1-darwin.17-x86_64.tar.gz -C / + rm szip-2.1.1-darwin.17-x86_64.tar.gz - name: Install Windows system dependencies if: runner.os == 'Windows' @@ -239,7 +263,7 @@ jobs: - name: Test coverage if: github.ref == 'refs/heads/main' && env.run_covr == 'true' && runner.os == 'Linux' run: | - covr::codecov(token = "${{ secrets.CODECOV_TOKEN }}") + covr::codecov() shell: Rscript {0} - name: Install package diff --git a/DESCRIPTION b/DESCRIPTION index b1a33c7d..6608f837 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.17.2 +Version: 1.15.0 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different @@ -40,9 +40,9 @@ Authors@R: c(person(given = "RforMassSpectrometry Package Maintainer", Depends: R (>= 4.0.0), S4Vectors, - BiocParallel + BiocParallel, + ProtGenerics (>= 1.35.4) Imports: - ProtGenerics (>= 1.37.1), methods, IRanges, MsCoreUtils (>= 1.7.5), diff --git a/NAMESPACE b/NAMESPACE index d70ef776..3e9d518a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,6 @@ export(MsBackendMemory) export(MsBackendMzR) export(PrecursorMzParam) export(applyProcessing) -export(asDataFrame) export(chunkapply) export(combinePeaksData) export(combineSpectra) @@ -17,8 +16,8 @@ export(concatenateSpectra) export(coreSpectraVariables) export(countIdentifications) export(deisotopeSpectra) +export(estimatePrecursorIntensity) export(estimatePrecursorMz) -export(filterPeaksRanges) export(filterPrecursorIsotopes) export(filterPrecursorMaxIntensity) export(filterPrecursorPeaks) @@ -51,16 +50,13 @@ exportMethods("centroided<-") exportMethods("collisionEnergy<-") exportMethods("dataOrigin<-") exportMethods("dataStorage<-") -exportMethods("dataStorageBasePath<-") exportMethods("intensity<-") exportMethods("isolationWindowLowerMz<-") exportMethods("isolationWindowTargetMz<-") exportMethods("isolationWindowUpperMz<-") -exportMethods("msLevel<-") exportMethods("mz<-") exportMethods("peaksData<-") exportMethods("polarity<-") -exportMethods("precursorMz<-") exportMethods("rtime<-") exportMethods("smoothed<-") exportMethods("spectraData<-") @@ -72,7 +68,6 @@ exportMethods(backendBpparam) exportMethods(backendInitialize) exportMethods(backendMerge) exportMethods(backendParallelFactor) -exportMethods(backendRequiredSpectraVariables) exportMethods(bin) exportMethods(c) exportMethods(cbind2) @@ -84,12 +79,9 @@ exportMethods(containsMz) exportMethods(containsNeutralLoss) exportMethods(dataOrigin) exportMethods(dataStorage) -exportMethods(dataStorageBasePath) exportMethods(dropNaSpectraVariables) exportMethods(entropy) -exportMethods(estimatePrecursorIntensity) exportMethods(export) -exportMethods(extractByIndex) exportMethods(filterAcquisitionNum) exportMethods(filterDataOrigin) exportMethods(filterDataStorage) @@ -166,7 +158,6 @@ importFrom(MsCoreUtils,coefMA) importFrom(MsCoreUtils,coefSG) importFrom(MsCoreUtils,coefWMA) importFrom(MsCoreUtils,common) -importFrom(MsCoreUtils,common_path) importFrom(MsCoreUtils,entropy) importFrom(MsCoreUtils,group) importFrom(MsCoreUtils,i2index) @@ -210,7 +201,6 @@ importFrom(methods,.hasSlot) importFrom(methods,.valueClassTest) importFrom(methods,as) importFrom(methods,callNextMethod) -importFrom(methods,existsMethod) importFrom(methods,is) importFrom(methods,new) importFrom(methods,setAs) @@ -235,11 +225,9 @@ importMethodsFrom(ProtGenerics,"intensity<-") importMethodsFrom(ProtGenerics,"isolationWindowLowerMz<-") importMethodsFrom(ProtGenerics,"isolationWindowTargetMz<-") importMethodsFrom(ProtGenerics,"isolationWindowUpperMz<-") -importMethodsFrom(ProtGenerics,"msLevel<-") importMethodsFrom(ProtGenerics,"mz<-") importMethodsFrom(ProtGenerics,"peaksData<-") importMethodsFrom(ProtGenerics,"polarity<-") -importMethodsFrom(ProtGenerics,"precursorMz<-") importMethodsFrom(ProtGenerics,"rtime<-") importMethodsFrom(ProtGenerics,"smoothed<-") importMethodsFrom(ProtGenerics,"spectraData<-") @@ -256,7 +244,6 @@ importMethodsFrom(ProtGenerics,collisionEnergy) importMethodsFrom(ProtGenerics,compareSpectra) importMethodsFrom(ProtGenerics,dataOrigin) importMethodsFrom(ProtGenerics,dataStorage) -importMethodsFrom(ProtGenerics,estimatePrecursorIntensity) importMethodsFrom(ProtGenerics,filterAcquisitionNum) importMethodsFrom(ProtGenerics,filterDataOrigin) importMethodsFrom(ProtGenerics,filterDataStorage) diff --git a/NEWS.md b/NEWS.md index 2e469261..e76fb0bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,80 +1,10 @@ -# Spectra 1.17 +# Spectra 1.15 -## Changes in 1.17.2 +## Changes in 1.15.0 - Add `cbind2()` method to easily add multiple `spectraVariables` to the `spectraData` -## Changes in 1.17.1 - -- Refactor `containsMz()` to support chunk-wise processing. - -# Spectra 1.15 - -## Changes in 1.15.13 - -- Add `precursorMz<-` method [issue #336](https://github.com/rformassspectrometry/Spectra/issues/336). - -## Changes in 1.15.12 - -- Add generic `backendRequiredSpectraVariables()` to allow definition of - mandatory spectra variables for a backend. - -## Changes in 1.15.11 - -- Add reference to `MsBackendMetaboLights`. - -## Changes in 1.15.10 - -- Add new `extractSpectra()` generic and implementation for `MsBackend`. Fixes - [issue #5](https://github.com/rformassspectrometry/MsBackendMetaboLights/issues/5). - -## Changes in 1.15.9 - -- Restructure and reorganize documentation for `Spectra`. - -## Changes in 1.15.8 - -- Refactor the `Spectra()` constructor method: better support for - initialization of backends that define their own specific parameters. - -## Changes in 1.15.7 - -- Change `estimatePrecursorIntensity()` to a method to avoid overrides/clashes - with the same-named implementation in *xcms*. - -## Changes in 1.15.6 - -- Fix in `selectSpectraVariables()` for `MsBackendMzR`: ensure peaks variables - `"mz"` and `"intensity"` are not by default removed. - -## Changes in 1.15.5 - -- Add new `filterPeaksRanges()` function to filter mass peaks by ranges on - numeric spectra or peak variables. - -## Changes in 1.15.3 - -- For evaluation of the `Spectra`'s processing queue: call functions from the - *MetaboCoreUtils* directly through their namespace (`MsCoreUtils::`) to avoid - errors if performed in parallel on Windows machines or if called on a - re-loaded object. -- New `asDataFrame()` function to convert a (small) `Spectra` object - into a long `DataFrame`. - -## Changes in 1.15.2 - -- Add `dataStorageDataPath()` and `dataStorageDataPath<-` methods to allow - updating/adapting the path of the data storage files of backends supporting - that [issue #321](https://github.com/rformassspectrometry/Spectra/issues/321). - -## Changes in 1.15.1 - -- Improve documentation for `combineSpectra()` and `combinePeaks()` [issue - #320](https://github.com/rformassspectrometry/Spectra/issues/320). - -# Spectra 1.13 - ## Changes in 1.13.8 - Add `estimatePrecursorMz()` function to *estimate* the precursor m/z for DDA diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 856cb69e..f68500ad 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -1,40 +1,42 @@ #' @include hidden_aliases.R NULL -setGeneric("backendRequiredSpectraVariables", function(object, ...) - standardGeneric("backendRequiredSpectraVariables")) #' @rdname hidden_aliases setMethod("bin", "numeric", MsCoreUtils::bin) setGeneric("combinePeaks", function(object, ...) standardGeneric("combinePeaks")) +#' @rdname hidden_aliases setGeneric("containsMz", function(object, ...) standardGeneric("containsMz")) +#' @rdname hidden_aliases setGeneric("containsNeutralLoss", function(object, ...) standardGeneric("containsNeutralLoss")) -setGeneric("dataStorageBasePath", function(object, ...) - standardGeneric("dataStorageBasePath")) -setGeneric("dataStorageBasePath<-", function(object, ..., value) - standardGeneric("dataStorageBasePath<-")) +#' @rdname hidden_aliases setGeneric("dropNaSpectraVariables", function(object, ...) standardGeneric("dropNaSpectraVariables")) +#' @rdname hidden_aliases setGeneric("entropy", function(object, ...) standardGeneric("entropy")) +#' @rdname hidden_aliases setGeneric("export", function(object, ...) standardGeneric("export")) -setGeneric("extractByIndex", function(object, i) - standardGeneric("extractByIndex")) setGeneric("filterFourierTransformArtefacts", function(object, ...) standardGeneric("filterFourierTransformArtefacts")) +#' @rdname neutralLoss setGeneric("neutralLoss", function(object, param, ...) standardGeneric("neutralLoss")) +#' @rdname hidden_aliases setGeneric("pickPeaks", function(object, ...) standardGeneric("pickPeaks")) setGeneric("plotSpectraMirror", function(x, y, ...) standardGeneric("plotSpectraMirror")) +#' @rdname hidden_aliases setGeneric("replaceIntensitiesBelow", function(object, threshold = min, ...) standardGeneric("replaceIntensitiesBelow")) +#' @rdname hidden_aliases setGeneric("reset", function(object, ...) standardGeneric("reset")) +#' @rdname hidden_aliases setGeneric("selectSpectraVariables", function(object, ...) standardGeneric("selectSpectraVariables")) setGeneric("Spectra", function(object, ...) standardGeneric("Spectra")) diff --git a/R/MsBackend.R b/R/MsBackend.R index 010dc963..74945cbf 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -11,14 +11,6 @@ #' @aliases backendInitialize #' @aliases backendParallelFactor,MsBackendMzR-method #' @aliases backendParallelFactor,MsBackendHdf5Peaks-method -#' @aliases dataStorageBasePath -#' @aliases dataStorageBasePath,MsBackendMzR-method -#' @aliases dataStorageBasePath<- -#' @aliases dataStorageBasePath<-,MsBackendMzR-method -#' @aliases extractByIndex -#' @aliases msLeveL<-,MsBackend-method -#' @aliases backendRequiredSpectraVariables -#' @aliases backendRequiredSpectraVariables,MsBackend-method #' #' @description #' @@ -230,9 +222,7 @@ #' allowed. Parameter `i` should support `integer` indices and `logical` #' and should throw an error if `i` is out of bounds. The #' `MsCoreUtils::i2index` could be used to check the input `i`. -#' For `i = integer()` an empty backend should be returned. Implementation -#' of this method is optional, as the default calls the `extractByIndex()` -#' method (which has to be implemented as the main subsetting method). +#' For `i = integer()` an empty backend should be returned. #' #' - `$`, `$<-`: access or set/add a single spectrum variable (column) in the #' backend. Using a `value` of `NULL` should allow deleting the specified @@ -286,13 +276,6 @@ #' `MsBackendMzR` on the other hand returns `factor(dataStorage(object))` #' hence suggesting to split the object by data file. #' -#' - `backendRequiredSpectraVariables()`: returns a `character` with spectra -#' variable names that are mandatory for a specific backend. The default -#' returns an empty `character()`. The implementation for `MsBackendMzR` -#' returns `c("dataStorage", "scanIndex")` as these two spectra variables -#' are required to load the MS data on-the-fly. This method needs only to -#' be implemented if a backend requires specific variables to be defined. -#' #' - `dataOrigin()`: gets a `character` of length equal to the number of #' spectra in `object` with the *data origin* of each spectrum. This could #' e.g. be the mzML file from which the data was read. @@ -301,16 +284,6 @@ #' spectra in `object` with the data storage of each spectrum. Note that #' missing values (`NA_character_`) are not supported for `dataStorage`. #' -#' - `dataStorageBasePath()`, `dataStorageBasePath<-: gets or sets the common -#' *base* path of the directory containing all data files. If supported, -#' the function is expected to return (or accept) a `character` of length 1. -#' Most backends (such as for example the `MsBackendMemory` will not support -#' this function and `dataStorageBasePath()` will return `NA_character_`. -#' For `MsBackendMzR`, this function allows to get or change the path to the -#' directory containing the original data files, which is required if e.g. -#' a serialized `MsBackendMzR` instance gets copied to another computer or -#' file system. -#' #' - `dropNaSpectraVariables()`: removes spectra variables (i.e. columns in the #' object's `spectraData` that contain only missing values (`NA`). Note that #' while columns with only `NA`s are removed, a `spectraData()` call after @@ -349,17 +322,6 @@ #' *mzML* or *mzXML* format. See the documentation for the `MsBackendMzR` #' class below for more information. #' -#' - `extractByIndex()`: function to subset a backend to selected elements -#' defined by the provided index. Similar to `[`, this method should allow -#' extracting (or to subset) the data in any order. In contrast to `[`, -#' however, `i` is expected to be an `integer` (while `[` should also -#' support `logical` and eventually `character`). While being apparently -#' redundant to `[`, this methods avoids package namespace errors/problems -#' that can result in implementations of `[` being not found by R (which -#' can happen sometimes in parallel processing using the [SnowParam()]). This -#' method is used internally by `Spectra` to extract/subset its backend. -#' Implementation of this method is mandatory. -#' #' - `filterAcquisitionNum()`: filters the object keeping only spectra matching #' the provided acquisition numbers (argument `n`). If `dataOrigin` or #' `dataStorage` is also provided, `object` is subsetted to the spectra with @@ -507,8 +469,6 @@ #' vector (of length equal to the number of spectra) with the MS #' level for each spectrum (or `NA_integer_` if not available). #' -#' - `msLevel<-`: replaces the spectra's MS level. -#' #' - `mz()`: gets the mass-to-charge ratios (m/z) from the #' spectra. Returns a [NumericList()] or length equal to the number of #' spectra, each element a `numeric` vector with the m/z values of @@ -737,7 +697,7 @@ #' #' The parameters are: #' - `object`: an instance of the `MsBackendMzR` class. -#' - `x`: the [Spectra] object to be exported. +#' - `x`: the [Spectra-class] object to be exported. #' - `file`: `character` with the (full) output file name(s). Should be #' of length 1 or equal `length(x)`. If a single file is specified, all #' spectra are exported to that file. Alternatively it is possible to specify @@ -750,7 +710,7 @@ #' backend and if `dataOrigin(x)` contains the original MS data file names. #' - `BPPARAM`: parallel processing settings. #' -#' See examples in [Spectra] or the vignette for more details and +#' See examples in [Spectra-class] or the vignette for more details and #' examples. #' #' The `MsBackendMzR` ignores parameter `columns` of the `peaksData()` @@ -809,7 +769,7 @@ #' #' @return See documentation of respective function. #' -#' @author Johannes Rainer, Sebastian Gibb, Laurent Gatto, Philippine Louail +#' @author Johannes Rainer, Sebastian Gibb, Laurent Gatto #' #' @md #' @@ -937,8 +897,6 @@ setValidity("MsBackend", function(object) { #' @exportMethod backendBpparam #' #' @rdname MsBackend -#' -#' @export setMethod("backendBpparam", signature = "MsBackend", function(object, BPPARAM = bpparam()) { BPPARAM @@ -949,8 +907,6 @@ setMethod("backendBpparam", signature = "MsBackend", #' @importMethodsFrom ProtGenerics backendInitialize #' #' @rdname MsBackend -#' -#' @export setMethod("backendInitialize", signature = "MsBackend", function(object, ...) { validObject(object) object @@ -966,8 +922,6 @@ setMethod("backendMerge", "list", function(object, ...) { #' @exportMethod backendMerge #' #' @rdname MsBackend -#' -#' @export setMethod("backendMerge", "MsBackend", function(object, ...) { stop("Not implemented for ", class(object), ".") }) @@ -977,21 +931,11 @@ setMethod("backendMerge", "MsBackend", function(object, ...) { #' @exportMethod backendParallelFactor #' #' @rdname MsBackend -#' -#' @export setMethod("backendParallelFactor", "MsBackend", function(object, ...) { factor() }) -#' @export -setMethod("backendRequiredSpectraVariables", "MsBackend", - function(object, ...) { - character() - }) - #' @rdname MsBackend -#' -#' @export setMethod("export", "MsBackend", function(object, ...) { stop(class(object), " does not support export of data; please provide a ", "backend that supports data export with parameter 'backend'.") @@ -1002,8 +946,6 @@ setMethod("export", "MsBackend", function(object, ...) { #' @importMethodsFrom ProtGenerics acquisitionNum #' #' @rdname MsBackend -#' -#' @export setMethod("acquisitionNum", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1013,8 +955,6 @@ setMethod("acquisitionNum", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics peaksData #' #' @rdname MsBackend -#' -#' @export setMethod("peaksData", "MsBackend", function(object, columns = c("mz", "intensity")) { stop("Not implemented for ", class(object), ".") @@ -1025,8 +965,6 @@ setMethod("peaksData", "MsBackend", function(object, #' @importMethodsFrom ProtGenerics peaksVariables #' #' @rdname MsBackend -#' -#' @export setMethod("peaksVariables", "MsBackend", function(object) { c("mz", "intensity") }) @@ -1058,8 +996,6 @@ setMethod("cbind2", signature = c("MsBackend", "dataframeOrDataFrameOrmatrix"), #' @importMethodsFrom ProtGenerics centroided #' #' @rdname MsBackend -#' -#' @export setMethod("centroided", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1069,8 +1005,6 @@ setMethod("centroided", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics centroided<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("centroided", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1080,8 +1014,6 @@ setReplaceMethod("centroided", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics collisionEnergy #' #' @rdname MsBackend -#' -#' @export setMethod("collisionEnergy", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1091,8 +1023,6 @@ setMethod("collisionEnergy", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics collisionEnergy<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("collisionEnergy", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1102,8 +1032,6 @@ setReplaceMethod("collisionEnergy", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics dataOrigin #' #' @rdname MsBackend -#' -#' @export setMethod("dataOrigin", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1113,8 +1041,6 @@ setMethod("dataOrigin", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics dataOrigin<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("dataOrigin", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1124,8 +1050,6 @@ setReplaceMethod("dataOrigin", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics dataStorage #' #' @rdname MsBackend -#' -#' @export setMethod("dataStorage", "MsBackend", function(object) { stop("Method 'dataStorage' is not implemented for ", class(object), ".") }) @@ -1135,8 +1059,6 @@ setMethod("dataStorage", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics dataStorage<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("dataStorage", "MsBackend", function(object, value) { stop("Method 'dataStorage' is not implemented for ", class(object), ".") }) @@ -1144,12 +1066,9 @@ setReplaceMethod("dataStorage", "MsBackend", function(object, value) { #' @exportMethod dropNaSpectraVariables #' #' @rdname MsBackend -#' -#' @export setMethod("dropNaSpectraVariables", "MsBackend", function(object) { svs <- spectraVariables(object) - req_cols <- c(backendRequiredSpectraVariables(object), c("mz", "intensity")) - svs <- svs[!(svs %in% req_cols)] + svs <- svs[!(svs %in% c("mz", "intensity"))] spd <- spectraData(object, columns = svs) keep <- !vapply1l(spd, function(z) { allna <- all(is.na(z)) @@ -1157,25 +1076,7 @@ setMethod("dropNaSpectraVariables", "MsBackend", function(object) { FALSE else allna }) - selectSpectraVariables(object, c(svs[keep], req_cols)) -}) - -#' @rdname MsBackend -#' -#' @importFrom methods existsMethod -#' -#' @export -setMethod("extractByIndex", c("MsBackend", "ANY"), function(object, i) { - if (existsMethod("[", class(object)[1L])) - object[i = i] - else stop("'extractByIndex' not implemented for ", class(object)[1L], ".") -}) - -#' @rdname MsBackend -#' -#' @export -setMethod("extractByIndex", c("MsBackend", "missing"), function(object, i) { - object + selectSpectraVariables(object, c(svs[keep], "mz", "intensity")) }) #' @exportMethod filterAcquisitionNum @@ -1183,8 +1084,6 @@ setMethod("extractByIndex", c("MsBackend", "missing"), function(object, i) { #' @importMethodsFrom ProtGenerics filterAcquisitionNum #' #' @rdname MsBackend -#' -#' @export setMethod("filterAcquisitionNum", "MsBackend", function(object, n, file, ...) { stop("Not implemented for ", class(object), ".") }) @@ -1194,8 +1093,6 @@ setMethod("filterAcquisitionNum", "MsBackend", function(object, n, file, ...) { #' @importMethodsFrom ProtGenerics filterDataOrigin #' #' @rdname MsBackend -#' -#' @export setMethod("filterDataOrigin", "MsBackend", function(object, dataOrigin = character()) { if (length(dataOrigin)) { @@ -1211,8 +1108,6 @@ setMethod("filterDataOrigin", "MsBackend", #' @importMethodsFrom ProtGenerics filterDataStorage #' #' @rdname MsBackend -#' -#' @export setMethod("filterDataStorage", "MsBackend", function(object, dataStorage = character()) { if (length(dataStorage)) { @@ -1228,8 +1123,6 @@ setMethod("filterDataStorage", "MsBackend", #' @importMethodsFrom ProtGenerics filterEmptySpectra #' #' @rdname MsBackend -#' -#' @export setMethod("filterEmptySpectra", "MsBackend", function(object, ...) { if (!length(object)) return(object) object[as.logical(lengths(object))] @@ -1240,8 +1133,6 @@ setMethod("filterEmptySpectra", "MsBackend", function(object, ...) { #' @importMethodsFrom ProtGenerics filterIsolationWindow #' #' @rdname MsBackend -#' -#' @export setMethod("filterIsolationWindow", "MsBackend", function(object, mz = numeric(), ...) { if (length(mz)) { @@ -1258,8 +1149,6 @@ setMethod("filterIsolationWindow", "MsBackend", #' @importMethodsFrom ProtGenerics filterMsLevel #' #' @rdname MsBackend -#' -#' @export setMethod("filterMsLevel", "MsBackend", function(object, msLevel = integer()) { if (length(msLevel)) { @@ -1272,8 +1161,6 @@ setMethod("filterMsLevel", "MsBackend", #' @importMethodsFrom ProtGenerics filterPolarity #' #' @rdname MsBackend -#' -#' @export setMethod("filterPolarity", "MsBackend", function(object, polarity = integer()) { if (length(polarity)) @@ -1286,8 +1173,6 @@ setMethod("filterPolarity", "MsBackend", #' @importMethodsFrom ProtGenerics filterPrecursorMzRange #' #' @rdname MsBackend -#' -#' @export setMethod("filterPrecursorMzRange", "MsBackend", function(object, mz = numeric()) { if (length(mz)) { @@ -1300,8 +1185,6 @@ setMethod("filterPrecursorMzRange", "MsBackend", #' @importMethodsFrom ProtGenerics filterPrecursorMz #' #' @rdname MsBackend -#' -#' @export setMethod("filterPrecursorMz", "MsBackend", function(object, mz = numeric()) { filterPrecursorMzRange(object, mz) @@ -1312,8 +1195,6 @@ setMethod("filterPrecursorMz", "MsBackend", #' @importMethodsFrom ProtGenerics filterPrecursorMzValues #' #' @rdname MsBackend -#' -#' @export setMethod("filterPrecursorMzValues", "MsBackend", function(object, mz = numeric(), ppm = 20, tolerance = 0) { if (length(mz)) { @@ -1327,8 +1208,6 @@ setMethod("filterPrecursorMzValues", "MsBackend", #' @importMethodsFrom ProtGenerics filterPrecursorCharge #' #' @rdname MsBackend -#' -#' @export setMethod("filterPrecursorCharge", "MsBackend", function(object, z = integer()) { if (length(z)) { @@ -1342,8 +1221,6 @@ setMethod("filterPrecursorCharge", "MsBackend", #' @importMethodsFrom ProtGenerics filterPrecursorScan #' #' @rdname MsBackend -#' -#' @export setMethod("filterPrecursorScan", "MsBackend", function(object, acquisitionNum = integer(), f = dataOrigin(object)) { if (length(acquisitionNum) && length(f)) { @@ -1364,8 +1241,6 @@ setMethod("filterPrecursorScan", "MsBackend", #' @importFrom MsCoreUtils between #' #' @rdname MsBackend -#' -#' @export setMethod("filterRanges", "MsBackend", function(object, spectraVariables = character(), ranges = numeric(), match = c("all", "any")){ @@ -1407,8 +1282,6 @@ setMethod("filterRanges", "MsBackend", #' @importMethodsFrom ProtGenerics filterRt #' #' @rdname MsBackend -#' -#' @export setMethod("filterRt", "MsBackend", function(object, rt = numeric(), msLevel. = uniqueMsLevels(object)) { if (length(rt)) { @@ -1426,8 +1299,6 @@ setMethod("filterRt", "MsBackend", #' @importFrom MsCoreUtils ppm #' #' @rdname MsBackend -#' -#' @export setMethod("filterValues", "MsBackend", function(object, spectraVariables = character(), values = numeric(), ppm = 0, tolerance = 0, match = c("all", "any")){ @@ -1473,8 +1344,6 @@ setMethod("filterValues", "MsBackend", #' @importMethodsFrom ProtGenerics intensity #' #' @rdname MsBackend -#' -#' @export setMethod("intensity", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1484,8 +1353,6 @@ setMethod("intensity", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics intensity<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("intensity", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1497,8 +1364,6 @@ setReplaceMethod("intensity", "MsBackend", function(object, value) { #' @importFrom MsCoreUtils vapply1d #' #' @rdname MsBackend -#' -#' @export setMethod("ionCount", "MsBackend", function(object) { vapply1d(intensity(object), sum, na.rm = TRUE) }) @@ -1509,8 +1374,6 @@ setMethod("ionCount", "MsBackend", function(object) { #' @importFrom MsCoreUtils vapply1l #' #' @rdname MsBackend -#' -#' @export setMethod("isCentroided", "MsBackend", function(object, ...) { vapply1l(peaksData(object), .peaks_is_centroided) }) @@ -1520,8 +1383,6 @@ setMethod("isCentroided", "MsBackend", function(object, ...) { #' @rdname MsBackend #' #' @importMethodsFrom S4Vectors isEmpty -#' -#' @export setMethod("isEmpty", "MsBackend", function(x) { stop("Not implemented for ", class(x), ".") }) @@ -1531,8 +1392,6 @@ setMethod("isEmpty", "MsBackend", function(x) { #' @importMethodsFrom ProtGenerics isolationWindowLowerMz #' #' @rdname MsBackend -#' -#' @export setMethod("isolationWindowLowerMz", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1542,8 +1401,6 @@ setMethod("isolationWindowLowerMz", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics isolationWindowLowerMz<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("isolationWindowLowerMz", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") @@ -1554,8 +1411,6 @@ setReplaceMethod("isolationWindowLowerMz", "MsBackend", function(object, #' @importMethodsFrom ProtGenerics isolationWindowTargetMz #' #' @rdname MsBackend -#' -#' @export setMethod("isolationWindowTargetMz", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1565,8 +1420,6 @@ setMethod("isolationWindowTargetMz", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics isolationWindowTargetMz<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("isolationWindowTargetMz", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") @@ -1577,8 +1430,6 @@ setReplaceMethod("isolationWindowTargetMz", "MsBackend", function(object, #' @importMethodsFrom ProtGenerics isolationWindowUpperMz #' #' @rdname MsBackend -#' -#' @export setMethod("isolationWindowUpperMz", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1588,8 +1439,6 @@ setMethod("isolationWindowUpperMz", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics isolationWindowUpperMz<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("isolationWindowUpperMz", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") @@ -1600,8 +1449,6 @@ setReplaceMethod("isolationWindowUpperMz", "MsBackend", function(object, #' @importMethodsFrom ProtGenerics isReadOnly #' #' @rdname MsBackend -#' -#' @export setMethod("isReadOnly", "MsBackend", function(object) { object@readonly }) @@ -1609,8 +1456,6 @@ setMethod("isReadOnly", "MsBackend", function(object) { #' @exportMethod length #' #' @rdname MsBackend -#' -#' @export setMethod("length", "MsBackend", function(x) { stop("Not implemented for ", class(x), ".") }) @@ -1620,28 +1465,15 @@ setMethod("length", "MsBackend", function(x) { #' @importMethodsFrom ProtGenerics msLevel #' #' @rdname MsBackend -#' -#' @export setMethod("msLevel", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) -#' @importMethodsFrom ProtGenerics msLevel<- -#' -#' @rdname MsBackend -#' -#' @export -setReplaceMethod("msLevel", "MsBackend", function(object, value) { - stop("Not implemented for ", class(object), ".") -}) - #' @exportMethod mz #' #' @importMethodsFrom ProtGenerics mz #' #' @rdname MsBackend -#' -#' @export setMethod("mz", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1651,8 +1483,6 @@ setMethod("mz", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics mz<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("mz", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1667,8 +1497,6 @@ setMethod("lengths", "MsBackend", function(x, use.names = FALSE) { #' @importMethodsFrom ProtGenerics polarity #' #' @rdname MsBackend -#' -#' @export setMethod("polarity", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1678,8 +1506,6 @@ setMethod("polarity", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics polarity<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("polarity", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1689,8 +1515,6 @@ setReplaceMethod("polarity", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics precScanNum #' #' @rdname MsBackend -#' -#' @export setMethod("precScanNum", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1700,8 +1524,6 @@ setMethod("precScanNum", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics precursorCharge #' #' @rdname MsBackend -#' -#' @export setMethod("precursorCharge", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1711,8 +1533,6 @@ setMethod("precursorCharge", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics precursorIntensity #' #' @rdname MsBackend -#' -#' @export setMethod("precursorIntensity", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1722,31 +1542,14 @@ setMethod("precursorIntensity", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics precursorMz #' #' @rdname MsBackend -#' -#' @export setMethod("precursorMz", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) -#' @exportMethod precursorMz<- -#' -#' @importMethodsFrom ProtGenerics precursorMz<- -#' -#' @rdname MsBackend -#' -#' @export -setReplaceMethod("precursorMz", "MsBackend", function(object, ..., value) { - object$precursorMz <- value - object -}) - #' @exportMethod peaksData<- #' #' @importMethodsFrom ProtGenerics peaksData<- -#' #' @rdname MsBackend -#' -#' @export setReplaceMethod("peaksData", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1754,8 +1557,6 @@ setReplaceMethod("peaksData", "MsBackend", function(object, value) { #' @exportMethod reset #' #' @rdname MsBackend -#' -#' @export setMethod("reset", "MsBackend", function(object) { object }) @@ -1765,8 +1566,6 @@ setMethod("reset", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics rtime #' #' @rdname MsBackend -#' -#' @export setMethod("rtime", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1776,8 +1575,6 @@ setMethod("rtime", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics rtime<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("rtime", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1787,8 +1584,6 @@ setReplaceMethod("rtime", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics scanIndex #' #' @rdname MsBackend -#' -#' @export setMethod("scanIndex", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1796,8 +1591,6 @@ setMethod("scanIndex", "MsBackend", function(object) { #' @exportMethod selectSpectraVariables #' #' @rdname MsBackend -#' -#' @export setMethod( "selectSpectraVariables", "MsBackend", function(object, spectraVariables = spectraVariables(object)) { @@ -1809,8 +1602,6 @@ setMethod( #' @importMethodsFrom ProtGenerics smoothed #' #' @rdname MsBackend -#' -#' @export setMethod("smoothed", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1822,8 +1613,6 @@ setMethod("smoothed", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics smoothed<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("smoothed", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1831,8 +1620,6 @@ setReplaceMethod("smoothed", "MsBackend", function(object, value) { #' @exportMethod spectraData #' #' @rdname MsBackend -#' -#' @export setMethod( "spectraData", "MsBackend", function(object, columns = spectraVariables(object)) { @@ -1842,8 +1629,6 @@ setMethod( #' @exportMethod spectraData<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("spectraData", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1853,8 +1638,6 @@ setReplaceMethod("spectraData", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics spectraNames #' #' @rdname MsBackend -#' -#' @export setMethod("spectraNames", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1864,8 +1647,6 @@ setMethod("spectraNames", "MsBackend", function(object) { #' @importMethodsFrom ProtGenerics spectraNames<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("spectraNames", "MsBackend", function(object, value) { stop("Not implemented for ", class(object), ".") }) @@ -1875,8 +1656,6 @@ setReplaceMethod("spectraNames", "MsBackend", function(object, value) { #' @importMethodsFrom ProtGenerics spectraVariables #' #' @rdname MsBackend -#' -#' @export setMethod("spectraVariables", "MsBackend", function(object) { stop("Not implemented for ", class(object), ".") }) @@ -1886,8 +1665,6 @@ setMethod("spectraVariables", "MsBackend", function(object) { #' @importMethodsFrom S4Vectors split #' #' @rdname MsBackend -#' -#' @export setMethod("split", "MsBackend", function(x, f, drop = FALSE, ...) { split.default(x, f, drop = drop, ...) }) @@ -1897,8 +1674,6 @@ setMethod("split", "MsBackend", function(x, f, drop = FALSE, ...) { #' @exportMethod supportsSetBackend #' #' @rdname MsBackend -#' -#' @export setMethod("supportsSetBackend", "MsBackend", function(object, ...) { !isReadOnly(object) }) @@ -1908,8 +1683,6 @@ setMethod("supportsSetBackend", "MsBackend", function(object, ...) { #' @importMethodsFrom ProtGenerics tic #' #' @rdname MsBackend -#' -#' @export setMethod("tic", "MsBackend", function(object, initial = TRUE) { stop("Not implemented for ", class(object), ".") }) @@ -1917,17 +1690,13 @@ setMethod("tic", "MsBackend", function(object, initial = TRUE) { #' @exportMethod [ #' #' @rdname MsBackend -#' -#' @export setMethod("[", "MsBackend", function(x, i, j, ..., drop = FALSE) { - extractByIndex(x, i2index(i, length = length(x))) + stop("Not implemented for ", class(x), ".") }) #' @exportMethod $ #' #' @rdname MsBackend -#' -#' @export setMethod("$", "MsBackend", function(x, name) { stop("Not implemented for ", class(x), ".") }) @@ -1935,8 +1704,6 @@ setMethod("$", "MsBackend", function(x, name) { #' @exportMethod $<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("$", "MsBackend", function(x, name, value) { stop("Not implemented for ", class(x), ".") }) @@ -1944,8 +1711,6 @@ setReplaceMethod("$", "MsBackend", function(x, name, value) { #' @exportMethod [[ #' #' @rdname MsBackend -#' -#' @export setMethod("[[", "MsBackend", function(x, i, j, ...) { if (!is.character(i)) stop("'i' is supposed to be a character defining the spectra ", @@ -1958,8 +1723,6 @@ setMethod("[[", "MsBackend", function(x, i, j, ...) { #' @exportMethod [[<- #' #' @rdname MsBackend -#' -#' @export setReplaceMethod("[[", "MsBackend", function(x, i, j, ..., value) { if (!is.character(i)) stop("'i' is supposed to be a character defining the spectra ", @@ -1974,29 +1737,6 @@ setReplaceMethod("[[", "MsBackend", function(x, i, j, ..., value) { #' @importMethodsFrom ProtGenerics uniqueMsLevels #' #' @rdname MsBackend -#' -#' @export setMethod("uniqueMsLevels", "MsBackend", function(object, ...) { unique(msLevel(object)) }) - -#' @exportMethod dataStorageBasePath -#' -#' @rdname MsBackend -#' -#' @export -setMethod("dataStorageBasePath", "MsBackend", function(object) { - NA_character_ -}) - -#' @exportMethod dataStorageBasePath<- -#' -#' @rdname MsBackend -#' -#' @export -setReplaceMethod( - "dataStorageBasePath", "MsBackend", function(object, value) { - warning(class(object)[1L], " does not support changing", - " 'dataStorageBasePath'.") - object - }) diff --git a/R/MsBackendCached.R b/R/MsBackendCached.R index e2f4d4d2..5628037d 100644 --- a/R/MsBackendCached.R +++ b/R/MsBackendCached.R @@ -294,15 +294,6 @@ setMethod("dataStorage", "MsBackendCached", function(object) { rep("", length(object)) }) -#' @rdname MsBackendCached -setMethod("extractByIndex", c("MsBackendCached", "ANY"), - function(object, i) { - slot(object, "localData", check = FALSE) <- - object@localData[i, , drop = FALSE] - object@nspectra <- nrow(object@localData) - object -}) - #' @rdname MsBackendCached setMethod("length", "MsBackendCached", function(x) { x@nspectra @@ -437,7 +428,7 @@ setMethod("show", "MsBackendCached", function(object) { cat(class(object), "with", n, "spectra\n") if (n) { idx <- unique(c(1L:min(6L, n), max(1L, n-5L):n)) - spd <- spectraData(extractByIndex(object, idx), + spd <- spectraData(object[idx, ], c("msLevel", "precursorMz", "polarity")) if (!length(rownames(spd))) rownames(spd) <- idx @@ -464,6 +455,7 @@ setMethod("centroided", "MsBackendCached", function(object) { #' @rdname MsBackendCached setReplaceMethod("centroided", "MsBackendCached", function(object, value) { object$centroided <- value + validObject(object) object }) @@ -475,6 +467,7 @@ setMethod("collisionEnergy", "MsBackendCached", function(object) { #' @rdname MsBackendCached setReplaceMethod("collisionEnergy", "MsBackendCached", function(object, value) { object$collisionEnergy <- value + validObject(object) object }) @@ -486,6 +479,7 @@ setMethod("dataOrigin", "MsBackendCached", function(object) { #' @rdname MsBackendCached setReplaceMethod("dataOrigin", "MsBackendCached", function(object, value) { object$dataOrigin <- value + validObject(object) object }) @@ -522,6 +516,7 @@ setMethod("isolationWindowLowerMz", "MsBackendCached", function(object) { setReplaceMethod("isolationWindowLowerMz", "MsBackendCached", function(object, value) { object$isolationWindowLowerMz <- value + validObject(object) object }) @@ -534,6 +529,7 @@ setMethod("isolationWindowTargetMz", "MsBackendCached", function(object) { setReplaceMethod("isolationWindowTargetMz", "MsBackendCached", function(object, value) { object$isolationWindowTargetMz <- value + validObject(object) object }) @@ -546,6 +542,7 @@ setMethod("isolationWindowUpperMz", "MsBackendCached", function(object) { setReplaceMethod("isolationWindowUpperMz", "MsBackendCached", function(object, value) { object$isolationWindowUpperMz <- value + validObject(object) object }) @@ -568,6 +565,7 @@ setMethod("polarity", "MsBackendCached", function(object) { setReplaceMethod("polarity", "MsBackendCached", function(object, value) { if (is.numeric(value)) value <- as.integer(value) object$polarity <- value + validObject(object) object }) @@ -594,6 +592,7 @@ setMethod("rtime", "MsBackendCached", function(object) { #' @rdname MsBackendCached setReplaceMethod("rtime", "MsBackendCached", function(object, value) { object$rtime <- value + validObject(object) object }) @@ -610,5 +609,6 @@ setMethod("smoothed", "MsBackendCached", function(object) { #' @rdname MsBackendCached setReplaceMethod("smoothed", "MsBackendCached", function(object, value) { object$smoothed <- value + validObject(object) object }) diff --git a/R/MsBackendDataFrame.R b/R/MsBackendDataFrame.R index b83b0b72..a97a36fc 100644 --- a/R/MsBackendDataFrame.R +++ b/R/MsBackendDataFrame.R @@ -22,8 +22,7 @@ setClass("MsBackendDataFrame", version = "0.2")) setValidity("MsBackendDataFrame", function(object) { - msg <- .valid_spectra_data_required_columns( - object@spectraData, backendRequiredSpectraVariables(object)) + msg <- .valid_spectra_data_required_columns(object@spectraData) if (length(msg)) return(msg) msg <- c( @@ -93,12 +92,6 @@ setMethod("backendMerge", "MsBackendDataFrame", function(object, ...) { res }) -#' @rdname hidden_aliases -setMethod("backendRequiredSpectraVariables", "MsBackendDataFrame", - function(object, ...) { - "dataStorage" - }) - ## Data accessors #' @rdname hidden_aliases @@ -188,14 +181,6 @@ setReplaceMethod("dataStorage", "MsBackendDataFrame", function(object, value) { object }) -#' @rdname hidden_aliases -setMethod("extractByIndex", c("MsBackendDataFrame", "ANY"), - function(object, i) { - slot(object, "spectraData", check = FALSE) <- - extractROWS(object@spectraData, i) - object - }) - #' @rdname hidden_aliases setMethod("intensity", "MsBackendDataFrame", function(object) { if (any(colnames(object@spectraData) == "intensity")) @@ -420,18 +405,16 @@ setMethod("selectSpectraVariables", "MsBackendDataFrame", paste(spectraVariables[!(spectraVariables %in% spectraVariables(object))], collapse = ", "), " not available") - bv <- backendRequiredSpectraVariables(object) - if (!all(bv %in% spectraVariables)) - stop("Spectra variables ", - paste(bv[!bv %in% spectraVariables], collapse = ","), - " are required by the backend") keep <- spectraVariables[spectraVariables %in% - colnames(object@spectraData)] + colnames(object@spectraData)] if (length(keep)) object@spectraData <- object@spectraData[, keep, drop = FALSE] + msg <- .valid_spectra_data_required_columns(object@spectraData) + if (length(msg)) + stop(msg) object@peaksVariables <- intersect(object@peaksVariables, - spectraVariables) + colnames(object@spectraData)) validObject(object) object }) @@ -561,8 +544,6 @@ setReplaceMethod("$", "MsBackendDataFrame", function(x, name, value) { #' @importFrom MsCoreUtils i2index #' #' @rdname hidden_aliases -#' -#' @export setMethod("[", "MsBackendDataFrame", function(x, i, j, ..., drop = FALSE) { .subset_backend_data_frame(x, i) }) @@ -602,5 +583,5 @@ setMethod("filterAcquisitionNum", "MsBackendDataFrame", "acquisition number(s) for sub-setting") sel_file <- .sel_file(object, dataStorage, dataOrigin) sel_acq <- acquisitionNum(object) %in% n & sel_file - extractByIndex(object, which(sel_acq | !sel_file)) + object[sel_acq | !sel_file] }) diff --git a/R/MsBackendHdf5Peaks.R b/R/MsBackendHdf5Peaks.R index 27f14753..e5482803 100644 --- a/R/MsBackendHdf5Peaks.R +++ b/R/MsBackendHdf5Peaks.R @@ -26,8 +26,8 @@ setClass("MsBackendHdf5Peaks", prototype = prototype(version = "0.1", readonly = FALSE)) setValidity("MsBackendHdf5Peaks", function(object) { - msg <- .valid_spectra_data_required_columns( - object@spectraData, backendRequiredSpectraVariables(object)) + msg <- .valid_spectra_data_required_columns(object@spectraData, + c("dataStorage", "scanIndex")) fls <- unique(object@spectraData$dataStorage) msg <- c(msg, .valid_ms_backend_mod_count(object@modCount, fls)) msg <- c(msg, .valid_ms_backend_files_exist(fls)) @@ -36,12 +36,6 @@ setValidity("MsBackendHdf5Peaks", function(object) { else msg }) -#' @rdname hidden_aliases -setMethod("backendRequiredSpectraVariables", "MsBackendHdf5Peaks", - function(object, ...) { - c("dataStorage", "scanIndex") - }) - #' @rdname hidden_aliases #' #' @importFrom fs path_sanitize @@ -297,20 +291,6 @@ setMethod("[", "MsBackendHdf5Peaks", function(x, i, j, ..., drop = FALSE) { x }) -#' @rdname hidden_aliases -#' -#' @aliases [,MsBackendHdf5Peaks-method -setMethod("extractByIndex", c("MsBackendHdf5Peaks", "ANY"), - function(object, i) { - fls <- unique(object@spectraData$dataStorage) - slot(object, "spectraData", check = FALSE) <- - extractROWS(object@spectraData, i) - slot(object, "modCount", check = FALSE) <- - object@modCount[match( - unique(object@spectraData$dataStorage), fls)] - object -}) - #' @rdname hidden_aliases setMethod("backendMerge", "MsBackendHdf5Peaks", function(object, ...) { object <- unname(c(object, ...)) diff --git a/R/MsBackendMemory.R b/R/MsBackendMemory.R index 52b6a75a..3f6770c2 100644 --- a/R/MsBackendMemory.R +++ b/R/MsBackendMemory.R @@ -122,12 +122,6 @@ setMethod("backendMerge", "MsBackendMemory", function(object, ...) { res }) -#' @rdname hidden_aliases -setMethod("backendRequiredSpectraVariables", "MsBackendMemory", - function(object, ...) { - "dataStorage" - }) - ## Data accessors #' @rdname hidden_aliases @@ -198,18 +192,6 @@ setReplaceMethod("dataStorage", "MsBackendMemory", function(object, value) { object }) -#' @rdname hidden_aliases -setMethod("extractByIndex", c("MsBackendMemory", "ANY"), function(object, i) { - slot(object, "spectraData", check = FALSE) <- - object@spectraData[i, , drop = FALSE] - if (length(object@peaksData)) - slot(object, "peaksData", check = FALSE) <- object@peaksData[i] - if (length(object@peaksDataFrame)) - slot(object, "peaksDataFrame", check = FALSE) <- - object@peaksDataFrame[i] - object -}) - #' @rdname hidden_aliases setMethod("intensity", "MsBackendMemory", function(object) { if (length(object)) { @@ -520,8 +502,7 @@ setMethod("selectSpectraVariables", "MsBackendMemory", z[, keep, drop = FALSE]) } } - msg <- .valid_spectra_data_required_columns( - object@spectraData, backendRequiredSpectraVariables(object)) + msg <- .valid_spectra_data_required_columns(object@spectraData) if (length(msg)) stop(msg) validObject(object) diff --git a/R/MsBackendMzR.R b/R/MsBackendMzR.R index a7930e0d..74b00308 100644 --- a/R/MsBackendMzR.R +++ b/R/MsBackendMzR.R @@ -24,20 +24,14 @@ setClass("MsBackendMzR", prototype = prototype(version = "0.1", readonly = TRUE)) setValidity("MsBackendMzR", function(object) { - msg <- .valid_spectra_data_required_columns( - object@spectraData, backendRequiredSpectraVariables(object)) + msg <- .valid_spectra_data_required_columns(object@spectraData, + c("dataStorage", "scanIndex")) msg <- c(msg, .valid_ms_backend_files_exist( unique(object@spectraData$dataStorage))) if (length(msg)) msg else TRUE }) -#' @rdname hidden_aliases -setMethod("backendRequiredSpectraVariables", "MsBackendMzR", - function(object, ...) { - c("dataStorage", "scanIndex") - }) - #' @rdname hidden_aliases #' #' @importFrom methods callNextMethod @@ -49,14 +43,12 @@ setMethod("backendRequiredSpectraVariables", "MsBackendMzR", #' @importFrom BiocParallel bpparam setMethod("backendInitialize", "MsBackendMzR", function(object, files, ..., BPPARAM = bpparam()) { - if (missing(files)) + if (missing(files) || !length(files)) stop("Parameter 'files' is mandatory for 'MsBackendMzR'") if (!is.character(files)) stop("Parameter 'files' is expected to be a character vector", " with the files names from where data should be", " imported") - if (!length(files)) - return(object) files <- normalizePath(files, mustWork = FALSE) msg <- .valid_ms_backend_files_exist(files) if (length(msg)) @@ -222,21 +214,3 @@ setMethod("export", "MsBackendMzR", function(object, x, file = tempfile(), setMethod("backendParallelFactor", "MsBackendMzR", function(object) { factor(dataStorage(object), levels = unique(dataStorage(object))) }) - -#' @importFrom MsCoreUtils common_path -setMethod("dataStorageBasePath", "MsBackendMzR", function(object) { - common_path(dataStorage(object)) -}) - -setReplaceMethod( - "dataStorageBasePath", "MsBackendMzR", function(object, value) { - ds <- dataStorage(object) - ds <- gsub("\\", "/", ds, fixed = TRUE) - value <- gsub("\\", "/", value, fixed = TRUE) - cp <- common_path(ds) - ds <- sub(cp, value, ds, fixed = TRUE) - if (!all(file.exists(unique(ds)))) - stop("Provided path does not contain all data files.") - dataStorage(object) <- normalizePath(ds) - object - }) diff --git a/R/Spectra-estimatePrecursorMz.R b/R/Spectra-estimatePrecursorMz.R index ad6ff630..72743d57 100644 --- a/R/Spectra-estimatePrecursorMz.R +++ b/R/Spectra-estimatePrecursorMz.R @@ -55,10 +55,6 @@ #' #' @author Mar Garcia-Aloy, Johannes Rainer #' -#' @seealso -#' -#' [addProcessing()] for other data analysis and manipulation functions. -#' #' @export #' #' @examples diff --git a/R/Spectra-functions.R b/R/Spectra-functions.R index 93d9f2db..12a82aea 100644 --- a/R/Spectra-functions.R +++ b/R/Spectra-functions.R @@ -63,13 +63,7 @@ NULL #' @description #' #' This function applies the processing queue and an arbitrary function to -#' the peaks matrix of each spectrum of the `Spectra` object `object`. It has -#' build-in parallel and/or chunk-wise processing enabled through parameter -#' `f`, that allows to define how the `Spectra` (or rather its backend) needs -#' to be splitted. The default `f = .parallel_processing_factor(object)` splits -#' the backend by chunk (if a finite chunk size is defined for the `Spectra`) -#' or by it's optimal parallel processing factor. See the description of -#' the `.parallel_processing_factor()` function below for information. +#' the peaks matrix of each spectrum of the `Spectra` object `object`. #' #' @param object `Spectra` object. #' @@ -84,8 +78,7 @@ NULL #' #' @param f `factor` or `vector` that can be coerced to one defining how the #' data should be split for parallel processing. Set to `NULL` or -#' `factor()` to disable splitting and parallel processing. See function -#' description above for details and information. +#' `factor()` to disable splitting and parallel processing. #' #' @param columns `character` defining the columns that should be returned. #' This will be passed to the backend's `peaksData` function. @@ -214,7 +207,7 @@ NULL #' @export applyProcessing #' -#' @rdname addProcessing +#' @rdname Spectra applyProcessing <- function(object, f = processingChunkFactor(object), BPPARAM = bpparam(), ...) { queue <- object@processingQueue @@ -243,9 +236,8 @@ applyProcessing <- function(object, f = processingChunkFactor(object), }, queue = queue, pv = pv, svars = svars, BPPARAM = BPPARAM) bknds <- backendMerge(bknds) if (is.unsorted(f)) - bknds <- extractByIndex( - bknds, order(unlist(split(seq_along(bknds), f), - use.names = FALSE))) + bknds <- bknds[order(unlist(split(seq_along(bknds), f), + use.names = FALSE))] object@backend <- bknds } else { if (length(svars)) @@ -546,14 +538,14 @@ applyProcessing <- function(object, f = processingChunkFactor(object), #' @export concatenateSpectra #' -#' @rdname combineSpectra +#' @rdname Spectra concatenateSpectra <- function(x, ...) { .concatenate_spectra(unlist(unname(list(unname(x), ...)))) } #' @export combineSpectra #' -#' @rdname combineSpectra +#' @rdname Spectra combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage, FUN = combinePeaksData, ..., BPPARAM = bpparam()) { if (!is.factor(f)) @@ -578,8 +570,39 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage, #' @description #' -#' Check for presence of an m/z value in each spectrum. Each spectrum gets -#' its own m/z. +#' Internal function to check if any (or all) of the provided `mz` values are +#' in the spectras' m/z. +#' +#' @param x `Spectra` object +#' +#' @param mz `numeric` of m/z value(s) to check in each spectrum of `x`. +#' +#' @param tolarance `numeric(1)` with the tolerance. +#' +#' @param ppm `numeric(1)` with the ppm. +#' +#' @param condFun `function` such as `any` or `all`. +#' +#' @param parallel `BiocParallel` parameter object. +#' +#' @return `logical` same length than `x`. +#' +#' @author Johannes Rainer +#' +#' @importFrom MsCoreUtils common +#' +#' @noRd +.has_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, condFun = any, + parallel = SerialParam()) { + mzs <- mz(x, BPPARAM = parallel) + vapply(mzs, FUN = function(z) + condFun(common(mz, z, tolerance = tolerance, ppm = ppm)), logical(1)) +} + +#' @description +#' +#' Same as `.has_mz` only that a different `mz` is used for each spectrum in +#' `x`. Length of `mz` is thus expected to be equal to length of `x`. #' #' @param mz `numeric` **same length as `x`**. #' @@ -599,7 +622,7 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage, #' @export joinSpectraData #' -#' @rdname combineSpectra +#' @rdname Spectra joinSpectraData <- function(x, y, by.x = "spectrumId", by.y, @@ -662,11 +685,87 @@ joinSpectraData <- function(x, y, #' @export #' -#' @rdname addProcessing +#' @rdname Spectra processingLog <- function(x) { x@processing } +#' @title Estimate Precursor Intensities +#' +#' @description +#' +#' Some MS instrument manufacturers don't provide precursor intensities for +#' fragment spectra. These can however be estimated, given that also MS1 +#' spectra are available. The `estimatePrecursorIntensity()` funtion defines the +#' precursor intensities for MS2 spectra using the intensity of the matching +#' MS1 peak from the closest MS1 spectrum (i.e. the last MS1 spectrum measured +#' before the respective MS2 spectrum). With `method = "interpolation"` it is +#' also possible to calculate the precursor intensity based on an interpolation +#' of intensity values (and retention times) of the matching MS1 peaks from the +#' previous and next MS1 spectrum. See below for an example. +#' +#' @param x `Spectra` with MS1 and MS2 spectra. +#' +#' @param ppm `numeric(1)` with the maximal allowed relative difference of m/z +#' values between the precursor m/z of a spectrum and the m/z of the +#' respective ion on the MS1 scan. +#' +#' @param tolerance `numeric(1)` with the maximal allowed difference of m/z +#' values between the precursor m/z of a spectrum and the m/z of the +#' respective ion on the MS1 scan. +#' +#' @param method `character(1)` defining whether the precursor intensity +#' should be estimated on the previous MS1 spectrum (`method = "previous"`, +#' the default) or based on an interpolation on the previous and next +#' MS1 spectrum (`method = "interpolation"`). +#' +#' @param msLevel. `integer(1)` the MS level for which precursor intensities +#' should be estimated. Defaults to `2L`. +#' +#' @param f `factor` (or vector to be coerced to `factor`) defining which +#' spectra belong to the same original data file (sample). +#' Defaults to `f = dataOrigin(x)`. +#' +#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more +#' information. This is passed directly to the [backendInitialize()] method +#' of the [MsBackend-class]. +#' +#' @author Johannes Rainer with feedback and suggestions from Corey Broeckling +#' +#' @export +#' +#' @rdname estimatePrecursorIntensity +#' +#' @examples +#' +#' #' ## Calculating the precursor intensity for MS2 spectra: +#' ## +#' ## Some MS instrument manufacturer don't report the precursor intensities +#' ## for MS2 spectra. The `estimatePrecursorIntensity` function can be used +#' ## in these cases to calculate the precursor intensity on MS1 data. Below +#' ## we load an mzML file from a vendor providing precursor intensities and +#' ## compare the estimated and reported precursor intensities. +#' tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5], +#' backend = MsBackendMzR()) +#' pmi <- estimatePrecursorIntensity(tmt) +#' plot(pmi, precursorIntensity(tmt)) +#' +#' ## We can also replace the original precursor intensity values with the +#' ## newly calculated ones +#' tmt$precursorIntensity <- pmi +estimatePrecursorIntensity <- function(x, ppm = 20, tolerance = 0, + method = c("previous", "interpolation"), + msLevel. = 2L, f = dataOrigin(x), + BPPARAM = bpparam()) { + if (is.factor(f)) + f <- as.character(f) + f <- factor(f, levels = unique(f)) + BPPARAM <- backendBpparam(x@backend, BPPARAM) + unlist(bplapply(split(x, f), FUN = .estimate_precursor_intensity, ppm = ppm, + tolerance = tolerance, method = method, msLevel = msLevel., + BPPARAM = BPPARAM), use.names = FALSE) +} + #' estimate precursor intensities based on MS1 peak intensity. This function #' assumes that `x` is a `Spectra` with data **from a single file/sample**. #' @@ -808,7 +907,9 @@ chunkapply <- function(x, FUN, ..., chunkSize = 1000L, chunks = factor()) { as.factor(rep(1:ceiling(len / chunkSize), each = chunkSize)[seq_len(len)]) } -#' @rdname filterMsLevel +#' @rdname Spectra +#' +#' @author Nir Shahaf, Johannes Rainer #' #' @export deisotopeSpectra <- @@ -820,7 +921,9 @@ deisotopeSpectra <- substDefinition = im, charge = charge) } -#' @rdname filterMsLevel +#' @rdname Spectra +#' +#' @author Nir Shahaf, Johannes Rainer #' #' @export reduceSpectra <- function(x, tolerance = 0, ppm = 20) { @@ -829,7 +932,9 @@ reduceSpectra <- function(x, tolerance = 0, ppm = 20) { addProcessing(x, .peaks_reduce, tolerance = tolerance, ppm = ppm) } -#' @rdname filterMsLevel +#' @rdname Spectra +#' +#' @author Nir Shahaf #' #' @export filterPrecursorMaxIntensity <- function(x, tolerance = 0, ppm = 20) { @@ -862,7 +967,9 @@ filterPrecursorMaxIntensity <- function(x, tolerance = 0, ppm = 20) { x } -#' @rdname filterMsLevel +#' @rdname Spectra +#' +#' @author Nir Shahaf #' #' @export filterPrecursorIsotopes <- @@ -895,7 +1002,9 @@ filterPrecursorIsotopes <- x } -#' @rdname addProcessing +#' @rdname Spectra +#' +#' @author Johannes Rainer #' #' @export scalePeaks <- function(x, by = sum, msLevel. = uniqueMsLevels(x)) { @@ -908,7 +1017,7 @@ scalePeaks <- function(x, by = sum, msLevel. = uniqueMsLevels(x)) { x } -#' @rdname filterMsLevel +#' @rdname Spectra #' #' @export filterPrecursorPeaks <- function(object, tolerance = 0, ppm = 20, @@ -959,11 +1068,6 @@ filterPrecursorPeaks <- function(object, tolerance = 0, ppm = 20, #' per file parallel processing if `f` or `chunkSize` is not defined. #' Other on-disk backends: only if requested by the user. #' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. -#' -#' @param object `Spectra` object. -#' #' @param x `Spectra` object. #' #' @param chunkSize `integer` defining the size of chunks into which `x` should @@ -1039,11 +1143,6 @@ filterPrecursorPeaks <- function(object, tolerance = 0, ppm = 20, #' For these, the `backendBpparam()` function will always return a #' `SerialParam()` independently on how parallel processing was defined. #' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. -#' -#' @param object `Spectra` object. -#' #' @param x `Spectra`. #' #' @param value `integer(1)` defining the chunk size. @@ -1083,189 +1182,3 @@ processingChunkFactor <- function(x) { stop("'x' is supposed to be a 'Spectra' object") .parallel_processing_factor(x) } - -#' @title Filter peaks based on spectra and peaks variable ranges -#' -#' @description -#' -#' The `filterPeaksRanges()` function allows to filter the peaks matrices of a -#' [Spectra] object using any set of range-based filters on numeric spectra -#' variables or peaks variables. These ranges can be passed to the function -#' using the `...` as ` = ` pairs. `` -#' has to be an available spectra or peaks variable. `` can be a -#' `numeric` of length 2 defining the lower and upper boundary, or a `numeric` -#' two-column matrix (multi-row matrices are also supported, see further -#' below). `filterPeaksRanges(s, mz = c(200, 300))` would for example reduce -#' the peaks matrices of the `Spectra` object `s` to mass peaks with an m/z -#' value between 200 and 300. `filterPeaksRanges()` returns the original -#' `Spectra` object with the filter operation added to the processing queue. -#' Thus, the filter gets **only** applied when the peaks data gets extracted -#' with `mz()`, `intensity()` or `peaksData()`. If ranges for both spectra -#' **and** peaks variables are defined, the function evaluates first whether -#' the spectra variable value for a spectrum is within the provided range and, -#' if so, applies also the peaks variable-based filter (otherwise an empty -#' peaks matrix is returned). -#' -#' If more than one spectra variable and/or peaks variable are defined, their -#' filter results are combined with a logical AND: a peak matrix is only -#' returned for a spectrum if all values of spectra variables are within the -#' provided (respective) ranges for spectra variables, and this matrix is -#' further filtered to contain only those peaks which values are within the -#' provided peaks variable ranges. -#' -#' **Filtering with multiple ranges** per spectra and peaks variables is also -#' supported: ranges can also be provided as multi-row numeric (two-column) -#' matrices. In this case, the above described procedure is applied for each -#' row separately and their results are combined with a logical OR, i.e. -#' peaks matrices are returned that match any of the conditions/filters -#' of a row. The number of rows of the provided ranges (being it for spectra -#' or peaks variables) have to match. -#' -#' **Missing value handling**: any comparison which involves a missing value -#' (being it a spectra variable value, a peaks variable value or a value -#' in one of the provided ranges) is treated as a logical `FALSE`. For -#' example, if the retention time of a spectrum is `NA` and the data is -#' filtered using a retention time range, an empty peaks matrix is returned -#' (for `keep = TRUE`, for `keep = FALSE` the full peaks matrix is returned). -#' -#' @note -#' -#' In contrast to some other *filter* functions, this function does not provide -#' a `msLevel` parameter that allows to define the MS level of spectra on which -#' the filter should be applied. The filter(s) will always be applied to -#' **all** spectra (irrespectively of their MS level). Through combination of -#' multiple filter ranges it is however possible to apply MS level-dependent -#' filters (see examples below for details). -#' -#' The filter will not be applied immediately to the data but only executed when -#' the mass peak data is accessed (through `peaksData()`, `mz()` or -#' `intensity()`) or by calling `applyProcessing()`. -#' -#' @param object A [Spectra] object. -#' -#' @param ... the ranges for the spectra and/or peaks variables. Has to be -#' provided as ` = ` pairs with `` being the name of a -#' spectra or peaks variable (of numeric data type) and `` being -#' either a `numeric` of length 2 or a `numeric` two column matrix (see -#' function desription above for details), -#' -#' @param keep `logical(1)` whether to keep (default) or remove peaks that -#' match the provided range(s). -#' -#' @author Johannes Rainer -#' -#' @name filterPeaksRanges -#' -#' @export -#' -#' @examples -#' -#' ## Define a test Spectra -#' d <- data.frame(rtime = c(123.2, 134.2), msLevel = c(1L, 2L)) -#' d$mz <- list(c(100.1, 100.2, 100.3, 200.1, 200.2, 300.3), -#' c(100.3, 100.4, 200.2, 400.3, 400.4)) -#' ## Use the index of the mass peak within the spectrum as index for -#' ## better illustration of filtering results -#' d$intensity <- list(c(1:6), 1:5) -#' s <- Spectra(d) -#' s -#' -#' ## Filter peaks removing all mass peaks with an m/z between 200 and 300 -#' res <- filterPeaksRanges(s, mz = c(200, 300), keep = FALSE) -#' res -#' -#' ## The Spectra object has still the same length and spectra variables -#' length(res) -#' res$rtime -#' -#' ## The filter gets applied when mass peak data gets extracted, using either -#' ## `mz()`, `intensity()` or `peaksData()`. The filtered peaks data does -#' ## not contain any mass peaks with m/z values between 200 and 300: -#' peaksData(res)[[1L]] -#' peaksData(res)[[2L]] -#' -#' ## We next combine spectra and filter variables. We want to keep only mass -#' ## peaks of MS2 spectra that have an m/z between 100 and 110. -#' res <- filterPeaksRanges(s, mz = c(100, 110), msLevel = c(2, 2)) -#' res -#' length(res) -#' -#' ## Only data for peaks are returned for which the spectra's MS level is -#' ## between 2 and 2 and with an m/z between 100 and 110. The peaks data for -#' ## the first spectrum, that has MS level 1, is thus empty: -#' peaksData(res)[[1L]] -#' -#' ## While the peaks matrix for the second spectrum (with MS level 2) contains -#' ## the mass peaks with m/z between 100 and 110. -#' peaksData(res)[[2L]] -#' -#' ## To keep also the peaks data for the first spectrum, we need to define -#' ## an additional set of ranges, which we define using a second row in each -#' ## ranges matrix. We use the same filter as above, i.e. keeping only mass -#' ## peaks with an m/z between 100 and 110 for spectra with MS level 2, but -#' ## add an additional row for MS level 1 spectra keeping mass peaks with an -#' ## m/z between 0 and 2000. Filter results of different rows are combined -#' ## using a logical OR, i.e. peaks matrices with mass peaks are returned -#' ## matching either the first, or the second row. -#' res <- filterPeaksRanges(s, mz = rbind(c(100, 110), c(0, 1000)), -#' msLevel = rbind(c(2, 2), c(1, 1))) -#' -#' ## The results for the MS level 2 spectrum are the same as before, but with -#' ## the additional row we keep the full peaks matrix of the MS1 spectrum: -#' peaksData(res)[[1L]] -#' peaksData(res)[[2L]] -#' -#' ## As a last example we define a filter that keeps all mass peaks with an -#' ## m/z either between 100 and 200, or between 300 and 400. -#' res <- filterPeaksRanges(s, mz = rbind(c(100, 200), c(300, 400))) -#' peaksData(res)[[1L]] -#' peaksData(res)[[2L]] -#' -#' ## Such filters could thus be defined to restrict/filter the MS data to -#' ## specific e.g. retention time and m/z ranges. -filterPeaksRanges <- function(object, ..., keep = TRUE) { - if (!inherits(object, "Spectra")) - stop("'object' is expected to be a 'Spectra' object.") - dots <- list(...) - variables <- names(dots) - if (!length(variables)) - return(object) - ## check that: - ## - variables are in spectraVariables - pvars <- peaksVariables(object) - svars <- spectraVariables(object) - if (!all(variables %in% c(svars, pvars))) - stop("Provided filter variable(s): ", - paste0("\"", variables[!variables %in% c(svars, pvars)], "\"", - collapse = ", "), " are not valid spectra variables. ", - "Use 'spectraVariables(object)' and 'peaksVariables()' to list ", - "available variables.") - ## - range parameters are defined correctly - err <- paste0("Range parameters have to be either a 'numeric' of length ", - "2 or a 'numeric' matrix with two columns.") - dots <- lapply(dots, function(z) { - if (is.null(nrow(z))) { - if (length(z) != 2) - stop(err) - z <- matrix(z, ncol = 2) - } - if (!is.matrix(z) | !is.numeric(z)) stop(err) - z - }) - ## - number for rows of matrices matches. - nr <- unlist(lapply(dots, nrow), use.names = FALSE) - if (any(nr != nr[1L])) - stop("Number of rows of the range matrices have to match.") - ## OK, now proceed to split by svar and pvar and pass to the peaks function. - pvars <- intersect(variables, pvars) - svars <- intersect(variables, svars) - object <- addProcessing(object, .peaks_filter_ranges, ranges = dots, - svars = svars, pvars = pvars, - spectraVariables = c(svars, "msLevel"), keep = keep) - if (keep) keep_or_remove <- "select" - else keep_or_remove <- "remove" - object@processing <- .logging( - object@processing, "Filter: ", keep_or_remove, " peaks based on ", - "user-provided ranges for ", length(variables), " variables") - object -} diff --git a/R/Spectra-neutralLoss.R b/R/Spectra-neutralLoss.R index dc9cf32c..53f3b2b5 100644 --- a/R/Spectra-neutralLoss.R +++ b/R/Spectra-neutralLoss.R @@ -87,10 +87,6 @@ setClassUnion("functionOrNull", c("function", "NULL")) #' Analysis in METLIN. Journal of the American Society for Mass Spectrometry. #' \doi{10.1021/jasms.1c00343} #' -#' @seealso -#' -#' [addProcessing()] for other data analysis and manipulation functions. -#' #' @examples #' #' ## Create a simple example Spectra object with some MS1, MS2 and MS3 spectra. diff --git a/R/Spectra.R b/R/Spectra.R index 8bf0565a..0e78b954 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -1,106 +1,68 @@ #' @include hidden_aliases.R NULL -################################################################################ -## -## Spectra class, creation, data representation, export -## -################################################################################ - #' @title The Spectra class to manage and access MS data #' -#' @name Spectra +#' @aliases Spectra-class [,Spectra-method +#' @aliases uniqueMsLevels uniqueMsLevels,Spectra-method +#' @aliases combinePeaks #' -#' @aliases Spectra-class -#' @aliases Spectra -#' @aliases setBackend -#' @aliases export +#' @name Spectra #' #' @description #' -#' The `Spectra` class encapsules spectral mass spectrometry (MS) data and -#' related metadata. The MS data is represented by a *backend* extending the -#' virual [MsBackend] class which provides the data to the `Spectra` object. -#' The `Spectra` class implements only data accessor, filtering and analysis -#' methods for the MS data and relies on its *backend* to provide the MS data. -#' This allows to change data representations of a `Spectra` object depending -#' on the user's needs and properties of the data. Different backends and -#' their properties are explained in the [MsBackend] documentation. -#' -#' Documentation on other topics and functionality of `Spectra`can be found in: -#' -#' - [spectraData()] for accessing and using MS data through `Spectra` objects. -#' - [filterMsLevel()] to subset and filter `Spectra` objects. -#' - [plotSpectra()] for visualization of `Spectra` orbjects. -#' - [processingChunkSize()] for information on parallel and chunk-wise data -#' processing. -#' - [combineSpectra()] for merging, aggregating and splitting of `Spectra` -#' objects. -#' - [combinePeaks()] for merging and aggregating `Spectra`'s mass peaks data. -#' - [addProcessing()] for data analysis functions. -#' - [compareSpectra()] for spectra similarity calculations. -#' -#' @param backend For `Spectra()`: [MsBackend-class] to be used as backend. See -#' section on creation of `Spectra` objects for details. For `setBackend()`: -#' instance of [MsBackend-class] that supports `setBackend()` (i.e. for -#' which `supportsSetBackend()` returns `TRUE`). Such backends have a -#' parameter `data` in their `backendInitialize()` function that support -#' passing the full spectra data to the initialize method. See section on -#' creation of `Spectra` objects for details. -#' For `export()`: [MsBackend-class] to be used to export the data. -#' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. This is passed directly to the [backendInitialize()] method -#' of the [MsBackend-class]. +#' The `Spectra` class encapsules spectral mass spectrometry data and +#' related metadata. #' -#' @param f For `setBackend()`: factor defining how to split the data -#' for parallelized copying of the spectra data to the new backend. For -#' some backends changing this parameter can lead to errors. Defaults to -#' [processingChunkFactor()]. +#' It supports multiple data backends, e.g. in-memory ([MsBackendMemory], +#' [MsBackendDataFrame()]), on-disk as mzML ([MsBackendMzR()]) or HDF5 +#' ([MsBackendHdf5Peaks()]). #' -#' @param metadata For `Spectra()`: optional `list` with metadata information. +#' @details #' -#' @param object For `Spectra()`: an object to instantiate the `Spectra` -#' object and initialize the with data.. See section on creation of -#' `Spectra` objects for details. For all other methods a `Spectra` object. +#' The `Spectra` class uses by default a lazy data manipulation strategy, +#' i.e. data manipulations such as performed with `replaceIntensitiesBelow()` +#' are not applied immediately to the data, but applied on-the-fly to the +#' spectrum data once it is retrieved. For some backends that allow to write +#' data back to the data storage (such as the [MsBackendMemory()], +#' [MsBackendDataFrame()] and [MsBackendHdf5Peaks()]) it is possible to apply +#' to queue with the `applyProcessing` function. See the *Data manipulation and +#' analysis *methods* section below for more details. #' -#' @param processingQueue For `Spectra()`: optional `list` of -#' [ProcessingStep-class] objects. +#' For more information on parallel or chunk-wise processing (especially +#' helpful for very large data sets) see [processingChunkSize()]. #' -#' @param source For `Spectra()`: instance of [MsBackend-class] that can be -#' used to import spectrum data from the provided files. See section -#' *Creation of objects* for more details. +#' To apply arbitrary functions to a `Spectra` use the `spectrapply()` function +#' (or directly [chunkapply()] for chunk-wise processing). See description of +#' the `spectrapply()` function below for details. #' -#' @param value For `dataStorageBasePath()`: A `character` vector that defines -#' the base directory where the data storage files can be found. +#' For details on plotting spectra, see [plotSpectra()]. #' -#' @param ... Additional arguments. +#' Clarifications regarding scan/acquisition numbers and indices: #' -#' @section Data stored in a `Spectra` object: +#' - A `spectrumId` (or `spectrumID`) is a vendor specific field in +#' the mzML file that contains some information about the +#' run/spectrum, e.g.: `controllerType=0 controllerNumber=1 +#' scan=5281 file=2` #' -#' The `Spectra` object is a container for MS data that includes mass peak -#' data (*m/z* and related intensity values, also referred to as *peaks data* -#' in the context of `Spectra`) and metadata of individual spectra (so called -#' *spectra variables*). While a core set of spectra variables (the -#' `coreSpectraVariables()`) are guaranteed to be provided by a -#' `Spectra`, it is possible to add arbitrary additional spectra variables to -#' a `Spectra` object. +#' - `acquisitionNum` is a more a less sanitize spectrum id generated +#' from the `spectrumId` field by `mzR` (see +#' [here](https://github.com/sneumann/mzR/blob/master/src/pwiz/data/msdata/MSData.cpp#L552-L580)). #' -#' The `Spectra` object is designed to contain MS data of a (large) set of mass -#' spectra. The data is organized *linearly* and can be thought of a list of -#' mass spectra, i.e. each element in the `Spectra` is one spectrum. +#' - `scanIndex` is the `mzR` generated sequence number of the +#' spectrum in the raw file (which doesn't have to be the same as +#' the `acquisitionNum`) #' +#' See also [this issue](https://github.com/lgatto/MSnbase/issues/525). #' -#' @section Creation of objects: +#' @section Creation of objects, conversion, changing the backend and export: #' #' `Spectra` classes can be created with the `Spectra()` constructor function #' which supports the following formats: #' #' - parameter `object` is a `data.frame` or `DataFrame` containing the -#' full spectrum data (spectra variables in columns as well as columns -#' with the individual MS peak data, *m/z* and intensity). The provided -#' `backend` (by default a [MsBackendMemory-class]) will be initialized -#' with that data. +#' spectrum data. The provided `backend` (by default a +#' [MsBackendMemory-class]) will be initialized with that data. #' #' - parameter `object` is a [MsBackend-class] (assumed to be already #' initialized). @@ -117,80 +79,45 @@ NULL #' #' With `...` additional arguments can be passed to the backend's #' [backendInitialize()] method. Parameter `backend` allows to specify which -#' [MsBackend-class] should be used for data representation and storage. -#' -#' -#' @section Data representation of a `Spectra`: -#' -#' The MS data which can be accessed through the `Spectra` object is -#' *represented* by its backend, which means that this backend defines how -#' and where the data is stored (e.g. in memory or on disk). The `Specrta` -#' object relies on the backend to provide the MS data whenever it needs it -#' for data processing. -#' Different backends with different properties, such as minimal memory -#' requirement or fast data access, are defined in the *Spectra* package or -#' one of the MsBackend* packages. More information on backends and their -#' properties is provided in the documentation of [MsBackend]. -#' -#' On-disk backends keep only a limited amount of data in memory retrieving -#' most of the data (usually the MS peak data) upon request on-the-fly from -#' their on-disk data representations. Moving the on-disk data storage of such -#' a backend or a serialized object to a different location in the file -#' system will cause data corruption. The `dataStorageBasePath()` and -#' `dataStorageBasePath<-` functions allow in such cases (and if thebackend -#' classes support this operation), to get or change the *base* -#' path to the directory of the backend's data storage. In-memory backends -#' such as [MsBackendMemory] or [MsBackendDataFrame] keeping all MS data in -#' memory don't support, and need, this function, but for [MsBackendMzR] this -#' function can be used to update/adapt the path to the directory containing -#' the original data files. Thus, for `Spectra` objects (using this backend) -#' that were moved to another file system or computer, these functions allow to -#' adjust/adapt the base file path. -#' -#' -#' @section Changing data representation of a `Spectra`: -#' -#' The data representation, i.e. the backend of a `Spectra` object can be -#' changed with the `setBackend()` method that takes an instance of the new -#' backend as second parameter `backend`. A call to -#' `setBackend(sps, backend = MsBackendDataFrame())` +#' [MsBackend-class] should be used for data storage. +#' +#' The backend of a `Spectra` object can be changed with the `setBackend()` +#' method that takes an instance of the new backend as second parameter +#' `backend`. A call to `setBackend(sps, backend = MsBackendDataFrame())` #' would for example change the backend of `sps` to the *in-memory* #' `MsBackendDataFrame`. Changing to a backend is only supported if that #' backend has a `data` parameter in its `backendInitialize()` method and if #' `supportsSetBackend()` returns `TRUE` for that backend. `setBackend()` will -#' transfer the full spectra data from the originating backend as a `DataFrame` -#' to the new backend. -#' -#' Generally, it is not possible to change **to** a read-only backend such as -#' the [MsBackendMzR()] backend. +#' transfer the full spectra data from the originating backend as a +#' `DataFrame` to the new backend. +#' Most *read-only* backends do not support `setBackend()`. It is for example +#' not possible to change the backend to a *read-only* backend (such as +#' the [MsBackendMzR()] backend). #' #' The definition of the function is: #' `setBackend(object, backend, ..., f = dataStorage(object), #' BPPARAM = bpparam())` and its parameters are: #' -#' - `object`: the `Spectra` object. +#' - parameter `object`: the `Spectra` object. #' -#' - `backend`: an instance of the new backend, e.g. `[MsBackendMemory()]`. +#' - parameter `backend`: an instance of the new backend, e.g. +#' `[MsBackendMemory()]`. #' -#' - `f`: factor allowing to parallelize the change of the backends. By -#' default the process of copying the spectra data from the original to the +#' - parameter `f`: factor allowing to parallelize the change of the backends. +#' By default the process of copying the spectra data from the original to the #' new backend is performed separately (and in parallel) for each file. Users #' are advised to use the default setting. #' -#' - `...`: optional additional arguments passed to the [backendInitialize()] -#' method of the new `backend`. +#' - parameter `...`: optional additional arguments passed to the +#' [backendInitialize()] method of the new `backend`. #' -#' - `BPPARAM`: setup for the parallel processing. See [bpparam()] for +#' - parameter `BPPARAM`: setup for the parallel processing. See [bpparam()] for #' details. #' -#' -#' @section Exporting data from a `Spectra` object: -#' #' Data from a `Spectra` object can be **exported** to a file with the -#' `export()` function. The actual export of the data is performed by +#' `export()` function. The actual export of the data has to be performed by #' the `export` method of the [MsBackend] class defined with the mandatory -#' parameter `backend` which defines also the format in which the data -#' is exported. Note however that not all backend classes support +#' parameter `backend`. Note however that not all backend classes support #' export of data. From the `MsBackend` classes in the `Spectra` package #' currently only the `MsBackendMzR` backend supports data export (to #' mzML/mzXML file(s)); see the help page of the [MsBackend-class] for @@ -210,3118 +137,2441 @@ NULL #' parameter `backend`. #' #' -#' @details -#' -#' The `Spectra` class uses by default a lazy data manipulation strategy, -#' i.e. data manipulations such as performed with `replaceIntensitiesBelow()` -#' are not applied immediately to the data, but applied on-the-fly to the -#' spectrum data once it is retrieved. This enables data manipulation -#' operations also for *read only* data representations. For some backends that -#' allow to write data back to the data storage (such as the -#' [MsBackendMemory()], [MsBackendDataFrame()] and [MsBackendHdf5Peaks()]) it -#' is possible to apply to queue with the [applyProcessing()] function (see -#' the [applyProcessing()] function for details). -#' -#' Clarifications regarding scan/acquisition numbers and indices: +#' @section Accessing spectra data: #' -#' - A `spectrumId` (or `spectrumID`) is a vendor specific field in -#' the mzML file that contains some information about the -#' run/spectrum, e.g.: `controllerType=0 controllerNumber=1 -#' scan=5281 file=2` +#' - `$`, `$<-`: gets (or sets) a spectra variable for all spectra in `object`. +#' See examples for details. Note that replacing values of a peaks variable +#' is not supported with a non-empty processing queue, i.e. if any filtering +#' or data manipulations on the peaks data was performed. In these cases +#' [applyProcessing()] needs to be called first to apply all cached data +#' operations. #' -#' - `acquisitionNum` is a more a less sanitize spectrum id generated -#' from the `spectrumId` field by `mzR` (see -#' [here](https://github.com/sneumann/mzR/blob/master/src/pwiz/data/msdata/MSData.cpp#L552-L580)). +#' - `[[`, `[[<-`: access or set/add a single spectrum variable (column) in the +#' backend. #' -#' - `scanIndex` is the `mzR` generated sequence number of the -#' spectrum in the raw file (which doesn't have to be the same as -#' the `acquisitionNum`) +#' - `acquisitionNum()`: returns the acquisition number of each +#' spectrum. Returns an `integer` of length equal to the number of +#' spectra (with `NA_integer_` if not available). #' -#' See also [this issue](https://github.com/lgatto/MSnbase/issues/525). +#' - `centroided()`, `centroided<-`: gets or sets the centroiding +#' information of the spectra. `centroided()` returns a `logical` +#' vector of length equal to the number of spectra with `TRUE` if a +#' spectrum is centroided, `FALSE` if it is in profile mode and `NA` +#' if it is undefined. See also `isCentroided()` for estimating from +#' the spectrum data whether the spectrum is centroided. `value` +#' for `centroided<-` is either a single `logical` or a `logical` of +#' length equal to the number of spectra in `object`. #' -#' @md +#' - `collisionEnergy()`, `collisionEnergy<-`: gets or sets the +#' collision energy for all spectra in `object`. `collisionEnergy()` +#' returns a `numeric` with length equal to the number of spectra +#' (`NA_real_` if not present/defined), `collisionEnergy<-` takes a +#' `numeric` of length equal to the number of spectra in `object`. #' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail +#' - `coreSpectraVariables()`: returns the *core* spectra variables along with +#' their expected data type. #' -#' @exportClass Spectra +#' - `dataOrigin()`, `dataOrigin<-`: gets or sets the *data origin* for each +#' spectrum. `dataOrigin()` returns a `character` vector (same length than +#' `object`) with the origin of the spectra. `dataOrigin<-` expects a +#' `character` vector (same length than `object`) with the replacement +#' values for the data origin of each spectrum. #' -#' @exportMethod Spectra +#' - `dataStorage()`: returns a `character` vector (same length than `object`) +#' with the data storage location of each spectrum. #' -#' @examples +#' - `intensity()`: gets the intensity values from the spectra. Returns +#' a [NumericList()] of `numeric` vectors (intensity values for each +#' spectrum). The length of the list is equal to the number of +#' `spectra` in `object`. #' -#' ## -------- CREATION OF SPECTRA OBJECTS -------- +#' - `ionCount()`: returns a `numeric` with the sum of intensities for +#' each spectrum. If the spectrum is empty (see `isEmpty()`), +#' `NA_real_` is returned. #' -#' ## Create a Spectra providing a `DataFrame` containing the spectrum data. +#' - `isCentroided()`: a heuristic approach assessing if the spectra in +#' `object` are in profile or centroided mode. The function takes +#' the `qtl`th quantile top peaks, then calculates the difference +#' between adjacent m/z value and returns `TRUE` if the first +#' quartile is greater than `k`. (See `Spectra:::.isCentroided()` for +#' the code.) #' -#' spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) -#' spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) -#' spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) +#' - `isEmpty()`: checks whether a spectrum in `object` is empty +#' (i.e. does not contain any peaks). Returns a `logical` vector of +#' length equal number of spectra. #' -#' data <- Spectra(spd) -#' data +#' - `isolationWindowLowerMz()`, `isolationWindowLowerMz<-`: gets or sets the +#' lower m/z boundary of the isolation window. #' -#' ## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk -#' ## backend. -#' sciex_file <- dir(system.file("sciex", package = "msdata"), -#' full.names = TRUE) -#' sciex <- Spectra(sciex_file, backend = MsBackendMzR()) -#' sciex +#' - `isolationWindowTargetMz()`, `isolationWindowTargetMz<-`: gets or sets the +#' target m/z of the isolation window. #' +#' - `isolationWindowUpperMz()`, `isolationWindowUpperMz<-`: gets or sets the +#' upper m/z boundary of the isolation window. #' -#' ## -------- CHANGING DATA REPRESENTATIONS -------- +#' - `containsMz()`: checks for each of the spectra whether they contain mass +#' peaks with an m/z equal to `mz` (given acceptable difference as defined by +#' parameters `tolerance` and `ppm` - see [common()] for details). Parameter +#' `which` allows to define whether any (`which = "any"`, the default) or +#' all (`which = "all"`) of the `mz` have to match. The function returns +#' `NA` if `mz` is of length 0 or is `NA`. #' -#' ## The MS data is on disk and will be read into memory on-demand. We can -#' ## however change the backend to a MsBackendMemory backend which will -#' ## keep all of the data in memory. -#' sciex_im <- setBackend(sciex, MsBackendMemory()) -#' sciex_im +#' - `containsNeutralLoss()`: checks for each spectrum in `object` if it has a +#' peak with an m/z value equal to its precursor m/z - `neutralLoss` (given +#' acceptable difference as defined by parameters `tolerance` and `ppm`). +#' Returns `NA` for MS1 spectra (or spectra without a precursor m/z). #' -#' ## The `MsBackendMemory()` supports the `setBackend()` method: -#' supportsSetBackend(MsBackendMemory()) +#' - `length()`: gets the number of spectra in the object. #' -#' ## Thus, it is possible to change to that backend with `setBackend()`. Most -#' ## read-only backends however don't support that, such as the -#' ## `MsBackendMzR` and `setBackend()` would fail to change to that backend. -#' supportsSetBackend(MsBackendMzR()) +#' - `lengths()`: gets the number of peaks (m/z-intensity values) per +#' spectrum. Returns an `integer` vector (length equal to the +#' number of spectra). For empty spectra, `0` is returned. #' -#' ## The on-disk object `sciex` is light-weight, because it does not keep the -#' ## MS peak data in memory. The `sciex_im` object in contrast keeps all the -#' ## data in memory and its size is thus much larger. -#' object.size(sciex) -#' object.size(sciex_im) +#' - `msLevel()`: gets the spectra's MS level. Returns an integer vector (names +#' being spectrum names, length equal to the number of spectra) with the MS +#' level for each spectrum. #' -#' ## The spectra variable `dataStorage` returns for each spectrum the location -#' ## where the data is stored. For in-memory objects: -#' head(dataStorage(sciex_im)) +#' - `mz()`: gets the mass-to-charge ratios (m/z) from the +#' spectra. Returns a [NumericList()] or length equal to the number of +#' spectra, each element a `numeric` vector with the m/z values of +#' one spectrum. #' -#' ## While objects that use an on-disk backend will list the files where the -#' ## data is stored. -#' head(dataStorage(sciex)) +#' - `peaksData()`: gets the *peaks* data for all spectra in `object`. Peaks +#' data consist of the m/z and intensity values as well as possible additional +#' annotations (variables) of all peaks of each spectrum. The function +#' returns a [SimpleList()] of two dimensional arrays (either `matrix` or +#' `data.frame`), with each array providing the values for the requested +#' *peak variables* (by default `"mz"` and `"intensity"`). Optional parameter +#' `columns` is passed to the backend's `peaksData()` function to allow +#' the selection of specific (or additional) peaks variables (columns) that +#' should be extracted (if available). Importantly, +#' it is **not** guaranteed that each backend supports this parameter (while +#' each backend must support extraction of `"mz"` and `"intensity"` columns). +#' Parameter `columns` defaults to `c("mz", "intensity")` but any value +#' returned by `peaksVariables(object)` is supported. +#' Note also that it is possible to extract the peak data with +#' `as(x, "list")` and `as(x, "SimpleList")` as a `list` and `SimpleList`, +#' respectively. Note however that, in contrast to `peaksData()`, `as()` +#' does not support the parameter `columns`. #' -#' ## The spectra variable `dataOrigin` returns for each spectrum the *origin* -#' ## of the data. If the data is read from e.g. mzML files, this will be the -#' ## original mzML file name: -#' head(dataOrigin(sciex)) -#' head(dataOrigin(sciex_im)) +#' - `peaksVariables()`: lists the available variables for mass peaks provided +#' by the backend. Default peak variables are `"mz"` and `"intensity"` (which +#' all backends need to support and provide), but some backends might provide +#' additional variables. +#' These variables correspond to the column names of the peak data array +#' returned by `peaksData()`. #' +#' - `polarity()`, `polarity<-`: gets or sets the polarity for each +#' spectrum. `polarity()` returns an `integer` vector (length equal +#' to the number of spectra), with `0` and `1` representing negative +#' and positive polarities, respectively. `polarity<-` expects an +#' `integer` vector of length 1 or equal to the number of spectra. #' -#' ## -------- DATA EXPORT -------- +#' - `precursorCharge()`, `precursorIntensity()`, `precursorMz()`, +#' `precScanNum()`, `precAcquisitionNum()`: gets the charge (`integer`), +#' intensity (`numeric`), m/z (`numeric`), scan index (`integer`) +#' and acquisition number (`interger`) of the precursor for MS level > +#' 2 spectra from the object. Returns a vector of length equal to +#' the number of spectra in `object`. `NA` are reported for MS1 +#' spectra of if no precursor information is available. #' -#' ## Some `MsBackend` classes provide an `export()` method to export the data -#' ## to the file format supported by the backend. -#' ## The `MsBackendMzR` for example allows to export MS data to mzML or -#' ## mzXML file(s), the `MsBackendMgf` (defined in the MsBackendMgf R package) -#' ## would allow to export the data in mgf file format. -#' ## Below we export the MS data in `data`. We call the `export()` method on -#' ## this object, specify the backend that should be used to export the data -#' ## (and which also defines the output format) and provide a file name. -#' fl <- tempfile() -#' export(data, MsBackendMzR(), file = fl) +#' - `rtime()`, `rtime<-`: gets or sets the retention times (in seconds) +#' for each spectrum. `rtime()` returns a `numeric` vector (length +#' equal to the number of spectra) with the retention time for each +#' spectrum. `rtime<-` expects a numeric vector with length equal +#' to the number of spectra. #' -#' ## This exported our data in mzML format. Below we read the first 6 lines -#' ## from that file. -#' readLines(fl, n = 6) +#' - `scanIndex()`: returns an `integer` vector with the *scan index* +#' for each spectrum. This represents the relative index of the +#' spectrum within each file. Note that this can be different to the +#' `acquisitionNum` of the spectrum which represents the index of the +#' spectrum during acquisition/measurement (as reported in the mzML file). #' -#' ## If only a single file name is provided, all spectra are exported to that -#' ## file. To export data with the `MsBackendMzR` backend to different files, a -#' ## file name for each individual spectrum has to be provided. -#' ## Below we export each spectrum to its own file. -#' fls <- c(tempfile(), tempfile()) -#' export(data, MsBackendMzR(), file = fls) +#' - `smoothed()`,`smoothed<-`: gets or sets whether a spectrum is +#' *smoothed*. `smoothed()` returns a `logical` vector of length equal +#' to the number of spectra. `smoothed<-` takes a `logical` vector +#' of length 1 or equal to the number of spectra in `object`. #' -#' ## Reading the data from the first file -#' res <- Spectra(backendInitialize(MsBackendMzR(), fls[1])) +#' - `spectraData()`: gets general spectrum metadata (annotation, also called +#' header). `spectraData()` returns a `DataFrame`. Note that this +#' method does by default **not** return m/z or intensity values. #' -#' mz(res) -#' mz(data) -NULL - -#' The Spectra class +#' - `spectraData<-`: **replaces** the full spectra data of the `Spectra` +#' object with the one provided with `value`. The `spectraData<-` function +#' expects a `DataFrame` to be passed as value with the same number of rows +#' as there a spectra in `object`. Note that replacing values of +#' peaks variables is not supported with a non-empty processing queue, i.e. +#' if any filtering or data manipulations on the peaks data was performed. +#' In these cases [applyProcessing()] needs to be called first to apply all +#' cached data operations and empty the processing queue. #' -#' The [Spectra] class encapsulates data and meta-data for mass -#' spectrometry experiments. +#' - `spectraNames()`, `spectraNames<-`: gets or sets the spectra names. #' -#' @slot backend A derivate of [MsBackend-class] holding/controlling the spectra -#' data. +#' - `spectraVariables()`: returns a `character` vector with the +#' available spectra variables (columns, fields or attributes of each +#' spectrum) available in `object`. Note that `spectraVariables()` does not +#' list the *peak variables* (`"mz"`, `"intensity"` and eventual additional +#' annotations for each MS peak). Peak variables are returned by +#' `peaksVariables()`. #' -#' @slot processingQueue `list` of `ProcessingStep` objects. +#' - `tic()`: gets the total ion current/count (sum of signal of a +#' spectrum) for all spectra in `object`. By default, the value +#' reported in the original raw data file is returned. For an empty +#' spectrum, `0` is returned. #' -#' @slot processingQueueVariables `character` of spectraVariables that should -#' be passed to the processing step function. +#' - `uniqueMsLevels()`: get the unique MS levels available in `object`. This +#' function is supposed to be more efficient than `unique(msLevel(object))`. #' -#' @slot processing A `character` storing logging information. +#' @section Data subsetting, filtering and merging: #' -#' @slot metadata A `list` storing experiment metadata. +#' Subsetting and filtering of `Spectra` objects can be performed with the below +#' listed methods. #' -#' @slot version A `character(1)` containing the class version. +#' - `[`: subsets the spectra keeping only selected elements (`i`). The method +#' **always** returns a `Spectra` object. #' -#' @docType class +#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, +#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so +#' *blindly* (e.g. do not check rownames compatibility) and is therefore at +#' the risk of the user. For a more controlled way of adding spectra +#' variables, the `joinSpectraData()` should be used. It will return a +#' `Spectra` object with the appended spectra variables. `cbind2()` does +#' check however that the number of rows of the `data.frame` or `DataFrame` +#' matches the number of spectra in the `Spectra` object. #' -#' @author Sebastian Gibb \email{mail@@sebastiangibb.de} +#' - `deisotopeSpectra()`: *deisotopes* each spectrum keeping only the +#' monoisotopic peak for groups of isotopologues. Isotopologues are +#' estimated using the [isotopologues()] function from the +#' *MetaboCoreUtils* package. Note that +#' the default parameters for isotope prediction/detection have been +#' determined using data from the Human Metabolome Database (HMDB) and +#' isotopes for elements other than CHNOPS might not be detected. See +#' parameter `substDefinition` in the documentation of [isotopologues()] for +#' more information. The approach and code to define the parameters for +#' isotope prediction is described +#' [here](https://github.com/EuracBiomedicalResearch/isotopologues). #' -#' @importClassesFrom S4Vectors DataFrame +#' - `dropNaSpectraVariables()`: removes spectra variables (i.e. columns in the +#' object's `spectraData` that contain only missing values (`NA`). Note that +#' while columns with only `NA`s are removed, a `spectraData()` call after +#' `dropNaSpectraVariables()` might still show columns containing `NA` values +#' for *core* spectra variables. #' -#' @importMethodsFrom S4Vectors lapply +#' - `filterAcquisitionNum()`: filters the object keeping only spectra matching +#' the provided acquisition numbers (argument `n`). If `dataOrigin` or +#' `dataStorage` is also provided, `object` is subsetted to the spectra with +#' an acquisition number equal to `n` **in spectra with matching dataOrigin +#' or dataStorage values** retaining all other spectra. +#' Returns the filtered `Spectra`. #' -#' @importFrom S4Vectors DataFrame +#' - `filterDataOrigin()`: filters the object retaining spectra matching the +#' provided `dataOrigin`. Parameter `dataOrigin` has to be of type +#' `character` and needs to match exactly the data origin value of the +#' spectra to subset. +#' Returns the filtered `Spectra` object (with spectra ordered according to +#' the provided `dataOrigin` parameter). #' -#' @noRd -setClass( - "Spectra", - slots = c( - backend = "MsBackend", - processingQueue = "list", - processingQueueVariables = "character", - ## logging - processing = "character", - ## metadata - metadata = "list", - processingChunkSize = "numeric", - version = "character" - ), - prototype = prototype(version = "0.3", - processingChunkSize = Inf) -) - -setValidity("Spectra", function(object) { - msg <- .valid_processing_queue(object@processingQueue) - if (length(msg)) msg - else TRUE -}) - -#' @rdname hidden_aliases +#' - `filterDataStorage()`: filters the object retaining spectra stored in the +#' specified `dataStorage`. Parameter `dataStorage` has to be of type +#' `character` and needs to match exactly the data storage value of the +#' spectra to subset. +#' Returns the filtered `Spectra` object (with spectra ordered according to +#' the provided `dataStorage` parameter). #' -#' @importMethodsFrom methods show +#' - `filterEmptySpectra()`: removes empty spectra (i.e. spectra without peaks). +#' Returns the filtered `Spectra` object (with spectra in their +#' original order). #' -#' @importFrom utils capture.output +#' - `filterFourierTransformArtefacts()`: removes (Orbitrap) fast fourier +#' artefact peaks from spectra (see examples below). The function iterates +#' through all intensity ordered peaks in a spectrum and removes all peaks +#' with an m/z within +/- `halfWindowSize` of the current peak if their +#' intensity is lower than `threshold` times the current peak's intensity. +#' Additional parameters `keepIsotopes`, `maxCharge` and `isotopeTolerance` +#' allow to avoid removing of potential `[13]C` isotope peaks (`maxCharge` +#' being the maximum charge that should be considered and `isotopeTolerance` +#' the absolute acceptable tolerance for matching their m/z). +#' See [filterFourierTransformArtefacts()] for details and background and +#' `deisitopeSpectra()` for an alternative. #' -#' @exportMethod show -setMethod("show", "Spectra", - function(object) { - cat("MSn data (", class(object)[1L], ") with ", - length(object@backend), " spectra in a ", class(object@backend), - " backend:\n", sep = "") - if (length(object@backend)) { - txt <- capture.output(show(object@backend)) - cat(txt[-1], sep = "\n") - } - if (length(object@processingQueue)) - cat("Lazy evaluation queue:", length(object@processingQueue), - "processing step(s)\n") - lp <- length(object@processing) - if (lp) { - lps <- object@processing - if (lp > 3) { - lps <- lps[1:3] - lps <- c(lps, paste0("...", lp - 3, " more processings. ", - "Use 'processingLog' to list all.")) - } - cat("Processing:\n", paste(lps, collapse="\n "), "\n") - } - }) - -#' @rdname Spectra -setMethod("Spectra", "missing", function(object, processingQueue = list(), - metadata = list(), ..., - backend = MsBackendMemory(), - BPPARAM = bpparam()) { - if (length(backend)) - new("Spectra", metadata = metadata, processingQueue = processingQueue, - backend = backend) - else callNextMethod() -}) - -#' @rdname Spectra -setMethod("Spectra", "MsBackend", function(object, processingQueue = list(), - metadata = list(), ..., - BPPARAM = bpparam()) { - new("Spectra", metadata = metadata, processingQueue = processingQueue, - backend = object) -}) - -#' @rdname Spectra +#' - `filterIntensity()`: filters each spectrum keeping only peaks with +#' intensities that are within the provided range or match the criteria of +#' the provided function. For the former, parameter `intensity` has to be a +#' `numeric` defining the intensity range, for the latter a `function` that +#' takes the intensity values of the spectrum and returns a `logical` whether +#' the peak should be retained or not (see examples below for details) - +#' additional parameters to the function can be passed with `...`. To +#' remove only peaks with intensities below a certain threshold, say 100, use +#' `intensity = c(100, Inf)`. Note: also a single value can be passed with +#' the `intensity` parameter in which case an upper limit of `Inf` is used. +#' Note that this function removes also peaks with missing intensities +#' (i.e. an intensity of `NA`). Parameter `msLevel.` allows to restrict the +#' filtering to spectra of the specified MS level(s). #' -#' @importFrom methods callNextMethod -setMethod("Spectra", "character", function(object, processingQueue = list(), - metadata = list(), - source = MsBackendMzR(), - backend = source, - ..., BPPARAM = bpparam()) { - sp <- .create_spectra(object, processingQueue = processingQueue, - metadata = metadata, backend = source, - ..., BPPARAM = BPPARAM) - if (class(source)[1L] != class(backend)[1L]) - setBackend(sp, backend, ..., BPPARAM = backendBpparam(backend, BPPARAM)) - else sp -}) - -#' @rdname Spectra -setMethod("Spectra", "ANY", function(object, processingQueue = list(), - metadata = list(), - source = MsBackendMemory(), - backend = source, - ..., BPPARAM = bpparam()) { - sp <- .create_spectra(object, processingQueue = processingQueue, - metadata = metadata, backend = source, - ..., BPPARAM = BPPARAM) - if (class(source)[1L] != class(backend)[1L]) - setBackend(sp, backend, ..., BPPARAM = backendBpparam(backend, BPPARAM)) - else sp -}) - -.create_spectra <- function(object, processingQueue = list(), metadata = list(), - backend = MsBackendMemory(), ..., - BPPARAM = bpparam()) { - if (missing(object)) - backend <- backendInitialize( - backend, ..., BPPARAM = backendBpparam(backend, BPPARAM)) - else backend <- backendInitialize( - backend, object, ..., BPPARAM = backendBpparam(backend, BPPARAM)) - new("Spectra", metadata = metadata, processingQueue = processingQueue, - backend = backend) -} - -#' @rdname Spectra +#' - `filterIsolationWindow()`: retains spectra that contain `mz` in their +#' isolation window m/z range (i.e. with an `isolationWindowLowerMz` <= `mz` +#' and `isolationWindowUpperMz` >= `mz`. Returns the filtered `Spectra` +#' object (with spectra in their original order). #' -#' @importMethodsFrom ProtGenerics setBackend +#' - `filterMsLevel()`: filters object by MS level keeping only spectra matching +#' the MS level specified with argument `msLevel`. Returns the filtered +#' `Spectra` (with spectra in their original order). #' -#' @exportMethod setBackend -setMethod( - "setBackend", c("Spectra", "MsBackend"), - function(object, backend, f = processingChunkFactor(object), ..., - BPPARAM = bpparam()) { - backend_class <- class(object@backend)[1L] - BPPARAM <- backendBpparam(object@backend, BPPARAM) - BPPARAM <- backendBpparam(backend, BPPARAM) - if (!supportsSetBackend(backend)) - stop(class(backend), " does not support 'setBackend'") - if (!length(object)) { - bknds <- backendInitialize( - backend, data = spectraData(object@backend), ...) - } else { - if (!is.factor(f)) - f <- force(factor(f, levels = unique(f))) - if (length(f) && (length(levels(f)) > 1)) { - if (length(f) != length(object)) - stop("length of 'f' has to match the length of 'object'") - bknds <- bplapply( - split(object@backend, f = f), - function(z, ...) { - backendInitialize(backend, - data = spectraData(z), ..., - BPPARAM = SerialParam()) - }, ..., BPPARAM = BPPARAM) - bknds <- backendMerge(bknds) - ## That below ensures the backend is returned in its original - ## order - unsplit does unfortunately not work. - if (is.unsorted(f)) - bknds <- extractByIndex( - bknds, order(unlist(split(seq_along(bknds), f), - use.names = FALSE))) - } else { - bknds <- backendInitialize( - backend, data = spectraData(object@backend), ...) - } - } - object@backend <- bknds - object@processing <- .logging(object@processing, - "Switch backend from ", - backend_class, " to ", - class(object@backend)) - object - }) - -#' @rdname Spectra +#' - `filterMzRange()`: filters the object keeping or removing peaks in each +#' spectrum that are within the provided m/z range. Whether peaks are +#' retained or removed can be configured with parameter `keep` (default +#' `keep = TRUE`). #' -#' @export -setMethod("export", "Spectra", - function(object, backend, ...) { - if (missing(backend)) - stop("Parameter 'backend' is required.") - export(backend, object, ...) - }) - -#' @rdname Spectra -setMethod("dataStorageBasePath", "Spectra", function(object) { - dataStorageBasePath(object@backend) -}) - -#' @rdname Spectra -setReplaceMethod("dataStorageBasePath", "Spectra", function(object, value) { - dataStorageBasePath(object@backend) <- value - object -}) - -################################################################################ -## -## Accessing and adding/setting/changing MS data. -## -################################################################################ - -#' @title Accessing mass spectrometry data -#' -#' @name spectraData -#' -#' @aliases acquisitionNum -#' @aliases centroided -#' @aliases collisionEnergy -#' @aliases dataOrigin -#' @aliases dataStorage -#' @aliases intensity -#' @aliases ionCount -#' @aliases isCentroided -#' @aliases isEmpty -#' @aliases isolationWindowLowerMz -#' @aliases isolationWindowUpperMz -#' @aliases isolationWindowTargetMz -#' @aliases lengths -#' @aliases msLevel -#' @aliases mz -#' @aliases peaksData -#' @aliases peaksVariables -#' @aliases polarity -#' @aliases precursorCharge -#' @aliases precursorIntensity -#' @aliases precursorMz -#' @aliases rtime -#' @aliases scanIndex -#' @aliases smoothed -#' @aliases spectraData -#' @aliases spectraNames -#' @aliases spectraVariables -#' @aliases tic -#' @aliases uniqueMsLevels +#' - `filterMzValues()`: filters the object keeping **all** peaks in each +#' spectrum that match the provided m/z value(s) (for `keep = TRUE`, the +#' default) or removing **all** of them (for `keep = FALSE`). The m/z +#' matching considers also the absolute `tolerance` and m/z-relative +#' `ppm` values. `tolerance` and `ppm` have to be of length 1. #' -#' @description +#' - `filterPolarity()`: filters the object keeping only spectra matching the +#' provided polarity. Returns the filtered `Spectra` (with spectra in their +#' original order). #' -#' As detailed in the documentation of the [Spectra] class, a `Spectra` object -#' is a container for mass spectrometry (MS) data that includes both the mass -#' peaks data (or *peaks data*, generally *m/z* and intensity values) as well -#' as spectra metadata (so called *spectra variables*). Spectra variables -#' generally define one value per spectrum, while for peaks variables one value -#' per mass peak is defined and hence multiple values per spectrum (depending -#' on the number of mass peaks of a spectrum). +#' - `filterPrecursorCharge()`: retains spectra with the defined precursor +#' charge(s). #' -#' Data can be extracted from a `Spectra` object using dedicated accessor -#' functions or also using the `$` operator. Depending on the backend class -#' used by the `Spectra` to represent the data, data can also be added or -#' replaced (again, using dedicated functions or using `$<-`). +#' - `filterPrecursorIsotopes()`: groups MS2 spectra based on their precursor +#' m/z and precursor intensity into predicted isotope groups and keep for each +#' only the spectrum representing the monoisotopic precursor. MS1 spectra +#' are returned as is. See documentation for `deisotopeSpectra()` below for +#' details on isotope prediction and parameter description. #' +#' - `filterPrecursorMaxIntensity()`: filters the `Spectra` keeping for groups +#' of (MS2) spectra with similar precursor m/z values (given parameters +#' `ppm` and `tolerance`) the one with the highest precursor intensity. The +#' function filters only MS2 spectra and returns all MS1 spectra. If +#' precursor intensities are `NA` for all spectra within a spectra group, the +#' first spectrum of that groups is returned. +#' Note: some manufacturers don't provide precursor intensities. These can +#' however also be estimated with [estimatePrecursorIntensity()]. #' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. See also [processingChunkSize()] for more information -#' on parallel processing. +#' - `filterPrecursorMzRange()` (previously `filterPrecursorMz()` which is now +#' deprecated): retains spectra with a precursor m/z within the +#' provided m/z range. See examples for details on selecting spectra with +#' a precursor m/z for a target m/z accepting a small difference in *ppm*. #' -#' @param columns For `spectraData()` accessor: optional `character` with -#' column names (spectra variables) that should be included in the -#' returned `DataFrame`. By default, all columns are returned. -#' For `peaksData()` accessor: optional `character` with requested columns -#' in the individual `matrix` of the returned `list`. Defaults to -#' `c("mz", "value")` but any values returned by `peaksVariables(object)` -#' with `object` being the `Spectra` object are supported. +#' - `filterPrecursorMzValues()`: retains spectra with precursor m/z matching +#' any of the provided m/z values (given `ppm` and `tolerance`). Spectra with +#' missing precursor m/z value (e.g. MS1 spectra) are dropped. #' -#' @param f For `intensity()`, `mz()` and `peaksData()`: factor defining how -#' data should be chunk-wise loaded an processed. Defaults to -#' [processingChunkFactor()]. +#' - `filterPrecursorPeaks()`: removes peaks from each spectrum in `object` with +#' an m/z equal or larger than the m/z of the precursor, depending on the +#' value of parameter `mz`: for `mz = ==" (the default) peaks with matching +#' m/z (considering an absolute and relative acceptable difference depending +#' on `tolerance` and `ppm`, respectively) are removed. For `mz = ">="` all +#' peaks with an m/z larger or equal to the precursor m/z (minus `tolerance` +#' and the `ppm` of the precursor m/z) are removed. Parameter `msLevel.` +#' allows to restrict the filter to certain MS levels (by default the filter +#' is applied to all MS levels). Note that no peaks are removed if the +#' precursor m/z is `NA` (e.g. typically for MS1 spectra). #' -#' @param i For `asDataFrame()`: A `numeric` indicating which scans to coerce -#' to a `DataFrame` (default is `seq_along(object)`). +#' - `filterPrecursorScan()`: retains parent (e.g. MS1) and children scans (e.g. +#' MS2) of acquisition number `acquisitionNum`. Returns the filtered +#' `Spectra` (with spectra in their original order). Parameter `f` allows to +#' define which spectra belong to the same sample or original data file ( +#' defaults to `f = dataOrigin(object)`). #' -#' @param initial For `tic()`: `logical(1)` whether the initially -#' reported total ion current should be reported, or whether the -#' total ion current should be (re)calculated on the actual data -#' (`initial = FALSE`, same as `ionCount()`). +#' - `filterRt()`: retains spectra of MS level `msLevel` with retention +#' times (in seconds) within (`>=`) `rt[1]` and (`<=`) +#' `rt[2]`. Returns the filtered `Spectra` (with spectra in their +#' original order). #' -#' @param j For `[`: not supported. +#' - `filterRanges()`: allows filtering of the `Spectra` object based on user +#' defined *numeric* ranges (parameter `ranges`) for one or more available +#' spectra variables in object (spectra variable names can be specified with +#' parameter `spectraVariables`). Spectra for which the value of a spectra +#' variable is within it's defined range are retained. If multiple +#' ranges/spectra variables are defined, the `match` parameter can be used +#' to specify whether all conditions (`match = "all"`; the default) or if +#' any of the conditions must match (`match = "any"`; all spectra for which +#' values are within any of the provided ranges are retained). #' -#' @param name For `$` and `$<-`: the name of the spectra variable to return -#' or set. +#' - `filterValues()`: allows filtering of the `Spectra` object based on +#' similarities of *numeric* values of one or more `spectraVariables(object)` +#' (parameter `spectraVariables`) to provided values (parameter `values`) +#' given acceptable differences (parameters tolerance and ppm). If multiple +#' values/spectra variables are defined, the `match` parameter can be used +#' to specify whether all conditions (`match = "all"`; the default) or if +#' any of the conditions must match (`match = "any"`; all spectra for which +#' values are within any of the provided ranges are retained). #' -#' @param object A `Spectra` object. +#' - `reduceSpectra()`: for groups of peaks within highly similar m/z values +#' within each spectrum (given `ppm` and `tolerance`), this function keeps +#' only the peak with the highest intensity removing all other peaks hence +#' *reducing* each spectrum to the highest intensity peaks per *peak group*. +#' Peak groups are defined using the [group()] function from the +#' *MsCoreUtils* package. #' -#' @param spectraVars `character()` indicating what spectra variables to add to -#' the `DataFrame`. Default is `spectraVariables(object)`, i.e. all -#' available variables. +#' - `reset()`: restores the data to its original state (as much as possible): +#' removes any processing steps from the lazy processing queue and calls +#' `reset()` on the backend which, depending on the backend, can also undo +#' e.g. data filtering operations. Note that a `reset*(` call after +#' `applyProcessing()` will not have any effect. See examples below for more +#' information. #' -#' @param use.names For `lengths()`: ignored. +#' - `selectSpectraVariables()`: reduces the information within the object to +#' the selected spectra variables: all data for variables not specified will +#' be dropped. For mandatory columns (i.e., those listed by +#' [coreSpectraVariables()], such as *msLevel*, *rtime* ...) only +#' the values will be dropped but not the variable itself. Additional (or +#' user defined) spectra variables will be completely removed. +#' Returns the filtered `Spectra`. #' -#' @param value A vector with values to replace the respective spectra -#' variable. Needs to be of the correct data type for the spectra variable. +#' - `split()`: splits the `Spectra` object based on parameter `f` into a `list` +#' of `Spectra` objects. #' -#' @param x A `Spectra` object. +#' - `joinSpectraData()`: Individual spectra variables can be directly +#' added with the `$<-` or `[[<-` syntax. The `joinSpectraData()` +#' function allows to merge a `DataFrame` to the existing spectra +#' data. This function diverges from the [merge()] method in two +#' main ways: +#' - The `by.x` and `by.y` column names must be of length 1. +#' - If variable names are shared in `x` and `y`, the spectra +#' variables of `x` are not modified. It's only the `y` +#' variables that are appended the suffix defined in +#' `suffix.y`. This is to avoid modifying any core spectra +#' variables that would lead to an invalid object. +#' - Duplicated Spectra keys (i.e. `x[[by.x]]`) are not +#' allowed. Duplicated keys in the `DataFrame` (i.e `y[[by.y]]`) +#' throw a warning and only the last occurrence is kept. These +#' should be explored and ideally be removed using for +#' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar +#' functions. +#' For a more general function that allows to append `data.frame`, +#' `DataFrame` and `matrix` see `cbind2()`. #' -#' @param ... Additional arguments. +#' Several `Spectra` objects can be concatenated into a single object with the +#' `c()` or the `concatenateSpectra()` function. Concatenation will fail if the +#' processing queue of any of the `Spectra` objects is not empty or if +#' different backends are used in the `Spectra` objects. The spectra variables +#' of the resulting `Spectra` object is the union of the spectra variables of +#' the individual `Spectra` objects. #' #' -#' @section Spectra variables: -#' -#' A common set of *core spectra variables* are defined for `Spectra`. These -#' have a pre-defined data type and each `Spectra` will return a value for -#' these if requested. If no value for a spectra variable is defined, a missing -#' value (of the correct data type) is returned. The list of core spectra -#' variables and their respective data type is: -#' -#' - *acquisitionNum* `integer(1)`: the index of acquisition of a spectrum -#' during an MS run. -#' - *centroided* `logical(1)`: whether the spectrum is in profile or centroid -#' mode. -#' - *collisionEnergy* `numeric(1)`: collision energy used to create an MSn -#' spectrum. -#' - *dataOrigin* `character(1)`: the *origin* of the spectrum's data, e.g. the -#' mzML file from which it was read. -#' - *dataStorage* `character(1)`: the (current) storage location of the -#' spectrum data. This value depends on the backend used to handle and -#' provide the data. For an *in-memory* backend like the `MsBackendDataFrame` -#' this will be `""`, for an on-disk backend such as the -#' `MsBackendHdf5Peaks` it will be the name of the HDF5 file where the -#' spectrum's peak data is stored. -#' - *isolationWindowLowerMz* `numeric(1)`: lower m/z for the isolation -#' window in which the (MSn) spectrum was measured. -#' - *isolationWindowTargetMz* `numeric(1)`: the target m/z for the isolation -#' window in which the (MSn) spectrum was measured. -#' - *isolationWindowUpperMz* `numeric(1)`: upper m/z for the isolation window -#' in which the (MSn) spectrum was measured. -#' - *msLevel* `integer(1)`: the MS level of the spectrum. -#' - *polarity* `integer(1)`: the polarity of the spectrum (`0` and `1` -#' representing negative and positive polarity, respectively). -#' - *precScanNum* `integer(1)`: the scan (acquisition) number of the precursor -#' for an MSn spectrum. -#' - *precursorCharge* `integer(1)`: the charge of the precursor of an MSn -#' spectrum. -#' - *precursorIntensity* `numeric(1)`: the intensity of the precursor of an -#' MSn spectrum. -#' - *precursorMz* `numeric(1)`: the m/z of the precursor of an MSn spectrum. -#' - *rtime* `numeric(1)`: the retention time of a spectrum. -#' - *scanIndex* `integer(1)`: the index of a spectrum within a (raw) file. -#' - *smoothed* `logical(1)`: whether the spectrum was smoothed. -#' -#' For each of these spectra variable a dedicated accessor function is defined -#' (such as `msLevel()` or `rtime()`) that allows to extract the values of -#' that spectra variable for all spectra in a `Spectra` object. Also, -#' replacement functions are defined, but not all backends might support -#' replacing values for spectra variables. As described above, additional -#' spectra variables can be defined or added. The `spectraVariables()` function -#' can be used to -#' -#' Values for multiple spectra variables, or all spectra vartiables* can be -#' extracted with the `spectraData()` function. -#' -#' -#' @section Peaks variables: -#' -#' `Spectra` also provide mass peak data with the *m/z* and intensity values -#' being the *core* peaks variables: -#' -#' - *intensity* `numeric`: intensity values for the spectrum's peaks. -#' - *mz* `numeric`: the m/z values for the spectrum's peaks. -#' -#' Values for these can be extracted with the `mz()` and `intensity()` -#' functions, or the `peaksData()` function. The former functions return a -#' `NumericList` with the respective values, while the latter returns a `List` -#' with `numeric` two-column matrices. The list of peaks matrices can also -#' be extracted using `as(x, "list")` or `as(x, "SimpleList")` with `x` being -#' a `Spectra` object. -#' -#' Some `Spectra`/backends provide also values for additional peaks variables. -#' The set of available peaks variables can be extracted with the -#' `peaksVariables()` function. -#' -#' -#' @section Functions to access MS data: -#' -#' The set of available functions to extract data from, or set data in, a -#' `Spectra` object are (in alphabetical order) listed below. Note that there -#' are also other functions to extract information from a `Spectra` object -#' documented in [addProcessing()]. +#' @section Data manipulation and analysis methods: #' -#' - `$`, `$<-`: gets (or sets) a spectra variable for all spectra in `object`. -#' See examples for details. Note that replacing values of a peaks variable -#' is not supported with a non-empty processing queue, i.e. if any filtering -#' or data manipulations on the peaks data was performed. In these cases -#' [applyProcessing()] needs to be called first to apply all cached data -#' operations. +#' Many data manipulation operations, such as those listed in this section, are +#' not applied immediately to the spectra, but added to a +#' *lazy processing/manipulation queue*. Operations stored in this queue are +#' applied on-the-fly to spectra data each time it is accessed. This lazy +#' execution guarantees the same functionality for `Spectra` objects with +#' any backend, i.e. backends supporting to save changes to spectrum data +#' ([MsBackendMemory()], [MsBackendDataFrame()] or [MsBackendHdf5Peaks()]) as +#' well as read-only backends (such as the [MsBackendMzR()]). +#' Note that for the former it is possible to apply the processing queue and +#' write the modified peak data back to the data storage with the +#' `applyProcessing()` function. #' -#' - `[[`, `[[<-`: access or set/add a single spectrum variable (column) in the -#' backend. +#' - `addProcessing()`: adds an arbitrary function that should be applied to the +#' peaks matrix of every spectrum in `object`. The function (can be passed +#' with parameter `FUN`) is expected to take a peaks matrix as input and to +#' return a peaks matrix. A peaks matrix is a numeric matrix with two columns, +#' the first containing the m/z values of the peaks and the second the +#' corresponding intensities. The function has to have `...` in its +#' definition. Additional arguments can be passed with `...`. With parameter +#' `spectraVariables` it is possible to define additional spectra variables +#' from `object` that should be passed to the function `FUN`. These will be +#' passed by their name (e.g. specifying `spectraVariables = "precursorMz"` +#' will pass the spectra's precursor m/z as a parameter named `precursorMz` +#' to the function. The only exception is the spectra's MS level, these will +#' be passed to the function as a parameter called `spectrumMsLevel` (i.e. +#' with `spectraVariables = "msLevel"` the MS levels of each spectrum will be +#' submitted to the function as a parameter called `spectrumMsLevel`). +#' Examples are provided in the package vignette. #' -#' - `acquisitionNum()`: returns the acquisition number of each -#' spectrum. Returns an `integer` of length equal to the number of -#' spectra (with `NA_integer_` if not available). +#' - `applyProcessing()`: for `Spectra` objects that use a **writeable** backend +#' only: apply all steps from the lazy processing queue to the peak data and +#' write it back to the data storage. Parameter `f` allows to specify how +#' `object` should be split for parallel processing. This should either be +#' equal to the `dataStorage`, or `f = rep(1, length(object))` to disable +#' parallel processing alltogether. Other partitionings might result in +#' errors (especially if a `MsBackendHdf5Peaks` backend is used). #' -#' - `asDataFrame()`: converts the `Spectra` to a `DataFrame` (in long format) -#' contining all data. Returns a `DataFrame`. +#' - `bin()`: aggregates individual spectra into discrete (m/z) bins. Binning is +#' performed only on spectra of the specified MS level(s) (parameter +#' `msLevel`, by default all MS levels of `x`). The bins can be defined with +#' parameter `breaks` which by default are equally sized bins, with size +#' being defined by parameter `binSize`, from the minimal to the maximal m/z +#' of all spectra (of MS level `msLevel`) within `x`. The same bins are used +#' for all spectra in `x`. All intensity values for peaks falling into the +#' same bin are aggregated using the function provided with parameter `FUN` +#' (defaults to `FUN = sum`, i.e. all intensities are summed up). Note that +#' the binning operation is applied to the peak data on-the-fly upon data +#' access and it is possible to *revert* the operation with the `reset()` +#' function (see description of `reset()` above). +#' +#' - `combinePeaks()`: combines mass peaks within each spectrum with a +#' difference in their m/z values that is smaller than the maximal +#' acceptable difference defined by `ppm` and `tolerance`. Parameters +#' `intensityFun` and `mzFun` allow to define functions to aggregate the +#' intensity and m/z values for each such group of peaks. With +#' `weighted = TRUE` (the default), the m/z value of the combined peak is +#' calculated using an intensity-weighted mean and parameter `mzFun` is +#' ignored. The [MsCoreUtils::group()] function is used for the grouping of +#' mass peaks. Parameter `msLevel.` allows to define selected MS levels for +#' which peaks should be combined. This function returns a `Spectra` with +#' the same number of spectra than the input object, but with possibly +#' combined peaks within each spectrum. +# Additional peak variables (other than `"mz"` and `"intensity"`) are +#' dropped (i.e. their values are replaced with `NA`) for combined peaks +#' unless they are constant across the combined peaks. See also +#' `reduceSpectra()` for a function to select a single *representative* +#' mass peak for each peak group. +#' +#' - `combineSpectra()`: combines sets of spectra into a single spectrum per +#' set. For each spectrum group (set), spectra variables from the first +#' spectrum are used and the peak matrices are combined using the function +#' specified with `FUN`, which defaults to [combinePeaksData()]. Please +#' refer to the [combinePeaksData()] help page for details and options of +#' the actual combination of peaks across the sets of spectra and to the +#' package vignette for examples and alternative ways to aggregate spectra. +#' The sets of spectra can be specified with parameter `f`. +#' In addition it is possible to define, with parameter `p` if and how to +#' split the input data for parallel processing. +#' This defaults to `p = x$dataStorage` and hence a per-file parallel +#' processing is applied for `Spectra` with file-based backends (such as the +#' [MsBackendMzR()]). +#' Prior combination of the spectra all processings queued in the lazy +#' evaluation queue are applied. Be aware that calling `combineSpectra()` on a +#' `Spectra` object with certain backends that allow modifications might +#' **overwrite** the original data. This does not happen with a +#' `MsBackendMemory` or `MsBackendDataFrame` backend, but with a +#' `MsBackendHdf5Peaks` backend the m/z and intensity values in the original +#' hdf5 file(s) will be overwritten. +#' The function returns a `Spectra` of length equal to the unique levels +#' of `f`. +#' +#' - `compareSpectra()`: compares each spectrum in `x` with each spectrum in `y` +#' using the function provided with `FUN` (defaults to [ndotproduct()]). If +#' `y` is missing, each spectrum in `x` is compared with each other spectrum +#' in `x`. +#' The matching/mapping of peaks between the compared spectra is done with the +#' `MAPFUN` function. The default [joinPeaks()] matches peaks of both spectra +#' and allows to keep all peaks from the first spectrum (`type = "left"`), +#' from the second (`type = "right"`), from both (`type = "outer"`) and to +#' keep only matching peaks (`type = "inner"`); see [joinPeaks()] for more +#' information and examples). The `MAPFUN` function should have parameters +#' `x`, `y`, `xPrecursorMz` and `yPrecursorMz` as these values are passed to +#' the function. In addition to `joinPeaks()` also [joinPeaksGnps()] is +#' supported for GNPS-like similarity score calculations. Note that +#' `joinPeaksGnps()` should only be used in combination with +#' `FUN = MsCoreUtils::gnps` (see [joinPeaksGnps()] for more information and +#' details). Use `MAPFUN = joinPeaksNone` to disable internal peak +#' matching/mapping if a similarity scoring function is used that performs +#' the matching internally. +#' `FUN` is supposed to be a function to compare intensities of (matched) +#' peaks of the two spectra that are compared. The function needs to take two +#' matrices with columns `"mz"` and `"intensity"` as input and is supposed +#' to return a single numeric as result. In addition to the two peak matrices +#' the spectra's precursor m/z values are passed to the function as parameters +#' `xPrecursorMz` (precursor m/z of the `x` peak matrix) and `yPrecursorMz` +#' (precursor m/z of the `y` peak matrix). Additional parameters to functions +#' `FUN` and `MAPFUN` can be passed with `...`. Parameters `ppm` and +#' `tolerance` are passed to both `MAPFUN` and `FUN`. +#' The function returns a `matrix` with the results of `FUN` for each +#' comparison, number of rows equal to `length(x)` and number of columns +#' equal `length(y)` (i.e. element in row 2 and column 3 is the result from +#' the comparison of `x[2]` with `y[3]`). If `SIMPLIFY = TRUE` the `matrix` +#' is *simplified* to a `numeric` if length of `x` or `y` is one. See also +#' the vignette for additional examples, such as using spectral entropy +#' similarity in the scoring. #' -#' - `centroided()`, `centroided<-`: gets or sets the centroiding -#' information of the spectra. `centroided()` returns a `logical` -#' vector of length equal to the number of spectra with `TRUE` if a -#' spectrum is centroided, `FALSE` if it is in profile mode and `NA` -#' if it is undefined. See also `isCentroided()` for estimating from -#' the spectrum data whether the spectrum is centroided. `value` -#' for `centroided<-` is either a single `logical` or a `logical` of -#' length equal to the number of spectra in `object`. +#' - `deisotopeSpectra()`: *deisotopes* each spectrum keeping only the +#' monoisotopic peak for groups of isotopologues. Isotopologues are +#' estimated using the [isotopologues()] function from the *MetaboCoreUtils* +#' package. Note that the default parameters for isotope +#' prediction/detection have been determined using data from the Human +#' Metabolome Database (HMDB) and isotopes for elements other than CHNOPS +#' might not be detected. See parameter `substDefinition` in the +#' documentation of [isotopologues()] for more information. The approach +#' and code to define the parameters for isotope prediction is described +#' [here](https://github.com/EuracBiomedicalResearch/isotopologues). #' -#' - `collisionEnergy()`, `collisionEnergy<-`: gets or sets the -#' collision energy for all spectra in `object`. `collisionEnergy()` -#' returns a `numeric` with length equal to the number of spectra -#' (`NA_real_` if not present/defined), `collisionEnergy<-` takes a -#' `numeric` of length equal to the number of spectra in `object`. +#' - `entropy()`: calculates the entropy of each spectra based on the metrics +#' suggested by Li et al. (https://doi.org/10.1038/s41592-021-01331-z). +#' See also [nentropy()] in the *MsCoreUtils* package for details. #' -#' - `coreSpectraVariables()`: returns the *core* spectra variables along with -#' their expected data type. +#' - `estimatePrecursorIntensity()`: defines the precursor intensities for MS2 +#' spectra using the intensity of the matching MS1 peak from the +#' closest MS1 spectrum (i.e. the last MS1 spectrum measured before the +#' respective MS2 spectrum). With `method = "interpolation"` it is also +#' possible to calculate the precursor intensity based on an interpolation of +#' intensity values (and retention times) of the matching MS1 peaks from the +#' previous and next MS1 spectrum. See [estimatePrecursorIntensity()] for +#' examples and more details. #' -#' - `dataOrigin()`, `dataOrigin<-`: gets or sets the *data origin* for each -#' spectrum. `dataOrigin()` returns a `character` vector (same length than -#' `object`) with the origin of the spectra. `dataOrigin<-` expects a -#' `character` vector (same length than `object`) with the replacement -#' values for the data origin of each spectrum. +#' - `estimatePrecursorMz()`: **for DDA data**: allows to estimate a fragment +#' spectra's precursor m/z based on the reported precursor m/z and the data +#' from the previous MS1 spectrum. See [estimatePrecursorMz()] for details. #' -#' - `dataStorage()`: returns a `character` vector (same length than `object`) -#' with the data storage location of each spectrum. +#' - `neutralLoss()`: calculates neutral loss spectra for fragment spectra. See +#' [neutralLoss()] for detailed documentation. #' -#' - `intensity()`: gets the intensity values from the spectra. Returns -#' a [NumericList()] of `numeric` vectors (intensity values for each -#' spectrum). The length of the list is equal to the number of -#' `spectra` in `object`. +#' - `processingLog()`: returns a `character` vector with the processing log +#' messages. #' -#' - `ionCount()`: returns a `numeric` with the sum of intensities for -#' each spectrum. If the spectrum is empty (see `isEmpty()`), -#' `NA_real_` is returned. +#' - `reduceSpectra()`: keeps for groups of peaks with similar m/z values in +#' (given `ppm` and `tolerance`) in each spectrum only the peak with the +#' highest intensity removing all other peaks hence *reducing* each +#' spectrum to the highest intensity peaks per *peak group*. +#' Peak groups are defined using the [group()] function from the +#' *MsCoreUtils* package. See also the `combinePeaks()` function for an +#' alternative function to combine peaks within each spectrum. #' -#' - `isCentroided()`: a heuristic approach assessing if the spectra in -#' `object` are in profile or centroided mode. The function takes -#' the `qtl`th quantile top peaks, then calculates the difference -#' between adjacent m/z value and returns `TRUE` if the first -#' quartile is greater than `k`. (See `Spectra:::.isCentroided()` for -#' the code.) +#' - `scalePeaks()`: scales intensities of peaks within each spectrum depending +#' on parameter `by`. With `by = sum` (the default) peak intensities are +#' divided by the sum of peak intensities within each spectrum. The sum of +#' intensities is thus 1 for each spectrum after scaling. Parameter +#' `msLevel.` allows to apply the scaling of spectra of a certain MS level. +#' By default (`msLevel. = uniqueMsLevels(x)`) intensities for all +#' spectra will be scaled. #' -#' - `isEmpty()`: checks whether a spectrum in `object` is empty -#' (i.e. does not contain any peaks). Returns a `logical` vector of -#' length equal number of spectra. +#' - `spectrapply()`: applies a given function to each individual spectrum or +#' sets of a `Spectra` object. By default, the `Spectra` is split into +#' individual spectra (i.e. `Spectra` of length 1) and the function `FUN` +#' is applied to each of them. An alternative splitting can be defined with +#' parameter `f`. Parameters for `FUN` can be passed using `...`. +#' The returned result and its order depend on the function `FUN` and how +#' `object` is split (hence on `f`, if provided). Parallel processing is +#' supported and can be configured with parameter `BPPARAM`, is however only +#' suggested for computational intense `FUN`. +#' As an alternative to the (eventual parallel) processing of the full +#' `Spectra`, `spectrapply()` supports also a chunk-wise processing. For this, +#' parameter `chunkSize` needs to be specified. `object` is then split into +#' chunks of size `chunkSize` which are then (stepwise) processed by `FUN`. +#' This guarantees a lower memory demand (especially for on-disk backends) +#' since only the data for one chunk needs to be loaded into memory in each +#' iteration. Note that by specifying `chunkSize`, parameters `f` and +#' `BPPARAM` will be ignored. +#' See also [chunkapply()] or examples below for details on chunk-wise +#' processing. #' -#' - `isolationWindowLowerMz()`, `isolationWindowLowerMz<-`: gets or sets the -#' lower m/z boundary of the isolation window. -#' -#' - `isolationWindowTargetMz()`, `isolationWindowTargetMz<-`: gets or sets the -#' target m/z of the isolation window. +#' - `smooth()`: smooths individual spectra using a moving window-based approach +#' (window size = `2 * halfWindowSize`). Currently, the +#' Moving-Average- (`method = "MovingAverage"`), +#' Weighted-Moving-Average- (`method = "WeightedMovingAverage")`, +#' weights depending on the distance of the center and calculated +#' `1/2^(-halfWindowSize:halfWindowSize)`) and +#' Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported. +#' For details how to choose the correct `halfWindowSize` please see +#' [`MsCoreUtils::smooth()`]. #' -#' - `isolationWindowUpperMz()`, `isolationWindowUpperMz<-`: gets or sets the -#' upper m/z boundary of the isolation window. +#' - `pickPeaks()`: picks peaks on individual spectra using a moving +#' window-based approach (window size = `2 * halfWindowSize`). For noisy +#' spectra there are currently two different noise estimators available, +#' the *M*edian *A*bsolute *D*eviation (`method = "MAD"`) and +#' Friedman's Super Smoother (`method = "SuperSmoother"`), +#' as implemented in the [`MsCoreUtils::noise()`]. +#' The method supports also to optionally *refine* the m/z value of +#' the identified centroids by considering data points that belong (most +#' likely) to the same mass peak. Therefore the m/z value is calculated as an +#' intensity weighted average of the m/z values within the peak region. +#' The peak region is defined as the m/z values (and their respective +#' intensities) of the `2 * k` closest signals to the centroid or the closest +#' valleys (`descending = TRUE`) in the `2 * k` region. For the latter the `k` +#' has to be chosen general larger. See [`MsCoreUtils::refineCentroids()`] for +#' details. +#' If the ratio of the signal to the highest intensity of the peak is below +#' `threshold` it will be ignored for the weighted average. #' -#' - `length()`: gets the number of spectra in the object. +#' - `replaceIntensitiesBelow()`: replaces intensities below a specified +#' threshold with the provided `value`. Parameter `threshold` can be either +#' a single numeric value or a function which is applied to all non-`NA` +#' intensities of each spectrum to determine a threshold value for each +#' spectrum. The default is `threshold = min` which replaces all values +#' which are <= the minimum intensity in a spectrum with `value` (the +#' default for `value` is `0`). Note that the function specified with +#' `threshold` is expected to have a parameter `na.rm` since `na.rm = TRUE` +#' will be passed to the function. If the spectrum is in profile mode, +#' ranges of successive non-0 peaks <= `threshold` are set to 0. +#' Parameter `msLevel.` allows to apply this to only spectra of certain MS +#' level(s). #' -#' - `lengths()`: gets the number of peaks (m/z-intensity values) per -#' spectrum. Returns an `integer` vector (length equal to the -#' number of spectra). For empty spectra, `0` is returned. #' -#' - `msLevel()`: gets the spectra's MS level. Returns an integer vector (names -#' being spectrum names, length equal to the number of spectra) with the MS -#' level for each spectrum. +#' @return See individual method description for the return value. #' -#' - `mz()`: gets the mass-to-charge ratios (m/z) from the -#' spectra. Returns a [NumericList()] or length equal to the number of -#' spectra, each element a `numeric` vector with the m/z values of -#' one spectrum. +#' @param acquisitionNum for `filterPrecursorScan()`: `integer` with the +#' acquisition number of the spectra to which the object should be +#' subsetted. #' -#' - `peaksData()`: gets the *peaks* data for all spectra in `object`. Peaks -#' data consist of the m/z and intensity values as well as possible additional -#' annotations (variables) of all peaks of each spectrum. The function -#' returns a [SimpleList()] of two dimensional arrays (either `matrix` or -#' `data.frame`), with each array providing the values for the requested -#' *peak variables* (by default `"mz"` and `"intensity"`). Optional parameter -#' `columns` is passed to the backend's `peaksData()` function to allow -#' the selection of specific (or additional) peaks variables (columns) that -#' should be extracted (if available). Importantly, -#' it is **not** guaranteed that each backend supports this parameter (while -#' each backend must support extraction of `"mz"` and `"intensity"` columns). -#' Parameter `columns` defaults to `c("mz", "intensity")` but any value -#' returned by `peaksVariables(object)` is supported. -#' Note also that it is possible to extract the peak data with -#' `as(x, "list")` and `as(x, "SimpleList")` as a `list` and `SimpleList`, -#' respectively. Note however that, in contrast to `peaksData()`, `as()` -#' does not support the parameter `columns`. +#' @param backend For `Spectra()`: [MsBackend-class] to be used as backend. See +#' section on creation of `Spectra` objects for details. For `setBackend()`: +#' instance of [MsBackend-class] that supports `setBackend()` (i.e. for +#' which `supportsSetBackend()` returns `TRUE`). Such backends have a +#' parameter `data` in their `backendInitialize()` function that support +#' passing the full spectra data to the initialize method. See section on +#' creation of `Spectra` objects for details. +#' For `export()`: [MsBackend-class] to be used to export the data. #' -#' - `peaksVariables()`: lists the available variables for mass peaks provided -#' by the backend. Default peak variables are `"mz"` and `"intensity"` (which -#' all backends need to support and provide), but some backends might provide -#' additional variables. -#' These variables correspond to the column names of the peak data array -#' returned by `peaksData()`. +#' @param binSize For `bin()`: `numeric(1)` defining the size for the m/z bins. +#' Defaults to `binSize = 1`. #' -#' - `polarity()`, `polarity<-`: gets or sets the polarity for each -#' spectrum. `polarity()` returns an `integer` vector (length equal -#' to the number of spectra), with `0` and `1` representing negative -#' and positive polarities, respectively. `polarity<-` expects an -#' `integer` vector of length 1 or equal to the number of spectra. +#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more +#' information. This is passed directly to the [backendInitialize()] method +#' of the [MsBackend-class]. #' -#' - `precursorCharge()`, `precursorIntensity()`, `precursorMz()`, -#' `precScanNum()`, `precAcquisitionNum()`: gets the charge (`integer`), -#' intensity (`numeric`), m/z (`numeric`), scan index (`integer`) -#' and acquisition number (`interger`) of the precursor for MS level > -#' 2 spectra from the object. Returns a vector of length equal to -#' the number of spectra in `object`. `NA` are reported for MS1 -#' spectra of if no precursor information is available. +#' @param breaks For `bin()`: `numeric` defining the m/z breakpoints between +#' bins. #' -#' - `rtime()`, `rtime<-`: gets or sets the retention times (in seconds) -#' for each spectrum. `rtime()` returns a `numeric` vector (length -#' equal to the number of spectra) with the retention time for each -#' spectrum. `rtime<-` expects a numeric vector with length equal -#' to the number of spectra. +#' @param by For `scalePeaks()`: function to calculate a single `numeric` from +#' intensity values of a spectrum by which all intensities (of +#' that spectrum) should be divided by. The default `by = sum` will +#' divide intensities of each spectrum by the sum of intensities of that +#' spectrum. #' -#' - `scanIndex()`: returns an `integer` vector with the *scan index* -#' for each spectrum. This represents the relative index of the -#' spectrum within each file. Note that this can be different to the -#' `acquisitionNum` of the spectrum which represents the index of the -#' spectrum during acquisition/measurement (as reported in the mzML file). +#' @param by.x A `character(1)` specifying the spectra variable used +#' for merging. Default is `"spectrumId"`. #' -#' - `smoothed()`,`smoothed<-`: gets or sets whether a spectrum is -#' *smoothed*. `smoothed()` returns a `logical` vector of length equal -#' to the number of spectra. `smoothed<-` takes a `logical` vector -#' of length 1 or equal to the number of spectra in `object`. +#' @param by.y A `character(1)` specifying the column used for +#' merging. Set to `by.x` if missing. #' -#' - `spectraData()`: gets general spectrum metadata (annotation, also called -#' header). `spectraData()` returns a `DataFrame`. Note that this -#' method does by default **not** return m/z or intensity values. +#' @param charge For `deisotopeSpectra()`: expected charge of the ionized +#' compounds. See [isotopologues()] for details. #' -#' - `spectraData<-`: **replaces** the full spectra data of the `Spectra` -#' object with the one provided with `value`. The `spectraData<-` function -#' expects a `DataFrame` to be passed as value with the same number of rows -#' as there a spectra in `object`. Note that replacing values of -#' peaks variables is not supported with a non-empty processing queue, i.e. -#' if any filtering or data manipulations on the peaks data was performed. -#' In these cases [applyProcessing()] needs to be called first to apply all -#' cached data operations and empty the processing queue. +#' @param chunkSize For `spectrapply()`: size of the chunks into which `Spectra` +#' should be split. This parameter overrides parameters `f` and `BPPARAM`. #' -#' - `spectraNames()`, `spectraNames<-`: gets or sets the spectra names. +#' @param columns For `spectraData()` accessor: optional `character` with +#' column names (spectra variables) that should be included in the +#' returned `DataFrame`. By default, all columns are returned. +#' For `peaksData()` accessor: optional `character` with requested columns +#' in the individual `matrix` of the returned `list`. Defaults to +#' `c("mz", "value")` but any values returned by `peaksVariables(object)` +#' with `object` being the `Spectra` object are supported. #' -#' - `spectraVariables()`: returns a `character` vector with the -#' available spectra variables (columns, fields or attributes of each -#' spectrum) available in `object`. Note that `spectraVariables()` does not -#' list the *peak variables* (`"mz"`, `"intensity"` and eventual additional -#' annotations for each MS peak). Peak variables are returned by -#' `peaksVariables()`. +#' @param match For `filterRanges()` and `filterValues()`: `character(1) ` +#' defining whether the condition has to match for all provided +#' `ranges`/`values` (`match = "all"`; the default), or for any of them +#' (`match = "any"`) for spectra to be retained. #' -#' - `tic()`: gets the total ion current/count (sum of signal of a -#' spectrum) for all spectra in `object`. By default, the value -#' reported in the original raw data file is returned. For an empty -#' spectrum, `0` is returned. +#' @param dataOrigin For `filterDataOrigin()`: `character` to define which +#' spectra to keep. +#' For `filterAcquisitionNum()`: optionally specify if filtering should +#' occurr only for spectra of selected `dataOrigin`. #' -#' - `uniqueMsLevels()`: get the unique MS levels available in `object`. This -#' function is supposed to be more efficient than `unique(msLevel(object))`. +#' @param dataStorage For `filterDataStorage()`: `character` to define which +#' spectra to keep. +#' For `filterAcquisitionNum()`: optionally specify if filtering should +#' occur only for spectra of selected `dataStorage`. #' -#' @md +#' @param descending For `pickPeaks()`: `logical`, if `TRUE` just values between +#' the nearest valleys around the peak centroids are used. +# +#' @param drop For `[`, `split()`: not considered. #' -#' @seealso +#' @param f For `split()`: factor defining how to split `x`. See [base::split()] +#' for details. For `setBackend()`: factor defining how to split the data +#' for parallelized copying of the spectra data to the new backend. For some +#' backends changing this parameter can lead to errors. +#' For `combineSpectra()`: `factor` defining the grouping of the spectra +#' that should be combined. For `spectrapply()`: `factor` how `object` +#' should be splitted. For `filterPrecursorScan()`: defining which spectra +#' belong to the same original data file (sample): Defaults to +#' `f = dataOrigin(x)`. +#' For `intensity()`, `mz()` and `peaksData()`: factor defining how data +#' should be chunk-wise loaded an processed. Defaults to +#' [processingChunkFactor()]. #' -#' - [addProcessing()] for functions to analyze `Spectra`. +#' @param FUN For `addProcessing()`: function to be applied to the peak matrix +#' of each spectrum in `object`. For `compareSpectra()`: function to compare +#' intensities of peaks between two spectra with each other. +#' For `combineSpectra()`: function to combine the (peak matrices) of the +#' spectra. See section *Data manipulations* and examples below for more +#' details. +#' For `bin()`: function to aggregate intensity values of peaks falling +#' into the same bin. Defaults to `FUN = sum` thus summing up intensities. +#' For `spectrapply()` and `chunkapply()`: function to be applied to +#' `Spectra`. #' -#' - [Spectra] for a general description of the `Spectra` object. +#' @param halfWindowSize +#' - For `pickPeaks()`: `integer(1)`, used in the +#' identification of the mass peaks: a local maximum has to be the maximum +#' in the window from `(i - halfWindowSize):(i + halfWindowSize)`. +#' - For `smooth()`: `integer(1)`, used in the smoothing algorithm, the +#' window reaches from `(i - halfWindowSize):(i + halfWindowSize)`. +#' - For `filterFourierTransformArtefacts()`: `numeric(1)` defining the m/z +#' window left and right of a peak where to remove fourier transform +#' artefacts. #' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail +#' @param i For `[`: `integer`, `logical` or `character` to subset the object. #' -#' @examples +#' @param j For `[`: not supported. #' -#' ## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk -#' ## backend. -#' sciex_file <- dir(system.file("sciex", package = "msdata"), -#' full.names = TRUE) -#' sciex <- Spectra(sciex_file, backend = MsBackendMzR()) -#' sciex +#' @param initial For `tic()`: `logical(1)` whether the initially +#' reported total ion current should be reported, or whether the +#' total ion current should be (re)calculated on the actual data +#' (`initial = FALSE`, same as `ionCount()`). #' -#' ## Get the number of spectra in the data set -#' length(sciex) +#' @param intensity For `filterIntensity()`: `numeric` of length 1 or 2 +#' defining either the lower or the lower and upper intensity limit for the +#' filtering, or a `function` that takes the intensities as input and +#' returns a `logical` (same length then peaks in the spectrum) whether the +#' peak should be retained or not. Defaults to `intensity = c(0, Inf)` thus +#' only peaks with `NA` intensity are removed. #' -#' ## Get the number of mass peaks per spectrum - limit to the first 6 -#' lengths(sciex) |> head() +#' @param intensityFun For `combinePeaks()`: function to be used to aggregate +#' intensities for all peaks in each peak group into a single intensity +#' value. #' -#' ## Get the MS level for each spectrum - limit to the first 6 spectra -#' msLevel(sciex) |> head() +#' @param isotopeTolerance For `filterFourierTransformArtefacts()`: the m/z +#' `tolerance` to be used to define whether peaks might be isotopes of +#' the current tested peak. #' -#' ## Alternatively, we could also use $ to access a specific spectra variable. -#' ## This could also be used to add additional spectra variables to the -#' ## object (see further below). -#' sciex$msLevel |> head() +#' @param k For `pickPeaks()`: `integer(1)`, number of values left and right of +#' the peak that should be considered in the weighted mean calculation. #' -#' ## Get the intensity and m/z values. -#' intensity(sciex) -#' mz(sciex) +#' @param keep For `filterMzValues()` and `filterMzRange()`: `logical(1)` +#' whether the matching peaks should be retained (`keep = TRUE`, the +#' default) or dropped (`keep = FALSE`). #' -#' ## Convert a subset of the Spectra object to a long DataFrame. -#' asDataFrame(sciex, i = 1:3, spectraVars = c("rtime", "msLevel")) +#' @param keepIsotopes For `filterFourierTransformArtefacts()`: whether isotope +#' peaks should not be removed as fourier artefacts. #' -#' ## Create a Spectra providing a `DataFrame` containing the spectrum data. +#' @param maxCharge For `filterFourierTransformArtefacts()`: the maximum charge +#' to be considered for isotopes. #' -#' spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) -#' spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) -#' spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) +#' @param MAPFUN For `compareSpectra()`: function to map/match peaks between the +#' two compared spectra. See [joinPeaks()] for more information and possible +#' functions. #' -#' s <- Spectra(spd) -#' s +#' @param method +#' - For `pickPeaks()`: `character(1)`, the noise estimators that +#' should be used, currently the the *M*edian *A*bsolute *D*eviation +#' (`method = "MAD"`) and Friedman's Super Smoother +#' (`method = "SuperSmoother"`) are supported. +#' - For `smooth()`: `character(1)`, the smoothing function that should be +#' used, currently, the Moving-Average- (`method = "MovingAverage"`), +#' Weighted-Moving-Average- (`method = "WeightedMovingAverage")`, +#' Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported. #' -#' ## List all available spectra variables (i.e. spectrum data and metadata). -#' spectraVariables(s) +#' @param metadata For `Spectra()`: optional `list` with metadata information. #' -#' ## For all *core* spectrum variables accessor functions are available. These -#' ## return NA if the variable was not set. -#' centroided(s) -#' dataStorage(s) -#' rtime(s) -#' precursorMz(s) +#' @param msLevel. `integer` defining the MS level(s) of the spectra to which +#' the function should be applied (defaults to all MS levels of `object`. +#' For `filterMsLevel()`: the MS level to which `object` should be +#' subsetted. #' -#' ## The core spectra variables are: -#' coreSpectraVariables() +#' @param mz For `filterIsolationWindow()`: `numeric(1)` with the m/z value to +#' filter the object. For `filterPrecursorMz()` and `filterMzRange()`: +#' `numeric(2)` defining the lower and upper m/z boundary. +#' For `filterMzValues()` and `filterPrecursorMzValues()`: `numeric` with +#' the m/z values to match peaks or precursor m/z against. #' -#' ## Add an additional metadata column. -#' s$spectrum_id <- c("sp_1", "sp_2") +#' @param mzFun For `combinePeaks()`: function to aggregate m/z values for all +#' peaks within each peak group into a single m/z value. This parameter +#' is ignored if `weighted = TRUE` (the default). #' -#' ## List spectra variables, "spectrum_id" is now also listed -#' spectraVariables(s) +#' @param n for `filterAcquisitionNum()`: `integer` with the acquisition +#' numbers to filter for. #' -#' ## Get the values for the new spectra variable -#' s$spectrum_id +#' @param name For `$` and `$<-`: the name of the spectra variable to return +#' or set. #' -#' ## Extract specific spectra variables. -#' spectraData(s, columns = c("spectrum_id", "msLevel")) +#' @param neutralLoss for `containsNeutralLoss()`: `numeric(1)` defining the +#' value which should be subtracted from the spectrum's precursor m/z. #' +#' @param normalized for `entropy()`: `logical(1)` whether the normalized +#' entropy should be calculated (default). See also [nentropy()] for +#' details. #' -#' ## -------- PEAKS VARIABLES AND DATA -------- +#' @param object For `Spectra()`: either a `DataFrame` or `missing`. See +#' section on creation of `Spectra` objects for details. For all other +#' methods a `Spectra` object. #' -#' ## Get the peak data (m/z and intensity values). -#' pks <- peaksData(s) -#' pks +#' @param p For `combineSpectra()`: `factor` defining how to split the input +#' `Spectra` for parallel processing. Defaults to `x$dataStorage`, i.e., +#' depending on the used backend, per-file parallel processing will be +#' performed. +#' +#' @param polarity for `filterPolarity()`: `integer` specifying the polarity to +#' to subset `object`. +#' +#' @param ppm For `compareSpectra()`, `containsMz()`, `deisotopeSpectra()`, +#' `filterMzValues()` and `reduceSpectra()`: `numeric(1)` +#' defining a relative, m/z-dependent, maximal accepted difference between +#' m/z values for peaks to be matched (or grouped). +#' For `filterPrecursorMaxIntensity()`: `numeric(1)` defining the relative +#' maximal accepted difference of precursor m/z values of spectra for +#' grouping them into *precursor groups*. For `filterPrecursorIsotopes()`: +#' passed directly to the [isotopologues()] function. +#' For `filterValues()`: `numeric` of any length allowing to define +#' a maximal accepted difference between user input `values` and the +#' `spectraVariables` values. If it is not equal to the length of the +#' value provided with parameter `spectraVariables`, `ppm[1]` will be +#' recycled. +#' +#' @param processingQueue For `Spectra()`: optional `list` of +#' [ProcessingStep-class] objects. +#' +#' @param ranges for `filterRanges()`: A `numeric` vector of paired values +#' (upper and lower boundary) that define the ranges to filter the `object`. +#' These paired values need to be in the same order as the +#' `spectraVariables` parameter (see below). +#' +#' @param rt for `filterRt()`: `numeric(2)` defining the retention time range to +#' be used to subset/filter `object`. +#' +#' @param SIMPLIFY For `compareSpectra()` whether the result matrix should be +#' *simplified* to a `numeric` if possible (i.e. if either `x` or `y` is +#' of length 1). +#' +#' @param snr For `pickPeaks()`: `double(1)` defining the +#' *S*ignal-to-*N*oise-*R*atio. The intensity of a local maximum has to be +#' higher than `snr * noise` to be considered as peak. +#' +#' @param source For `Spectra()`: instance of [MsBackend-class] that can be used +#' to import spectrum data from the provided files. See section *Creation +#' of objects, conversion and changing the backend* for more details. +#' +#' @param spectraVariables +#' - For `selectSpectraVariables()`: `character` with the +#' names of the spectra variables to which the backend should be +#' subsetted. +#' - For `addProcessing()`: `character` with additional spectra variables +#' that should be passed along to the function defined with `FUN`. See +#' function description for details. +#' - For `filterRanges()` and `filterValues()`: `character` vector +#' specifying the column(s) from `spectraData(object)` on which to filter +#' the data and that correspond to the the names of the spectra variables +#' that should be used for the filtering. +#' +#' @param substDefinition For `deisotopeSpectra()` and +#' `filterPrecursorIsotopes()`: `matrix` or `data.frame` with definitions +#' of isotopic substitutions. Uses by default isotopic substitutions +#' defined from all compounds in the Human Metabolome Database (HMDB). See +#' [isotopologues()] or [isotopicSubstitutionMatrix()] for details. +#' +#' @param suffix.y A `character(1)` specifying the suffix to be used +#' for making the names of columns in the merged spectra variables +#' unique. This suffix will be used to amend `names(y)`, while +#' `spectraVariables(x)` will remain unchanged. +#' +#' @param tolerance For `compareSpectra()`, `containsMz()`, +#' `deisotopeSpectra()`, `filterMzValues()` and `reduceSpectra()`: +#' `numeric(1)` allowing to define a constant maximal accepted difference +#' between m/z values for peaks to be matched (or grouped). For +#' `containsMz()` it can also be of length equal `mz` to specify a different +#' tolerance for each m/z value. +#' For `filterPrecursorMaxIntensity()`: `numeric(1)` defining the +#' (constant) maximal accepted difference of precursor m/z values of +#' spectra for grouping them into *precursor groups*. For +#' `filterPrecursorIsotopes()`: passed directly to the [isotopologues()] +#' function. For `filterValues()`: `numeric` of any length allowing to +#' define a maximal accepted difference between user input `values` and the +#' `spectraVariables` values. If it is not equal to the length of the +#' value provided with parameter `spectraVariables`, `tolerance[1]` will be +#' recycled. Default is `tolerance = 0` +#' +#' @param threshold +#' - For `pickPeaks()`: a `double(1)` defining the proportion of the maximal +#' peak intensity. Just values above are used for the weighted mean +#' calculation. +#' - For `replaceIntensitiesBelow()`: a `numeric(1)` defining the threshold +#' or a `function` to calculate the threshold for each spectrum on its +#' intensity values. Defaults to `threshold = min`. +#' - For `filterFourierTransformArtefacts()`: the relative intensity (to a +#' peak) below which peaks are considered fourier artefacts. Defaults to +#' `threshold = 0.2` hence removing peaks that have an intensity below 0.2 +#' times the intensity of the tested peak (within the selected +#' `halfWindowSize`). +#' +#' @param use.names For `lengths()`: ignored. +#' +#' @param value replacement value for `<-` methods. See individual +#' method description or expected data type. +#' +#' @param values for `filterValues()`: A `numeric` vector that define the +#' values to filter the Spectra data. These values need to be in the same +#' order as the `spectraVariables` parameter. +#' +#' @param weighted For `combinePeaks()`: `logical(1)` whether m/z values of +#' peaks within each peak group should be aggregated into a single m/z +#' value using an intensity-weighted mean. Defaults to `weighted = TRUE`. +#' +#' @param which for `containsMz()`: either `"any"` or `"all"` defining whether +#' any (the default) or all provided `mz` have to be present in the +#' spectrum. +#' +#' @param x A `Spectra` object. +#' +#' @param y A `Spectra` object. +#' - For `joinSpectraData()`: a `DataFrame`. +#' - For `cbind2()` a `data.frame`, `DataFrame` or `matrix`. +#' +#' @param z For `filterPrecursorCharge()`: `integer()` with the precursor +#' charges to be used as filter. +#' +#' @param zero.rm `logical`. For `bin()`: indicating whether to remove bins +#' with zero intensity. Defaults to `TRUE`, meaning the function will +#' discard bins created with an intensity of 0 to enhance memory efficiency. +#' +#' @param ... Additional arguments. +#' +#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto +#' +#' @md +#' +#' @exportClass Spectra +#' +#' @exportMethod Spectra +#' +#' @examples +#' +#' ## Create a Spectra providing a `DataFrame` containing the spectrum data. +#' +#' spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) +#' spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) +#' spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) +#' +#' data <- Spectra(spd) +#' data +#' +#' ## Get the number of spectra +#' length(data) +#' +#' ## Get the number of peaks per spectrum +#' lengths(data) +#' +#' ## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk +#' ## backend. +#' sciex_file <- dir(system.file("sciex", package = "msdata"), +#' full.names = TRUE) +#' sciex <- Spectra(sciex_file, backend = MsBackendMzR()) +#' sciex +#' +#' ## The MS data is on disk and will be read into memory on-demand. We can +#' ## however change the backend to a MsBackendMemory backend which will +#' ## keep all of the data in memory. +#' sciex_im <- setBackend(sciex, MsBackendMemory()) +#' sciex_im +#' +#' ## The `MsBackendMemory()` supports the `setBackend()` method: +#' supportsSetBackend(MsBackendMemory()) +#' +#' ## Thus, it is possible to change to that backend with `setBackend()`. Most +#' ## read-only backends however don't support that, such as the +#' ## `MsBackendMzR` and `setBackend()` would fail to change to that backend. +#' supportsSetBackend(MsBackendMzR()) +#' +#' ## The on-disk object `sciex` is light-weight, because it does not keep the +#' ## MS peak data in memory. The `sciex_im` object in contrast keeps all the +#' ## data in memory and its size is thus much larger. +#' object.size(sciex) +#' object.size(sciex_im) +#' +#' ## The spectra variable `dataStorage` returns for each spectrum the location +#' ## where the data is stored. For in-memory objects: +#' head(dataStorage(sciex_im)) +#' +#' ## While objects that use an on-disk backend will list the files where the +#' ## data is stored. +#' head(dataStorage(sciex)) +#' +#' ## The spectra variable `dataOrigin` returns for each spectrum the *origin* +#' ## of the data. If the data is read from e.g. mzML files, this will be the +#' ## original mzML file name: +#' head(dataOrigin(sciex)) +#' head(dataOrigin(sciex_im)) +#' +#' +#' ## ---- ACCESSING AND ADDING DATA ---- +#' +#' ## Get the MS level for each spectrum. +#' msLevel(data) +#' +#' ## Alternatively, we could also use $ to access a specific spectra variable. +#' ## This could also be used to add additional spectra variables to the +#' ## object (see further below). +#' data$msLevel +#' +#' ## Get the intensity and m/z values. +#' intensity(data) +#' mz(data) +#' +#' ## Determine whether one of the spectra has a specific m/z value +#' containsMz(data, mz = 120.4) +#' +#' ## Accessing spectra variables works for all backends: +#' intensity(sciex) +#' intensity(sciex_im) +#' +#' ## Get the m/z for the first spectrum. +#' mz(data)[[1]] +#' +#' ## Get the peak data (m/z and intensity values). +#' pks <- peaksData(data) +#' pks #' pks[[1]] #' pks[[2]] #' #' ## Note that we could get the same resulb by coercing the `Spectra` to #' ## a `list` or `SimpleList`: -#' as(s, "list") -#' as(s, "SimpleList") +#' as(data, "list") +#' as(data, "SimpleList") #' -#' ## Or use `mz()` and `intensity()` to extract the m/z and intensity values -#' ## separately -#' mz(s) -#' intensity(s) +#' ## List all available spectra variables (i.e. spectrum data and metadata). +#' spectraVariables(data) #' -#' ## Some `MsBackend` classes provide support for arbitrary peaks variables -#' ## (in addition to the mandatory `"mz"` and `"intensity"` values. Below -#' ## we create a simple data frame with an additional peak variable `"pk_ann"` -#' ## and create a `Spectra` with a `MsBackendMemory` for that data. -#' ## Importantly the number of values (per spectrum) need to be the same -#' ## for all peak variables. +#' ## For all *core* spectrum variables accessor functions are available. These +#' ## return NA if the variable was not set. +#' centroided(data) +#' dataStorage(data) +#' rtime(data) +#' precursorMz(data) #' -#' tmp <- data.frame(msLevel = c(2L, 2L), rtime = c(123.2, 123.5)) -#' tmp$mz <- list(c(103.1, 110.4, 303.1), c(343.2, 453.1)) -#' tmp$intensity <- list(c(130.1, 543.1, 40), c(0.9, 0.45)) -#' tmp$pk_ann <- list(c(NA_character_, "A", "P"), c("B", "P")) +#' ## The core spectra variables are: +#' coreSpectraVariables() #' -#' ## Create the Spectra. With parameter `peaksVariables` we can define -#' ## the columns in `tmp` that contain peaks variables. -#' sps <- Spectra(tmp, source = MsBackendMemory(), -#' peaksVariables = c("mz", "intensity", "pk_ann")) -#' peaksVariables(sps) +#' ## Add an additional metadata column. +#' data$spectrum_id <- c("sp_1", "sp_2") #' -#' ## Extract just the m/z and intensity values -#' peaksData(sps)[[1L]] +#' ## List spectra variables, "spectrum_id" is now also listed +#' spectraVariables(data) #' -#' ## Extract the full peaks data -#' peaksData(sps, columns = peaksVariables(sps))[[1L]] +#' ## Get the values for the new spectra variable +#' data$spectrum_id #' -#' ## Access just the pk_ann variable -#' sps$pk_ann +#' ## Extract specific spectra variables. +#' spectraData(data, columns = c("spectrum_id", "msLevel")) #' +#' ## Drop spectra variable data and/or columns. +#' res <- selectSpectraVariables(data, c("mz", "intensity")) #' -NULL - -#' @importFrom methods setAs -setAs("Spectra", "list", function(from, to) { - .peaksapply(from) -}) - -setAs("Spectra", "SimpleList", function(from, to) { - peaksData(from) -}) - -#' @export +#' ## This removed the additional columns "spectrum_id" and deleted all values +#' ## for all spectra variables, except "mz" and "intensity". +#' spectraData(res) #' -#' @rdname spectraData -asDataFrame <- function(object, i = seq_along(object), - spectraVars = spectraVariables(object)) { - stopifnot(inherits(object, "Spectra")) - object <- object[i] - n <- sapply(peaksData(object), nrow) - v <- spectraData(object)[rep(seq_along(object), n), spectraVars] - p <- do.call(rbind, as.list(peaksData(object))) - cbind(p, v) -} - -#' @rdname spectraData +#' ## Compared to the data before selectSpectraVariables. +#' spectraData(data) #' -#' @export -setMethod("acquisitionNum", "Spectra", function(object) - acquisitionNum(object@backend)) - -#' @rdname spectraData -setMethod("centroided", "Spectra", function(object) { - centroided(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("centroided", "Spectra", function(object, value) { - centroided(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("collisionEnergy", "Spectra", function(object) { - collisionEnergy(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("collisionEnergy", "Spectra", function(object, value) { - collisionEnergy(object@backend) <- value - object -}) - -#' @rdname spectraData #' -#' @export -coreSpectraVariables <- function() .SPECTRA_DATA_COLUMNS - -#' @rdname spectraData -setMethod("dataOrigin", "Spectra", function(object) dataOrigin(object@backend)) - -#' @rdname spectraData -setReplaceMethod("dataOrigin", "Spectra", function(object, value) { - dataOrigin(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("dataStorage", "Spectra", - function(object) dataStorage(object@backend)) - -#' @rdname spectraData -setMethod("intensity", "Spectra", function(object, - f = processingChunkFactor(object), - ...) { - if (length(object@processingQueue) || length(f)) - NumericList(.peaksapply(object, FUN = function(z, ...) z[, 2], - f = f, ...), compress = FALSE) - else intensity(object@backend) -}) - -#' @rdname spectraData -setMethod("ionCount", "Spectra", function(object) { - if (length(object)) - unlist(.peaksapply( - object, FUN = function(pks, ...) sum(pks[, 2], na.rm = TRUE)), - use.names = FALSE) - else numeric() -}) - -#' @rdname spectraData -setMethod("isCentroided", "Spectra", function(object, ...) { - if (length(object)) - unlist(.peaksapply(object, FUN = .peaks_is_centroided), - use.names = FALSE) - else logical() -}) - -#' @rdname spectraData -setMethod("isEmpty", "Spectra", function(x) { - if (length(x)) - unlist(.peaksapply(x, FUN = function(pks, ...) nrow(pks) == 0), - use.names = FALSE) - else logical() -}) - -#' @rdname spectraData -setMethod("isolationWindowLowerMz", "Spectra", function(object) { - isolationWindowLowerMz(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("isolationWindowLowerMz", "Spectra", function(object, value) { - isolationWindowLowerMz(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("isolationWindowTargetMz", "Spectra", function(object) { - isolationWindowTargetMz(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("isolationWindowTargetMz", "Spectra", function(object, value) { - isolationWindowTargetMz(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("isolationWindowUpperMz", "Spectra", function(object) { - isolationWindowUpperMz(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("isolationWindowUpperMz", "Spectra", function(object, value) { - isolationWindowUpperMz(object@backend) <- value - object -}) - -#' @rdname spectraData +#' ## ---- SUBSETTING, FILTERING AND COMBINING #' -#' @exportMethod length -setMethod("length", "Spectra", function(x) length(x@backend)) - -#' @rdname spectraData +#' ## Subset to all MS2 spectra. +#' data[msLevel(data) == 2] #' -#' @exportMethod lengths -setMethod("lengths", "Spectra", function(x, use.names = FALSE) { - f <- .parallel_processing_factor(x) - if (length(x)) { - if (length(x@processingQueue) || length(f)) - unlist(.peaksapply(x, FUN = function(pks, ...) nrow(pks)), - use.names = use.names) - else lengths(x@backend, use.names = use.names) - } else integer() -}) - -#' @rdname spectraData -setMethod("msLevel", "Spectra", function(object) msLevel(object@backend)) - -#' @rdname spectraData -setMethod("mz", "Spectra", function(object, f = processingChunkFactor(object), - ...) { - if (length(object@processingQueue) || length(f)) - NumericList(.peaksapply(object, FUN = function(z, ...) z[, 1], - f = f, ...), compress = FALSE) - else mz(object@backend) -}) - -#' @rdname spectraData +#' ## Append new `spectraVariables` to the `spectraData` +#' df <- data.frame(cola = 4:5, colb = "b") +#' data_append <- cbind2(data, df) #' -#' @export -setMethod( - "peaksData", "Spectra", - function(object, columns = c("mz", "intensity"), - f = processingChunkFactor(object), ..., BPPARAM = bpparam()) { - if (length(object@processingQueue) || length(f)) - SimpleList(.peaksapply(object, columns = columns, f = f)) - else SimpleList(peaksData(object@backend, columns = columns)) - }) - -#' @rdname spectraData -setMethod("peaksVariables", "Spectra", function(object) - peaksVariables(object@backend)) - -#' @rdname spectraData -setMethod("polarity", "Spectra", function(object) { - polarity(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("polarity", "Spectra", function(object, value) { - polarity(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("precScanNum", "Spectra", function(object) { - precScanNum(object@backend) -}) - -#' @rdname spectraData -setMethod("precursorCharge", "Spectra", function(object) { - precursorCharge(object@backend) -}) - -#' @rdname spectraData -setMethod("precursorIntensity", "Spectra", function(object) { - precursorIntensity(object@backend) -}) - -#' @rdname spectraData -setMethod("precursorMz", "Spectra", function(object) { - precursorMz(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("precursorMz", "Spectra", function(object, ..., value) { - precursorMz(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("rtime", "Spectra", function(object) { - rtime(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("rtime", "Spectra", function(object, value) { - rtime(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("scanIndex", "Spectra", function(object) { - scanIndex(object@backend) -}) - -#' @rdname spectraData -setMethod("smoothed", "Spectra", function(object) { - smoothed(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("smoothed", "Spectra", function(object, value) { - smoothed(object@backend) <- value - object -}) - -#' @rdname spectraData +#' ## Same with the filterMsLevel function +#' filterMsLevel(data, 2) #' -#' @importMethodsFrom ProtGenerics spectraData +#' ## Below we combine the `data` and `sciex_im` objects into a single one. +#' data_comb <- c(data, sciex_im) #' -#' @exportMethod spectraData -setMethod( - "spectraData", "Spectra", - function(object, columns = spectraVariables(object)) { - if (length(object@processingQueue) && - length(pcns <- intersect(columns, peaksVariables(object)))) { - ## If user requests peaks variables we need to ensure that the - ## processing queue is executed. - scns <- setdiff(columns, pcns) - if (length(scns)) - spd <- spectraData(object@backend, columns = scns) - else - spd <- make_zero_col_DFrame(nrow = length(object)) - pkd <- peaksData(object, columns = pcns) - ## Add individual peaks variables to the `DataFrame`. - for (pcn in pcns) { - vals <- lapply(pkd, `[`, , pcn) - if (pcn %in% c("mz", "intensity")) - vals <- NumericList(vals, compress = FALSE) - spd <- do.call(`[[<-`, list(spd, i = pcn, value = vals)) - } - spd - } else - spectraData(object@backend, columns = columns) - }) - -#' @rdname spectraData +#' ## The combined Spectra contains a union of all spectra variables: +#' head(data_comb$spectrum_id) +#' head(data_comb$rtime) +#' head(data_comb$dataStorage) +#' head(data_comb$dataOrigin) #' -#' @importMethodsFrom ProtGenerics spectraData<- +#' ## Filter a Spectra for a target precursor m/z with a tolerance of 10ppm +#' spd$precursorMz <- c(323.4, 543.2302) +#' data_filt <- Spectra(spd) +#' filterPrecursorMzRange(data_filt, mz = 543.23 + ppm(c(-543.23, 543.23), 10)) #' -#' @exportMethod spectraData<- -setReplaceMethod("spectraData", "Spectra", function(object, value) { - if (!inherits(value, "DataFrame")) - stop("'spectraData<-' expects a 'DataFrame' as input.", call. = FALSE) - pvs <- peaksVariables(object) - if (length(object@processingQueue) && - any(colnames(value) %in% pvs)) - stop("Can not replace peaks variables with a non-empty processing ", - "queue. Please use 'object <- applyProcessing(object)' to apply ", - "and clear the processing queue. Note that 'applyProcessing' ", - "requires a *writeable* backend. Use e.g. 'object <- ", - "setBackend(object, MsBackendMemory())' if needed.") - pvs <- setdiff(pvs, colnames(value)) - if (length(pvs)) { - sd <- spectraData(object, pvs) - for (pv in pvs) { - value <- do.call("$<-", list(value, name = pv, sd[, pv])) - } - object@processingQueue <- list() - } - spectraData(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("spectraNames", "Spectra", function(object) { - spectraNames(object@backend) -}) - -#' @rdname spectraData -setReplaceMethod("spectraNames", "Spectra", function(object, value) { - spectraNames(object@backend) <- value - object -}) - -#' @rdname spectraData -setMethod("spectraVariables", "Spectra", function(object) { - setdiff(spectraVariables(object@backend), peaksVariables(object@backend)) -}) - -#' @rdname spectraData -setMethod("tic", "Spectra", function(object, initial = TRUE) { - if (!length(object)) - return(numeric()) - if (initial) - tic(object@backend, initial = initial) - else ionCount(object) -}) - -#' @rdname spectraData -setMethod("uniqueMsLevels", "Spectra", function(object, ...) { - uniqueMsLevels(object@backend, ...) -}) - -#' @rdname spectraData +#' ## Filter a Spectra keeping only peaks matching certain m/z values +#' sps_sub <- filterMzValues(data, mz = c(103, 104), tolerance = 0.3) +#' mz(sps_sub) #' -#' @importMethodsFrom S4Vectors $ +#' ## This function can also be used to remove specific peaks from a spectrum +#' ## by setting `keep = FALSE`. +#' sps_sub <- filterMzValues(data, mz = c(103, 104), +#' tolerance = 0.3, keep = FALSE) +#' mz(sps_sub) #' -#' @export -setMethod("$", "Spectra", function(x, name) { - if (!(name %in% c(spectraVariables(x@backend), peaksVariables(x@backend)))) - stop("No spectra variable '", name, "' available") - if (name == "mz") - mz(x) - else if (name == "intensity") - intensity(x) - else { - if (length(x@processingQueue) && name %in% peaksVariables(x)) - .peaksapply(x, FUN = function(z, ...) z[, name], - columns = c("mz", "intensity", name)) - else - do.call("$", list(x@backend, name)) - } -}) - -#' @rdname spectraData +#' ## Note that `filterMzValues()` keeps or removes all peaks with a matching +#' ## m/z given the provided `ppm` and `tolerance` parameters. #' -#' @export -setReplaceMethod("$", "Spectra", function(x, name, value) { - if (length(x@processingQueue) && - any(name %in% peaksVariables(x))) - stop("Can not replace peaks variables with a non-empty processing ", - "queue. Please use 'object <- applyProcessing(object)' to apply ", - "and clear the processing queue. Note that 'applyProcessing' ", - "requires a *writeable* backend. Use e.g. 'object <- ", - "setBackend(object, MsBackendMemory())' if needed.") - x@backend <- do.call("$<-", list(x@backend, name, value)) - x -}) - -#' @rdname spectraData +#' ## Filter a Spectra keeping only peaks within a m/z range +#' sps_sub <- filterMzRange(data, mz = c(100, 300)) +#' mz(sps_sub) #' -#' @export -setMethod("[[", "Spectra", function(x, i, j, ...) { - if (!is.character(i)) - stop("'i' is supposed to be a character defining the spectra ", - "variable to access.") - if (!missing(j)) - stop("'j' is not supported.") - if (!(i %in% c(spectraVariables(x), "mz", "intensity"))) - stop("No spectra variable '", i, "' available") - if (i == "mz") - mz(x) - else if (i == "intensity") - intensity(x) - else - do.call("[[", list(x@backend, i)) -}) - -#' @rdname spectraData -#' -#' @export -setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { - if (!is.character(i)) - stop("'i' is supposed to be a character defining the spectra ", - "variable to replace or create.") - if (!missing(j)) - stop("'j' is not supported.") - x@backend <- do.call("[[<-", list(x@backend, i = i, value = value)) - x -}) - - -################################################################################ -## -## Merging, splitting and aggregating Spectra: length of Spectra is changed -## -################################################################################ - -#' @title Merging, aggregating and splitting Spectra +#' ## Remove empty spectra variables +#' sciex_noNA <- dropNaSpectraVariables(sciex) #' -#' @name combineSpectra +#' ## Available spectra variables before and after `dropNaSpectraVariables()` +#' spectraVariables(sciex) +#' spectraVariables(sciex_noNA) #' -#' @aliases combineSpectra -#' @aliases split -#' @aliases joinSpectraData #' -#' @description +#' ## Adding new spectra variables +#' sciex1 <- filterDataOrigin(sciex, dataOrigin(sciex)[1]) +#' spv <- DataFrame(spectrumId = sciex1$spectrumId[3:12], ## used for merging +#' var1 = rnorm(10), +#' var2 = sample(letters, 10)) +#' spv #' -#' Various functions are availabe to combine, aggregate or split data from one -#' of more `Spectra` objects. These are: -#' -#' - `c()` and `concatenateSpectra()`: combines several `Spectra` objects into -#' a single object. The resulting `Spectra` contains all data from all -#' individual `Spectra`, i.e. the union of all their spectra variables. -#' Concatenation will fail if the processing queue of any of the `Spectra` -#' objects is not empty or if different backends are used for the `Spectra` -#' objects. In such cases it is suggested to first change the backends of -#' all `Spectra` to the same type of backend (using the [setBackend()] -#' function and to eventually (if needed) apply the processing queue using -#' the [applyProcessing()] function. -#' -#' - `combineSpectra()`: combines sets of spectra (defined with parameter `f`) -#' into a single spectrum per set aggregating their MS data (i.e. their -#' *peaks data* matrices with the *m/z* and intensity values of their -#' mass peaks). The spectra variable values of the first spectrum per set -#' are reported for the combined spectrum. The peak matrices of the spectra -#' per set are combined using the function specified with parameter `FUN` -#' which uses by default the [combinePeaksData()] function. See the -#' documentation of [combinePeaksData()] for details on the aggregation of -#' the peak data and the package vignette for examples. -#' The sets of spectra can be specified with parameter `f` which is expected -#' to be a `factor` or `vector` of length equal to the length of the -#' `Spectra` specifying to which set a spectrum belongs to. The function -#' returns a `Spectra` of length equal to the unique levels of `f`. The -#' optional parameter `p` allows to define how the `Spectra` should be -#' split for potential parallel processing. The default is -#' `p = x$dataStorage` and hence a per storage file parallel processing is -#' applied for `Spectra` with on disk data representations (such as the -#' [MsBackendMzR()]). This also prevents that spectra from different data -#' files/samples are combined (eventually use e.g. `p = x$dataOrigin` or any -#' other spectra variables defining the originating samples for a spectrum). -#' Before combining the peaks data, all eventual present processing steps are -#' applied (by calling [applyProcessing()] on the `Spectra`). This function -#' will replace the original *m/z* and intensity values of a `Spectra` hence -#' it can not be called on a `Spectra` with a *read-only* backend. In such -#' cases, the backend should be changed to a *writeable* backend before -#' using the [setBackend()] function (to e.g. a [MsBackendMemory()] backend). +#' sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") #' -#' - `joinSpectraData()`: Individual spectra variables can be directly -#' added with the `$<-` or `[[<-` syntax. The `joinSpectraData()` -#' function allows to merge a `DataFrame` to the existing spectra -#' data of a `Spectra`. This function diverges from the [merge()] method in -#' two main ways: -#' - The `by.x` and `by.y` column names must be of length 1. -#' - If variable names are shared in `x` and `y`, the spectra -#' variables of `x` are not modified. It's only the `y` -#' variables that are appended with the suffix defined in -#' `suffix.y`. This is to avoid modifying any core spectra -#' variables that would lead to an invalid object. -#' - Duplicated Spectra keys (i.e. `x[[by.x]]`) are not -#' allowed. Duplicated keys in the `DataFrame` (i.e `y[[by.y]]`) -#' throw a warning and only the last occurrence is kept. These -#' should be explored and ideally be removed using for -#' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar -#' functions. +#' spectraVariables(sciex2) +#' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] #' -#' - `split()`: splits the `Spectra` object based on parameter `f` into a `list` -#' of `Spectra` objects. +#' ## Removing fourier transform artefacts seen in Orbitra data. #' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. This is passed directly to the [backendInitialize()] method -#' of the [MsBackend-class]. +#' ## Loading an Orbitrap spectrum with artefacts. +#' data(fft_spectrum) +#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5)) +#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) #' -#' @param by.x A `character(1)` specifying the spectra variable used -#' for merging. Default is `"spectrumId"`. +#' fft_spectrum <- filterFourierTransformArtefacts(fft_spectrum) +#' fft_spectrum +#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) #' -#' @param by.y A `character(1)` specifying the column used for -#' merging. Set to `by.x` if missing. +#' ## Using a few examples peaks in your data you can optimize the parameters +#' fft_spectrum_filtered <- filterFourierTransformArtefacts(fft_spectrum, +#' halfWindowSize = 0.2, +#' threshold = 0.005, +#' keepIsotopes = TRUE, +#' maxCharge = 5, +#' isotopeTolerance = 0.005 +#' ) #' -#' @param drop For `split()`: not considered. +#' fft_spectrum_filtered +#' length(mz(fft_spectrum_filtered)[[1]]) +#' plotSpectra(fft_spectrum_filtered, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) #' -#' @param f For `split()`: factor defining how to split `x`. See [base::split()] -#' for details. -#' For `combineSpectra()`: `factor` defining the grouping of the spectra -#' that should be combined. Defaults to `x$dataStorage`. +#' ## Using filterRanges to filter spectra object based on variables available +#' ## in `spectraData`. +#' ## First, determine the variable(s) on which to base the filtering: +#' sv <- c("rtime", "precursorMz", "peaksCount") +#' ## Note that ANY variables can be chosen here, and as many as wanted. #' -#' @param FUN For `combineSpectra()`: function to combine the (peak matrices) -#' of the spectra. Defaults to [combinePeaksData()]. +#' ## Define the ranges (pairs of values with lower and upper boundary) to be +#' ## used for the individual spectra variables. The first two values will be +#' ## used for the first spectra variable (e.g., rtime here), the next two for +#' ## the second (e.g. precursorMz here) and so on: +#' ranges <- c(30, 350, 200,500, 350, 600) #' -#' @param p For `combineSpectra()`: `factor` defining how to split the input -#' `Spectra` for parallel processing. Defaults to `x$dataStorage`, i.e., -#' depending on the used backend, per-file parallel processing will be -#' performed. +#' ## Input the parameters within the filterRanges function: +#' filt_spectra <- filterRanges(sciex, spectraVariables = sv, +#' ranges = ranges) #' -#' @param suffix.y A `character(1)` specifying the suffix to be used -#' for making the names of columns in the merged spectra variables -#' unique. This suffix will be used to amend `names(y)`, while -#' `spectraVariables(x)` will remain unchanged. +#' ## Using `filterRanges()` to filter spectra object with multiple ranges for +#' ## the same `spectraVariable` (e.g, here rtime) +#' sv <- c("rtime", "rtime") +#' ranges <- c(30, 100, 200, 300) +#' filt_spectra <- filterRanges(sciex, spectraVariables = sv, +#' ranges = ranges, match = "any") #' -#' @param x A `Spectra` object. +#' ## Using filterValues in a similar way to a filter spectra object based on +#' ## variables available in `spectraData`. However, this time not based on +#' ## ranges but similarities to user input single values with given +#' ## tolerance/ppm +#' ## First determine the variable(s) on which to base the filtering: +#' sv <- c("rtime", "precursorMz") +#' ## Note that ANY variables can be chosen here, and as many as wanted. #' -#' @param y A `DataFrame` with the spectra variables to join/add. +#' ## Define the values that will be used to filter the spectra based on their +#' ## similarities to their respective spectraVariables. +#' ## The first values in the parameters values, tolerance and ppm will be +#' ## used for the first spectra variable (e.g. rtime here), the next for the +#' ## second (e.g. precursorMz here) and so on: +#' values <- c(350, 400) +#' tolerance <- c(100, 0) +#' ppm <- c(0,50) #' -#' @param ... Additional arguments. +#' ## Input the parameters within the `filterValues()` function: +#' filt_spectra <- filterValues(sciex, spectraVariables = sv, +#' values = values, tolerance = tolerance, ppm = ppm) #' -#' @seealso +#' ## ---- DATA MANIPULATIONS AND OTHER OPERATIONS ---- #' -#' - [combinePeaks()] for functions to aggregate mass peaks data. +#' ## Set the data to be centroided +#' centroided(data) <- TRUE #' -#' - [Spectra] for a general description of the `Spectra` object. +#' ## Replace peak intensities below 40 with 3. +#' res <- replaceIntensitiesBelow(data, threshold = 40, value = 3) +#' res #' -#' @importFrom MsCoreUtils vapply1c +#' ## Get the intensities of the first and second spectrum. +#' intensity(res)[[1]] +#' intensity(res)[[2]] #' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto +#' ## Remove all peaks with an intensity below 40. +#' res <- filterIntensity(res, intensity = c(40, Inf)) #' -#' @examples +#' ## Get the intensities of the first and second spectrum. +#' intensity(res)[[1]] +#' intensity(res)[[2]] #' -#' ## Create a Spectra providing a `DataFrame` containing a MS data. +#' ## Lengths of spectra is now different +#' lengths(mz(res)) +#' lengths(mz(data)) #' -#' spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) -#' spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) -#' spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) +#' ## In addition it is possible to pass a function to `filterIntensity()`: in +#' ## the example below we want to keep only peaks that have an intensity which +#' ## is larger than one third of the maximal peak intensity in that spectrum. +#' keep_peaks <- function(x, prop = 3) { +#' x > max(x, na.rm = TRUE) / prop +#' } +#' res2 <- filterIntensity(data, intensity = keep_peaks) +#' intensity(res2)[[1L]] +#' intensity(data)[[1L]] #' -#' s <- Spectra(spd) -#' s +#' ## We can also change the proportion by simply passing the `prop` parameter +#' ## to the function. To keep only peaks that have an intensity which is +#' ## larger than half of the maximum intensity: +#' res2 <- filterIntensity(data, intensity = keep_peaks, prop = 2) +#' intensity(res2)[[1L]] +#' intensity(data)[[1L]] #' -#' ## Create a second Spectra from mzML files and use the `MsBackendMzR` -#' ## on-disk backend. -#' sciex_file <- dir(system.file("sciex", package = "msdata"), -#' full.names = TRUE) -#' sciex <- Spectra(sciex_file, backend = MsBackendMzR()) -#' sciex +#' ## Since data manipulation operations are by default not directly applied to +#' ## the data but only added to the internal lazy evaluation queue, it is also +#' ## possible to remove these data manipulations with the `reset()` function: +#' res_rest <- reset(res) +#' res_rest +#' lengths(mz(res_rest)) +#' lengths(mz(res)) +#' lengths(mz(data)) +#' +#' ## `reset()` after a `applyProcessing()` can not restore the data, because +#' ## the data in the backend was changed. Similarly, `reset()` after any +#' ## filter operations can not restore data for a `Spectra` with a +#' ## `MsBackendMemory` or `MsBackendDataFrame`. +#' res_2 <- applyProcessing(res) +#' res_rest <- reset(res_2) +#' lengths(mz(res)) +#' lengths(mz(res_rest)) #' -#' ## Subset to the first 100 spectra to reduce running time of the examples -#' sciex <- sciex[1:100] #' +#' ## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using +#' ## the normalized dotproduct method. +#' res <- compareSpectra(sciex_im[2:3], sciex_im[10:20]) +#' ## first row contains comparisons of spectrum 2 with spectra 10 to 20 and +#' ## the second row comparisons of spectrum 3 with spectra 10 to 20 +#' res #' -#' ## -------- COMBINE SPECTRA -------- +#' ## To use a simple Pearson correlation instead we can define a function +#' ## that takes the two peak matrices and calculates the correlation for +#' ## their second columns (containing the intensity values). +#' correlateSpectra <- function(x, y, use = "pairwise.complete.obs", ...) { +#' cor(x[, 2], y[, 2], use = use) +#' } +#' res <- compareSpectra(sciex_im[2:3], sciex_im[10:20], +#' FUN = correlateSpectra) +#' res #' -#' ## Combining the `Spectra` object `s` with the MS data from `sciex`. -#' ## Calling directly `c(s, sciex)` would result in an error because -#' ## both backends use a different backend. We thus have to first change -#' ## the backends to the same backend. We change the backend of the `sciex` -#' ## `Spectra` to a `MsBackendMemory`, the backend used by `s`. +#' ## Use compareSpectra to determine the number of common (matching) peaks +#' ## with a ppm of 10: +#' ## type = "inner" uses a *inner join* to match peaks, i.e. keeps only +#' ## peaks that can be mapped betwen both spectra. The provided FUN returns +#' ## simply the number of matching peaks. +#' compareSpectra(sciex_im[2:3], sciex_im[10:20], ppm = 10, type = "inner", +#' FUN = function(x, y, ...) nrow(x)) #' -#' sciex <- setBackend(sciex, MsBackendMemory()) +#' ## Apply an arbitrary function to each spectrum in a Spectra. +#' ## In the example below we calculate the mean intensity for each spectrum +#' ## in a subset of the sciex_im data. Note that we can access all variables +#' ## of each individual spectrum either with the `$` operator or the +#' ## corresponding method. +#' res <- spectrapply(sciex_im[1:20], FUN = function(x) mean(x$intensity[[1]])) +#' head(res) #' -#' ## Combine the two `Spectra` -#' all <- c(s, sciex) -#' all +#' ## It is however important to note that dedicated methods to access the +#' ## data (such as `intensity`) are much more efficient than using `lapply()`: +#' res <- lapply(intensity(sciex_im[1:20]), mean) +#' head(res) #' -#' ## The new `Spectra` objects contains the union of spectra variables from -#' ## both: -#' spectraVariables(all) +#' ## As an alternative, applying a function `FUN` to a `Spectra` can be +#' ## performed *chunk-wise*. The advantage of this is, that only the data for +#' ## one chunk at a time needs to be loaded into memory reducing the memory +#' ## demand. This type of processing can be performed by specifying the size +#' ## of the chunks (i.e. number of spectra per chunk) with the `chunkSize` +#' ## parameter +#' spectrapply(sciex_im[1:20], lengths, chunkSize = 5L) #' -#' ## The spectra variables that were not present in `s`: -#' setdiff(spectraVariables(all), spectraVariables(s)) +#' ## ---- DATA EXPORT ---- #' -#' ## The values for these were filled with missing values for spectra from -#' ## `s`: -#' all$peaksCount |> head() +#' ## Some `MsBackend` classes provide an `export()` method to export the data +#' ## to the file format supported by the backend. +#' ## The `MsBackendMzR` for example allows to export MS data to mzML or +#' ## mzXML file(s), the `MsBackendMgf` (defined in the MsBackendMgf R package) +#' ## would allow to export the data in mgf file format. +#' ## Below we export the MS data in `data`. We call the `export()` method on +#' ## this object, specify the backend that should be used to export the data +#' ## (and which also defines the output format) and provide a file name. +#' fl <- tempfile() +#' export(data, MsBackendMzR(), file = fl) #' +#' ## This exported our data in mzML format. Below we read the first 6 lines +#' ## from that file. +#' readLines(fl, n = 6) #' -#' ## -------- AGGREGATE SPECTRA -------- +#' ## If only a single file name is provided, all spectra are exported to that +#' ## file. To export data with the `MsBackendMzR` backend to different files, a +#' ## file name for each individual spectrum has to be provided. +#' ## Below we export each spectrum to its own file. +#' fls <- c(tempfile(), tempfile()) +#' export(data, MsBackendMzR(), file = fls) #' -#' ## Sets of spectra can be combined into a single, representative spectrum -#' ## per set using `combineSpectra()`. This aggregates the peaks data (i.e. -#' ## the spectra's m/z and intensity values) while using the values for all -#' ## spectra variables from the first spectrum per set. Below we define the -#' ## sets as all spectra measured in the *same second*, i.e. rounding their -#' ## retention time to the next closer integer value. -#' f <- round(rtime(sciex)) -#' head(f) +#' ## Reading the data from the first file +#' res <- Spectra(backendInitialize(MsBackendMzR(), fls[1])) #' -#' cmp <- combineSpectra(sciex, f = f) +#' mz(res) +#' mz(data) #' -#' ## The length of `cmp` is now equal to the length of unique levels in `f`: -#' length(cmp) +#' ## ---- PEAKS VARIABLES AND DATA ---- #' -#' ## The spectra variable value from the first spectrum per set is used in -#' ## the representative/combined spectrum: -#' cmp$rtime +#' ## Some `MsBackend` classes provide support for arbitrary peaks variables +#' ## (in addition to the mandatory `"mz"` and `"intensity"` values. Below +#' ## we create a simple data frame with an additional peak variable `"pk_ann"` +#' ## and create a `Spectra` with a `MsBackendMemory` for that data. +#' ## Importantly the number of values (per spectrum) need to be the same +#' ## for all peak variables. #' -#' ## The peaks data was aggregated: the number of mass peaks of the first six -#' ## spectra from the original `Spectra`: -#' lengths(sciex) |> head() +#' tmp <- data.frame(msLevel = c(2L, 2L), rtime = c(123.2, 123.5)) +#' tmp$mz <- list(c(103.1, 110.4, 303.1), c(343.2, 453.1)) +#' tmp$intensity <- list(c(130.1, 543.1, 40), c(0.9, 0.45)) +#' tmp$pk_ann <- list(c(NA_character_, "A", "P"), c("B", "P")) #' -#' ## and for the first aggreagated spectra: -#' lengths(cmp) |> head() +#' ## Create the Spectra. With parameter `peaksVariables` we can define +#' ## the columns in `tmp` that contain peaks variables. +#' sps <- Spectra(tmp, source = MsBackendMemory(), +#' peaksVariables = c("mz", "intensity", "pk_ann")) +#' peaksVariables(sps) #' -#' ## The default peaks data aggregation method joins all mass peaks. See -#' ## documentation of the `combinePeaksData()` function for more options. +#' ## Extract just the m/z and intensity values +#' peaksData(sps)[[1L]] #' +#' ## Extract the full peaks data +#' peaksData(sps, columns = peaksVariables(sps))[[1L]] #' -#' ## -------- SPLITTING DATA -------- +#' ## Access just the pk_ann variable +#' sps$pk_ann +NULL + +#' The Spectra class #' -#' ## A `Spectra` can be split into a `list` of `Spectra` objects using the -#' ## `split()` function defining the sets into which the `Spectra` should -#' ## be splitted into with parameter `f`. -#' sciex_split <- split(sciex, f) +#' The [Spectra-class] encapsulates data and meta-data for mass +#' spectrometry experiments. #' -#' length(sciex_split) -#' sciex_split |> head() #' +#' @slot backend A derivate of [MsBackend-class] holding/controlling the spectra +#' data. +#' @slot processingQueue `list` of `ProcessingStep` objects. +#' @slot processingQueueVariables `character` of spectraVariables that should +#' be passed to the processing step function. +#' @slot processing A `character` storing logging information. +#' @slot metadata A `list` storing experiment metadata. +#' @slot version A `characher(1)` containing the class version. #' -#' ## -------- ADDING SPECTRA DATA -------- +#' @name Spectra-class +#' @docType class +#' @author Sebastian Gibb \email{mail@@sebastiangibb.de} #' -#' ## Adding new spectra variables -#' sciex1 <- filterDataOrigin(sciex, dataOrigin(sciex)[1]) -#' spv <- DataFrame(spectrumId = sciex1$spectrumId[3:12], ## used for merging -#' var1 = rnorm(10), -#' var2 = sample(letters, 10)) -#' spv +#' @importClassesFrom S4Vectors DataFrame #' -#' sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") +#' @importMethodsFrom S4Vectors lapply #' -#' spectraVariables(sciex2) -#' spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] -NULL - -#' @rdname combineSpectra +#' @importFrom S4Vectors DataFrame #' -#' @exportMethod c -setMethod("c", "Spectra", function(x, ...) { - .concatenate_spectra(unname(list(unname(x), ...))) -}) +#' @noRd +setClass( + "Spectra", + slots = c( + backend = "MsBackend", + processingQueue = "list", + processingQueueVariables = "character", + ## logging + processing = "character", + ## metadata + metadata = "list", + processingChunkSize = "numeric", + version = "character" + ), + prototype = prototype(version = "0.3", + processingChunkSize = Inf) +) -#' @rdname combineSpectra -setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { - bcknds <- split(x@backend, f, ...) - lapply(bcknds, function(b) { - slot(x, "backend", check = FALSE) <- b - x - }) +setValidity("Spectra", function(object) { + msg <- .valid_processing_queue(object@processingQueue) + if (length(msg)) msg + else TRUE }) - -################################################################################ -## -## Aggregating peaks data -## -################################################################################ - -#' @title Aggregating and combining mass peaks data -#' -#' @name combinePeaks -#' -#' @description -#' -#' In addition to aggregating content of spectra variables (describe in -#' [combineSpectra()]) it is also possible to aggregate and combine mass peaks -#' data from individual spectra within a `Spectra`. These `combinePeaks()` -#' function combines mass peaks **within each spectrum** with a difference in -#' their m/z values that is smaller than the maximal acceptable difference -#' defined by `ppm` and `tolerance`. Parameters `intensityFun` and `mzFun` -#' allow to define functions to aggregate the intensity and m/z values for -#' each such group of peaks. With `weighted = TRUE` (the default), the m/z -#' value of the combined peak is calculated using an intensity-weighted mean -#' and parameter `mzFun` is ignored. The [MsCoreUtils::group()] function is -#' used for the grouping of mass peaks. Parameter `msLevel.` allows to define -#' selected MS levels for which peaks should be combined. This function -#' returns a `Spectra` with the same number of spectra than the input object, -#' but with possibly combined peaks within each spectrum. -#' Additional peak variables (other than `"mz"` and `"intensity"`) are -#' dropped (i.e. their values are replaced with `NA`) for combined peaks -#' unless they are constant across the combined peaks. See also -#' [reduceSpectra()] for a function to select a single *representative* -#' mass peak for each peak group. -#' -#' @param intensityFun Function to aggregate intensities for all peaks in -#' each peak group into a single intensity value. -#' -#' @param msLevel. `integer` defining the MS level(s) of the spectra to which -#' the function should be applied (defaults to all MS levels of `object`. -#' -#' @param mzFun Function to aggregate m/z values for all mass peaks within -#' each peak group into a single m/z value. This parameter is ignored if -#' `weighted = TRUE` (the default). -#' -#' @param object A `Spectra` object. -#' -#' @param ppm `numeric(1)` defining a relative, m/z-dependent, maximal -#' accepted difference between m/z values for peaks to be grouped. Default -#' is `ppm = 20`. -#' -#' @param tolerance `numeric(1)` allowing to define a constant maximal -#' accepted difference between m/z values for peaks to be grouped. Default -#' is `tolerance = 0`. -#' -#' @param weighted `logical(1)` whether m/z values of peaks within each peak -#' group should be aggregated into a single m/z value using an -#' intensity-weighted mean. Defaults to `weighted = TRUE`. -#' -#' @param ... ignored. -#' -#' @md -#' -#' @seealso +#' @rdname hidden_aliases #' -#' - [combineSpectra()] for functions to combine or aggregate `Spectra`'s -#' spectra data. +#' @importMethodsFrom methods show #' -#' - [combinePeaksData()] for the function to combine the mass peaks data. +#' @importFrom utils capture.output #' -#' - [reduceSpectra()] and similar functions to filter mass peaks data. +#' @exportMethod show +setMethod("show", "Spectra", + function(object) { + cat("MSn data (", class(object)[1L], ") with ", + length(object@backend), " spectra in a ", class(object@backend), + " backend:\n", sep = "") + if (length(object@backend)) { + txt <- capture.output(show(object@backend)) + cat(txt[-1], sep = "\n") + } + if (length(object@processingQueue)) + cat("Lazy evaluation queue:", length(object@processingQueue), + "processing step(s)\n") + lp <- length(object@processing) + if (lp) { + lps <- object@processing + if (lp > 3) { + lps <- lps[1:3] + lps <- c(lps, paste0("...", lp - 3, " more processings. ", + "Use 'processingLog' to list all.")) + } + cat("Processing:\n", paste(lps, collapse="\n "), "\n") + } + }) + +#' @rdname Spectra +setMethod("Spectra", "missing", function(object, processingQueue = list(), + metadata = list(), ..., + backend = MsBackendMemory(), + BPPARAM = bpparam()) { + new("Spectra", metadata = metadata, processingQueue = processingQueue, + backend = backend) +}) + +#' @rdname Spectra +setMethod("Spectra", "MsBackend", function(object, processingQueue = list(), + metadata = list(), ..., + BPPARAM = bpparam()) { + new("Spectra", metadata = metadata, processingQueue = processingQueue, + backend = object) +}) + +#' @rdname Spectra #' -#' - [Spectra] for a general description of the `Spectra` object. +#' @importFrom methods callNextMethod +setMethod("Spectra", "character", function(object, processingQueue = list(), + metadata = list(), + source = MsBackendMzR(), + backend = source, + ..., BPPARAM = bpparam()) { + if (!length(object)) + Spectra(backend, metadata = metadata, + processingQueue = processingQueue) + else + callNextMethod(object = object, processingQueue = processingQueue, + metadata = metadata, source = source, backend = backend, + ..., BPPARAM = BPPARAM) +}) + +#' @rdname Spectra +setMethod("Spectra", "ANY", function(object, processingQueue = list(), + metadata = list(), + source = MsBackendMemory(), + backend = source, + ..., BPPARAM = bpparam()) { + sp <- new("Spectra", metadata = metadata, processingQueue = processingQueue, + backend = backendInitialize( + source, object, ..., + BPPARAM = backendBpparam(source, BPPARAM))) + if (class(source)[1L] != class(backend)[1L]) + setBackend(sp, backend, ..., BPPARAM = backendBpparam(backend, BPPARAM)) + else sp +}) + +#' @rdname Spectra #' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto +#' @importMethodsFrom ProtGenerics setBackend #' -#' @examples +#' @exportMethod setBackend +setMethod( + "setBackend", c("Spectra", "MsBackend"), + function(object, backend, f = processingChunkFactor(object), ..., + BPPARAM = bpparam()) { + backend_class <- class(object@backend)[1L] + BPPARAM <- backendBpparam(object@backend, BPPARAM) + BPPARAM <- backendBpparam(backend, BPPARAM) + if (!supportsSetBackend(backend)) + stop(class(backend), " does not support 'setBackend'") + if (!length(object)) { + bknds <- backendInitialize( + backend, data = spectraData(object@backend), ...) + } else { + if (!is.factor(f)) + f <- force(factor(f, levels = unique(f))) + if (length(f) && (length(levels(f)) > 1)) { + if (length(f) != length(object)) + stop("length of 'f' has to match the length of 'object'") + bknds <- bplapply( + split(object@backend, f = f), + function(z, ...) { + backendInitialize(backend, + data = spectraData(z), ..., + BPPARAM = SerialParam()) + }, ..., BPPARAM = BPPARAM) + bknds <- backendMerge(bknds) + ## That below ensures the backend is returned in its original + ## order - unsplit does unfortunately not work. + if (is.unsorted(f)) + bknds <- bknds[order(unlist(split(seq_along(bknds), f), + use.names = FALSE))] + } else { + bknds <- backendInitialize( + backend, data = spectraData(object@backend), ...) + } + } + object@backend <- bknds + object@processing <- .logging(object@processing, + "Switch backend from ", + backend_class, " to ", + class(object@backend)) + object + }) + +#' @rdname Spectra #' -#' ## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk -#' ## backend. -#' sciex_file <- dir(system.file("sciex", package = "msdata"), -#' full.names = TRUE) -#' sciex <- Spectra(sciex_file, backend = MsBackendMzR()) +#' @importFrom MsCoreUtils vapply1c #' -#' ## Combine mass peaks per spectrum with a difference in their m/z value -#' ## that is smaller than 20 ppm. The intensity values of such peaks are -#' ## combined by summing their values, while for the m/z values the median -#' ## is reported -#' sciex_comb <- combinePeaks(sciex, ppm = 20, -#' intensityFun = sum, mzFun = median) -#' -#' ## Comparing the number of mass peaks before and after aggregation -#' lengths(sciex) |> head() -#' lengths(sciex_comb) |> head() -#' -#' ## Plotting the first spectrum before and after aggregation -#' par(mfrow = c(1, 2)) -#' plotSpectra(sciex[2L]) -#' plotSpectra(sciex_comb[2L]) -#' -#' ## Using `reduceSpectra()` to keep for each group of mass peaks with a -#' ## difference in their m/z values < 20ppm the one with the highest intensity. -#' sciex_red <- reduceSpectra(sciex, ppm = 20) -#' -#' ## Comparing the number of mass peaks before and after the operation -#' lengths(sciex) |> head() -#' lengths(sciex_red) |> head() -NULL +#' @exportMethod c +setMethod("c", "Spectra", function(x, ...) { + .concatenate_spectra(unname(list(unname(x), ...))) +}) -#' @rdname hidden_aliases -setMethod("combinePeaks", "list", function(object, ...) { - .Deprecated("combinePeaksData", old = "combinePeaks", - msg = paste0("'combinePeaks' for lists of peak matrices is ", - "deprecated; please use 'combinePeaksData' ", - "instead.")) - combinePeaksData(object, ...) +#' @rdname Spectra +setMethod("split", "Spectra", function(x, f, drop = FALSE, ...) { + bcknds <- split(x@backend, f, ...) + lapply(bcknds, function(b) { + slot(x, "backend", check = FALSE) <- b + x + }) }) -#' @rdname combinePeaks +#' @rdname Spectra #' -#' @exportMethod combinePeaks -setMethod("combinePeaks", "Spectra", function(object, tolerance = 0, ppm = 20, - intensityFun = base::mean, - mzFun = base::mean, - weighted = TRUE, - msLevel. = uniqueMsLevels(object), - ...) { - object <- addProcessing( - object, .peaks_combine, ppm = ppm, tolerance = tolerance, - intensityFun = intensityFun, mzFun = mzFun, weighted = weighted, - msLevel = force(msLevel.), spectraVariables = "msLevel") - object@processing <- .logging( - object@processing, "Combining peaks within each spectrum with ppm = ", - ppm, " and tolerance = ", tolerance, ".") +#' @export +setMethod("export", "Spectra", + function(object, backend, ...) { + if (missing(backend)) + stop("Parameter 'backend' is required.") + export(backend, object, ...) + }) + +#### --------------------------------------------------------------------------- +## +## ACCESSOR METHODS +## +#### --------------------------------------------------------------------------- + +#' @rdname Spectra +setMethod("acquisitionNum", "Spectra", function(object) + acquisitionNum(object@backend)) + +#' @rdname Spectra +setMethod( + "peaksData", "Spectra", + function(object, columns = c("mz", "intensity"), + f = processingChunkFactor(object), ..., BPPARAM = bpparam()) { + if (length(object@processingQueue) || length(f)) + SimpleList(.peaksapply(object, columns = columns, f = f)) + else SimpleList(peaksData(object@backend, columns = columns)) + }) + +#' @rdname Spectra +setMethod("peaksVariables", "Spectra", function(object) + peaksVariables(object@backend)) + +#' @importFrom methods setAs +setAs("Spectra", "list", function(from, to) { + .peaksapply(from) +}) + +setAs("Spectra", "SimpleList", function(from, to) { + peaksData(from) +}) + +#' @rdname Spectra +setMethod("centroided", "Spectra", function(object) { + centroided(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("centroided", "Spectra", function(object, value) { + centroided(object@backend) <- value object }) +#' @rdname Spectra +setMethod("collisionEnergy", "Spectra", function(object) { + collisionEnergy(object@backend) +}) -################################################################################ -## -## Filtering, subsetting Spectra: subsetting Spectra and its data content. -## -################################################################################ - -#' @title Filter and subset Spectra objects -#' -#' @name filterMsLevel -#' -#' @aliases [,Spectra-method -#' @aliases filterAcquisitionNum -#' @aliases filterDataOrigin -#' @aliases filterDataStorage -#' @aliases filterEmptySpectra -#' @aliases filterIsolationWindow -#' @aliases filterMsLevel -#' @aliases filterPolarity -#' @aliases filterPrecursorCharge -#' @aliases filterPrecursorIsotopes -#' @aliases filterPrecursorMzRange -#' @aliases filterPrecursorMzValues -#' @aliases filterPrecursorScan -#' @aliases filterRanges -#' @aliases filterRt -#' @aliases filterValues -#' @aliases dropNaSpectraVariables -#' @aliases selectSpectraVariables -#' @aliases filterIntensity -#' @aliases filterMzRange -#' @aliases filterMzValues -#' @aliases reduceSpectra -#' -#' @description -#' -#' A variety of functions to filter or subset `Spectra` objects are available. -#' These can be generally separated into two main classes: I) *classical* -#' subset operations that immediately reduce the number of spectra in the -#' object and II) filters that reduce the **content** of the object without -#' changing its length (i.e. the number of spectra). The latter can be further -#' subdivided into functions that affect the content of the `spectraData` (i.e. -#' the general spectrum metadata) and those that reduce the content of the -#' object's `peaksData` (i.e. the m/z and intensity values of a spectrum's -#' mass peaks). -#' -#' A description of functions from these 3 different categories are given below -#' in sections *Subset `Spectra`*, *Filter content of `spectraData()`* and -#' *Filter content of `peaksData()`*, respectively. -#' -#' -#' @section Subset `Spectra`: -#' -#' These functions affect the number of spectra in a `Spectra` object creating -#' a subset of the original object without affecting its content. -#' -#' - `[`: subsets the spectra keeping only selected elements (`i`). The method -#' **always** returns a `Spectra` object. -#' -#' - `cbind2()`: Appends multiple spectra variables from a `data.frame`, -#' `DataFrame` or `matrix` to the `Spectra` object at once. It does so -#' *blindly* (e.g. do not check rownames compatibility) and is therefore at -#' the risk of the user. For a more controlled way of adding spectra -#' variables, the `joinSpectraData()` should be used. It will return a -#' `Spectra` object with the appended spectra variables. `cbind2()` does -#' check however that the number of rows of the `data.frame` or `DataFrame` -#' matches the number of spectra in the `Spectra` object. -#' -#' - `deisotopeSpectra()`: *deisotopes* each spectrum keeping only the -#' monoisotopic peak for groups of isotopologues. Isotopologues are -#' estimated using the [isotopologues()] function from the -#' *MetaboCoreUtils* package. Note that -#' the default parameters for isotope prediction/detection have been -#' determined using data from the Human Metabolome Database (HMDB) and -#' isotopes for elements other than CHNOPS might not be detected. See -#' parameter `substDefinition` in the documentation of [isotopologues()] for -#' more information. The approach and code to define the parameters for -#' isotope prediction is described -#' [here](https://github.com/EuracBiomedicalResearch/isotopologues). -#' -#' - `dropNaSpectraVariables()`: removes spectra variables (i.e. columns in the -#' object's `spectraData` that contain only missing values (`NA`). Note that -#' while columns with only `NA`s are removed, a `spectraData()` call after -#' `dropNaSpectraVariables()` might still show columns containing `NA` values -#' for *core* spectra variables. -#' -#' - `filterAcquisitionNum()`: filters the object keeping only spectra matching -#' the provided acquisition numbers (argument `n`). If `dataOrigin` or -#' `dataStorage` is also provided, `object` is subsetted to the spectra with -#' an acquisition number equal to `n` **in spectra with matching dataOrigin -#' or dataStorage values** retaining all other spectra. -#' Returns the filtered `Spectra`. -#' -#' - `filterDataOrigin()`: filters the object retaining spectra matching the -#' provided `dataOrigin`. Parameter `dataOrigin` has to be of type -#' `character` and needs to match exactly the data origin value of the -#' spectra to subset. -#' Returns the filtered `Spectra` object (with spectra ordered according to -#' the provided `dataOrigin` parameter). -#' -#' - `filterDataStorage()`: filters the object retaining spectra stored in the -#' specified `dataStorage`. Parameter `dataStorage` has to be of type -#' `character` and needs to match exactly the data storage value of the -#' spectra to subset. -#' Returns the filtered `Spectra` object (with spectra ordered according to -#' the provided `dataStorage` parameter). -#' -#' - `filterEmptySpectra()`: removes empty spectra (i.e. spectra without peaks). -#' Returns the filtered `Spectra` object (with spectra in their -#' original order). -#' -#' - `filterIsolationWindow()`: retains spectra that contain `mz` in their -#' isolation window m/z range (i.e. with an `isolationWindowLowerMz` <= `mz` -#' and `isolationWindowUpperMz` >= `mz`. Returns the filtered `Spectra` -#' object (with spectra in their original order). -#' -#' - `filterMsLevel()`: filters object by MS level keeping only spectra matching -#' the MS level specified with argument `msLevel`. Returns the filtered -#' `Spectra` (with spectra in their original order). -#' -#' - `filterPolarity()`: filters the object keeping only spectra matching the -#' provided polarity. Returns the filtered `Spectra` (with spectra in their -#' original order). -#' -#' - `filterPrecursorCharge()`: retains spectra with the defined precursor -#' charge(s). -#' -#' - `filterPrecursorIsotopes()`: groups MS2 spectra based on their precursor -#' m/z and precursor intensity into predicted isotope groups and keep for each -#' only the spectrum representing the monoisotopic precursor. MS1 spectra -#' are returned as is. See documentation for `deisotopeSpectra()` below for -#' details on isotope prediction and parameter description. -#' -#' - `filterPrecursorMaxIntensity()`: filters the `Spectra` keeping for groups -#' of (MS2) spectra with similar precursor m/z values (given parameters -#' `ppm` and `tolerance`) the one with the highest precursor intensity. The -#' function filters only MS2 spectra and returns all MS1 spectra. If -#' precursor intensities are `NA` for all spectra within a spectra group, the -#' first spectrum of that groups is returned. -#' Note: some manufacturers don't provide precursor intensities. These can -#' however also be estimated with [estimatePrecursorIntensity()]. -#' -#' - `filterPrecursorMzRange()` (previously `filterPrecursorMz()` which is now -#' deprecated): retains spectra with a precursor m/z within the -#' provided m/z range. See examples for details on selecting spectra with -#' a precursor m/z for a target m/z accepting a small difference in *ppm*. -#' -#' - `filterPrecursorMzValues()`: retains spectra with precursor m/z matching -#' any of the provided m/z values (given `ppm` and `tolerance`). Spectra with -#' missing precursor m/z value (e.g. MS1 spectra) are dropped. -#' -#' - `filterPrecursorScan()`: retains parent (e.g. MS1) and children scans (e.g. -#' MS2) of acquisition number `acquisitionNum`. Returns the filtered -#' `Spectra` (with spectra in their original order). Parameter `f` allows to -#' define which spectra belong to the same sample or original data file ( -#' defaults to `f = dataOrigin(object)`). -#' -#' - `filterRanges()`: allows filtering of the `Spectra` object based on user -#' defined *numeric* ranges (parameter `ranges`) for one or more available -#' spectra variables in object (spectra variable names can be specified with -#' parameter `spectraVariables`). Spectra for which the value of a spectra -#' variable is within it's defined range are retained. If multiple -#' ranges/spectra variables are defined, the `match` parameter can be used -#' to specify whether all conditions (`match = "all"`; the default) or if -#' any of the conditions must match (`match = "any"`; all spectra for which -#' values are within any of the provided ranges are retained). -#' -#' - `filterRt()`: retains spectra of MS level `msLevel` with retention -#' times (in seconds) within (`>=`) `rt[1]` and (`<=`) -#' `rt[2]`. Returns the filtered `Spectra` (with spectra in their -#' original order). -#' -#' - `filterValues()`: allows filtering of the `Spectra` object based on -#' similarities of *numeric* values of one or more `spectraVariables(object)` -#' (parameter `spectraVariables`) to provided values (parameter `values`) -#' given acceptable differences (parameters tolerance and ppm). If multiple -#' values/spectra variables are defined, the `match` parameter can be used -#' to specify whether all conditions (`match = "all"`; the default) or if -#' any of the conditions must match (`match = "any"`; all spectra for which -#' values are within any of the provided ranges are retained). -#' -#' -#' @section Filter content of `spectraData()`: -#' -#' The functions described in this section filter the content from a -#' `Spectra`'s spectra data, i.e. affect values of, or complete, spectra -#' variables. None of these functions reduces the object's number of spectra. -#' -#' - `dropNaSpectraVariables()`: removes spectra variables (i.e. columns in the -#' object's `spectraData` that contain only missing values (`NA`). Note that -#' while columns with only `NA`s are removed, a `spectraData()` call after -#' `dropNaSpectraVariables()` might still show columns containing `NA` values -#' for *core* spectra variables. The total number of spectra is not changed -#' by this function. -#' -#' - `selectSpectraVariables()`: reduces the information within the object to -#' the selected spectra variables: all data for variables not specified will -#' be dropped. For mandatory columns (i.e., those listed by -#' [coreSpectraVariables()], such as *msLevel*, *rtime* ...) only -#' the values will be dropped but not the variable itself. Additional (or -#' user defined) spectra variables will be completely removed. -#' Returns the filtered `Spectra`. -#' -#' -#' - `joinSpectraData()`: Individual spectra variables can be directly -#' added with the `$<-` or `[[<-` syntax. The `joinSpectraData()` -#' function allows to merge a `DataFrame` to the existing spectra -#' data. This function diverges from the [merge()] method in two -#' main ways: -#' - The `by.x` and `by.y` column names must be of length 1. -#' - If variable names are shared in `x` and `y`, the spectra -#' variables of `x` are not modified. It's only the `y` -#' variables that are appended the suffix defined in -#' `suffix.y`. This is to avoid modifying any core spectra -#' variables that would lead to an invalid object. -#' - Duplicated Spectra keys (i.e. `x[[by.x]]`) are not -#' allowed. Duplicated keys in the `DataFrame` (i.e `y[[by.y]]`) -#' throw a warning and only the last occurrence is kept. These -#' should be explored and ideally be removed using for -#' `QFeatures::reduceDataFrame()`, `PMS::reducePSMs()` or similar -#' functions. -#' For a more general function that allows to append `data.frame`, -#' `DataFrame` and `matrix` see `cbind2()`. -#' -#' @section Filter content of `peaksData()`: -#' -#' The functions described in this section filter the content of the -#' `Spectra`'s peaks data, i.e. either the number or the values (*m/z* or -#' intensity values) of the mass peaks. Also, the actual operation is only -#' executed once peaks data is accessed (through `peaksData()`, -#' `mz()` or `intensity()`) or `applyProcessing()` is called. -#' These operations don't affect the number of spectra in the `Spectra` object. -#' -#' - `deisotopeSpectra()`: *deisotopes* each spectrum keeping only the -#' monoisotopic peak for groups of isotopologues. Isotopologues are -#' estimated using the [isotopologues()] function from the -#' *MetaboCoreUtils* package. Note that -#' the default parameters for isotope prediction/detection have been -#' determined using data from the Human Metabolome Database (HMDB) and -#' isotopes for elements other than CHNOPS might not be detected. See -#' parameter `substDefinition` in the documentation of [isotopologues()] for -#' more information. The approach and code to define the parameters for -#' isotope prediction is described -#' [here](https://github.com/EuracBiomedicalResearch/isotopologues). -#' -#' - `filterFourierTransformArtefacts()`: removes (Orbitrap) fast fourier -#' artefact peaks from spectra (see examples below). The function iterates -#' through all intensity ordered peaks in a spectrum and removes all peaks -#' with an m/z within +/- `halfWindowSize` of the current peak if their -#' intensity is lower than `threshold` times the current peak's intensity. -#' Additional parameters `keepIsotopes`, `maxCharge` and `isotopeTolerance` -#' allow to avoid removing of potential `[13]C` isotope peaks (`maxCharge` -#' being the maximum charge that should be considered and `isotopeTolerance` -#' the absolute acceptable tolerance for matching their m/z). -#' See [filterFourierTransformArtefacts()] for details and background and -#' `deisitopeSpectra()` for an alternative. -#' -#' - `filterIntensity()`: filters mass peaks in each spectrum keeping only -#' those with intensities that are within the provided range or match the -#' criteria of the provided function. For the former, parameter `intensity` -#' has to be a `numeric` defining the intensity range, for the latter a -#' `function` that takes the intensity values of the spectrum and returns -#' a `logical` whether the peak should be retained or not (see examples -#' below for details) - additional parameters to the function can be passed -#' with `...`. -#' To remove only peaks with intensities below a certain threshold, say -#' 100, use `intensity = c(100, Inf)`. Note: also a single value can be -#' passed with the `intensity` parameter in which case an upper limit of -#' `Inf` is used. -#' Note that this function removes also peaks with missing intensities -#' (i.e. an intensity of `NA`). Parameter `msLevel.` allows to restrict the -#' filtering to spectra of the specified MS level(s). -#' -#' - `filterMzRange()`: filters mass peaks in the object keeping or removing -#' those in each spectrum that are within the provided m/z range. Whether -#' peaks are retained or removed can be configured with parameter `keep` -#' (default `keep = TRUE`). -#' -#' - `filterMzValues()`: filters mass peaks in the object keeping all -#' peaks in each spectrum that match the provided m/z value(s) (for -#' `keep = TRUE`, the default) or removing all of them (for `keep = FALSE`). -#' The m/z matching considers also the absolute `tolerance` and m/z-relative -#' `ppm` values. `tolerance` and `ppm` have to be of length 1. -#' -#' - `filterPeaksRanges()`: filters mass peaks of a `Spectra` object using any -#' set of range-based filters on numeric spectra or peaks variables. See -#' [filterPeaksRanges()] for more information. -#' -#' - `filterPrecursorPeaks()`: removes peaks from each spectrum in `object` with -#' an m/z equal or larger than the m/z of the precursor, depending on the -#' value of parameter `mz`: for `mz = ==" (the default) peaks with matching -#' m/z (considering an absolute and relative acceptable difference depending -#' on `tolerance` and `ppm`, respectively) are removed. For `mz = ">="` all -#' peaks with an m/z larger or equal to the precursor m/z (minus `tolerance` -#' and the `ppm` of the precursor m/z) are removed. Parameter `msLevel.` -#' allows to restrict the filter to certain MS levels (by default the filter -#' is applied to all MS levels). Note that no peaks are removed if the -#' precursor m/z is `NA` (e.g. typically for MS1 spectra). -#' -#' - `reduceSpectra()`: keeps for groups of peaks with similar m/z values in -#' (given `ppm` and `tolerance`) in each spectrum only the mass peak with the -#' highest intensity removing all other peaks hence *reducing* each -#' spectrum to the highest intensity peaks per *peak group*. -#' Peak groups are defined using the [group()] function from the -#' *MsCoreUtils* package. See also the [combinePeaks()] function for an -#' alternative function to combine peaks within each spectrum. -#' -#' @param acquisitionNum for `filterPrecursorScan()`: `integer` with the -#' acquisition number of the spectra to which the object should be -#' subsetted. -#' -#' @param charge For `deisotopeSpectra()`: expected charge of the ionized -#' compounds. See [isotopologues()] for details. -#' -#' @param dataOrigin For `filterDataOrigin()`: `character` to define which -#' spectra to keep. -#' For `filterAcquisitionNum()`: optionally specify if filtering should -#' occurr only for spectra of selected `dataOrigin`. -#' -#' @param dataStorage For `filterDataStorage()`: `character` to define which -#' spectra to keep. -#' For `filterAcquisitionNum()`: optionally specify if filtering should -#' occur only for spectra of selected `dataStorage`. -#' -#' @param drop For `[`: not considered. -#' -#' @param f For `filterPrecursorScan()`: defining which spectra -#' belong to the same original data file (sample): Defaults to -#' `f = dataOrigin(x)`. -#' -#' @param halfWindowSize For `filterFourierTransformArtefacts()`: `numeric(1)` -#' defining the m/z window left and right of a peak where to remove -#' fourier transform artefacts. -#' -#' @param i For `[`: `integer`, `logical` or `character` to subset the -#' object. -#' -#' @param intensity For `filterIntensity()`: `numeric` of length 1 or 2 -#' defining either the lower or the lower and upper intensity limit for the -#' filtering, or a `function` that takes the intensities as input and -#' returns a `logical` (same length then peaks in the spectrum) whether the -#' peak should be retained or not. Defaults to `intensity = c(0, Inf)` thus -#' only peaks with `NA` intensity are removed. -#' -#' @param isotopeTolerance For `filterFourierTransformArtefacts()`: the m/z -#' `tolerance` to be used to define whether peaks might be isotopes of -#' the current tested peak. -#' -#' @param j For `[`: not supported. -#' -#' @param keep For `filterMzValues()` and `filterMzRange()`: `logical(1)` -#' whether the matching peaks should be retained (`keep = TRUE`, the -#' default) or dropped (`keep = FALSE`). -#' -#' @param keepIsotopes For `filterFourierTransformArtefacts()`: whether isotope -#' peaks should not be removed as fourier artefacts. -#' -#' @param match For `filterRanges()` and `filterValues()`: `character(1) ` -#' defining whether the condition has to match for all provided -#' `ranges`/`values` (`match = "all"`; the default), or for any of them -#' (`match = "any"`) for spectra to be retained. -#' -#' @param maxCharge For `filterFourierTransformArtefacts()`: the maximum charge -#' to be considered for isotopes. -#' -#' @param msLevel. `integer` defining the MS level(s) of the spectra to which -#' the function should be applied (defaults to all MS levels of `object`. -#' For `filterMsLevel()`: the MS level to which `object` should be -#' subsetted. -#' -#' @param mz For `filterIsolationWindow()`: `numeric(1)` with the m/z value to -#' filter the object. For `filterPrecursorMz()` and `filterMzRange()`: -#' `numeric(2)` defining the lower and upper m/z boundary. -#' For `filterMzValues()` and `filterPrecursorMzValues()`: `numeric` with -#' the m/z values to match peaks or precursor m/z against. -#' For `filterPrecursorPeaks()`: `character(1)` defining whether mass peaks -#' with an m/z matching the spectrum's precursor m/z (`mz = "=="`, -#' the default) or mass peaks with a m/z that is equal or larger -#' (`mz = ">="`) should be removed. -#' -#' @param n for `filterAcquisitionNum()`: `integer` with the acquisition -#' numbers to filter for. -#' -#' @param object `Spectra` object. -#' -#' @param polarity for `filterPolarity()`: `integer` specifying the polarity to -#' to subset `object`. -#' -#' @param ppm For `filterMzValues()` and `reduceSpectra()`: `numeric(1)` -#' defining a relative, m/z-dependent, maximal accepted difference between -#' m/z values for peaks to be matched (or grouped). -#' For `filterPrecursorMaxIntensity()`: `numeric(1)` defining the relative -#' maximal accepted difference of precursor m/z values of spectra for -#' grouping them into *precursor groups*. For `filterPrecursorIsotopes()`: -#' passed directly to the [isotopologues()] function. -#' For `filterValues()`: `numeric` of any length allowing to define -#' a maximal accepted difference between user input `values` and the -#' `spectraVariables` values. If it is not equal to the length of the -#' value provided with parameter `spectraVariables`, `ppm[1]` will be -#' recycled. -#' -#' @param ranges for `filterRanges()`: A `numeric` vector of paired values -#' (upper and lower boundary) that define the ranges to filter the `object`. -#' These paired values need to be in the same order as the -#' `spectraVariables` parameter (see below). -#' -#' @param rt for `filterRt()`: `numeric(2)` defining the retention time range to -#' be used to subset/filter `object`. -#' -#' @param spectraVariables For `selectSpectraVariables()`: `character` with the -#' names of the spectra variables to which the backend should be -#' subsetted. For `filterRanges()` and `filterValues()`: `character` -#' vector specifying the column(s) from `spectraData(object)` on which -#' to filter the data and that correspond to the the names of the -#' spectra variables that should be used for the filtering. -#' -#' @param substDefinition For `deisotopeSpectra()` and -#' `filterPrecursorIsotopes()`: `matrix` or `data.frame` with definitions -#' of isotopic substitutions. Uses by default isotopic substitutions -#' defined from all compounds in the Human Metabolome Database (HMDB). See -#' [isotopologues()] or [isotopicSubstitutionMatrix()] in the -#' *MetaboCoreUtils* for details. -#' -#' @param threshold For `filterFourierTransformArtefacts()`: the relative -#' intensity (to a peak) below which peaks are considered fourier -#' artefacts. Defaults to `threshold = 0.2` hence removing peaks that -#' have an intensity below 0.2 times the intensity of the tested peak -#' (within the selected `halfWindowSize`). -#' -#' @param tolerance For `filterMzValues()` and `reduceSpectra()`: -#' `numeric(1)` allowing to define a constant maximal accepted difference -#' between m/z values for peaks to be matched (or grouped). For -#' `containsMz()` it can also be of length equal `mz` to specify a different -#' tolerance for each m/z value. -#' For `filterPrecursorMaxIntensity()`: `numeric(1)` defining the -#' (constant) maximal accepted difference of precursor m/z values of -#' spectra for grouping them into *precursor groups*. For -#' `filterPrecursorIsotopes()`: passed directly to the [isotopologues()] -#' function. For `filterValues()`: `numeric` of any length allowing to -#' define a maximal accepted difference between user input `values` and the -#' `spectraVariables` values. If it is not equal to the length of the -#' value provided with parameter `spectraVariables`, `tolerance[1]` will be -#' recycled. Default is `tolerance = 0`. -#' -#' @param values for `filterValues()`: A `numeric` vector that define the -#' values to filter the Spectra data. These values need to be in the same -#' order as the `spectraVariables` parameter. -#' -#' @param weighted For `combinePeaks()`: `logical(1)` whether m/z values of -#' peaks within each peak group should be aggregated into a single m/z -#' value using an intensity-weighted mean. Defaults to `weighted = TRUE`. -#' -#' @param which for `containsMz()`: either `"any"` or `"all"` defining whether -#' any (the default) or all provided `mz` have to be present in the -#' spectrum. -#' -#' @param x A `Spectra` object. -#' -#' @param y A `Spectra` object. -#' - For `joinSpectraData()`: a `DataFrame`. -#' - For `cbind2()` a `data.frame`, `DataFrame` or `matrix`. -#' -#' @param x `Spectra` object. -#' -#' @param z For `filterPrecursorCharge()`: `integer()` with the precursor -#' charges to be used as filter. -#' -#' @param ... Additional arguments. -#' -#' @seealso -#' -#' - [combineSpectra()] for functions to combine or aggregate `Spectra`. -#' -#' - [combinePeaks()] for functions to combine or aggregate a `Spectra`'s -#' `peaksData()` -#' -#' @md -#' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail, Nir Shahaf -#' -#' @examples -#' -#' ## Load a `Spectra` object with LC-MS/MS data. -#' fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", -#' package = "msdata") -#' sps_dda <- Spectra(fl) -#' sps_dda -#' -#' -#' ## -------- SUBSET SPECTRA -------- -#' -#' ## Subset to the first 3 spectra -#' tmp <- sps_dda[1:3] -#' tmp -#' length(tmp) -#' -#' ## Subset to all MS2 spectra; this could be done with [, or, more -#' ## efficiently, with the `filterMsLevel` function: -#' sps_dda[msLevel(sps_dda) == 2L] -#' filterMsLevel(sps_dda, 2L) -#' -#' ## Filter the object keeping only MS2 spectra with an precursor m/z value -#' ## between a specified range: -#' filterPrecursorMzRange(sps_dda, c(80, 90)) -#' -#' ## Filter the object to MS2 spectra with an precursor m/z matching a -#' ## pre-defined value (given ppm and tolerance) -#' filterPrecursorMzValues(sps_dda, 85, ppm = 5, tolerance = 0.1) -#' -#' ## The `filterRanges()` function allows to filter a `Spectra` based on -#' ## numerical ranges of any of its (numerical) spectra variables. -#' ## First, determine the variable(s) on which to base the filtering: -#' sv <- c("rtime", "precursorMz", "peaksCount") -#' ## Note that ANY variables can be chosen here, and as many as wanted. -#' -#' ## Define the ranges (pairs of values with lower and upper boundary) to be -#' ## used for the individual spectra variables. The first two values will be -#' ## used for the first spectra variable (e.g., `"rtime"` here), the next two -#' ## for the second (e.g. `"precursorMz"` here) and so on: -#' ranges <- c(30, 350, 200, 500, 350, 600) -#' -#' ## Input the parameters within the filterRanges function: -#' filt_spectra <- filterRanges(sps_dda, spectraVariables = sv, -#' ranges = ranges) -#' filt_spectra -#' -#' ## `filterRanges()` can also be used to filter a `Spectra` object with -#' ## multiple ranges for the same `spectraVariable` (e.g, here `"rtime"`) -#' sv <- c("rtime", "rtime") -#' ranges <- c(30, 100, 200, 300) -#' filt_spectra <- filterRanges(sps_dda, spectraVariables = sv, -#' ranges = ranges, match = "any") -#' filt_spectra -#' -#' ## While `filterRanges()` filtered on numeric ranges, `filterValues()` -#' ## allows to filter an object matching spectra variable values to user -#' ## provided values (allowing to configure allowed differences using the -#' ## `ppm` and `tolerance` parameters). -#' ## First determine the variable(s) on which to base the filtering: -#' sv <- c("rtime", "precursorMz") -#' ## Note that ANY variables can be chosen here, and as many as wanted. -#' -#' ## Define the values that will be used to filter the spectra based on their -#' ## similarities to their respective `spectraVariables`. -#' ## The first values in the parameters values, tolerance and ppm will be -#' ## used for the first spectra variable (e.g. `"rtime"` here), the next for -#' ## the second (e.g. `"precursorMz"` here) and so on: -#' values <- c(350, 80) -#' tolerance <- c(100, 0.1) -#' ppm <- c(0, 50) -#' -#' ## Input the parameters within the `filterValues()` function: -#' filt_spectra <- filterValues(sps_dda, spectraVariables = sv, -#' values = values, tolerance = tolerance, ppm = ppm) -#' filt_spectra -#' -#' -#' ## -------- FILTER SPECTRA DATA -------- -#' -#' ## Remove spectra variables without content (i.e. with only missing values) -#' sps_noNA <- dropNaSpectraVariables(sps_dda) -#' -#' ## Append new `spectraVariables` to the `spectraData` -#' df <- data.frame(cola = 4:5, colb = "b") -#' data_append <- cbind2(data, df) -#' -#' ## Same with the filterMsLevel function -#' filterMsLevel(data, 2) -#' -#' ## This reduced the size of the object slightly -#' print(object.size(sps_dda), unit = "MB") -#' print(object.size(sps_noNA), unit = "MB") -#' -#' ## With the `selectSpectraVariables()` function it is in addition possible -#' ## to subset the data of a `Spectra` to the selected columns/variables, -#' ## keeping only their data: -#' tmp <- selectSpectraVariables(sps_dda, c("msLevel", "mz", "intensity", -#' "scanIndex")) -#' print(object.size(tmp), units = "MB") -#' -#' ## Except the selected variables, all data is now removed. Accessing -#' ## core spectra variables still works, but returns only NA -#' rtime(tmp) |> head() -#' -#' -#' ## -------- FILTER PEAKS DATA -------- -#' -#' ## `filterMzValues()` filters the mass peaks data of a `Spectra` retaining -#' ## only those mass peaks with an m/z value matching the provided value(s). -#' sps_sub <- filterMzValues(sps_dda, mz = c(103, 104), tolerance = 0.3) -#' -#' ## The filtered `Spectra` has the same length -#' length(sps_dda) -#' length(sps_sub) -#' -#' ## But the number of mass peaks changed -#' lengths(sps_dda) |> head() -#' lengths(sps_sub) |> head() -#' -#' ## This function can also be used to remove specific peaks from a spectrum -#' ## by setting `keep = FALSE`. -#' sps_sub <- filterMzValues(sps_dda, mz = c(103, 104), -#' tolerance = 0.3, keep = FALSE) -#' lengths(sps_sub) |> head() -#' -#' ## With the `filterMzRange()` function it is possible to keep (or remove) -#' ## mass peaks with m/z values within a specified numeric range. -#' sps_sub <- filterMzRange(sps_dda, mz = c(100, 150)) -#' lengths(sps_sub) |> head() -#' -#' ## See also the `filterPeaksRanges()` function for a more flexible framework -#' ## to filter mass peaks -#' -#' -#' ## Removing fourier transform artefacts seen in Orbitra data. -#' -#' ## Loading an Orbitrap spectrum with artefacts. -#' data(fft_spectrum) -#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5)) -#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) -#' -#' fft_spectrum <- filterFourierTransformArtefacts(fft_spectrum) -#' fft_spectrum -#' plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) -#' -#' ## Using a few examples peaks in your data you can optimize the parameters -#' fft_spectrum_filtered <- filterFourierTransformArtefacts(fft_spectrum, -#' halfWindowSize = 0.2, -#' threshold = 0.005, -#' keepIsotopes = TRUE, -#' maxCharge = 5, -#' isotopeTolerance = 0.005 -#' ) -#' -#' fft_spectrum_filtered -#' length(mz(fft_spectrum_filtered)[[1]]) -#' plotSpectra(fft_spectrum_filtered, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) -#' -#' -#' ## *Reducing* a `Spectra` keeping for groups of mass peaks (characterized -#' ## by similarity of their m/z values) only one representative peak. This -#' ## function helps cleaning fragment spectra. -#' ## Filter the data set to MS2 spectra -#' ms2 <- filterMsLevel(sps_dda, 2L) -#' -#' ## For groups of fragment peaks with a difference in m/z < 0.1, keep only -#' ## the largest one. -#' ms2_red <- reduceSpectra(ms2, ppm = 0, tolerance = 0.1) -#' lengths(ms2) |> tail() -#' lengths(ms2_red) |> tail() -NULL - -#' @rdname filterMsLevel -setMethod("dropNaSpectraVariables", "Spectra", function(object) { - object@backend <- dropNaSpectraVariables(object@backend) - object -}) - -#' @rdname filterMsLevel -setMethod( - "selectSpectraVariables", "Spectra", - function(object, spectraVariables = union(spectraVariables(object), - peaksVariables(object))) { - spectraVariables <- union(spectraVariables, "dataStorage") - object@backend <- selectSpectraVariables( - object@backend, spectraVariables = spectraVariables) - object - }) - -#' @rdname filterMsLevel -#' -#' @export -setMethod("[", "Spectra", function(x, i, j, ..., drop = FALSE) { - if (!missing(j)) - stop("Subsetting 'Spectra' by columns is not (yet) supported") - if (missing(i)) - return(x) - slot(x, "backend", check = FALSE) <- extractByIndex( - x@backend, i2index(i, length(x))) - x -}) - -setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) -#' @rdname Spectra -#' -#' @export -setMethod("cbind2", signature(x = "Spectra", - y = "dataframeOrDataFrame"), function(x, y, ...) { - x@backend <- cbind2(x@backend, y, ...) - x - }) - -#' @rdname filterMsLevel -setMethod("filterAcquisitionNum", "Spectra", function(object, n = integer(), - dataStorage = character(), - dataOrigin = character()) { - if (length(dataStorage) && !is.character(dataStorage)) - stop("'dataStorage' is expected to be of type character") - if (length(dataOrigin) && !is.character(dataOrigin)) - stop("'dataOrigin' is expected to be of type character") - object@backend <- filterAcquisitionNum(object@backend, n, - dataStorage, dataOrigin) - object@processing <- .logging(object@processing, - "Filter: select by: ", length(n), - " acquisition number(s) in ", - max(length(dataStorage), length(dataOrigin)), - " file(s)") - object -}) - -#' @rdname filterMsLevel -setMethod("filterEmptySpectra", "Spectra", function(object) { - object@backend <- extractByIndex(object@backend, - which(as.logical(lengths(object)))) - object@processing <- .logging(object@processing, - "Filter: removed empty spectra.") - object -}) - -#' @rdname filterMsLevel -setMethod("filterDataOrigin", "Spectra", function(object, - dataOrigin = character()) { - if (length(dataOrigin) && !is.character(dataOrigin)) - stop("'dataOrigin' is expected to be of type character") - object@backend <- filterDataOrigin(object@backend, dataOrigin = dataOrigin) - object@processing <- .logging(object@processing, - "Filter: select data origin(s) ", - paste0(dataOrigin, collapse = ", ")) - object -}) - -#' @rdname filterMsLevel -setMethod("filterDataStorage", "Spectra", function(object, - dataStorage = character()) { - if (length(dataStorage) && !is.character(dataStorage)) - stop("'dataStorage' is expected to be of type character") - object@backend <- filterDataStorage(object@backend, dataStorage) - object@processing <- .logging(object@processing, - "Filter: select data storage(s) ", - paste0(dataStorage, collapse = ", ")) - object -}) - -#' @rdname filterMsLevel -#' -#' @exportMethod filterFourierTransformArtefacts -setMethod("filterFourierTransformArtefacts", "Spectra", - function(object, halfWindowSize = 0.05, threshold = 0.2, - keepIsotopes = TRUE, maxCharge = 5, - isotopeTolerance = 0.005) { - object <- addProcessing(object, .peaks_remove_fft_artifact, - halfWindowSize = halfWindowSize, - threshold = threshold, - keepIsotopes = keepIsotopes, - maxCharge = maxCharge, - isotopeTolerance = isotopeTolerance) - object@processing <- .logging( - object@processing, "Remove fast fourier artefacts.") - object - }) - -#' @rdname filterMsLevel -#' -#' @importMethodsFrom ProtGenerics filterIntensity -#' -#' @exportMethod filterIntensity -setMethod("filterIntensity", "Spectra", - function(object, intensity = c(0, Inf), - msLevel. = uniqueMsLevels(object), ...) { - if (!.check_ms_level(object, msLevel.)) - return(object) - if (is.numeric(intensity)) { - if (length(intensity) == 1) - intensity <- c(intensity, Inf) - if (length(intensity) != 2) - stop("'intensity' should be of length specifying a ", - "lower intensity limit or of length two defining ", - "a lower and upper limit.") - object <- addProcessing(object, .peaks_filter_intensity, - intensity = intensity, - msLevel = msLevel., - spectraVariables = "msLevel") - object@processing <- .logging( - object@processing, "Remove peaks with intensities ", - "outside [", intensity[1], ", ", intensity[2], - "] in spectra of MS level(s) ", - paste0(msLevel., collapse = ", "), ".") - } else { - if (is.function(intensity)) { - object <- addProcessing( - object, .peaks_filter_intensity_function, - intfun = intensity, msLevel = msLevel., - args = list(...), spectraVariables = "msLevel") - object@processing <- .logging( - object@processing, "Remove peaks based on their ", - "intensities and a user-provided function ", - "in spectra of MS level(s) ", - paste0(msLevel., collapse = ", "), ".") - } - else stop("'intensity' has to be numeric or a function") - } - object - }) - - -#' @rdname filterMsLevel -setMethod("filterIsolationWindow", "Spectra", function(object, mz = numeric()) { - object@backend <- filterIsolationWindow(object@backend, mz = mz) - object@processing <- .logging(object@processing, - "Filter: select spectra containing m/z ", - mz, " in their isolation window") - object -}) - -#' @rdname filterMsLevel -setMethod("filterMsLevel", "Spectra", function(object, msLevel. = integer()) { - object@backend <- filterMsLevel(object@backend, msLevel = msLevel.) - object@processing <- .logging(object@processing, - "Filter: select MS level(s) ", - paste0(unique(msLevel.), collapse = " ")) +#' @rdname Spectra +setReplaceMethod("collisionEnergy", "Spectra", function(object, value) { + collisionEnergy(object@backend) <- value object }) -#' @rdname filterMsLevel -#' -#' @importMethodsFrom ProtGenerics filterMzRange -#' -#' @export -setMethod("filterMzRange", "Spectra", - function(object, mz = numeric(), msLevel. = uniqueMsLevels(object), - keep = TRUE) { - if (!.check_ms_level(object, msLevel.)) - return(object) - if (!length(mz)) mz <- c(-Inf, Inf) - else mz <- range(mz) - object <- addProcessing(object, .peaks_filter_mz_range, mz = mz, - msLevel = msLevel., keep = keep, - spectraVariables = "msLevel") - if (keep) keep_or_remove <- "select" - else keep_or_remove <- "remove" - object@processing <- .logging( - object@processing, "Filter: ", keep_or_remove, - " peaks with an m/z within [", mz[1L], ", ", mz[2L], "]") - object - }) - -#' @rdname filterMsLevel -#' -#' @importMethodsFrom ProtGenerics filterMzValues -#' -#' @export -setMethod("filterMzValues", "Spectra", - function(object, mz = numeric(), tolerance = 0, ppm = 20, - msLevel. = uniqueMsLevels(object), keep = TRUE) { - if (!.check_ms_level(object, msLevel.)) - return(object) - l <- length(mz) - if (length(tolerance) != 1) - stop("'tolerance' should be of length 1") - if (length(ppm) != 1) - stop("'ppm' should be of length 1") - if (is.unsorted(mz)) { - idx <- order(mz) - mz <- mz[idx] - if (length(tolerance) == l) - tolerance <- tolerance[idx] - if (length(ppm) == l) - ppm <- ppm[idx] - } - object <- addProcessing(object, .peaks_filter_mz_value, - mz = mz, tolerance = tolerance, - ppm = ppm, msLevel = msLevel., - keep = keep, spectraVariables = "msLevel") - if (length(mz) <= 3) - what <- paste0(format(mz, digits = 4), collapse = ", ") - else what <- "" - if (keep) - keep_or_remove <- "select" - else keep_or_remove <- "remove" - object@processing <- .logging( - object@processing, "Filter: ", keep_or_remove, - " peaks matching provided m/z values ", what) - object - }) +#' @rdname Spectra +setMethod("dataOrigin", "Spectra", function(object) dataOrigin(object@backend)) -#' @rdname filterMsLevel -setMethod("filterPolarity", "Spectra", function(object, polarity = integer()) { - object@backend <- filterPolarity(object@backend, polarity = polarity) - object@processing <- .logging(object@processing, - "Filter: select spectra with polarity ", - paste0(polarity, collapse = " ")) +#' @rdname Spectra +setReplaceMethod("dataOrigin", "Spectra", function(object, value) { + dataOrigin(object@backend) <- value object }) -#' @rdname filterMsLevel -#' -#' @export -setMethod("filterPrecursorMz", "Spectra", - function(object, mz = numeric()) { - .Deprecated( - msg = paste0("'filterPrecursorMz' is deprecated. Please use", - " 'filterPrecursorMzRange' instead.")) - object@backend <- filterPrecursorMzRange(object@backend, mz) - object@processing <- .logging( - object@processing, - "Filter: select spectra with a precursor m/z within [", - paste0(mz, collapse = ", "), "]") - object - }) - -#' @rdname filterMsLevel -setMethod("filterPrecursorMzRange", "Spectra", - function(object, mz = numeric()) { - object@backend <- filterPrecursorMzRange(object@backend, mz) - object@processing <- .logging( - object@processing, - "Filter: select spectra with a precursor m/z within [", - paste0(mz, collapse = ", "), "]") - object - }) - -#' @rdname filterMsLevel -setMethod("filterPrecursorMzValues", "Spectra", - function(object, mz = numeric(), ppm = 20, tolerance = 0) { - object@backend <- filterPrecursorMzValues( - object@backend, sort(mz), ppm = ppm, tolerance = tolerance) - object@processing <- .logging( - object@processing, - "Filter: select spectra with precursor m/z matching ", - paste0(mz, collapse = ", "), "") - object - }) - -#' @rdname filterMsLevel -setMethod("filterPrecursorCharge", "Spectra", - function(object, z = integer()) { - z <- unique(z) - object@backend <- filterPrecursorCharge(object@backend, z) - object@processing <- .logging( - object@processing, - "Filter: select spectra with a precursor charge ", - paste0(z, collapse = ", ")) - object - }) - -#' @rdname filterMsLevel -setMethod("filterPrecursorScan", "Spectra", - function(object, acquisitionNum = integer(), f = dataOrigin(object)) { - if (!all(f %in% unique(dataOrigin(object)))) - stop("'f' must be in dataOrigin().") - object@backend <- filterPrecursorScan(object@backend, - acquisitionNum, - f = dataOrigin(object)) - object@backend <- filterDataOrigin(object@backend, dataOrigin = f) - object@processing <- .logging( - object@processing, - "Filter: select parent/children scans for ", - paste0(acquisitionNum, collapse = " ")) - object - }) - -#' @rdname filterMsLevel -setMethod("filterRt", "Spectra", - function(object, rt = numeric(), msLevel. = uniqueMsLevels(object)) { - if (!is.numeric(msLevel.)) - stop("Please provide a numeric MS level.") - if (length(rt) != 2L || !is.numeric(rt) || rt[1] >= rt[2]) - stop("Please provide a lower and upper numeric retention", - " time range.") - if (length(rt)) - rt <- range(rt) - else rt <- c(-Inf, Inf) - object@backend <- filterRt(object@backend, rt, msLevel.) - object@processing <- .logging( - object@processing, - "Filter: select retention time [", rt[1], "..", rt[2], - "] on MS level(s) ", paste0(msLevel., collapse = " ")) - object - }) - -#' @rdname filterMsLevel -setMethod("filterRanges", "Spectra", - function(object, spectraVariables = character(), ranges = numeric(), - match = c("all", "any")){ - object@backend <- filterRanges(object@backend, spectraVariables, - ranges, match) - object@processing <- .logging(object@processing, - "Filter: select spectra with a ", - spectraVariables, " within: [", - ranges[seq(ranges)%% 2 != 0], ", ", - ranges[seq(ranges)%% 2 == 0], "]" - ) - object - }) - -#' @rdname filterMsLevel -setMethod("filterValues", "Spectra", - function(object, spectraVariables = character(), values = numeric(), - ppm = 0, tolerance = 0, match = c("all", "any")){ - object@backend <- filterValues(object@backend, spectraVariables, - values, ppm, tolerance, match) - object@processing <- .logging(object@processing, - "Filter: select spectra with a ", - spectraVariables, " similar to: ", - values) - object - }) +#' @rdname Spectra +setMethod("dataStorage", "Spectra", + function(object) dataStorage(object@backend)) +#' @rdname Spectra +setMethod("dropNaSpectraVariables", "Spectra", function(object) { + object@backend <- dropNaSpectraVariables(object@backend) + object +}) -################################################################################ -## -## Data manipulation and analysis operations (lazy processing) -## -################################################################################ +#' @rdname Spectra +setMethod("intensity", "Spectra", function(object, + f = processingChunkFactor(object), + ...) { + if (length(object@processingQueue) || length(f)) + NumericList(.peaksapply(object, FUN = function(z, ...) z[, 2], + f = f, ...), compress = FALSE) + else intensity(object@backend) +}) -#' @title Data manipulation and analysis methods -#' -#' @name addProcessing -#' -#' @aliases addProcessing -#' @aliases applyProcessing -#' @aliases bin -#' @aliases containsMz -#' @aliases containsNeutralLoss -#' @aliases entropy -#' @aliases pickPeaks -#' @aliases replaceIntensitiesBelow -#' @aliases reset -#' @aliases smooth -#' @aliases spectrapply -#' -#' @description -#' -#' Various data analysis functions are available for `Spectra` objects. These -#' can be categorized into functions that either return a `Spectra` object -#' (with the manipulated data) and functions that directly return the -#' result from the calculation. For the former category, the data manipulations -#' are cached in the result object's *processing queue* and only exectuted -#' on-the-fly when the respective data gets extracted from the `Spectra` (see -#' section *The processing queue* for more information). -#' -#' For the second category, the calculations are directly executed and the -#' result, usually one value per spectrum, returned. Generally, to reduce -#' memory demand, a chunk-wise processing of the data is performed. -#' -#' -#' @section Data analysis methods returning a `Spectra`: -#' -#' The methods listed here return a `Spectra` object as a result. -#' -#' - `addProcessing()`: adds an arbitrary function that should be applied to the -#' peaks matrix of every spectrum in `object`. The function (can be passed -#' with parameter `FUN`) is expected to take a peaks matrix as input and to -#' return a peaks matrix. A peaks matrix is a numeric matrix with two columns, -#' the first containing the m/z values of the peaks and the second the -#' corresponding intensities. The function has to have `...` in its -#' definition. Additional arguments can be passed with `...`. With parameter -#' `spectraVariables` it is possible to define additional spectra variables -#' from `object` that should be passed to the function `FUN`. These will be -#' passed by their name (e.g. specifying `spectraVariables = "precursorMz"` -#' will pass the spectra's precursor m/z as a parameter named `precursorMz` -#' to the function. The only exception is the spectra's MS level, these will -#' be passed to the function as a parameter called `spectrumMsLevel` (i.e. -#' with `spectraVariables = "msLevel"` the MS levels of each spectrum will be -#' submitted to the function as a parameter called `spectrumMsLevel`). -#' Examples are provided in the package vignette. -#' -#' - `bin()`: aggregates individual spectra into discrete (m/z) bins. Binning is -#' performed only on spectra of the specified MS level(s) (parameter -#' `msLevel`, by default all MS levels of `x`). The bins can be defined with -#' parameter `breaks` which by default are equally sized bins, with size -#' being defined by parameter `binSize`, from the minimal to the maximal m/z -#' of all spectra (of MS level `msLevel`) within `x`. The same bins are used -#' for all spectra in `x`. All intensity values for peaks falling into the -#' same bin are aggregated using the function provided with parameter `FUN` -#' (defaults to `FUN = sum`, i.e. all intensities are summed up). Note that -#' the binning operation is applied to the peak data on-the-fly upon data -#' access and it is possible to *revert* the operation with the `reset()` -#' function (see description of `reset()` below). -#' -#' - `countIdentifications`: counts the number of identifications each scan has -#' led to. See [countIdentifications()] for more details. -#' -#' - `pickPeaks()`: picks peaks on individual spectra using a moving -#' window-based approach (window size = `2 * halfWindowSize`). For noisy -#' spectra there are currently two different noise estimators available, -#' the *M*edian *A*bsolute *D*eviation (`method = "MAD"`) and -#' Friedman's Super Smoother (`method = "SuperSmoother"`), -#' as implemented in the [`MsCoreUtils::noise()`]. -#' The method supports also to optionally *refine* the m/z value of -#' the identified centroids by considering data points that belong (most -#' likely) to the same mass peak. Therefore the m/z value is calculated as an -#' intensity weighted average of the m/z values within the peak region. -#' The peak region is defined as the m/z values (and their respective -#' intensities) of the `2 * k` closest signals to the centroid or the closest -#' valleys (`descending = TRUE`) in the `2 * k` region. For the latter the `k` -#' has to be chosen general larger. See [`MsCoreUtils::refineCentroids()`] for -#' details. -#' If the ratio of the signal to the highest intensity of the peak is below -#' `threshold` it will be ignored for the weighted average. -#' -#' - `replaceIntensitiesBelow()`: replaces intensities below a specified -#' threshold with the provided `value`. Parameter `threshold` can be either -#' a single numeric value or a function which is applied to all non-`NA` -#' intensities of each spectrum to determine a threshold value for each -#' spectrum. The default is `threshold = min` which replaces all values -#' which are <= the minimum intensity in a spectrum with `value` (the -#' default for `value` is `0`). Note that the function specified with -#' `threshold` is expected to have a parameter `na.rm` since `na.rm = TRUE` -#' will be passed to the function. If the spectrum is in profile mode, -#' ranges of successive non-0 peaks <= `threshold` are set to 0. -#' Parameter `msLevel.` allows to apply this to only spectra of certain MS -#' level(s). -#' -#' - `scalePeaks()`: scales intensities of peaks within each spectrum depending -#' on parameter `by`. With `by = sum` (the default) peak intensities are -#' divided by the sum of peak intensities within each spectrum. The sum of -#' intensities is thus 1 for each spectrum after scaling. Parameter -#' `msLevel.` allows to apply the scaling of spectra of a certain MS level. -#' By default (`msLevel. = uniqueMsLevels(x)`) intensities for all -#' spectra will be scaled. -#' -#' - `smooth()`: smooths individual spectra using a moving window-based approach -#' (window size = `2 * halfWindowSize`). Currently, the -#' Moving-Average- (`method = "MovingAverage"`), -#' Weighted-Moving-Average- (`method = "WeightedMovingAverage")`, -#' weights depending on the distance of the center and calculated -#' `1/2^(-halfWindowSize:halfWindowSize)`) and -#' Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported. -#' For details how to choose the correct `halfWindowSize` please see -#' [`MsCoreUtils::smooth()`]. -#' -#' -#' @section Data analysis methods returning the result from the calculation: -#' -#' The functions listed in this section return immediately the result from the -#' calculation. To reduce memory demand (and allow parallel processing) the -#' calculations a chunk-wise processing is generally performed. -#' -#' - `chunkapply()`: apply an arbitrary function to chunks of spectra. See -#' [chunkapply()] for details and examples. -#' -#' - `containsMz()`: checks for each of the spectra whether they contain mass -#' peaks with an m/z equal to `mz` (given acceptable difference as defined by -#' parameters `tolerance` and `ppm` - see [common()] for details). Parameter -#' `which` allows to define whether any (`which = "any"`, the default) or -#' all (`which = "all"`) of the `mz` have to match. The function returns -#' `NA` if `mz` is of length 0 or is `NA`. -#' -#' - `containsNeutralLoss()`: checks for each spectrum in `object` if it has a -#' peak with an m/z value equal to its precursor m/z - `neutralLoss` (given -#' acceptable difference as defined by parameters `tolerance` and `ppm`). -#' Returns `NA` for MS1 spectra (or spectra without a precursor m/z). -#' -#' - `entropy()`: calculates the entropy of each spectra based on the metrics -#' suggested by Li et al. (https://doi.org/10.1038/s41592-021-01331-z). -#' See also [nentropy()] in the *MsCoreUtils* package for details. -#' -#' - `estimatePrecursorIntensity()`: defines the precursor intensities for MS2 -#' spectra using the intensity of the matching MS1 peak from the -#' closest MS1 spectrum (i.e. the last MS1 spectrum measured before the -#' respective MS2 spectrum). With `method = "interpolation"` it is also -#' possible to calculate the precursor intensity based on an interpolation of -#' intensity values (and retention times) of the matching MS1 peaks from the -#' previous and next MS1 spectrum. See [estimatePrecursorIntensity()] for -#' examples and more details. -#' -#' - `estimatePrecursorMz()`: **for DDA data**: allows to estimate a fragment -#' spectra's precursor m/z based on the reported precursor m/z and the data -#' from the previous MS1 spectrum. See [estimatePrecursorMz()] for details. -#' -#' - `neutralLoss()`: calculates neutral loss spectra for fragment spectra. See -#' [neutralLoss()] for detailed documentation. -#' -#' - `spectrapply()`: applies a given function to each individual spectrum or -#' sets of a `Spectra` object. By default, the `Spectra` is split into -#' individual spectra (i.e. `Spectra` of length 1) and the function `FUN` -#' is applied to each of them. An alternative splitting can be defined with -#' parameter `f`. Parameters for `FUN` can be passed using `...`. -#' The returned result and its order depend on the function `FUN` and how -#' `object` is split (hence on `f`, if provided). Parallel processing is -#' supported and can be configured with parameter `BPPARAM`, is however only -#' suggested for computational intense `FUN`. -#' As an alternative to the (eventual parallel) processing of the full -#' `Spectra`, `spectrapply()` supports also a chunk-wise processing. For this, -#' parameter `chunkSize` needs to be specified. `object` is then split into -#' chunks of size `chunkSize` which are then (stepwise) processed by `FUN`. -#' This guarantees a lower memory demand (especially for on-disk backends) -#' since only the data for one chunk needs to be loaded into memory in each -#' iteration. Note that by specifying `chunkSize`, parameters `f` and -#' `BPPARAM` will be ignored. -#' See also `chunkapply()` above or examples below for details on chunk-wise -#' processing. -#' -#' -#' @section The processing queue: -#' -#' Operations that modify mass peak data, i.e. the m/z and intensity values of -#' a `Spectra` are generally not applied immediately to the data but are -#' *cached* within the object's *processing queue*. These operations are then -#' applied to the data only upon request, for example when m/z and/or -#' intensity values are extracted. This lazy execution guarantees that the -#' same functionality can be applied to any `Spectra` object, regardless of -#' the type of backend that is used. Thus, data manipulation operations can -#' also be applied to data that is *read only*. As a side effect, this enables -#' also to *undo* operations using the `reset()` function. -#' -#' Functions related to the processing queue are: -#' -#' - `applyProcessing()`: for `Spectra` objects that use a **writeable** backend -#' only: apply all steps from the lazy processing queue to the peak data and -#' write it back to the data storage. Parameter `f` allows to specify how -#' `object` should be split for parallel processing. This should either be -#' equal to the `dataStorage`, or `f = rep(1, length(object))` to disable -#' parallel processing alltogether. Other partitionings might result in -#' errors (especially if a `MsBackendHdf5Peaks` backend is used). -#' -#' - `processingLog()`: returns a `character` vector with the processing log -#' messages. -#' -#' - `reset()`: restores the data to its original state (as much as possible): -#' removes any processing steps from the lazy processing queue and calls -#' `reset()` on the backend which, depending on the backend, can also undo -#' e.g. data filtering operations. Note that a `reset*(` call after -#' `applyProcessing()` will not have any effect. See examples below for more -#' information. -#' -#' @param binSize For `bin()`: `numeric(1)` defining the size for the m/z bins. -#' Defaults to `binSize = 1`. -#' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. This is passed directly to the [backendInitialize()] method -#' of the [MsBackend-class]. See also [processingChunkSize()] for -#' additional information on parallel processing. -#' -#' @param breaks For `bin()`: `numeric` defining the m/z breakpoints between -#' bins. -#' -#' @param by For `scalePeaks()`: function to calculate a single `numeric` from -#' intensity values of a spectrum by which all intensities (of -#' that spectrum) should be divided by. The default `by = sum` will -#' divide intensities of each spectrum by the sum of intensities of that -#' spectrum. -#' -#' @param chunkSize For `spectrapply()`: size of the chunks into which the -#' `Spectra` should be split. This parameter overrides parameters -#' `f` and `BPPARAM`. -#' -#' @param descending For `pickPeaks()`: `logical`, if `TRUE` just values -#' betwee the nearest valleys around the peak centroids are used. -# -#' @param f For `spectrapply()` and `applyProcessing()`: `factor` defining -#' how `object` should be splitted for eventual parallel processing. -#' Defaults to `factor()` for `spectrapply()` hence the object is not -#' splitted while it defaults to `f = processingChunkSize(object)` for -#' `applyProcessing()` splitting thus the object by default into chunks -#' depending on [processingChunkSize()]. -#' -#' @param FUN For `addProcessing()`: function to be applied to the peak matrix -#' of each spectrum in `object`. -#' For `bin()`: function to aggregate intensity values of peaks falling -#' into the same bin. Defaults to `FUN = sum` thus summing up intensities. -#' For `spectrapply()` and `chunkapply()`: function to be applied to -#' each individual or each chunk of `Spectra`. -#' -#' @param halfWindowSize For `pickPeaks()`: `integer(1)`, used in the -#' identification of the mass peaks: a local maximum has to be the -#' maximum in the window from `(i - halfWindowSize):(i + halfWindowSize)`. -#' For `smooth()`: `integer(1)`, used in the smoothing algorithm, the -#' window reaches from `(i - halfWindowSize):(i + halfWindowSize)`. -#' -#' @param k For `pickPeaks()`: `integer(1)`, number of values left and right of -#' the peak that should be considered in the weighted mean calculation. -#' -#' @param method For `pickPeaks()`: `character(1)`, the noise estimators that -#' should be used, currently the the *M*edian *A*bsolute *D*eviation -#' (`method = "MAD"`) and Friedman's Super Smoother -#' (`method = "SuperSmoother"`) are supported. -#' For `smooth()`: `character(1)`, the smoothing function that should be -#' used, currently, the Moving-Average- (`method = "MovingAverage"`), -#' Weighted-Moving-Average- (`method = "WeightedMovingAverage")`, -#' Savitzky-Golay-Smoothing (`method = "SavitzkyGolay"`) are supported. -#' -#' @param msLevel. `integer` defining the MS level(s) of the spectra to which -#' the function should be applied (defaults to all MS levels of `object`. -#' -#' @param mz For `containsMz()`: `numeric` with the m/z value(s) of the mass -#' peaks to check. -#' -#' @param neutralLoss for `containsNeutralLoss()`: `numeric(1)` defining the -#' value which should be subtracted from the spectrum's precursor m/z. -#' -#' @param normalized for `entropy()`: `logical(1)` whether the normalized -#' entropy should be calculated (default). See also [nentropy()] for -#' details. -#' -#' @param object A `Spectra` object. -#' -#' @param ppm For `containsMz()` and `neutralLoss()`: `numeric(1)` defining a -#' relative, m/z-dependent, maximal accepted difference between m/z values -#' for peaks to be matched. -#' -#' @param snr For `pickPeaks()`: `double(1)` defining the -#' *S*ignal-to-*N*oise-*R*atio. The intensity of a local maximum has to be -#' higher than `snr * noise` to be considered as peak. -#' -#' @param spectraVariables For `addProcessing()`: `character` with additional -#' spectra variables that should be passed along to the function defined -#' with `FUN`. See function description for details. -#' -#' @param threshold For `pickPeaks()`: a `numeric(1)` defining the proportion -#' of the maximal peak intensity. Only values above the threshold are -#' used for the weighted mean calculation. -#' For `replaceIntensitiesBelow()`: a `numeric(1)` defining the threshold -#' or a `function` to calculate the threshold for each spectrum on its -#' intensity values. Defaults to `threshold = min`. -#' -#' @param tolerance For `containsMz()` and `neutralLoss()`: -#' `numeric(1)` allowing to define a constant maximal accepted difference -#' between m/z values for peaks to be matched. -#' -#' @param value For `replaceIntensitiesBelow()`: `numeric(1)` defining the -#' value with which intensities should be replaced with. -#' -#' @param which For `containsMz()`: either `"any"` or `"all"` defining whether -#' any (the default) or all provided `mz` have to be present in the -#' spectrum. -#' -#' @param x A `Spectra`. -#' -#' @param zero.rm For `bin()`: `logical(1)` indicating whether to remove bins -#' with zero intensity. Defaults to `TRUE`, meaning the function will -#' discard bins created with an intensity of 0 to enhance memory -#' efficiency. -#' -#' @param ... Additional arguments passed to internal and downstream functions. -#' -#' @return -#' -#' See the documentation of the individual functions for a description of the -#' return value. -#' -#' @md -#' -#' @seealso -#' -#' - [compareSpectra()] for calculation of spectra similarity scores. -#' -#' - [processingChunkSize()] for information on parallel and chunk-wise data -#' processing. +#' @rdname Spectra +setMethod("ionCount", "Spectra", function(object) { + if (length(object)) + unlist(.peaksapply( + object, FUN = function(pks, ...) sum(pks[, 2], na.rm = TRUE)), + use.names = FALSE) + else numeric() +}) + +#' @rdname Spectra +setMethod("isCentroided", "Spectra", function(object, ...) { + if (length(object)) + unlist(.peaksapply(object, FUN = .peaks_is_centroided), + use.names = FALSE) + else logical() +}) + +#' @rdname Spectra +setMethod("isEmpty", "Spectra", function(x) { + if (length(x)) + unlist(.peaksapply(x, FUN = function(pks, ...) nrow(pks) == 0), + use.names = FALSE) + else logical() +}) + +#' @rdname Spectra +setMethod("isolationWindowLowerMz", "Spectra", function(object) { + isolationWindowLowerMz(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("isolationWindowLowerMz", "Spectra", function(object, value) { + isolationWindowLowerMz(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("isolationWindowTargetMz", "Spectra", function(object) { + isolationWindowTargetMz(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("isolationWindowTargetMz", "Spectra", function(object, value) { + isolationWindowTargetMz(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("isolationWindowUpperMz", "Spectra", function(object) { + isolationWindowUpperMz(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("isolationWindowUpperMz", "Spectra", function(object, value) { + isolationWindowUpperMz(object@backend) <- value + object +}) + +#' @rdname Spectra #' -#' - [Spectra] for a general description of the `Spectra` object. +#' @exportMethod containsMz +setMethod("containsMz", "Spectra", function(object, mz = numeric(), + tolerance = 0, + ppm = 20, which = c("any", "all"), + BPPARAM = bpparam()) { + cond_fun <- match.fun(match.arg(which)) + if (all(is.na(mz))) + return(rep(NA, length(object))) + mz <- unique(sort(mz)) + BPPARAM <- backendBpparam(object@backend, BPPARAM) + ## TODO: fix to use .peaksapply instead. + if (is(BPPARAM, "SerialParam")) + .has_mz(object, mz, tolerance = tolerance, ppm = ppm, + condFun = cond_fun, parallel = BPPARAM) + else { + sp <- SerialParam() + f <- as.factor(dataStorage(object)) + res <- .lapply(object, FUN = .has_mz, mz = mz, tolerance = tolerance, + condFun = cond_fun, parallel = sp, f = f, + BPPARAM = BPPARAM) + unsplit(res, f = f) + } +}) + +#' @rdname Spectra #' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail, Nir Shahaf, Mar Garcia-Aloy +#' @exportMethod containsNeutralLoss +setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0, + tolerance = 0, ppm = 20, + BPPARAM = bpparam()) { + BPPARAM <- backendBpparam(object@backend, BPPARAM) + ## TODO: FIX me to use chunk size. + if (is(BPPARAM, "SerialParam")) { + .has_mz_each(object, precursorMz(object) - neutralLoss, + tolerance = tolerance, ppm = ppm, parallel = BPPARAM) + } else { + sp <- SerialParam() + f <- as.factor(dataStorage(object)) + res <- .lapply(object, FUN = function(obj, n, tol, ppm, par) { + .has_mz_each(obj, precursorMz(obj) - n, tolerance = tol, + ppm = ppm, parallel = sp) + }, n = neutralLoss, tol = tolerance, ppm = ppm, par = sp, f = f, + BPPARAM = BPPARAM) + unsplit(res, f = f) + } +}) + +#' @rdname Spectra #' -#' @examples +#' @importMethodsFrom ProtGenerics spectrapply #' -#' ## Load a `Spectra` object with LC-MS/MS data. -#' fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", -#' package = "msdata") -#' sps_dda <- Spectra(fl) -#' sps_dda +#' @exportMethod spectrapply +setMethod("spectrapply", "Spectra", function(object, FUN, ..., + chunkSize = integer(), + f = factor(), + BPPARAM = SerialParam()) { + if (missing(FUN)) + FUN <- identity + if (length(chunkSize)) + return(chunkapply(object, FUN, ..., chunkSize = chunkSize)) + if (!length(f)) + f <- as.factor(seq_along(object)) + .lapply(object, FUN = FUN, f = f, ..., + BPPARAM = backendBpparam(object@backend, BPPARAM)) +}) + +#' @rdname Spectra #' +#' @exportMethod length +setMethod("length", "Spectra", function(x) length(x@backend)) + +#' @rdname Spectra +setMethod("msLevel", "Spectra", function(object) msLevel(object@backend)) + +#' @rdname Spectra +setMethod("mz", "Spectra", function(object, f = processingChunkFactor(object), + ...) { + if (length(object@processingQueue) || length(f)) + NumericList(.peaksapply(object, FUN = function(z, ...) z[, 1], + f = f, ...), compress = FALSE) + else mz(object@backend) +}) + +#' @rdname Spectra #' -#' ## -------- FUNCTIONS RETURNING A SPECTRA -------- +#' @exportMethod lengths +setMethod("lengths", "Spectra", function(x, use.names = FALSE) { + f <- .parallel_processing_factor(x) + if (length(x)) { + if (length(x@processingQueue) || length(f)) + unlist(.peaksapply(x, FUN = function(pks, ...) nrow(pks)), + use.names = use.names) + else lengths(x@backend, use.names = use.names) + } else integer() +}) + +#' @rdname Spectra +setMethod("polarity", "Spectra", function(object) { + polarity(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("polarity", "Spectra", function(object, value) { + polarity(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("precScanNum", "Spectra", function(object) { + precScanNum(object@backend) +}) + +#' @rdname Spectra +setMethod("precursorCharge", "Spectra", function(object) { + precursorCharge(object@backend) +}) + +#' @rdname Spectra +setMethod("precursorIntensity", "Spectra", function(object) { + precursorIntensity(object@backend) +}) + +#' @rdname Spectra +setMethod("precursorMz", "Spectra", function(object) { + precursorMz(object@backend) +}) + +#' @rdname Spectra +setMethod("rtime", "Spectra", function(object) { + rtime(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("rtime", "Spectra", function(object, value) { + rtime(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("scanIndex", "Spectra", function(object) { + scanIndex(object@backend) +}) + +#' @rdname Spectra +setMethod( + "selectSpectraVariables", "Spectra", + function(object, spectraVariables = union(spectraVariables(object), + peaksVariables(object))) { + spectraVariables <- union(spectraVariables, "dataStorage") + object@backend <- selectSpectraVariables( + object@backend, spectraVariables = spectraVariables) + object + }) + +#' @rdname Spectra +setMethod("smoothed", "Spectra", function(object) { + smoothed(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("smoothed", "Spectra", function(object, value) { + smoothed(object@backend) <- value + object +}) + +#' @rdname Spectra #' -#' ## Replace peak intensities below 40 with a value of 1 -#' sps_mod <- replaceIntensitiesBelow(sps_dda, threshold = 20, value = 1) -#' sps_mod +#' @importMethodsFrom ProtGenerics spectraData #' -#' ## Get the intensities of the first spectrum before and after the -#' ## operation -#' intensity(sps_dda[1]) -#' intensity(sps_mod[1]) +#' @exportMethod spectraData +setMethod( + "spectraData", "Spectra", + function(object, columns = spectraVariables(object)) { + if (length(object@processingQueue) && + length(pcns <- intersect(columns, peaksVariables(object)))) { + ## If user requests peaks variables we need to ensure that the + ## processing queue is executed. + scns <- setdiff(columns, pcns) + if (length(scns)) + spd <- spectraData(object@backend, columns = scns) + else + spd <- make_zero_col_DFrame(nrow = length(object)) + pkd <- peaksData(object, columns = pcns) + ## Add individual peaks variables to the `DataFrame`. + for (pcn in pcns) { + vals <- lapply(pkd, `[`, , pcn) + if (pcn %in% c("mz", "intensity")) + vals <- NumericList(vals, compress = FALSE) + spd <- do.call(`[[<-`, list(spd, i = pcn, value = vals)) + } + spd + } else + spectraData(object@backend, columns = columns) + }) + +#' @rdname Spectra #' -#' ## Remove all peaks with an intensity below 5. -#' sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf)) +#' @importMethodsFrom ProtGenerics spectraData<- #' -#' intensity(sps_mod) +#' @exportMethod spectraData<- +setReplaceMethod("spectraData", "Spectra", function(object, value) { + if (!inherits(value, "DataFrame")) + stop("'spectraData<-' expects a 'DataFrame' as input.", call. = FALSE) + pvs <- peaksVariables(object) + if (length(object@processingQueue) && + any(colnames(value) %in% pvs)) + stop("Can not replace peaks variables with a non-empty processing ", + "queue. Please use 'object <- applyProcessing(object)' to apply ", + "and clear the processing queue. Note that 'applyProcessing' ", + "requires a *writeable* backend. Use e.g. 'object <- ", + "setBackend(object, MsBackendMemory())' if needed.") + pvs <- setdiff(pvs, colnames(value)) + if (length(pvs)) { + sd <- spectraData(object, pvs) + for (pv in pvs) { + value <- do.call("$<-", list(value, name = pv, sd[, pv])) + } + object@processingQueue <- list() + } + spectraData(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("spectraNames", "Spectra", function(object) { + spectraNames(object@backend) +}) + +#' @rdname Spectra +setReplaceMethod("spectraNames", "Spectra", function(object, value) { + spectraNames(object@backend) <- value + object +}) + +#' @rdname Spectra +setMethod("spectraVariables", "Spectra", function(object) { + setdiff(spectraVariables(object@backend), peaksVariables(object@backend)) +}) + +#' @rdname Spectra +setMethod("tic", "Spectra", function(object, initial = TRUE) { + if (!length(object)) + return(numeric()) + if (initial) + tic(object@backend, initial = initial) + else ionCount(object) +}) + +#' @rdname Spectra #' -#' ## In addition it is possible to pass a function to `filterIntensity()`: in -#' ## the example below we want to keep only peaks that have an intensity which -#' ## is larger than one third of the maximal peak intensity in that spectrum. -#' keep_peaks <- function(x, prop = 3) { -#' x > max(x, na.rm = TRUE) / prop -#' } -#' sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks) -#' intensity(sps_mod) +#' @importMethodsFrom S4Vectors $ #' -#' ## We can also change the proportion by simply passing the `prop` parameter -#' ## to the function. To keep only peaks that have an intensity which is -#' ## larger than half of the maximum intensity: -#' sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks, prop = 2) -#' intensity(sps_mod) -#' -#' ## With the `scalePeaks()` function we can alternatively scale the -#' ## intensities of mass peaks per spectrum to relative intensities. This -#' ## is specifically useful for fragment (MS2) spectra. We below thus -#' ## scale the intensities per spectrum by the total sum of intensities -#' ## (such that the sum of all intensities per spectrum is 1). -#' ## Below we scale the intensities of all MS2 spectra in our data set. -#' sps_mod <- scalePeaks(sps_dda, msLevel = 2L) -#' -#' ## MS1 spectra were not affected -#' sps_mod |> -#' filterMsLevel(1L) |> -#' intensity() -#' -#' ## Intensities of MS2 spectra were scaled -#' sps_mod |> -#' filterMsLevel(2L) |> -#' intensity() +#' @export +setMethod("$", "Spectra", function(x, name) { + if (!(name %in% c(spectraVariables(x@backend), peaksVariables(x@backend)))) + stop("No spectra variable '", name, "' available") + if (name == "mz") + mz(x) + else if (name == "intensity") + intensity(x) + else { + if (length(x@processingQueue) && name %in% peaksVariables(x)) + .peaksapply(x, FUN = function(z, ...) z[, name], + columns = c("mz", "intensity", name)) + else + do.call("$", list(x@backend, name)) + } +}) + +#' @rdname Spectra #' -#' ## Since data manipulation operations are by default not directly applied to -#' ## the data but only cached in the internal processing queue, it is also -#' ## possible to remove these data manipulations with the `reset()` function: -#' tmp <- reset(sps_mod) -#' tmp -#' lengths(sps_dda) |> head() -#' lengths(sps_mod) |> head() -#' lengths(tmp) |> head() -#' -#' ## Data manipulation operations cached in the processing queue can also be -#' ## applied to the mass peaks data with the `applyProcessing()` function, if -#' ## the `Spectra` uses a backend that supports that (i.e. allows replacing -#' ## the mass peaks data). Below we first change the backend to a -#' ## `MsBackendMemory()` and then use the `applyProcessing()` to modify the -#' ## mass peaks data -#' sps_dda <- setBackend(sps_dda, MsBackendMemory()) -#' sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf)) -#' sps_mod <- applyProcessing(sps_mod) -#' sps_mod -#' -#' ## While we can't *undo* this filtering operation now using the `reset()` -#' ## function, accessing the data would now be faster, because the operation -#' ## does no longer to be applied to the original data before returning to the -#' ## user. -#' -#' -#' ## -------- FUNCTIONS RETURNING THE RESULT -------- -#' -#' ## With the `spectrapply()` function it is possible to apply an -#' ## arbitrary function to each spectrum in a Spectra. -#' ## In the example below we calculate the mean intensity for each spectrum -#' ## in a subset of the sciex_im data. Note that we can access all variables -#' ## of each individual spectrum either with the `$` operator or the -#' ## corresponding method. -#' res <- spectrapply(sps_dda[1:20], FUN = function(x) mean(x$intensity[[1]])) -#' head(res) +#' @export +setReplaceMethod("$", "Spectra", function(x, name, value) { + if (length(x@processingQueue) && + any(name %in% peaksVariables(x))) + stop("Can not replace peaks variables with a non-empty processing ", + "queue. Please use 'object <- applyProcessing(object)' to apply ", + "and clear the processing queue. Note that 'applyProcessing' ", + "requires a *writeable* backend. Use e.g. 'object <- ", + "setBackend(object, MsBackendMemory())' if needed.") + x@backend <- do.call("$<-", list(x@backend, name, value)) + x +}) + +#' @rdname Spectra #' -#' ## As an alternative, applying a function `FUN` to a `Spectra` can be -#' ## performed *chunk-wise*. The advantage of this is, that only the data for -#' ## one chunk at a time needs to be loaded into memory reducing the memory -#' ## demand. This type of processing can be performed by specifying the size -#' ## of the chunks (i.e. number of spectra per chunk) with the `chunkSize` -#' ## parameter -#' spectrapply(sps_dda[1:20], lengths, chunkSize = 5L) +#' @export +setMethod("[[", "Spectra", function(x, i, j, ...) { + if (!is.character(i)) + stop("'i' is supposed to be a character defining the spectra ", + "variable to access.") + if (!missing(j)) + stop("'j' is not supported.") + if (!(i %in% c(spectraVariables(x), "mz", "intensity"))) + stop("No spectra variable '", i, "' available") + if (i == "mz") + mz(x) + else if (i == "intensity") + intensity(x) + else + do.call("[[", list(x@backend, i)) +}) + +#' @rdname Spectra #' -#' ## Precursor intensity estimation. Some manufacturers don't report the -#' ## precursor intensity for MS2 spectra: -#' sps_dda |> -#' filterMsLevel(2L) |> -#' precursorIntensity() +#' @export +setReplaceMethod("[[", "Spectra", function(x, i, j, ..., value) { + if (!is.character(i)) + stop("'i' is supposed to be a character defining the spectra ", + "variable to replace or create.") + if (!missing(j)) + stop("'j' is not supported.") + x@backend <- do.call("[[<-", list(x@backend, i = i, value = value)) + x +}) + +#### --------------------------------------------------------------------------- +## +## FILTERING AND SUBSETTING +## +#### --------------------------------------------------------------------------- + +#' @rdname Spectra +setMethod("[", "Spectra", function(x, i, j, ..., drop = FALSE) { + if (!missing(j)) + stop("Subsetting 'Spectra' by columns is not (yet) supported") + if (missing(i)) + return(x) + slot(x, "backend", check = FALSE) <- x@backend[i = i] + x +}) + +setClassUnion("dataframeOrDataFrame", c("data.frame", "DataFrame")) +#' @rdname Spectra #' -#' ## This intensity can however be estimated from the previously measured -#' ## MS1 scan with the `estimatePrecursorIntensity()` function: -#' pi <- estimatePrecursorIntensity(sps_dda) +#' @export +setMethod("cbind2", signature(x = "Spectra", + y = "dataframeOrDataFrame"), function(x, y, ...) { + x@backend <- cbind2(x@backend, y, ...) + x + }) + +#' @rdname Spectra +setMethod("filterAcquisitionNum", "Spectra", function(object, n = integer(), + dataStorage = character(), + dataOrigin = character()) { + if (length(dataStorage) && !is.character(dataStorage)) + stop("'dataStorage' is expected to be of type character") + if (length(dataOrigin) && !is.character(dataOrigin)) + stop("'dataOrigin' is expected to be of type character") + object@backend <- filterAcquisitionNum(object@backend, n, + dataStorage, dataOrigin) + object@processing <- .logging(object@processing, + "Filter: select by: ", length(n), + " acquisition number(s) in ", + max(length(dataStorage), length(dataOrigin)), + " file(s)") + object +}) + +#' @rdname Spectra +setMethod("filterEmptySpectra", "Spectra", function(object) { + object@backend <- object@backend[as.logical(lengths(object))] + object@processing <- .logging(object@processing, + "Filter: removed empty spectra.") + object +}) + +#' @rdname Spectra +setMethod("filterDataOrigin", "Spectra", function(object, + dataOrigin = character()) { + if (length(dataOrigin) && !is.character(dataOrigin)) + stop("'dataOrigin' is expected to be of type character") + object@backend <- filterDataOrigin(object@backend, dataOrigin = dataOrigin) + object@processing <- .logging(object@processing, + "Filter: select data origin(s) ", + paste0(dataOrigin, collapse = ", ")) + object +}) + +#' @rdname Spectra +setMethod("filterDataStorage", "Spectra", function(object, + dataStorage = character()) { + if (length(dataStorage) && !is.character(dataStorage)) + stop("'dataStorage' is expected to be of type character") + object@backend <- filterDataStorage(object@backend, dataStorage) + object@processing <- .logging(object@processing, + "Filter: select data storage(s) ", + paste0(dataStorage, collapse = ", ")) + object +}) + +#' @rdname Spectra #' -#' ## This function returned the result as a `numeric` vector with one -#' ## value per spectrum: -#' pi +#' @exportMethod filterFourierTransformArtefacts +setMethod("filterFourierTransformArtefacts", "Spectra", + function(object, halfWindowSize = 0.05, threshold = 0.2, + keepIsotopes = TRUE, maxCharge = 5, + isotopeTolerance = 0.005) { + object <- addProcessing(object, .peaks_remove_fft_artifact, + halfWindowSize = halfWindowSize, + threshold = threshold, + keepIsotopes = keepIsotopes, + maxCharge = maxCharge, + isotopeTolerance = isotopeTolerance) + object@processing <- .logging( + object@processing, "Remove fast fourier artefacts.") + object + }) + +#' @rdname Spectra #' -#' ## We can replace the precursor intensity values of the originating -#' ## object: -#' sps_dda$precursorIntensity <- pi -#' sps_dda |> -#' filterMsLevel(2L) |> -#' precursorIntensity() +#' @importMethodsFrom ProtGenerics filterIntensity #' -NULL +#' @exportMethod filterIntensity +setMethod("filterIntensity", "Spectra", + function(object, intensity = c(0, Inf), + msLevel. = uniqueMsLevels(object), ...) { + if (!.check_ms_level(object, msLevel.)) + return(object) + if (is.numeric(intensity)) { + if (length(intensity) == 1) + intensity <- c(intensity, Inf) + if (length(intensity) != 2) + stop("'intensity' should be of length specifying a ", + "lower intensity limit or of length two defining ", + "a lower and upper limit.") + object <- addProcessing(object, .peaks_filter_intensity, + intensity = intensity, + msLevel = msLevel., + spectraVariables = "msLevel") + object@processing <- .logging( + object@processing, "Remove peaks with intensities ", + "outside [", intensity[1], ", ", intensity[2], + "] in spectra of MS level(s) ", + paste0(msLevel., collapse = ", "), ".") + } else { + if (is.function(intensity)) { + object <- addProcessing( + object, .peaks_filter_intensity_function, + intfun = intensity, msLevel = msLevel., + args = list(...), spectraVariables = "msLevel") + object@processing <- .logging( + object@processing, "Remove peaks based on their ", + "intensities and a user-provided function ", + "in spectra of MS level(s) ", + paste0(msLevel., collapse = ", "), ".") + } + else stop("'intensity' has to be numeric or a function") + } + object + }) -#' @exportMethod addProcessing -#' -#' @importFrom ProtGenerics ProcessingStep + +#' @rdname Spectra +setMethod("filterIsolationWindow", "Spectra", function(object, mz = numeric()) { + object@backend <- filterIsolationWindow(object@backend, mz = mz) + object@processing <- .logging(object@processing, + "Filter: select spectra containing m/z ", + mz, " in their isolation window") + object +}) + +#' @rdname Spectra +setMethod("filterMsLevel", "Spectra", function(object, msLevel. = integer()) { + object@backend <- filterMsLevel(object@backend, msLevel = msLevel.) + object@processing <- .logging(object@processing, + "Filter: select MS level(s) ", + paste0(unique(msLevel.), collapse = " ")) + object +}) + +#' @rdname Spectra #' -#' @importMethodsFrom ProtGenerics addProcessing +#' @importMethodsFrom ProtGenerics filterMzRange #' -#' @importClassesFrom ProtGenerics ProcessingStep +#' @export +setMethod("filterMzRange", "Spectra", + function(object, mz = numeric(), msLevel. = uniqueMsLevels(object), + keep = TRUE) { + if (!.check_ms_level(object, msLevel.)) + return(object) + if (!length(mz)) mz <- c(-Inf, Inf) + else mz <- range(mz) + object <- addProcessing(object, .peaks_filter_mz_range, mz = mz, + msLevel = msLevel., keep = keep, + spectraVariables = "msLevel") + if (keep) keep_or_remove <- "select" + else keep_or_remove <- "remove" + object@processing <- .logging( + object@processing, "Filter: ", keep_or_remove, + " peaks with an m/z within [", mz[1L], ", ", mz[2L], "]") + object + }) + +#' @rdname Spectra #' -#' @importFrom methods .hasSlot +#' @importMethodsFrom ProtGenerics filterMzValues #' -#' @importFrom BiocGenerics updateObject +#' @export +setMethod("filterMzValues", "Spectra", + function(object, mz = numeric(), tolerance = 0, ppm = 20, + msLevel. = uniqueMsLevels(object), keep = TRUE) { + if (!.check_ms_level(object, msLevel.)) + return(object) + l <- length(mz) + if (length(tolerance) != 1) + stop("'tolerance' should be of length 1") + if (length(ppm) != 1) + stop("'ppm' should be of length 1") + if (is.unsorted(mz)) { + idx <- order(mz) + mz <- mz[idx] + if (length(tolerance) == l) + tolerance <- tolerance[idx] + if (length(ppm) == l) + ppm <- ppm[idx] + } + object <- addProcessing(object, .peaks_filter_mz_value, + mz = mz, tolerance = tolerance, + ppm = ppm, msLevel = msLevel., + keep = keep, spectraVariables = "msLevel") + if (length(mz) <= 3) + what <- paste0(format(mz, digits = 4), collapse = ", ") + else what <- "" + if (keep) + keep_or_remove <- "select" + else keep_or_remove <- "remove" + object@processing <- .logging( + object@processing, "Filter: ", keep_or_remove, + " peaks matching provided m/z values ", what) + object + }) + +#' @rdname Spectra +setMethod("filterPolarity", "Spectra", function(object, polarity = integer()) { + object@backend <- filterPolarity(object@backend, polarity = polarity) + object@processing <- .logging(object@processing, + "Filter: select spectra with polarity ", + paste0(polarity, collapse = " ")) + object +}) + +#' @rdname Spectra #' -#' @rdname addProcessing -setMethod("addProcessing", "Spectra", function(object, FUN, ..., - spectraVariables = character()) { - if (missing(FUN)) - return(object) - object@processingQueue <- c(object@processingQueue, - list(ProcessingStep(FUN, ARGS = list(...)))) +#' @export +setMethod("filterPrecursorMz", "Spectra", + function(object, mz = numeric()) { + .Deprecated( + msg = paste0("'filterPrecursorMz' is deprecated. Please use", + " 'filterPrecursorMzRange' instead.")) + object@backend <- filterPrecursorMzRange(object@backend, mz) + object@processing <- .logging( + object@processing, + "Filter: select spectra with a precursor m/z within [", + paste0(mz, collapse = ", "), "]") + object + }) + +#' @rdname Spectra +setMethod("filterPrecursorMzRange", "Spectra", + function(object, mz = numeric()) { + object@backend <- filterPrecursorMzRange(object@backend, mz) + object@processing <- .logging( + object@processing, + "Filter: select spectra with a precursor m/z within [", + paste0(mz, collapse = ", "), "]") + object + }) + +#' @rdname Spectra +setMethod("filterPrecursorMzValues", "Spectra", + function(object, mz = numeric(), ppm = 20, tolerance = 0) { + object@backend <- filterPrecursorMzValues( + object@backend, sort(mz), ppm = ppm, tolerance = tolerance) + object@processing <- .logging( + object@processing, + "Filter: select spectra with precursor m/z matching ", + paste0(mz, collapse = ", "), "") + object + }) + +#' @rdname Spectra +setMethod("filterPrecursorCharge", "Spectra", + function(object, z = integer()) { + z <- unique(z) + object@backend <- filterPrecursorCharge(object@backend, z) + object@processing <- .logging( + object@processing, + "Filter: select spectra with a precursor charge ", + paste0(z, collapse = ", ")) + object + }) + +#' @rdname Spectra +setMethod("filterPrecursorScan", "Spectra", + function(object, acquisitionNum = integer(), f = dataOrigin(object)) { + if (!all(f %in% unique(dataOrigin(object)))) + stop("'f' must be in dataOrigin().") + object@backend <- filterPrecursorScan(object@backend, + acquisitionNum, + f = dataOrigin(object)) + object@backend <- filterDataOrigin(object@backend, dataOrigin = f) + object@processing <- .logging( + object@processing, + "Filter: select parent/children scans for ", + paste0(acquisitionNum, collapse = " ")) + object + }) + +#' @rdname Spectra +setMethod("filterRt", "Spectra", + function(object, rt = numeric(), msLevel. = uniqueMsLevels(object)) { + if (!is.numeric(msLevel.)) + stop("Please provide a numeric MS level.") + if (length(rt) != 2L || !is.numeric(rt) || rt[1] >= rt[2]) + stop("Please provide a lower and upper numeric retention", + " time range.") + if (length(rt)) + rt <- range(rt) + else rt <- c(-Inf, Inf) + object@backend <- filterRt(object@backend, rt, msLevel.) + object@processing <- .logging( + object@processing, + "Filter: select retention time [", rt[1], "..", rt[2], + "] on MS level(s) ", paste0(msLevel., collapse = " ")) + object + }) + +#' @rdname Spectra +setMethod("reset", "Spectra", function(object, ...) { + object@backend <- reset(object@backend) + object@processingQueue <- list() if (!.hasSlot(object, "processingQueueVariables")) - object <- updateObject(object) - object@processingQueueVariables <- union(object@processingQueueVariables, - spectraVariables) - validObject(object) + object <- updateObject(object, check = FALSE) + object@processingQueueVariables <- character() + object@processing <- .logging(object@processing, "Reset object.") object }) -#' @rdname addProcessing +#' @rdname Spectra +setMethod("filterRanges", "Spectra", + function(object, spectraVariables = character(), ranges = numeric(), + match = c("all", "any")){ + object@backend <- filterRanges(object@backend, spectraVariables, + ranges, match) + object@processing <- .logging(object@processing, + "Filter: select spectra with a ", + spectraVariables, " within: [", + ranges[seq(ranges)%% 2 != 0], ", ", + ranges[seq(ranges)%% 2 == 0], "]" + ) + object + }) + +#' @rdname Spectra +setMethod("filterValues", "Spectra", + function(object, spectraVariables = character(), values = numeric(), + ppm = 0, tolerance = 0, match = c("all", "any")){ + object@backend <- filterValues(object@backend, spectraVariables, + values, ppm, tolerance, match) + object@processing <- .logging(object@processing, + "Filter: select spectra with a ", + spectraVariables, " similar to: ", + values) + object + }) + +#### --------------------------------------------------------------------------- +## +## DATA MANIPULATION METHODS +## +#### --------------------------------------------------------------------------- + +#' @rdname Spectra #' #' @importMethodsFrom ProtGenerics bin #' @@ -3330,90 +2580,68 @@ setMethod("bin", "Spectra", function(x, binSize = 1L, breaks = NULL, msLevel. = uniqueMsLevels(x), FUN = sum, zero.rm = TRUE) { if (!.check_ms_level(x, msLevel.)) - return(x) - if (!length(breaks)) { - mzr <- range(.peaksapply(filterMsLevel(x, msLevel.), - function(z, ...) z[c(1L, nrow(z))] - ), na.rm = TRUE) - breaks <- seq(floor(mzr[1]), ceiling(mzr[2]), by = binSize) - breaks <- MsCoreUtils:::.fix_breaks(breaks, mzr) - } - mids <- (breaks[-length(breaks)] + breaks[-1L]) / 2 - x <- addProcessing(x, .peaks_bin, breaks = breaks, mids = mids, - agg_fun = FUN, msLevel = msLevel., zero.rm = zero.rm, - spectraVariables = "msLevel") - x@processing <- .logging(x@processing, - "Spectra of MS level(s) ", - paste0(msLevel., collapse = ", "), - " binned.") - x -}) - -#' @rdname addProcessing -#' -#' @exportMethod containsMz -setMethod("containsMz", "Spectra", function(object, mz = numeric(), - tolerance = 0, - ppm = 20, which = c("any", "all"), - BPPARAM = bpparam()) { - if (length(object)) { - cond_fun <- match.fun(match.arg(which)) - if (all(is.na(mz))) - return(rep(NA, length(object))) - mz <- unique(sort(mz)) - BPPARAM <- backendBpparam(object@backend, BPPARAM) - unlist(.peaksapply( - object, FUN = .peaks_contain_mz, mz = mz, tolerance = tolerance, - ppm = ppm, condFun = cond_fun, BPPARAM = BPPARAM), - use.names = FALSE - ) - } else logical() -}) - -#' @rdname addProcessing -#' -#' @exportMethod containsNeutralLoss -setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0, - tolerance = 0, ppm = 20, - BPPARAM = bpparam()) { - BPPARAM <- backendBpparam(object@backend, BPPARAM) - ## TODO: FIX me to use chunk size. - if (is(BPPARAM, "SerialParam")) { - .has_mz_each(object, precursorMz(object) - neutralLoss, - tolerance = tolerance, ppm = ppm, parallel = BPPARAM) - } else { - sp <- SerialParam() - f <- as.factor(dataStorage(object)) - res <- .lapply(object, FUN = function(obj, n, tol, ppm, par) { - .has_mz_each(obj, precursorMz(obj) - n, tolerance = tol, - ppm = ppm, parallel = sp) - }, n = neutralLoss, tol = tolerance, ppm = ppm, par = sp, f = f, - BPPARAM = BPPARAM) - unsplit(res, f = f) + return(x) + if (!length(breaks)) { + mzr <- range(.peaksapply(filterMsLevel(x, msLevel.), + function(z, ...) z[c(1L, nrow(z))] + ), na.rm = TRUE) + breaks <- seq(floor(mzr[1]), ceiling(mzr[2]), by = binSize) + breaks <- MsCoreUtils:::.fix_breaks(breaks, mzr) } + mids <- (breaks[-length(breaks)] + breaks[-1L]) / 2 + x <- addProcessing(x, .peaks_bin, breaks = breaks, mids = mids, + agg_fun = FUN, msLevel = msLevel., zero.rm = zero.rm, + spectraVariables = "msLevel") + x@processing <- .logging(x@processing, + "Spectra of MS level(s) ", + paste0(msLevel., collapse = ", "), + " binned.") + x }) -#' @rdname addProcessing +#' @rdname Spectra #' -#' @importFrom MsCoreUtils entropy nentropy +#' @exportMethod compareSpectra #' -#' @export -setMethod("entropy", "Spectra", function(object, normalized = TRUE) { - if (length(object)) { - if (normalized) entropy_fun <- nentropy - else entropy_fun <- entropy - unlist(.peaksapply( - object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), - use.names = FALSE - ) - } else numeric() -}) -#' @rdname addProcessing -setMethod("entropy", "ANY", function(object, ...) { - MsCoreUtils::entropy(object) -}) +#' @importFrom MsCoreUtils ndotproduct +#' +#' @importMethodsFrom ProtGenerics compareSpectra +#' +#' @exportMethod compareSpectra +setMethod("compareSpectra", signature(x = "Spectra", y = "Spectra"), + function(x, y, MAPFUN = joinPeaks, tolerance = 0, ppm = 20, + FUN = ndotproduct, ..., SIMPLIFY = TRUE) { + mat <- .compare_spectra_chunk(x, y, MAPFUN = MAPFUN, + tolerance = tolerance, + ppm = ppm, FUN = FUN, ...) + if (SIMPLIFY && (length(x) == 1 || length(y) == 1)) + mat <- as.vector(mat) + mat + }) +#' @rdname Spectra +setMethod("compareSpectra", signature(x = "Spectra", y = "missing"), + function(x, y = NULL, MAPFUN = joinPeaks, tolerance = 0, ppm = 20, + FUN = ndotproduct, ..., SIMPLIFY = TRUE) { + if (length(x) == 1) + return(compareSpectra(x, x, MAPFUN = MAPFUN, + tolerance = tolerance, + ppm = ppm, FUN = FUN, ..., + SIMPLIFY = SIMPLIFY)) + mat <- .compare_spectra_self(x, MAPFUN = MAPFUN, FUN = FUN, + tolerance = tolerance, ppm = ppm, + ...) + if (SIMPLIFY && length(x) == 1) + mat <- as.vector(mat) + mat + }) + +## estimateMzResolution -#' @rdname addProcessing +## estimateNoise + +## normalize + +#' @rdname Spectra #' #' @exportMethod pickPeaks setMethod("pickPeaks", "Spectra", @@ -3455,7 +2683,11 @@ setMethod("pickPeaks", "Spectra", object }) -#' @rdname addProcessing +## quantify + +## removeReporters + +#' @rdname Spectra #' #' @exportMethod replaceIntensitiesBelow setMethod("replaceIntensitiesBelow", "Spectra", @@ -3482,18 +2714,8 @@ setMethod("replaceIntensitiesBelow", "Spectra", object }) -#' @rdname addProcessing -setMethod("reset", "Spectra", function(object, ...) { - object@backend <- reset(object@backend) - object@processingQueue <- list() - if (!.hasSlot(object, "processingQueueVariables")) - object <- updateObject(object, check = FALSE) - object@processingQueueVariables <- character() - object@processing <- .logging(object@processing, "Reset object.") - object -}) -#' @rdname addProcessing +#' @rdname Spectra #' #' @importFrom ProtGenerics smooth #' @importFrom MsCoreUtils coefMA coefWMA coefSG @@ -3524,264 +2746,93 @@ setMethod("smooth", "Spectra", x }) -#' @rdname addProcessing -#' -#' @importMethodsFrom ProtGenerics spectrapply -#' -#' @exportMethod spectrapply -setMethod("spectrapply", "Spectra", function(object, FUN, ..., - chunkSize = integer(), - f = factor(), - BPPARAM = SerialParam()) { - if (missing(FUN)) - FUN <- identity - if (length(chunkSize)) - return(chunkapply(object, FUN, ..., chunkSize = chunkSize)) - if (!length(f)) - f <- as.factor(seq_along(object)) - .lapply(object, FUN = FUN, f = f, ..., - BPPARAM = backendBpparam(object@backend, BPPARAM)) -}) - -#' @title Estimate Precursor Intensities -#' -#' @aliases estimatePrecursorIntensity -#' -#' @description -#' -#' Some MS instrument manufacturers don't provide precursor intensities for -#' fragment spectra. These can however be estimated, given that also MS1 -#' spectra are available. The `estimatePrecursorIntensity()` funtion defines the -#' precursor intensities for MS2 spectra using the intensity of the matching -#' MS1 peak from the closest MS1 spectrum (i.e. the last MS1 spectrum measured -#' before the respective MS2 spectrum). With `method = "interpolation"` it is -#' also possible to calculate the precursor intensity based on an interpolation -#' of intensity values (and retention times) of the matching MS1 peaks from the -#' previous and next MS1 spectrum. See below for an example. -#' -#' @param object `Spectra` with MS1 and MS2 spectra. -#' -#' @param ppm `numeric(1)` with the maximal allowed relative difference of m/z -#' values between the precursor m/z of a spectrum and the m/z of the -#' respective ion on the MS1 scan. -#' -#' @param tolerance `numeric(1)` with the maximal allowed difference of m/z -#' values between the precursor m/z of a spectrum and the m/z of the -#' respective ion on the MS1 scan. -#' -#' @param method `character(1)` defining whether the precursor intensity -#' should be estimated on the previous MS1 spectrum (`method = "previous"`, -#' the default) or based on an interpolation on the previous and next -#' MS1 spectrum (`method = "interpolation"`). -#' -#' @param msLevel. `integer(1)` the MS level for which precursor intensities -#' should be estimated. Defaults to `2L`. -#' -#' @param f `factor` (or vector to be coerced to `factor`) defining which -#' spectra belong to the same original data file (sample). -#' Defaults to `f = dataOrigin(x)`. -#' -#' @param BPPARAM Parallel setup configuration. See [bpparam()] for more -#' information. This is passed directly to the [backendInitialize()] method -#' of the [MsBackend-class]. +#' @exportMethod addProcessing #' -#' @author Johannes Rainer with feedback and suggestions from Corey Broeckling +#' @importFrom ProtGenerics ProcessingStep #' -#' @importMethodsFrom ProtGenerics estimatePrecursorIntensity +#' @importMethodsFrom ProtGenerics addProcessing #' -#' @exportMethod estimatePrecursorIntensity +#' @importClassesFrom ProtGenerics ProcessingStep #' -#' @rdname estimatePrecursorIntensity +#' @importFrom methods .hasSlot #' -#' @examples +#' @importFrom BiocGenerics updateObject #' -#' #' ## Calculating the precursor intensity for MS2 spectra: -#' ## -#' ## Some MS instrument manufacturer don't report the precursor intensities -#' ## for MS2 spectra. The `estimatePrecursorIntensity` function can be used -#' ## in these cases to calculate the precursor intensity on MS1 data. Below -#' ## we load an mzML file from a vendor providing precursor intensities and -#' ## compare the estimated and reported precursor intensities. -#' tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5], -#' backend = MsBackendMzR()) -#' pmi <- estimatePrecursorIntensity(tmt) -#' plot(pmi, precursorIntensity(tmt)) -#' -#' ## We can also replace the original precursor intensity values with the -#' ## newly calculated ones -#' tmt$precursorIntensity <- pmi -setMethod( - "estimatePrecursorIntensity", "Spectra", - function(object, ppm = 20, tolerance = 0, - method = c("previous", "interpolation"), - msLevel. = 2L, f = dataOrigin(object), BPPARAM = bpparam()) { - if (is.factor(f)) - f <- as.character(f) - f <- factor(f, levels = unique(f)) - BPPARAM <- backendBpparam(object@backend, BPPARAM) - unlist(bplapply(split(object, f), - FUN = .estimate_precursor_intensity, ppm = ppm, - tolerance = tolerance, method = method, - msLevel = msLevel., BPPARAM = BPPARAM), - use.names = FALSE) - }) +#' @rdname Spectra +setMethod("addProcessing", "Spectra", function(object, FUN, ..., + spectraVariables = character()) { + if (missing(FUN)) + return(object) + object@processingQueue <- c(object@processingQueue, + list(ProcessingStep(FUN, ARGS = list(...)))) + if (!.hasSlot(object, "processingQueueVariables")) + object <- updateObject(object) + object@processingQueueVariables <- union(object@processingQueueVariables, + spectraVariables) + validObject(object) + object +}) +#' @rdname Spectra +#' +#' @export +coreSpectraVariables <- function() .SPECTRA_DATA_COLUMNS -################################################################################ -## -## Spectra similarity calculations -## -################################################################################ +#' @rdname Spectra +setMethod("uniqueMsLevels", "Spectra", function(object, ...) { + uniqueMsLevels(object@backend, ...) +}) -#' @title Spectra similarity calculations -#' -#' @name compareSpectra -#' -#' @aliases compareSpectra -#' -#' @description -#' -#' `compareSpectra()` compares each spectrum in `x` with each spectrum in `y` -#' using the function provided with `FUN` (defaults to [ndotproduct()]). If -#' `y` is missing, each spectrum in `x` is compared with each other spectrum -#' in `x`. -#' The matching/mapping of peaks between the compared spectra is done with the -#' `MAPFUN` function. The default [joinPeaks()] matches peaks of both spectra -#' and allows to keep all peaks from the first spectrum (`type = "left"`), -#' from the second (`type = "right"`), from both (`type = "outer"`) and to -#' keep only matching peaks (`type = "inner"`); see [joinPeaks()] for more -#' information and examples). The `MAPFUN` function should have parameters -#' `x`, `y`, `xPrecursorMz` and `yPrecursorMz` as these values are passed to -#' the function. -#' -#' In addition to `joinPeaks()` also [joinPeaksGnps()] is supported for -#' GNPS-like similarity score calculations. Note that `joinPeaksGnps()` should -#' only be used in combination with `FUN = MsCoreUtils::gnps` -#' (see [joinPeaksGnps()] for more information and details). Use -#' `MAPFUN = joinPeaksNone` to disable internal peak matching/mapping if a -#' similarity scoring function is used that performs the matching internally. -#' -#' `FUN` is supposed to be a function to compare intensities of (matched) -#' peaks of the two spectra that are compared. The function needs to take two -#' matrices with columns `"mz"` and `"intensity"` as input and is supposed -#' to return a single numeric as result. In addition to the two peak matrices -#' the spectra's precursor m/z values are passed to the function as parameters -#' `xPrecursorMz` (precursor m/z of the `x` peak matrix) and `yPrecursorMz` -#' (precursor m/z of the `y` peak matrix). Additional parameters to functions -#' `FUN` and `MAPFUN` can be passed with `...`. Parameters `ppm` and -#' `tolerance` are passed to both `MAPFUN` and `FUN`. -#' The function returns a `matrix` with the results of `FUN` for each -#' comparison, number of rows equal to `length(x)` and number of columns -#' equal `length(y)` (i.e. element in row 2 and column 3 is the result from -#' the comparison of `x[2]` with `y[3]`). If `SIMPLIFY = TRUE` the `matrix` -#' is *simplified* to a `numeric` if length of `x` or `y` is one. See also -#' the vignette for additional examples, such as using spectral entropy -#' similarity in the scoring. -#' -#' @param FUN function to compare intensities of peaks between two spectra. -#' Defaults to [ndotproduct()]. -#' -#' @param MAPFUN For `compareSpectra()`: function to map/match peaks between -#' the two compared spectra. See [joinPeaks()] for more information and -#' possible functions. Defaults to [joinPeaks()]. -#' -#' @param ppm `numeric(1)` defining a relative, m/z-dependent, maximal -#' accepted difference between m/z values for peaks to be matched. This -#' parameter is directly passed to `MAPFUN`. -#' -#' @param tolerance `numeric(1)` allowing to define a constant maximal -#' accepted difference between m/z values for peaks to be matched. This -#' parameter is directly passed to `MAPFUN`. -#' -#' @param x A `Spectra` object. -#' -#' @param y A `Spectra` object. -#' -#' @param SIMPLIFY `logical(1)` defining whether the result matrix should be -#' *simplified* to a `numeric` if possible (i.e. if either `x` or `y` is -#' of length 1). -#' -#' @param ... Additional arguments passed to the internal functions. -#' -#' @importFrom MsCoreUtils ndotproduct -#' -#' @importMethodsFrom ProtGenerics compareSpectra -#' -#' @exportMethod compareSpectra -#' -#' @author Sebastian Gibb, Johannes Rainer, Laurent Gatto -#' -#' @examples -#' -#' ## Load a `Spectra` object with LC-MS/MS data. -#' fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", -#' package = "msdata") -#' sps_dda <- Spectra(fl) -#' sps_dda -#' -#' ## Restrict to MS2 (fragment) spectra: -#' sps_ms2 <- filterMsLevel(sps_dda, msLevel = 2L) -#' -#' ## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using -#' ## the normalized dotproduct method. -#' res <- compareSpectra(sps_ms2[2:3], sps_ms2[10:20]) -#' ## first row contains comparisons of spectrum 2 with spectra 10 to 20 and -#' ## the second row comparisons of spectrum 3 with spectra 10 to 20 -#' res -#' -#' ## We next calculate the pairwise similarity for the first 10 spectra -#' compareSpectra(sps_ms2[1:10]) -#' -#' ## Use compareSpectra to determine the number of common (matching) peaks -#' ## with a ppm of 10: -#' ## type = "inner" uses a *inner join* to match peaks, i.e. keeps only -#' ## peaks that can be mapped betwen both spectra. The provided FUN returns -#' ## simply the number of matching peaks. -#' compareSpectra(sps_ms2[2:3], sps_ms2[10:20], ppm = 10, type = "inner", -#' FUN = function(x, y, ...) nrow(x)) -#' -#' ## We repeat this calculation between all pairwise combinations -#' ## of the first 20 spectra -#' compareSpectra(sps_ms2[1:20], ppm = 10, type = "inner", -#' FUN = function(x, y, ...) nrow(x)) -NULL +#' @rdname Spectra +setMethod("backendBpparam", "Spectra", function(object, BPPARAM = bpparam()) { + backendBpparam(object@backend, BPPARAM) +}) -#' @rdname compareSpectra -setMethod("compareSpectra", signature(x = "Spectra", y = "Spectra"), - function(x, y, MAPFUN = joinPeaks, tolerance = 0, ppm = 20, - FUN = ndotproduct, ..., SIMPLIFY = TRUE) { - mat <- .compare_spectra_chunk(x, y, MAPFUN = MAPFUN, - tolerance = tolerance, - ppm = ppm, FUN = FUN, ...) - if (SIMPLIFY && (length(x) == 1 || length(y) == 1)) - mat <- as.vector(mat) - mat - }) -#' @rdname compareSpectra -setMethod("compareSpectra", signature(x = "Spectra", y = "missing"), - function(x, y = NULL, MAPFUN = joinPeaks, tolerance = 0, ppm = 20, - FUN = ndotproduct, ..., SIMPLIFY = TRUE) { - if (length(x) == 1) - return(compareSpectra(x, x, MAPFUN = MAPFUN, - tolerance = tolerance, - ppm = ppm, FUN = FUN, ..., - SIMPLIFY = SIMPLIFY)) - mat <- .compare_spectra_self(x, MAPFUN = MAPFUN, FUN = FUN, - tolerance = tolerance, ppm = ppm, - ...) - if (SIMPLIFY && length(x) == 1) - mat <- as.vector(mat) - mat - }) +#' @rdname hidden_aliases +setMethod("combinePeaks", "list", function(object, ...) { + .Deprecated("combinePeaksData", old = "combinePeaks", + msg = paste0("'combinePeaks' for lists of peak matrices is ", + "deprecated; please use 'combinePeaksData' ", + "instead.")) + combinePeaksData(object, ...) +}) +#' @rdname Spectra +#' +#' @exportMethod combinePeaks +setMethod("combinePeaks", "Spectra", function(object, tolerance = 0, ppm = 20, + intensityFun = base::mean, + mzFun = base::mean, + weighted = TRUE, + msLevel. = uniqueMsLevels(object), + ...) { + object <- addProcessing( + object, .peaks_combine, ppm = ppm, tolerance = tolerance, + intensityFun = intensityFun, mzFun = mzFun, weighted = weighted, + msLevel = force(msLevel.), spectraVariables = "msLevel") + object@processing <- .logging( + object@processing, "Combining peaks within each spectrum with ppm = ", + ppm, " and tolerance = ", tolerance, ".") + object +}) -################################################################################ -## -## methods with documentation in Spectra-functions.R -## -################################################################################ -#' @rdname processingChunkSize -setMethod("backendBpparam", "Spectra", function(object, BPPARAM = bpparam()) { - backendBpparam(object@backend, BPPARAM) +#' @rdname Spectra +#' +#' @importFrom MsCoreUtils entropy nentropy +#' +#' @export +setMethod("entropy", "Spectra", function(object, normalized = TRUE) { + if (length(object)) { + if (normalized) entropy_fun <- nentropy + else entropy_fun <- entropy + unlist(.peaksapply( + object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])), + use.names = FALSE + ) + } else numeric() +}) +#' @rdname Spectra +setMethod("entropy", "ANY", function(object, ...) { + MsCoreUtils::entropy(object) }) diff --git a/R/countIdentifications.R b/R/countIdentifications.R index 2f3e8c15..b7ddb687 100644 --- a/R/countIdentifications.R +++ b/R/countIdentifications.R @@ -40,10 +40,6 @@ #' spectra variable `countIdentifications` with the number of #' identification for each scan. #' -#' @seealso -#' -#' [addProcessing()] for other data analysis functions. -#' #' @author Laurent Gatto #' #' @export diff --git a/R/peaks-functions.R b/R/peaks-functions.R index dc19e353..7419dd44 100644 --- a/R/peaks-functions.R +++ b/R/peaks-functions.R @@ -87,7 +87,7 @@ NULL msLevel = spectrumMsLevel, ...) { if (!spectrumMsLevel %in% msLevel || !length(x)) return(x) - x[which(MsCoreUtils::between(x[, "intensity"], intensity)), , drop = FALSE] + x[which(between(x[, "intensity"], intensity)), , drop = FALSE] } #' @description @@ -146,9 +146,8 @@ NULL keep = TRUE, ...) { if (!spectrumMsLevel %in% msLevel || !length(x)) return(x) - no_match <- is.na(MsCoreUtils::closest(x[, "mz"], mz, tolerance = tolerance, - ppm = ppm, duplicates = "keep", - .check = FALSE)) + no_match <- is.na(closest(x[, "mz"], mz, tolerance = tolerance, ppm = ppm, + duplicates = "keep", .check = FALSE)) if (keep) x[!no_match, , drop = FALSE] else x[no_match, , drop = FALSE] } @@ -171,8 +170,8 @@ NULL if (!spectrumMsLevel %in% msLevel || !length(x)) return(x) if (keep) - x[MsCoreUtils::between(x[, "mz"], mz), , drop = FALSE] - else x[!MsCoreUtils::between(x[, "mz"], mz), , drop = FALSE] + x[between(x[, "mz"], mz), , drop = FALSE] + else x[!between(x[, "mz"], mz), , drop = FALSE] } #' @description @@ -308,13 +307,7 @@ NULL #' #' @author Johannes Rainer, Michael Witting #' -#' @seealso -#' -#' - [compareSpectra()] for the function to calculate similarities between -#' spectra. -#' -#' - [gnps()] in the *MsCoreUtils* package for more information on the GNPS -#' similarity score. +#' @seealso [gnps()] #' #' @importFrom MsCoreUtils join ppm #' @@ -422,14 +415,14 @@ joinPeaksNone <- function(x, y, ...) { return(x) } - n <- MsCoreUtils::noise(x[, 1L], x[, 2L], method = method, ...) + n <- noise(x[, 1L], x[, 2L], method = method, ...) - l <- MsCoreUtils::localMaxima(x[, 2L], hws = halfWindowSize) + l <- localMaxima(x[, 2L], hws = halfWindowSize) p <- which(l & x[, 2L] > (snr * n)) if (k > 0L) { - cbind(mz = MsCoreUtils::refineCentroids(x = x[, 1L], y = x[, 2L], p = p, + cbind(mz = refineCentroids(x = x[, 1L], y = x[, 2L], p = p, k = k, threshold = threshold, descending = descending), intensity = x[p, 2L]) @@ -559,10 +552,9 @@ joinPeaksNone <- function(x, y, ...) { .peaks_deisotope <- function(x, substDefinition = isotopicSubstitutionMatrix("HMDB_NEUTRAL"), tolerance = 0, ppm = 10, charge = 1, ...) { - iso_grps <- MetaboCoreUtils::isotopologues( - x, substDefinition = substDefinition, - tolerance = tolerance, ppm = ppm, - charge = charge) + iso_grps <- isotopologues(x, substDefinition = substDefinition, + tolerance = tolerance, ppm = ppm, + charge = charge) if (length(iso_grps)) { rem <- unique(unlist(lapply(iso_grps, `[`, -1), use.names = FALSE)) x[-rem, , drop = FALSE] @@ -622,7 +614,7 @@ joinPeaksNone <- function(x, y, ...) { msLevel = spectrumMsLevel, ...) { if (!spectrumMsLevel %in% msLevel || !length(x)) return(x) - grps <- MsCoreUtils::group(x[, "mz"], tolerance = tolerance, ppm = ppm) + grps <- group(x[, "mz"], tolerance = tolerance, ppm = ppm) lg <- length(grps) if (grps[lg] == lg) return(x) @@ -657,7 +649,7 @@ joinPeaksNone <- function(x, y, ...) { msLevel = spectrumMsLevel, ...) { if (!spectrumMsLevel %in% msLevel || !nrow(x)) return(x) - keep <- is.na(MsCoreUtils::closest(x[, "mz"], precursorMz, ppm = ppm, + keep <- is.na(closest(x[, "mz"], precursorMz, ppm = ppm, tolerance = tolerance, duplicates = "keep", .check = FALSE)) x[keep, , drop = FALSE] @@ -678,72 +670,3 @@ joinPeaksNone <- function(x, y, ...) { pmz <- precursorMz - tolerance - ppm(precursorMz, ppm = ppm) x[x[, "mz"] < pmz, , drop = FALSE] } - -#' filter a peak matrix `x` by (arbitrary) numeric ranges for spectra and/or -#' peaks variables. ranges for spectra and peaks variables are combined using -#' a logical AND, rows in the provided range matrices with a logical OR. -#' -#' Used by `filterPeaksRanges()` function for `Spectra`. -#' -#' @param svars `character` with the spectra variables for which filter ranges -#' where provided. -#' -#' @param pvars `character` with the peaks variables for which filter ranges -#' where provided. -#' -#' @param ranges `list` with `numeric` two-column matrices with the -#' user-provided ranges. The number of rows of all matrices is expected -#' to match. -#' -#' @param spectrumMsLevel `integer(1)` with the MS level of the peak matrix' -#' spectrum. -#' -#' @param keep `logical(1)` whether mass peaks that match the filters should be -#' kept or removed. -#' -#' @param ... values for all spectra variables defined in `svars` are expected -#' to be passed through `...` as `name = value` pairs. -#' -#' @author Johannes Rainer -#' -#' @noRd -.peaks_filter_ranges <- function(x, svars = character(), - pvars = character(), - ranges, spectrumMsLevel, - keep = TRUE, ...) { - svalue <- list(..., msLevel = spectrumMsLevel) - nx <- nrow(x) - sel <- rep(FALSE, nx) - for (i in seq_len(nrow(ranges[[1L]]))) { - ## check ranges for spectra variables - svars_ok <- vapply(svars, function(z) - MsCoreUtils::between(svalue[[z]], ranges[[z]][i, ]), TRUE, - USE.NAMES = FALSE) - if (!anyNA(svars_ok) && all(svars_ok)) { - if (length(pvars)) { - ## check ranges for peaks variables - tmp <- rowSums(do.call(cbind, lapply(pvars, function(z) { - MsCoreUtils::between(x[, z], ranges[[z]][i, ]) - }))) == length(pvars) - tmp[is.na(tmp)] <- FALSE - sel <- sel | tmp - } else { - ## No need to check further, because we have a match - if (keep) return(x) - else return(x[logical(), , drop = FALSE]) - } - } - } - if (keep) x[sel, , drop = FALSE] - else x[!sel, , drop = FALSE] -} - -#' Check for presence of peaks defined by their m/z value. Note that this -#' function does **not** return a peak matrix, but only a logical of length 1! -#' -#' @return `logical(1)` -#' @noRd -.peaks_contain_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, - condFun = any, ...) { - condFun(common(mz, x[, "mz"], tolerance = tolerance, ppm = ppm)) -} diff --git a/README.md b/README.md index 3df3e6d7..be839639 100644 --- a/README.md +++ b/README.md @@ -19,81 +19,58 @@ footprint. A (possibly incomplete) list of available backends (along with a link to the R package providing it) is shown below: -- `MsBackendCompDb` (package - [*CompoundDb*](https://github.com/rformassspectrometry/CompoundDb): provides - access to spectra data (spectra and peaks variables) from a *CompDb* - database. Has a small memory footprint because all data (except precursor m/z - values) are retrieved on-the-fly from the database. - +- `MsBackendMemory` (package: *Spectra*): *default* backend which keeps all data + in memory. Optimized for fast processing. - `MsBackendDataFrame` (package: *Spectra*): alternative to the `MsBackendMemory` also keeping all data in memory, but supporting `S4` objects as spectra variables because the data is stored internally in a `DataFrame`. - +- `MsBackendMzR` (package: *Spectra*): by using the `mzR` package it supports + import of MS data from mzML, mzXML and CDF files. This backend keeps only + general spectra variables in memory and retrieves the peaks data (m/z and + intensity values) on-the-fly from the original data files. The backend has + thus a smaller memory footprint compared to in-memory backends. - `MsBackendHdf5Peaks` (package: *Spectra*): on-disk backend similar to `MsBackendMzR`, but the peaks data is stored in HDF5 files (general spectra variables are kept in memory). - -- `MsBackendHmdbXml` (package - [*MsbackendHmdb*](https://github.com/rformassspectrometry/MsBackendHmdb)): - allows import of MS data from xml files of the Human Metabolome Database - (HMDB). Extends the `MsBackendDataFrame` and keeps thus all data, after - import, in memory. - +- `MsBackendMgf` (package + [*MsBackendMgf*](https://github.com/rformassspectrometry/MsBackendMgf): allows + to import/export data in mascot generic format (MGF). Extends the + `MsBackendDataFrame` and keeps thus all data, after import, in memory. +- `MsBackendMsp` (package + [*MsbackendMsp*](https://github.com/rformassspectrometry/MsBackendMsp): allows + to import/export data in NIST MSP format. Extends the `MsBackendDataFrame` and + keeps thus all data, after import, in memory. - `MsBackendMassbank` (package [*MsBackendMassbank*](https://github.com/rformassspectrometry/MsBackendMassbank)): allows to import/export data in MassBank text file format. Extends the `MsBackendDataFrame` and keeps thus all data, after import, in memory. - - `MsBackendMassbankSql` (package [*MsBackendMassbank*](https://github.com/rformassspectrometry/MsBackendMassbank)): allows to directly connect to a MassBank SQL database to retrieve all MS data and variables. Has a minimal memory footprint because all data is retrieved on-the-fly from the SQL database. - -- `MsBackendMemory` (package: *Spectra*): *default* backend which keeps all data - in memory. Optimized for fast processing. - -- `MsBackendMetaboLights` (package - [*MsBackendMetaboLights*](https://github.com/rformassspectrometry/MsBackendMetaboLights)): - retrieves and caches MS data files from MetaboLights. - -- `MsBackendMgf` (package - [*MsBackendMgf*](https://github.com/rformassspectrometry/MsBackendMgf)): allows - to import/export data in mascot generic format (MGF). Extends the - `MsBackendDataFrame` and keeps thus all data, after import, in memory. - -- `MsBackendMsp` (package - [*MsbackendMsp*](https://github.com/rformassspectrometry/MsBackendMsp)): allows - to import/export data in NIST MSP format. Extends the `MsBackendDataFrame` and - keeps thus all data, after import, in memory. - -- `MsBackendMzR` (package: *Spectra*): by using the `mzR` package it supports - import of MS data from mzML, mzXML and CDF files. This backend keeps only - general spectra variables in memory and retrieves the peaks data (m/z and - intensity values) on-the-fly from the original data files. The backend has - thus a smaller memory footprint compared to in-memory backends. - -- `MsBackendOfflineSql` (package - [*MsBackendSql*](https://github.com/rformassspectrometry/MsBackendSql)): - stores all MS data in a SQL database and has thus a minimal memory footprint. - Does, in contrast to `MsBackendSql`, not keep an active SQL database - connection and can thus support parallel processing. - - `MsBackendRawFileReader` (package [*MsBackendRawFileReader*](https://github.com/fgcz/MsBackendRawFileReader)): implements a backend for reading MS data from Thermo Fisher Scientific's raw data files using the manufacturer's NewRawFileReader .Net libraries. The package generalizes the functionality introduced by the `rawrr` package. - +- `MsBackendHmdbXml` (package + [*MsbackendHmdb*](https://github.com/rformassspectrometry/MsBackendHmdb)): + allows import of MS data from xml files of the Human Metabolome Database + (HMDB). Extends the `MsBackendDataFrame` and keeps thus all data, after + import, in memory. - `MsBackendSql` (package [*MsBackendSql*](https://github.com/rformassspectrometry/MsBackendSql)): stores all MS data in a SQL database and has thus a minimal memory footprint. - +- `MsBackendCompDb` (package + [*CompoundDb*](https://github.com/rformassspectrometry/CompoundDb): provides + access to spectra data (spectra and peaks variables) from a *CompDb* + database. Has a small memory footprint because all data (except precursor m/z + values) are retrieved on-the-fly from the database. - `MsBackendTimsTof` (package [*MsBackendTimsTof*](https://github.com/rformassspectrometry/MsBackendTimsTof): allows import of data from Bruker TimsTOF raw data files (using the `opentimsr` R package). - - `MsBackendWeizMass` (package [*MsBackendWeizMass*](https://github.com/rformassspectrometry/MsBackendWeizMass): allows to access MS data from WeizMass MS/MS spectral databases. @@ -118,6 +95,4 @@ BiocManager::install("Spectra") Contributions are highly welcome and should follow the [contribution guidelines](https://rformassspectrometry.github.io/RforMassSpectrometry/articles/RforMassSpectrometry.html#contributions). Also, please check the coding style guidelines in the [RforMassSpectrometry -vignette](https://rformassspectrometry.github.io/RforMassSpectrometry/articles/RforMassSpectrometry.html) -and importantly, follow our [code of -conduct](https://rformassspectrometry.github.io/RforMassSpectrometry/articles/RforMassSpectrometry.html#code-of-conduct). +vignette](https://rformassspectrometry.github.io/RforMassSpectrometry/articles/RforMassSpectrometry.html). diff --git a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R index 98788c2d..84a69f60 100644 --- a/inst/test_backends/test_MsBackend/test_spectra_subsetting.R +++ b/inst/test_backends/test_MsBackend/test_spectra_subsetting.R @@ -49,24 +49,6 @@ test_that("[", { res <- be[integer()] expect_s4_class(res, class(be)[1L]) expect_true(length(res) == 0L) - - ## logical - l <- rep(FALSE, length(be)) - l[sample(seq_along(l), floor(length(l) / 2))] <- TRUE - res <- be[l] - expect_true(validObject(res)) - expect_true(length(res) == sum(l)) - expect_equal(res, be[which(l)]) -}) - -#' extractByIndex. Uses [ if not implemented -test_that("extractByIndex", { - i <- sample(seq_along(be), floor(length(be) / 2)) - res <- extractByIndex(be, i) - expect_true(validObject(res)) - expect_equal(length(res), length(i)) - expect_equal(msLevel(res), msLevel(be)[i]) - expect_equal(rtime(res), rtime(be)[i]) }) test_that("cbind2 works", { @@ -105,9 +87,8 @@ test_that("selectSpectraVariables", { if (!isReadOnly(be) || inherits(be, "MsBackendCached") || inherits(be, "MsBackendDataFrame")) { tmp <- be - res <- selectSpectraVariables( - tmp, union(c("mz", "intensity", "dataStorage", "scanIndex"), - backendRequiredSpectraVariables(be))) + res <- selectSpectraVariables(tmp, c("mz", "intensity", + "dataStorage", "scanIndex")) expect_true(all(names(coreSpectraVariables()) %in% spectraVariables(res))) expect_true(all(is.na(res$msLevel))) diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 7b22f696..dc410ae4 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -17,14 +17,6 @@ \alias{backendInitialize} \alias{backendParallelFactor,MsBackendMzR-method} \alias{backendParallelFactor,MsBackendHdf5Peaks-method} -\alias{dataStorageBasePath} -\alias{dataStorageBasePath,MsBackendMzR-method} -\alias{dataStorageBasePath<-} -\alias{dataStorageBasePath<-,MsBackendMzR-method} -\alias{extractByIndex} -\alias{msLeveL<-,MsBackend-method} -\alias{backendRequiredSpectraVariables} -\alias{backendRequiredSpectraVariables,MsBackend-method} \alias{backendBpparam,MsBackend-method} \alias{backendInitialize,MsBackend-method} \alias{backendMerge,list-method} @@ -44,8 +36,6 @@ \alias{dataStorage,MsBackend-method} \alias{dataStorage<-,MsBackend-method} \alias{dropNaSpectraVariables,MsBackend-method} -\alias{extractByIndex,MsBackend,ANY-method} -\alias{extractByIndex,MsBackend,missing-method} \alias{filterAcquisitionNum,MsBackend-method} \alias{filterDataOrigin,MsBackend-method} \alias{filterDataStorage,MsBackend-method} @@ -75,7 +65,6 @@ \alias{isReadOnly,MsBackend-method} \alias{length,MsBackend-method} \alias{msLevel,MsBackend-method} -\alias{msLevel<-,MsBackend-method} \alias{mz,MsBackend-method} \alias{mz<-,MsBackend-method} \alias{lengths,MsBackend-method} @@ -85,7 +74,6 @@ \alias{precursorCharge,MsBackend-method} \alias{precursorIntensity,MsBackend-method} \alias{precursorMz,MsBackend-method} -\alias{precursorMz<-,MsBackend-method} \alias{peaksData<-,MsBackend-method} \alias{reset,MsBackend-method} \alias{rtime,MsBackend-method} @@ -106,8 +94,6 @@ \alias{$<-,MsBackend-method} \alias{[[,MsBackend-method} \alias{[[<-,MsBackend-method} -\alias{dataStorageBasePath,MsBackend-method} -\alias{dataStorageBasePath<-,MsBackend-method} \alias{MsBackendDataFrame} \alias{backendInitialize,MsBackendDataFrame-method} \alias{MsBackendHdf5Peaks} @@ -154,10 +140,6 @@ \S4method{dropNaSpectraVariables}{MsBackend}(object) -\S4method{extractByIndex}{MsBackend,ANY}(object, i) - -\S4method{extractByIndex}{MsBackend,missing}(object, i) - \S4method{filterAcquisitionNum}{MsBackend}(object, n, file, ...) \S4method{filterDataOrigin}{MsBackend}(object, dataOrigin = character()) @@ -228,8 +210,6 @@ \S4method{msLevel}{MsBackend}(object) -\S4method{msLevel}{MsBackend}(object) <- value - \S4method{mz}{MsBackend}(object) \S4method{mz}{MsBackend}(object) <- value @@ -248,8 +228,6 @@ \S4method{precursorMz}{MsBackend}(object) -\S4method{precursorMz}{MsBackend}(object, ...) <- value - \S4method{peaksData}{MsBackend}(object) <- value \S4method{reset}{MsBackend}(object) @@ -294,10 +272,6 @@ \S4method{uniqueMsLevels}{MsBackend}(object, ...) -\S4method{dataStorageBasePath}{MsBackend}(object) - -\S4method{dataStorageBasePath}{MsBackend}(object) <- value - MsBackendDataFrame() \S4method{backendInitialize}{MsBackendDataFrame}(object, data, peaksVariables = c("mz", "intensity"), ...) @@ -337,8 +311,6 @@ length as the number of spectra in the backend.} \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} -\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.} - \item{n}{for \code{filterAcquisitionNum()}: \code{integer} with the acquisition numbers to filter for.} @@ -432,6 +404,8 @@ reported total ion current should be reported, or whether the total ion current should be (re)calculated on the actual data (\code{initial = FALSE}).} +\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.} + \item{j}{For \code{[}: not supported.} \item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return @@ -540,9 +514,7 @@ detailed description and examples): allowed. Parameter \code{i} should support \code{integer} indices and \code{logical} and should throw an error if \code{i} is out of bounds. The \code{MsCoreUtils::i2index} could be used to check the input \code{i}. -For \code{i = integer()} an empty backend should be returned. Implementation -of this method is optional, as the default calls the \code{extractByIndex()} -method (which has to be implemented as the main subsetting method). +For \code{i = integer()} an empty backend should be returned. \item \code{$}, \verb{$<-}: access or set/add a single spectrum variable (column) in the backend. Using a \code{value} of \code{NULL} should allow deleting the specified spectra variable. An error should be thrown if the spectra variable is not @@ -588,20 +560,12 @@ The default implementation returns a factor of length 0 (\code{factor()}) providing thus no default splitting. \code{backendParallelFactor()} for \code{MsBackendMzR} on the other hand returns \code{factor(dataStorage(object))} hence suggesting to split the object by data file. -\item \code{backendRequiredSpectraVariables()}: returns a \code{character} with spectra -variable names that are mandatory for a specific backend. The default -returns an empty \code{character()}. The implementation for \code{MsBackendMzR} -returns \code{c("dataStorage", "scanIndex")} as these two spectra variables -are required to load the MS data on-the-fly. This method needs only to -be implemented if a backend requires specific variables to be defined. \item \code{dataOrigin()}: gets a \code{character} of length equal to the number of spectra in \code{object} with the \emph{data origin} of each spectrum. This could e.g. be the mzML file from which the data was read. \item \code{dataStorage()}: gets a \code{character} of length equal to the number of spectra in \code{object} with the data storage of each spectrum. Note that missing values (\code{NA_character_}) are not supported for \code{dataStorage}. -\item \code{dataStorageBasePath()}, \verb{dataStorageBasePath<-: gets or sets the common *base* path of the directory containing all data files. If supported, the function is expected to return (or accept) a }character\verb{of length 1. Most backends (such as for example the}MsBackendMemory\verb{will not support this function and}dataStorageBasePath()\verb{will return}NA_character_\verb{. For }MsBackendMzR\verb{, this function allows to get or change the path to the directory containing the original data files, which is required if e.g. a serialized }MsBackendMzR` instance gets copied to another computer or -file system. \item \code{dropNaSpectraVariables()}: removes spectra variables (i.e. columns in the object's \code{spectraData} that contain only missing values (\code{NA}). Note that while columns with only \code{NA}s are removed, a \code{spectraData()} call after @@ -635,16 +599,6 @@ queue) are applied prior to export - this would not be possible with only a for the \code{MsBackendMzR} backend that supports export of the data in \emph{mzML} or \emph{mzXML} format. See the documentation for the \code{MsBackendMzR} class below for more information. -\item \code{extractByIndex()}: function to subset a backend to selected elements -defined by the provided index. Similar to \code{[}, this method should allow -extracting (or to subset) the data in any order. In contrast to \code{[}, -however, \code{i} is expected to be an \code{integer} (while \code{[} should also -support \code{logical} and eventually \code{character}). While being apparently -redundant to \code{[}, this methods avoids package namespace errors/problems -that can result in implementations of \code{[} being not found by R (which -can happen sometimes in parallel processing using the \code{\link[=SnowParam]{SnowParam()}}). This -method is used internally by \code{Spectra} to extract/subset its backend. -Implementation of this method is mandatory. \item \code{filterAcquisitionNum()}: filters the object keeping only spectra matching the provided acquisition numbers (argument \code{n}). If \code{dataOrigin} or \code{dataStorage} is also provided, \code{object} is subsetted to the spectra with @@ -765,7 +719,6 @@ number of spectra). For empty spectra, \code{0} is returned. \item \code{msLevel()}: gets the spectra's MS level. Returns an \code{integer} vector (of length equal to the number of spectra) with the MS level for each spectrum (or \code{NA_integer_} if not available). -\item \verb{msLevel<-}: replaces the spectra's MS level. \item \code{mz()}: gets the mass-to-charge ratios (m/z) from the spectra. Returns a \code{\link[=NumericList]{NumericList()}} or length equal to the number of spectra, each element a \code{numeric} vector with the m/z values of @@ -984,7 +937,7 @@ This backend provides an \code{export()} method to export data from a \code{Spec The parameters are: \itemize{ \item \code{object}: an instance of the \code{MsBackendMzR} class. -\item \code{x}: the \link{Spectra} object to be exported. +\item \code{x}: the \linkS4class{Spectra} object to be exported. \item \code{file}: \code{character} with the (full) output file name(s). Should be of length 1 or equal \code{length(x)}. If a single file is specified, all spectra are exported to that file. Alternatively it is possible to specify @@ -998,7 +951,7 @@ backend and if \code{dataOrigin(x)} contains the original MS data file names. \item \code{BPPARAM}: parallel processing settings. } -See examples in \link{Spectra} or the vignette for more details and +See examples in \linkS4class{Spectra} or the vignette for more details and examples. The \code{MsBackendMzR} ignores parameter \code{columns} of the \code{peaksData()} @@ -1136,5 +1089,5 @@ be$peak_ann <- NULL peaksVariables(be) } \author{ -Johannes Rainer, Sebastian Gibb, Laurent Gatto, Philippine Louail +Johannes Rainer, Sebastian Gibb, Laurent Gatto } diff --git a/man/MsBackendCached.Rd b/man/MsBackendCached.Rd index ae8c6687..e65e41e9 100644 --- a/man/MsBackendCached.Rd +++ b/man/MsBackendCached.Rd @@ -5,7 +5,6 @@ \alias{MsBackendCached-class} \alias{backendInitialize,MsBackendCached-method} \alias{dataStorage,MsBackendCached-method} -\alias{extractByIndex,MsBackendCached,ANY-method} \alias{length,MsBackendCached-method} \alias{spectraVariables,MsBackendCached-method} \alias{spectraData,MsBackendCached-method} @@ -58,8 +57,6 @@ MsBackendCached() \S4method{dataStorage}{MsBackendCached}(object) -\S4method{extractByIndex}{MsBackendCached,ANY}(object, i) - \S4method{length}{MsBackendCached}(x) \S4method{spectraVariables}{MsBackendCached}(object) @@ -153,8 +150,6 @@ variables to keep.} \item{...}{ignored} -\item{i}{For \code{[}: \code{integer} with the indices to subset the object.} - \item{x}{A \code{MsBackendCached} object.} \item{columns}{For \code{spectraData()}: \code{character} with the names of the spectra @@ -163,6 +158,8 @@ variables to retrieve.} \item{value}{replacement value for \verb{<-} methods. See individual method description or expected data type.} +\item{i}{For \code{[}: \code{integer} with the indices to subset the object.} + \item{j}{For \code{[}: ignored.} \item{drop}{For \code{[}: not considered.} diff --git a/man/Spectra.Rd b/man/Spectra.Rd index 5e4baaf7..f6a2ebc3 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -1,21 +1,173 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra.R -\name{Spectra} +% Please edit documentation in R/Spectra-functions.R, R/Spectra.R +\name{applyProcessing} +\alias{applyProcessing} +\alias{concatenateSpectra} +\alias{combineSpectra} +\alias{joinSpectraData} +\alias{processingLog} +\alias{deisotopeSpectra} +\alias{reduceSpectra} +\alias{filterPrecursorMaxIntensity} +\alias{filterPrecursorIsotopes} +\alias{scalePeaks} +\alias{filterPrecursorPeaks} \alias{Spectra} \alias{Spectra-class} -\alias{setBackend} -\alias{export} +\alias{[,Spectra-method} +\alias{uniqueMsLevels} +\alias{uniqueMsLevels,Spectra-method} +\alias{combinePeaks} \alias{Spectra,missing-method} \alias{Spectra,MsBackend-method} \alias{Spectra,character-method} \alias{Spectra,ANY-method} \alias{setBackend,Spectra,MsBackend-method} +\alias{c,Spectra-method} +\alias{split,Spectra,ANY-method} \alias{export,Spectra-method} +<<<<<<< HEAD \alias{dataStorageBasePath,Spectra-method} \alias{dataStorageBasePath<-,Spectra-method} \alias{cbind2,Spectra,dataframeOrDataFrame-method} +======= +\alias{acquisitionNum,Spectra-method} +\alias{peaksData,Spectra-method} +\alias{peaksVariables,Spectra-method} +\alias{centroided,Spectra-method} +\alias{centroided<-,Spectra-method} +\alias{collisionEnergy,Spectra-method} +\alias{collisionEnergy<-,Spectra-method} +\alias{dataOrigin,Spectra-method} +\alias{dataOrigin<-,Spectra-method} +\alias{dataStorage,Spectra-method} +\alias{dropNaSpectraVariables,Spectra-method} +\alias{intensity,Spectra-method} +\alias{ionCount,Spectra-method} +\alias{isCentroided,Spectra-method} +\alias{isEmpty,Spectra-method} +\alias{isolationWindowLowerMz,Spectra-method} +\alias{isolationWindowLowerMz<-,Spectra-method} +\alias{isolationWindowTargetMz,Spectra-method} +\alias{isolationWindowTargetMz<-,Spectra-method} +\alias{isolationWindowUpperMz,Spectra-method} +\alias{isolationWindowUpperMz<-,Spectra-method} +\alias{containsMz,Spectra-method} +\alias{containsNeutralLoss,Spectra-method} +\alias{spectrapply,Spectra-method} +\alias{length,Spectra-method} +\alias{msLevel,Spectra-method} +\alias{mz,Spectra-method} +\alias{lengths,Spectra-method} +\alias{polarity,Spectra-method} +\alias{polarity<-,Spectra-method} +\alias{precScanNum,Spectra-method} +\alias{precursorCharge,Spectra-method} +\alias{precursorIntensity,Spectra-method} +\alias{precursorMz,Spectra-method} +\alias{rtime,Spectra-method} +\alias{rtime<-,Spectra-method} +\alias{scanIndex,Spectra-method} +\alias{selectSpectraVariables,Spectra-method} +\alias{smoothed,Spectra-method} +\alias{smoothed<-,Spectra-method} +\alias{spectraData,Spectra-method} +\alias{spectraData<-,Spectra-method} +\alias{spectraNames,Spectra-method} +\alias{spectraNames<-,Spectra-method} +\alias{spectraVariables,Spectra-method} +\alias{tic,Spectra-method} +\alias{$,Spectra-method} +\alias{$<-,Spectra-method} +\alias{[[,Spectra-method} +\alias{[[<-,Spectra-method} +\alias{cbind2,Spectra,dataframeOrDataFrame-method} +\alias{filterAcquisitionNum,Spectra-method} +\alias{filterEmptySpectra,Spectra-method} +\alias{filterDataOrigin,Spectra-method} +\alias{filterDataStorage,Spectra-method} +\alias{filterFourierTransformArtefacts,Spectra-method} +\alias{filterIntensity,Spectra-method} +\alias{filterIsolationWindow,Spectra-method} +\alias{filterMsLevel,Spectra-method} +\alias{filterMzRange,Spectra-method} +\alias{filterMzValues,Spectra-method} +\alias{filterPolarity,Spectra-method} +\alias{filterPrecursorMz,Spectra-method} +\alias{filterPrecursorMzRange,Spectra-method} +\alias{filterPrecursorMzValues,Spectra-method} +\alias{filterPrecursorCharge,Spectra-method} +\alias{filterPrecursorScan,Spectra-method} +\alias{filterRt,Spectra-method} +\alias{reset,Spectra-method} +\alias{filterRanges,Spectra-method} +\alias{filterValues,Spectra-method} +\alias{bin,Spectra-method} +\alias{compareSpectra,Spectra,Spectra-method} +\alias{compareSpectra,Spectra,missing-method} +\alias{pickPeaks,Spectra-method} +\alias{replaceIntensitiesBelow,Spectra-method} +\alias{smooth,Spectra-method} +\alias{addProcessing,Spectra-method} +\alias{coreSpectraVariables} +\alias{backendBpparam,Spectra-method} +\alias{combinePeaks,Spectra-method} +\alias{entropy,Spectra-method} +\alias{entropy,ANY-method} +>>>>>>> parent of 7326508 (Merge branch 'main' into phili) \title{The Spectra class to manage and access MS data} \usage{ +applyProcessing( + object, + f = processingChunkFactor(object), + BPPARAM = bpparam(), + ... +) + +concatenateSpectra(x, ...) + +combineSpectra( + x, + f = x$dataStorage, + p = x$dataStorage, + FUN = combinePeaksData, + ..., + BPPARAM = bpparam() +) + +joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") + +processingLog(x) + +deisotopeSpectra( + x, + substDefinition = isotopicSubstitutionMatrix("HMDB_NEUTRAL"), + tolerance = 0, + ppm = 20, + charge = 1 +) + +reduceSpectra(x, tolerance = 0, ppm = 20) + +filterPrecursorMaxIntensity(x, tolerance = 0, ppm = 20) + +filterPrecursorIsotopes( + x, + tolerance = 0, + ppm = 20, + substDefinition = isotopicSubstitutionMatrix("HMDB_NEUTRAL") +) + +scalePeaks(x, by = sum, msLevel. = uniqueMsLevels(x)) + +filterPrecursorPeaks( + object, + tolerance = 0, + ppm = 20, + mz = c("==", ">="), + msLevel. = uniqueMsLevels(object) +) + \S4method{Spectra}{missing}( object, processingQueue = list(), @@ -61,26 +213,306 @@ BPPARAM = bpparam() ) +\S4method{c}{Spectra}(x, ...) + +\S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) + \S4method{export}{Spectra}(object, backend, ...) -\S4method{dataStorageBasePath}{Spectra}(object) +\S4method{acquisitionNum}{Spectra}(object) \S4method{dataStorageBasePath}{Spectra}(object) <- value \S4method{cbind2}{Spectra,dataframeOrDataFrame}(x, y, ...) +<<<<<<< HEAD } \arguments{ \item{object}{For \code{Spectra()}: an object to instantiate the \code{Spectra} object and initialize the with data.. See section on creation of \code{Spectra} objects for details. For all other methods a \code{Spectra} object.} +======= + +\S4method{filterAcquisitionNum}{Spectra}( + object, + n = integer(), + dataStorage = character(), + dataOrigin = character() +) + +\S4method{filterEmptySpectra}{Spectra}(object) + +\S4method{filterDataOrigin}{Spectra}(object, dataOrigin = character()) + +\S4method{filterDataStorage}{Spectra}(object, dataStorage = character()) + +\S4method{filterFourierTransformArtefacts}{Spectra}( + object, + halfWindowSize = 0.05, + threshold = 0.2, + keepIsotopes = TRUE, + maxCharge = 5, + isotopeTolerance = 0.005 +) + +\S4method{filterIntensity}{Spectra}( + object, + intensity = c(0, Inf), + msLevel. = uniqueMsLevels(object), + ... +) + +\S4method{filterIsolationWindow}{Spectra}(object, mz = numeric()) + +\S4method{filterMsLevel}{Spectra}(object, msLevel. = integer()) + +\S4method{filterMzRange}{Spectra}( + object, + mz = numeric(), + msLevel. = uniqueMsLevels(object), + keep = TRUE +) + +\S4method{filterMzValues}{Spectra}( + object, + mz = numeric(), + tolerance = 0, + ppm = 20, + msLevel. = uniqueMsLevels(object), + keep = TRUE +) + +\S4method{filterPolarity}{Spectra}(object, polarity = integer()) + +\S4method{filterPrecursorMz}{Spectra}(object, mz = numeric()) + +\S4method{filterPrecursorMzRange}{Spectra}(object, mz = numeric()) + +\S4method{filterPrecursorMzValues}{Spectra}(object, mz = numeric(), ppm = 20, tolerance = 0) + +\S4method{filterPrecursorCharge}{Spectra}(object, z = integer()) + +\S4method{filterPrecursorScan}{Spectra}(object, acquisitionNum = integer(), f = dataOrigin(object)) + +\S4method{filterRt}{Spectra}(object, rt = numeric(), msLevel. = uniqueMsLevels(object)) + +\S4method{reset}{Spectra}(object, ...) + +\S4method{filterRanges}{Spectra}( + object, + spectraVariables = character(), + ranges = numeric(), + match = c("all", "any") +) + +\S4method{filterValues}{Spectra}( + object, + spectraVariables = character(), + values = numeric(), + ppm = 0, + tolerance = 0, + match = c("all", "any") +) + +\S4method{bin}{Spectra}( + x, + binSize = 1L, + breaks = NULL, + msLevel. = uniqueMsLevels(x), + FUN = sum, + zero.rm = TRUE +) + +\S4method{compareSpectra}{Spectra,Spectra}( + x, + y, + MAPFUN = joinPeaks, + tolerance = 0, + ppm = 20, + FUN = ndotproduct, + ..., + SIMPLIFY = TRUE +) + +\S4method{compareSpectra}{Spectra,missing}( + x, + y = NULL, + MAPFUN = joinPeaks, + tolerance = 0, + ppm = 20, + FUN = ndotproduct, + ..., + SIMPLIFY = TRUE +) + +\S4method{pickPeaks}{Spectra}( + object, + halfWindowSize = 2L, + method = c("MAD", "SuperSmoother"), + snr = 0, + k = 0L, + descending = FALSE, + threshold = 0, + msLevel. = uniqueMsLevels(object), + ... +) + +\S4method{replaceIntensitiesBelow}{Spectra}( + object, + threshold = min, + value = 0, + msLevel. = uniqueMsLevels(object) +) + +\S4method{smooth}{Spectra}( + x, + halfWindowSize = 2L, + method = c("MovingAverage", "WeightedMovingAverage", "SavitzkyGolay"), + msLevel. = uniqueMsLevels(x), + ... +) + +\S4method{addProcessing}{Spectra}(object, FUN, ..., spectraVariables = character()) + +coreSpectraVariables() + +\S4method{uniqueMsLevels}{Spectra}(object, ...) + +\S4method{backendBpparam}{Spectra}(object, BPPARAM = bpparam()) + +\S4method{combinePeaks}{Spectra}( + object, + tolerance = 0, + ppm = 20, + intensityFun = base::mean, + mzFun = base::mean, + weighted = TRUE, + msLevel. = uniqueMsLevels(object), + ... +) + +\S4method{entropy}{Spectra}(object, normalized = TRUE) + +\S4method{entropy}{ANY}(object, ...) +} +\arguments{ +\item{object}{For \code{Spectra()}: either a \code{DataFrame} or \code{missing}. See +section on creation of \code{Spectra} objects for details. For all other +methods a \code{Spectra} object.} + +\item{f}{For \code{split()}: factor defining how to split \code{x}. See \code{\link[base:split]{base::split()}} +for details. For \code{setBackend()}: factor defining how to split the data +for parallelized copying of the spectra data to the new backend. For some +backends changing this parameter can lead to errors. +For \code{combineSpectra()}: \code{factor} defining the grouping of the spectra +that should be combined. For \code{spectrapply()}: \code{factor} how \code{object} +should be splitted. For \code{filterPrecursorScan()}: defining which spectra +belong to the same original data file (sample): Defaults to +\code{f = dataOrigin(x)}. +For \code{intensity()}, \code{mz()} and \code{peaksData()}: factor defining how data +should be chunk-wise loaded an processed. Defaults to +\code{\link[=processingChunkFactor]{processingChunkFactor()}}.} + +\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more +information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method +of the \linkS4class{MsBackend}.} + +\item{...}{Additional arguments.} + +\item{x}{A \code{Spectra} object.} + +\item{p}{For \code{combineSpectra()}: \code{factor} defining how to split the input +\code{Spectra} for parallel processing. Defaults to \code{x$dataStorage}, i.e., +depending on the used backend, per-file parallel processing will be +performed.} + +\item{FUN}{For \code{addProcessing()}: function to be applied to the peak matrix +of each spectrum in \code{object}. For \code{compareSpectra()}: function to compare +intensities of peaks between two spectra with each other. +For \code{combineSpectra()}: function to combine the (peak matrices) of the +spectra. See section \emph{Data manipulations} and examples below for more +details. +For \code{bin()}: function to aggregate intensity values of peaks falling +into the same bin. Defaults to \code{FUN = sum} thus summing up intensities. +For \code{spectrapply()} and \code{chunkapply()}: function to be applied to +\code{Spectra}.} + +\item{y}{A \code{Spectra} object. +- For \code{joinSpectraData()}: a \code{DataFrame}. +- For \code{cbind2()} a \code{data.frame}, \code{DataFrame} or \code{matrix}.} + +\item{by.x}{A \code{character(1)} specifying the spectra variable used +for merging. Default is \code{"spectrumId"}.} + +\item{by.y}{A \code{character(1)} specifying the column used for +merging. Set to \code{by.x} if missing.} + +\item{suffix.y}{A \code{character(1)} specifying the suffix to be used +for making the names of columns in the merged spectra variables +unique. This suffix will be used to amend \code{names(y)}, while +\code{spectraVariables(x)} will remain unchanged.} + +\item{substDefinition}{For \code{deisotopeSpectra()} and +\code{filterPrecursorIsotopes()}: \code{matrix} or \code{data.frame} with definitions +of isotopic substitutions. Uses by default isotopic substitutions +defined from all compounds in the Human Metabolome Database (HMDB). See +\code{\link[=isotopologues]{isotopologues()}} or \code{\link[=isotopicSubstitutionMatrix]{isotopicSubstitutionMatrix()}} for details.} + +\item{tolerance}{For \code{compareSpectra()}, \code{containsMz()}, +\code{deisotopeSpectra()}, \code{filterMzValues()} and \code{reduceSpectra()}: +\code{numeric(1)} allowing to define a constant maximal accepted difference +between m/z values for peaks to be matched (or grouped). For +\code{containsMz()} it can also be of length equal \code{mz} to specify a different +tolerance for each m/z value. +For \code{filterPrecursorMaxIntensity()}: \code{numeric(1)} defining the +(constant) maximal accepted difference of precursor m/z values of +spectra for grouping them into \emph{precursor groups}. For +\code{filterPrecursorIsotopes()}: passed directly to the \code{\link[=isotopologues]{isotopologues()}} +function. For \code{filterValues()}: \code{numeric} of any length allowing to +define a maximal accepted difference between user input \code{values} and the +\code{spectraVariables} values. If it is not equal to the length of the +value provided with parameter \code{spectraVariables}, \code{tolerance[1]} will be +recycled. Default is \code{tolerance = 0}} + +\item{ppm}{For \code{compareSpectra()}, \code{containsMz()}, \code{deisotopeSpectra()}, +\code{filterMzValues()} and \code{reduceSpectra()}: \code{numeric(1)} +defining a relative, m/z-dependent, maximal accepted difference between +m/z values for peaks to be matched (or grouped). +For \code{filterPrecursorMaxIntensity()}: \code{numeric(1)} defining the relative +maximal accepted difference of precursor m/z values of spectra for +grouping them into \emph{precursor groups}. For \code{filterPrecursorIsotopes()}: +passed directly to the \code{\link[=isotopologues]{isotopologues()}} function. +For \code{filterValues()}: \code{numeric} of any length allowing to define +a maximal accepted difference between user input \code{values} and the +\code{spectraVariables} values. If it is not equal to the length of the +value provided with parameter \code{spectraVariables}, \code{ppm[1]} will be +recycled.} + +\item{charge}{For \code{deisotopeSpectra()}: expected charge of the ionized +compounds. See \code{\link[=isotopologues]{isotopologues()}} for details.} + +\item{by}{For \code{scalePeaks()}: function to calculate a single \code{numeric} from +intensity values of a spectrum by which all intensities (of +that spectrum) should be divided by. The default \code{by = sum} will +divide intensities of each spectrum by the sum of intensities of that +spectrum.} + +\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which +the function should be applied (defaults to all MS levels of \code{object}. +For \code{filterMsLevel()}: the MS level to which \code{object} should be +subsetted.} + +\item{mz}{For \code{filterIsolationWindow()}: \code{numeric(1)} with the m/z value to +filter the object. For \code{filterPrecursorMz()} and \code{filterMzRange()}: +\code{numeric(2)} defining the lower and upper m/z boundary. +For \code{filterMzValues()} and \code{filterPrecursorMzValues()}: \code{numeric} with +the m/z values to match peaks or precursor m/z against.} +>>>>>>> parent of 7326508 (Merge branch 'main' into phili) \item{processingQueue}{For \code{Spectra()}: optional \code{list} of \linkS4class{ProcessingStep} objects.} \item{metadata}{For \code{Spectra()}: optional \code{list} with metadata information.} -\item{...}{Additional arguments.} - \item{backend}{For \code{Spectra()}: \linkS4class{MsBackend} to be used as backend. See section on creation of \code{Spectra} objects for details. For \code{setBackend()}: instance of \linkS4class{MsBackend} that supports \code{setBackend()} (i.e. for @@ -90,56 +522,238 @@ passing the full spectra data to the initialize method. See section on creation of \code{Spectra} objects for details. For \code{export()}: \linkS4class{MsBackend} to be used to export the data.} -\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more -information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method -of the \linkS4class{MsBackend}.} +\item{source}{For \code{Spectra()}: instance of \linkS4class{MsBackend} that can be used +to import spectrum data from the provided files. See section \emph{Creation +of objects, conversion and changing the backend} for more details.} -\item{source}{For \code{Spectra()}: instance of \linkS4class{MsBackend} that can be -used to import spectrum data from the provided files. See section -\emph{Creation of objects} for more details.} +\item{drop}{For \code{[}, \code{split()}: not considered.} -\item{f}{For \code{setBackend()}: factor defining how to split the data -for parallelized copying of the spectra data to the new backend. For -some backends changing this parameter can lead to errors. Defaults to -\code{\link[=processingChunkFactor]{processingChunkFactor()}}.} +\item{columns}{For \code{spectraData()} accessor: optional \code{character} with +column names (spectra variables) that should be included in the +returned \code{DataFrame}. By default, all columns are returned. +For \code{peaksData()} accessor: optional \code{character} with requested columns +in the individual \code{matrix} of the returned \code{list}. Defaults to +\code{c("mz", "value")} but any values returned by \code{peaksVariables(object)} +with \code{object} being the \code{Spectra} object are supported.} + +\item{value}{replacement value for \verb{<-} methods. See individual +method description or expected data type.} + +\item{which}{for \code{containsMz()}: either \code{"any"} or \code{"all"} defining whether +any (the default) or all provided \code{mz} have to be present in the +spectrum.} + +\item{neutralLoss}{for \code{containsNeutralLoss()}: \code{numeric(1)} defining the +value which should be subtracted from the spectrum's precursor m/z.} -\item{value}{For \code{dataStorageBasePath()}: A \code{character} vector that defines -the base directory where the data storage files can be found.} +\item{chunkSize}{For \code{spectrapply()}: size of the chunks into which \code{Spectra} +should be split. This parameter overrides parameters \code{f} and \code{BPPARAM}.} + +\item{use.names}{For \code{lengths()}: ignored.} + +\item{spectraVariables}{\itemize{ +\item For \code{selectSpectraVariables()}: \code{character} with the +names of the spectra variables to which the backend should be +subsetted. +\itemize{ +\item For \code{addProcessing()}: \code{character} with additional spectra variables +that should be passed along to the function defined with \code{FUN}. See +function description for details. +\item For \code{filterRanges()} and \code{filterValues()}: \code{character} vector +specifying the column(s) from \code{spectraData(object)} on which to filter +the data and that correspond to the the names of the spectra variables +that should be used for the filtering. } -\description{ -The \code{Spectra} class encapsules spectral mass spectrometry (MS) data and -related metadata. The MS data is represented by a \emph{backend} extending the -virual \link{MsBackend} class which provides the data to the \code{Spectra} object. -The \code{Spectra} class implements only data accessor, filtering and analysis -methods for the MS data and relies on its \emph{backend} to provide the MS data. -This allows to change data representations of a \code{Spectra} object depending -on the user's needs and properties of the data. Different backends and -their properties are explained in the \link{MsBackend} documentation. - -Documentation on other topics and functionality of \code{Spectra}can be found in: +}} + +\item{initial}{For \code{tic()}: \code{logical(1)} whether the initially +reported total ion current should be reported, or whether the +total ion current should be (re)calculated on the actual data +(\code{initial = FALSE}, same as \code{ionCount()}).} + +\item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return +or set.} + +\item{i}{For \code{[}: \code{integer}, \code{logical} or \code{character} to subset the object.} + +\item{j}{For \code{[}: not supported.} + +\item{n}{for \code{filterAcquisitionNum()}: \code{integer} with the acquisition +numbers to filter for.} + +\item{dataStorage}{For \code{filterDataStorage()}: \code{character} to define which +spectra to keep. +For \code{filterAcquisitionNum()}: optionally specify if filtering should +occur only for spectra of selected \code{dataStorage}.} + +\item{dataOrigin}{For \code{filterDataOrigin()}: \code{character} to define which +spectra to keep. +For \code{filterAcquisitionNum()}: optionally specify if filtering should +occurr only for spectra of selected \code{dataOrigin}.} + +\item{halfWindowSize}{\itemize{ +\item For \code{pickPeaks()}: \code{integer(1)}, used in the +identification of the mass peaks: a local maximum has to be the maximum +in the window from \code{(i - halfWindowSize):(i + halfWindowSize)}. \itemize{ -\item \code{\link[=spectraData]{spectraData()}} for accessing and using MS data through \code{Spectra} objects. -\item \code{\link[=filterMsLevel]{filterMsLevel()}} to subset and filter \code{Spectra} objects. -\item \code{\link[=plotSpectra]{plotSpectra()}} for visualization of \code{Spectra} orbjects. -\item \code{\link[=processingChunkSize]{processingChunkSize()}} for information on parallel and chunk-wise data -processing. -\item \code{\link[=combineSpectra]{combineSpectra()}} for merging, aggregating and splitting of \code{Spectra} -objects. -\item \code{\link[=combinePeaks]{combinePeaks()}} for merging and aggregating \code{Spectra}'s mass peaks data. -\item \code{\link[=addProcessing]{addProcessing()}} for data analysis functions. -\item \code{\link[=compareSpectra]{compareSpectra()}} for spectra similarity calculations. +\item For \code{smooth()}: \code{integer(1)}, used in the smoothing algorithm, the +window reaches from \code{(i - halfWindowSize):(i + halfWindowSize)}. +\item For \code{filterFourierTransformArtefacts()}: \code{numeric(1)} defining the m/z +window left and right of a peak where to remove fourier transform +artefacts. } +}} + +\item{threshold}{\itemize{ +\item For \code{pickPeaks()}: a \code{double(1)} defining the proportion of the maximal +peak intensity. Just values above are used for the weighted mean +calculation. +\itemize{ +\item For \code{replaceIntensitiesBelow()}: a \code{numeric(1)} defining the threshold +or a \code{function} to calculate the threshold for each spectrum on its +intensity values. Defaults to \code{threshold = min}. +\item For \code{filterFourierTransformArtefacts()}: the relative intensity (to a +peak) below which peaks are considered fourier artefacts. Defaults to +\code{threshold = 0.2} hence removing peaks that have an intensity below 0.2 +times the intensity of the tested peak (within the selected +\code{halfWindowSize}). +} +}} + +\item{keepIsotopes}{For \code{filterFourierTransformArtefacts()}: whether isotope +peaks should not be removed as fourier artefacts.} + +\item{maxCharge}{For \code{filterFourierTransformArtefacts()}: the maximum charge +to be considered for isotopes.} + +\item{isotopeTolerance}{For \code{filterFourierTransformArtefacts()}: the m/z +\code{tolerance} to be used to define whether peaks might be isotopes of +the current tested peak.} + +\item{intensity}{For \code{filterIntensity()}: \code{numeric} of length 1 or 2 +defining either the lower or the lower and upper intensity limit for the +filtering, or a \code{function} that takes the intensities as input and +returns a \code{logical} (same length then peaks in the spectrum) whether the +peak should be retained or not. Defaults to \code{intensity = c(0, Inf)} thus +only peaks with \code{NA} intensity are removed.} + +\item{keep}{For \code{filterMzValues()} and \code{filterMzRange()}: \code{logical(1)} +whether the matching peaks should be retained (\code{keep = TRUE}, the +default) or dropped (\code{keep = FALSE}).} + +\item{polarity}{for \code{filterPolarity()}: \code{integer} specifying the polarity to +to subset \code{object}.} + +\item{z}{For \code{filterPrecursorCharge()}: \code{integer()} with the precursor +charges to be used as filter.} + +\item{acquisitionNum}{for \code{filterPrecursorScan()}: \code{integer} with the +acquisition number of the spectra to which the object should be +subsetted.} + +\item{rt}{for \code{filterRt()}: \code{numeric(2)} defining the retention time range to +be used to subset/filter \code{object}.} + +\item{ranges}{for \code{filterRanges()}: A \code{numeric} vector of paired values +(upper and lower boundary) that define the ranges to filter the \code{object}. +These paired values need to be in the same order as the +\code{spectraVariables} parameter (see below).} + +\item{match}{For \code{filterRanges()} and \code{filterValues()}: \code{character(1) } +defining whether the condition has to match for all provided +\code{ranges}/\code{values} (\code{match = "all"}; the default), or for any of them +(\code{match = "any"}) for spectra to be retained.} + +\item{values}{for \code{filterValues()}: A \code{numeric} vector that define the +values to filter the Spectra data. These values need to be in the same +order as the \code{spectraVariables} parameter.} + +\item{binSize}{For \code{bin()}: \code{numeric(1)} defining the size for the m/z bins. +Defaults to \code{binSize = 1}.} + +\item{breaks}{For \code{bin()}: \code{numeric} defining the m/z breakpoints between +bins.} + +\item{zero.rm}{\code{logical}. For \code{bin()}: indicating whether to remove bins +with zero intensity. Defaults to \code{TRUE}, meaning the function will +discard bins created with an intensity of 0 to enhance memory efficiency.} + +\item{MAPFUN}{For \code{compareSpectra()}: function to map/match peaks between the +two compared spectra. See \code{\link[=joinPeaks]{joinPeaks()}} for more information and possible +functions.} + +\item{SIMPLIFY}{For \code{compareSpectra()} whether the result matrix should be +\emph{simplified} to a \code{numeric} if possible (i.e. if either \code{x} or \code{y} is +of length 1).} + +\item{method}{\itemize{ +\item For \code{pickPeaks()}: \code{character(1)}, the noise estimators that +should be used, currently the the \emph{M}edian \emph{A}bsolute \emph{D}eviation +(\code{method = "MAD"}) and Friedman's Super Smoother +(\code{method = "SuperSmoother"}) are supported. +\itemize{ +\item For \code{smooth()}: \code{character(1)}, the smoothing function that should be +used, currently, the Moving-Average- (\code{method = "MovingAverage"}), +Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")}, +Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported. +} +}} + +\item{snr}{For \code{pickPeaks()}: \code{double(1)} defining the +\emph{S}ignal-to-\emph{N}oise-\emph{R}atio. The intensity of a local maximum has to be +higher than \code{snr * noise} to be considered as peak.} + +\item{k}{For \code{pickPeaks()}: \code{integer(1)}, number of values left and right of +the peak that should be considered in the weighted mean calculation.} + +\item{descending}{For \code{pickPeaks()}: \code{logical}, if \code{TRUE} just values between +the nearest valleys around the peak centroids are used.} + +\item{intensityFun}{For \code{combinePeaks()}: function to be used to aggregate +intensities for all peaks in each peak group into a single intensity +value.} + +\item{mzFun}{For \code{combinePeaks()}: function to aggregate m/z values for all +peaks within each peak group into a single m/z value. This parameter +is ignored if \code{weighted = TRUE} (the default).} + +\item{weighted}{For \code{combinePeaks()}: \code{logical(1)} whether m/z values of +peaks within each peak group should be aggregated into a single m/z +value using an intensity-weighted mean. Defaults to \code{weighted = TRUE}.} + +\item{normalized}{for \code{entropy()}: \code{logical(1)} whether the normalized +entropy should be calculated (default). See also \code{\link[=nentropy]{nentropy()}} for +details.} +} +\value{ +See individual method description for the return value. +} +\description{ +The \code{Spectra} class encapsules spectral mass spectrometry data and +related metadata. + +It supports multiple data backends, e.g. in-memory (\link{MsBackendMemory}, +\code{\link[=MsBackendDataFrame]{MsBackendDataFrame()}}), on-disk as mzML (\code{\link[=MsBackendMzR]{MsBackendMzR()}}) or HDF5 +(\code{\link[=MsBackendHdf5Peaks]{MsBackendHdf5Peaks()}}). } \details{ The \code{Spectra} class uses by default a lazy data manipulation strategy, i.e. data manipulations such as performed with \code{replaceIntensitiesBelow()} are not applied immediately to the data, but applied on-the-fly to the -spectrum data once it is retrieved. This enables data manipulation -operations also for \emph{read only} data representations. For some backends that -allow to write data back to the data storage (such as the -\code{\link[=MsBackendMemory]{MsBackendMemory()}}, \code{\link[=MsBackendDataFrame]{MsBackendDataFrame()}} and \code{\link[=MsBackendHdf5Peaks]{MsBackendHdf5Peaks()}}) it -is possible to apply to queue with the \code{\link[=applyProcessing]{applyProcessing()}} function (see -the \code{\link[=applyProcessing]{applyProcessing()}} function for details). +spectrum data once it is retrieved. For some backends that allow to write +data back to the data storage (such as the \code{\link[=MsBackendMemory]{MsBackendMemory()}}, +\code{\link[=MsBackendDataFrame]{MsBackendDataFrame()}} and \code{\link[=MsBackendHdf5Peaks]{MsBackendHdf5Peaks()}}) it is possible to apply +to queue with the \code{applyProcessing} function. See the *Data manipulation and +analysis \emph{methods} section below for more details. + +For more information on parallel or chunk-wise processing (especially +helpful for very large data sets) see \code{\link[=processingChunkSize]{processingChunkSize()}}. + +To apply arbitrary functions to a \code{Spectra} use the \code{spectrapply()} function +(or directly \code{\link[=chunkapply]{chunkapply()}} for chunk-wise processing). See description of +the \code{spectrapply()} function below for details. + +For details on plotting spectra, see \code{\link[=plotSpectra]{plotSpectra()}}. Clarifications regarding scan/acquisition numbers and indices: \itemize{ @@ -156,33 +770,15 @@ the \code{acquisitionNum}) See also \href{https://github.com/lgatto/MSnbase/issues/525}{this issue}. } -\section{Data stored in a \code{Spectra} object}{ - - -The \code{Spectra} object is a container for MS data that includes mass peak -data (\emph{m/z} and related intensity values, also referred to as \emph{peaks data} -in the context of \code{Spectra}) and metadata of individual spectra (so called -\emph{spectra variables}). While a core set of spectra variables (the -\code{coreSpectraVariables()}) are guaranteed to be provided by a -\code{Spectra}, it is possible to add arbitrary additional spectra variables to -a \code{Spectra} object. - -The \code{Spectra} object is designed to contain MS data of a (large) set of mass -spectra. The data is organized \emph{linearly} and can be thought of a list of -mass spectra, i.e. each element in the \code{Spectra} is one spectrum. -} - -\section{Creation of objects}{ +\section{Creation of objects, conversion, changing the backend and export}{ \code{Spectra} classes can be created with the \code{Spectra()} constructor function which supports the following formats: \itemize{ \item parameter \code{object} is a \code{data.frame} or \code{DataFrame} containing the -full spectrum data (spectra variables in columns as well as columns -with the individual MS peak data, \emph{m/z} and intensity). The provided -\code{backend} (by default a \linkS4class{MsBackendMemory}) will be initialized -with that data. +spectrum data. The provided \code{backend} (by default a +\linkS4class{MsBackendMemory}) will be initialized with that data. \item parameter \code{object} is a \linkS4class{MsBackend} (assumed to be already initialized). \item parameter \code{object} is missing, in which case it is supposed that the data @@ -197,79 +793,41 @@ which allows to import spectra data from mzML, mzXML or CDF files. With \code{...} additional arguments can be passed to the backend's \code{\link[=backendInitialize]{backendInitialize()}} method. Parameter \code{backend} allows to specify which -\linkS4class{MsBackend} should be used for data representation and storage. -} - -\section{Data representation of a \code{Spectra}}{ - - -The MS data which can be accessed through the \code{Spectra} object is -\emph{represented} by its backend, which means that this backend defines how -and where the data is stored (e.g. in memory or on disk). The \code{Specrta} -object relies on the backend to provide the MS data whenever it needs it -for data processing. -Different backends with different properties, such as minimal memory -requirement or fast data access, are defined in the \emph{Spectra} package or -one of the MsBackend* packages. More information on backends and their -properties is provided in the documentation of \link{MsBackend}. - -On-disk backends keep only a limited amount of data in memory retrieving -most of the data (usually the MS peak data) upon request on-the-fly from -their on-disk data representations. Moving the on-disk data storage of such -a backend or a serialized object to a different location in the file -system will cause data corruption. The \code{dataStorageBasePath()} and -\verb{dataStorageBasePath<-} functions allow in such cases (and if thebackend -classes support this operation), to get or change the \emph{base} -path to the directory of the backend's data storage. In-memory backends -such as \link{MsBackendMemory} or \link{MsBackendDataFrame} keeping all MS data in -memory don't support, and need, this function, but for \link{MsBackendMzR} this -function can be used to update/adapt the path to the directory containing -the original data files. Thus, for \code{Spectra} objects (using this backend) -that were moved to another file system or computer, these functions allow to -adjust/adapt the base file path. -} - -\section{Changing data representation of a \code{Spectra}}{ +\linkS4class{MsBackend} should be used for data storage. - -The data representation, i.e. the backend of a \code{Spectra} object can be -changed with the \code{setBackend()} method that takes an instance of the new -backend as second parameter \code{backend}. A call to -\code{setBackend(sps, backend = MsBackendDataFrame())} +The backend of a \code{Spectra} object can be changed with the \code{setBackend()} +method that takes an instance of the new backend as second parameter +\code{backend}. A call to \code{setBackend(sps, backend = MsBackendDataFrame())} would for example change the backend of \code{sps} to the \emph{in-memory} \code{MsBackendDataFrame}. Changing to a backend is only supported if that backend has a \code{data} parameter in its \code{backendInitialize()} method and if \code{supportsSetBackend()} returns \code{TRUE} for that backend. \code{setBackend()} will -transfer the full spectra data from the originating backend as a \code{DataFrame} -to the new backend. - -Generally, it is not possible to change \strong{to} a read-only backend such as -the \code{\link[=MsBackendMzR]{MsBackendMzR()}} backend. +transfer the full spectra data from the originating backend as a +\code{DataFrame} to the new backend. +Most \emph{read-only} backends do not support \code{setBackend()}. It is for example +not possible to change the backend to a \emph{read-only} backend (such as +the \code{\link[=MsBackendMzR]{MsBackendMzR()}} backend). The definition of the function is: \code{setBackend(object, backend, ..., f = dataStorage(object), BPPARAM = bpparam())} and its parameters are: \itemize{ -\item \code{object}: the \code{Spectra} object. -\item \code{backend}: an instance of the new backend, e.g. \verb{[MsBackendMemory()]}. -\item \code{f}: factor allowing to parallelize the change of the backends. By -default the process of copying the spectra data from the original to the +\item parameter \code{object}: the \code{Spectra} object. +\item parameter \code{backend}: an instance of the new backend, e.g. +\verb{[MsBackendMemory()]}. +\item parameter \code{f}: factor allowing to parallelize the change of the backends. +By default the process of copying the spectra data from the original to the new backend is performed separately (and in parallel) for each file. Users are advised to use the default setting. -\item \code{...}: optional additional arguments passed to the \code{\link[=backendInitialize]{backendInitialize()}} -method of the new \code{backend}. -\item \code{BPPARAM}: setup for the parallel processing. See \code{\link[=bpparam]{bpparam()}} for +\item parameter \code{...}: optional additional arguments passed to the +\code{\link[=backendInitialize]{backendInitialize()}} method of the new \code{backend}. +\item parameter \code{BPPARAM}: setup for the parallel processing. See \code{\link[=bpparam]{bpparam()}} for details. } -} - -\section{Exporting data from a \code{Spectra} object}{ - Data from a \code{Spectra} object can be \strong{exported} to a file with the -\code{export()} function. The actual export of the data is performed by +\code{export()} function. The actual export of the data has to be performed by the \code{export} method of the \link{MsBackend} class defined with the mandatory -parameter \code{backend} which defines also the format in which the data -is exported. Note however that not all backend classes support +parameter \code{backend}. Note however that not all backend classes support export of data. From the \code{MsBackend} classes in the \code{Spectra} package currently only the \code{MsBackendMzR} backend supports data export (to mzML/mzXML file(s)); see the help page of the \linkS4class{MsBackend} for @@ -290,8 +848,6 @@ parameter \code{backend}. \examples{ -## -------- CREATION OF SPECTRA OBJECTS -------- - ## Create a Spectra providing a `DataFrame` containing the spectrum data. spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) @@ -301,6 +857,12 @@ spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) data <- Spectra(spd) data +## Get the number of spectra +length(data) + +## Get the number of peaks per spectrum +lengths(data) + ## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk ## backend. sciex_file <- dir(system.file("sciex", package = "msdata"), @@ -308,9 +870,6 @@ sciex_file <- dir(system.file("sciex", package = "msdata"), sciex <- Spectra(sciex_file, backend = MsBackendMzR()) sciex - -## -------- CHANGING DATA REPRESENTATIONS -------- - ## The MS data is on disk and will be read into memory on-demand. We can ## however change the backend to a MsBackendMemory backend which will ## keep all of the data in memory. @@ -346,7 +905,319 @@ head(dataOrigin(sciex)) head(dataOrigin(sciex_im)) +<<<<<<< HEAD ## -------- DATA EXPORT -------- +======= +## ---- ACCESSING AND ADDING DATA ---- + +## Get the MS level for each spectrum. +msLevel(data) + +## Alternatively, we could also use $ to access a specific spectra variable. +## This could also be used to add additional spectra variables to the +## object (see further below). +data$msLevel + +## Get the intensity and m/z values. +intensity(data) +mz(data) + +## Determine whether one of the spectra has a specific m/z value +containsMz(data, mz = 120.4) + +## Accessing spectra variables works for all backends: +intensity(sciex) +intensity(sciex_im) + +## Get the m/z for the first spectrum. +mz(data)[[1]] + +## Get the peak data (m/z and intensity values). +pks <- peaksData(data) +pks +pks[[1]] +pks[[2]] + +## Note that we could get the same resulb by coercing the `Spectra` to +## a `list` or `SimpleList`: +as(data, "list") +as(data, "SimpleList") + +## List all available spectra variables (i.e. spectrum data and metadata). +spectraVariables(data) + +## For all *core* spectrum variables accessor functions are available. These +## return NA if the variable was not set. +centroided(data) +dataStorage(data) +rtime(data) +precursorMz(data) + +## The core spectra variables are: +coreSpectraVariables() + +## Add an additional metadata column. +data$spectrum_id <- c("sp_1", "sp_2") + +## List spectra variables, "spectrum_id" is now also listed +spectraVariables(data) + +## Get the values for the new spectra variable +data$spectrum_id + +## Extract specific spectra variables. +spectraData(data, columns = c("spectrum_id", "msLevel")) + +## Drop spectra variable data and/or columns. +res <- selectSpectraVariables(data, c("mz", "intensity")) + +## This removed the additional columns "spectrum_id" and deleted all values +## for all spectra variables, except "mz" and "intensity". +spectraData(res) + +## Compared to the data before selectSpectraVariables. +spectraData(data) + + +## ---- SUBSETTING, FILTERING AND COMBINING + +## Subset to all MS2 spectra. +data[msLevel(data) == 2] + +## Append new `spectraVariables` to the `spectraData` +df <- data.frame(cola = 4:5, colb = "b") +data_append <- cbind2(data, df) + +## Same with the filterMsLevel function +filterMsLevel(data, 2) + +## Below we combine the `data` and `sciex_im` objects into a single one. +data_comb <- c(data, sciex_im) + +## The combined Spectra contains a union of all spectra variables: +head(data_comb$spectrum_id) +head(data_comb$rtime) +head(data_comb$dataStorage) +head(data_comb$dataOrigin) + +## Filter a Spectra for a target precursor m/z with a tolerance of 10ppm +spd$precursorMz <- c(323.4, 543.2302) +data_filt <- Spectra(spd) +filterPrecursorMzRange(data_filt, mz = 543.23 + ppm(c(-543.23, 543.23), 10)) + +## Filter a Spectra keeping only peaks matching certain m/z values +sps_sub <- filterMzValues(data, mz = c(103, 104), tolerance = 0.3) +mz(sps_sub) + +## This function can also be used to remove specific peaks from a spectrum +## by setting `keep = FALSE`. +sps_sub <- filterMzValues(data, mz = c(103, 104), + tolerance = 0.3, keep = FALSE) +mz(sps_sub) + +## Note that `filterMzValues()` keeps or removes all peaks with a matching +## m/z given the provided `ppm` and `tolerance` parameters. + +## Filter a Spectra keeping only peaks within a m/z range +sps_sub <- filterMzRange(data, mz = c(100, 300)) +mz(sps_sub) + +## Remove empty spectra variables +sciex_noNA <- dropNaSpectraVariables(sciex) + +## Available spectra variables before and after `dropNaSpectraVariables()` +spectraVariables(sciex) +spectraVariables(sciex_noNA) + + +## Adding new spectra variables +sciex1 <- filterDataOrigin(sciex, dataOrigin(sciex)[1]) +spv <- DataFrame(spectrumId = sciex1$spectrumId[3:12], ## used for merging + var1 = rnorm(10), + var2 = sample(letters, 10)) +spv + +sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") + +spectraVariables(sciex2) +spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] + +## Removing fourier transform artefacts seen in Orbitra data. + +## Loading an Orbitrap spectrum with artefacts. +data(fft_spectrum) +plotSpectra(fft_spectrum, xlim = c(264.5, 265.5)) +plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) + +fft_spectrum <- filterFourierTransformArtefacts(fft_spectrum) +fft_spectrum +plotSpectra(fft_spectrum, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) + +## Using a few examples peaks in your data you can optimize the parameters +fft_spectrum_filtered <- filterFourierTransformArtefacts(fft_spectrum, + halfWindowSize = 0.2, + threshold = 0.005, + keepIsotopes = TRUE, + maxCharge = 5, + isotopeTolerance = 0.005 + ) + +fft_spectrum_filtered +length(mz(fft_spectrum_filtered)[[1]]) +plotSpectra(fft_spectrum_filtered, xlim = c(264.5, 265.5), ylim = c(0, 5e6)) + +## Using filterRanges to filter spectra object based on variables available +## in `spectraData`. +## First, determine the variable(s) on which to base the filtering: +sv <- c("rtime", "precursorMz", "peaksCount") +## Note that ANY variables can be chosen here, and as many as wanted. + +## Define the ranges (pairs of values with lower and upper boundary) to be +## used for the individual spectra variables. The first two values will be +## used for the first spectra variable (e.g., rtime here), the next two for +## the second (e.g. precursorMz here) and so on: +ranges <- c(30, 350, 200,500, 350, 600) + +## Input the parameters within the filterRanges function: +filt_spectra <- filterRanges(sciex, spectraVariables = sv, + ranges = ranges) + +## Using `filterRanges()` to filter spectra object with multiple ranges for +## the same `spectraVariable` (e.g, here rtime) +sv <- c("rtime", "rtime") +ranges <- c(30, 100, 200, 300) +filt_spectra <- filterRanges(sciex, spectraVariables = sv, + ranges = ranges, match = "any") + +## Using filterValues in a similar way to a filter spectra object based on +## variables available in `spectraData`. However, this time not based on +## ranges but similarities to user input single values with given +## tolerance/ppm +## First determine the variable(s) on which to base the filtering: +sv <- c("rtime", "precursorMz") +## Note that ANY variables can be chosen here, and as many as wanted. + +## Define the values that will be used to filter the spectra based on their +## similarities to their respective spectraVariables. +## The first values in the parameters values, tolerance and ppm will be +## used for the first spectra variable (e.g. rtime here), the next for the +## second (e.g. precursorMz here) and so on: +values <- c(350, 400) +tolerance <- c(100, 0) +ppm <- c(0,50) + +## Input the parameters within the `filterValues()` function: +filt_spectra <- filterValues(sciex, spectraVariables = sv, + values = values, tolerance = tolerance, ppm = ppm) + +## ---- DATA MANIPULATIONS AND OTHER OPERATIONS ---- + +## Set the data to be centroided +centroided(data) <- TRUE + +## Replace peak intensities below 40 with 3. +res <- replaceIntensitiesBelow(data, threshold = 40, value = 3) +res + +## Get the intensities of the first and second spectrum. +intensity(res)[[1]] +intensity(res)[[2]] + +## Remove all peaks with an intensity below 40. +res <- filterIntensity(res, intensity = c(40, Inf)) + +## Get the intensities of the first and second spectrum. +intensity(res)[[1]] +intensity(res)[[2]] + +## Lengths of spectra is now different +lengths(mz(res)) +lengths(mz(data)) + +## In addition it is possible to pass a function to `filterIntensity()`: in +## the example below we want to keep only peaks that have an intensity which +## is larger than one third of the maximal peak intensity in that spectrum. +keep_peaks <- function(x, prop = 3) { + x > max(x, na.rm = TRUE) / prop +} +res2 <- filterIntensity(data, intensity = keep_peaks) +intensity(res2)[[1L]] +intensity(data)[[1L]] + +## We can also change the proportion by simply passing the `prop` parameter +## to the function. To keep only peaks that have an intensity which is +## larger than half of the maximum intensity: +res2 <- filterIntensity(data, intensity = keep_peaks, prop = 2) +intensity(res2)[[1L]] +intensity(data)[[1L]] + +## Since data manipulation operations are by default not directly applied to +## the data but only added to the internal lazy evaluation queue, it is also +## possible to remove these data manipulations with the `reset()` function: +res_rest <- reset(res) +res_rest +lengths(mz(res_rest)) +lengths(mz(res)) +lengths(mz(data)) + +## `reset()` after a `applyProcessing()` can not restore the data, because +## the data in the backend was changed. Similarly, `reset()` after any +## filter operations can not restore data for a `Spectra` with a +## `MsBackendMemory` or `MsBackendDataFrame`. +res_2 <- applyProcessing(res) +res_rest <- reset(res_2) +lengths(mz(res)) +lengths(mz(res_rest)) + + +## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using +## the normalized dotproduct method. +res <- compareSpectra(sciex_im[2:3], sciex_im[10:20]) +## first row contains comparisons of spectrum 2 with spectra 10 to 20 and +## the second row comparisons of spectrum 3 with spectra 10 to 20 +res + +## To use a simple Pearson correlation instead we can define a function +## that takes the two peak matrices and calculates the correlation for +## their second columns (containing the intensity values). +correlateSpectra <- function(x, y, use = "pairwise.complete.obs", ...) { + cor(x[, 2], y[, 2], use = use) +} +res <- compareSpectra(sciex_im[2:3], sciex_im[10:20], + FUN = correlateSpectra) +res + +## Use compareSpectra to determine the number of common (matching) peaks +## with a ppm of 10: +## type = "inner" uses a *inner join* to match peaks, i.e. keeps only +## peaks that can be mapped betwen both spectra. The provided FUN returns +## simply the number of matching peaks. +compareSpectra(sciex_im[2:3], sciex_im[10:20], ppm = 10, type = "inner", + FUN = function(x, y, ...) nrow(x)) + +## Apply an arbitrary function to each spectrum in a Spectra. +## In the example below we calculate the mean intensity for each spectrum +## in a subset of the sciex_im data. Note that we can access all variables +## of each individual spectrum either with the `$` operator or the +## corresponding method. +res <- spectrapply(sciex_im[1:20], FUN = function(x) mean(x$intensity[[1]])) +head(res) + +## It is however important to note that dedicated methods to access the +## data (such as `intensity`) are much more efficient than using `lapply()`: +res <- lapply(intensity(sciex_im[1:20]), mean) +head(res) + +## As an alternative, applying a function `FUN` to a `Spectra` can be +## performed *chunk-wise*. The advantage of this is, that only the data for +## one chunk at a time needs to be loaded into memory reducing the memory +## demand. This type of processing can be performed by specifying the size +## of the chunks (i.e. number of spectra per chunk) with the `chunkSize` +## parameter +spectrapply(sciex_im[1:20], lengths, chunkSize = 5L) + +## ---- DATA EXPORT ---- +>>>>>>> parent of 7326508 (Merge branch 'main' into phili) ## Some `MsBackend` classes provide an `export()` method to export the data ## to the file format supported by the backend. @@ -375,7 +1246,42 @@ res <- Spectra(backendInitialize(MsBackendMzR(), fls[1])) mz(res) mz(data) + +## ---- PEAKS VARIABLES AND DATA ---- + +## Some `MsBackend` classes provide support for arbitrary peaks variables +## (in addition to the mandatory `"mz"` and `"intensity"` values. Below +## we create a simple data frame with an additional peak variable `"pk_ann"` +## and create a `Spectra` with a `MsBackendMemory` for that data. +## Importantly the number of values (per spectrum) need to be the same +## for all peak variables. + +tmp <- data.frame(msLevel = c(2L, 2L), rtime = c(123.2, 123.5)) +tmp$mz <- list(c(103.1, 110.4, 303.1), c(343.2, 453.1)) +tmp$intensity <- list(c(130.1, 543.1, 40), c(0.9, 0.45)) +tmp$pk_ann <- list(c(NA_character_, "A", "P"), c("B", "P")) + +## Create the Spectra. With parameter `peaksVariables` we can define +## the columns in `tmp` that contain peaks variables. +sps <- Spectra(tmp, source = MsBackendMemory(), + peaksVariables = c("mz", "intensity", "pk_ann")) +peaksVariables(sps) + +## Extract just the m/z and intensity values +peaksData(sps)[[1L]] + +## Extract the full peaks data +peaksData(sps, columns = peaksVariables(sps))[[1L]] + +## Access just the pk_ann variable +sps$pk_ann } \author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail +Nir Shahaf, Johannes Rainer + +Nir Shahaf + +Johannes Rainer + +Sebastian Gibb, Johannes Rainer, Laurent Gatto } diff --git a/man/addProcessing.Rd b/man/addProcessing.Rd deleted file mode 100644 index 787aeabe..00000000 --- a/man/addProcessing.Rd +++ /dev/null @@ -1,547 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-functions.R, R/Spectra.R -\name{applyProcessing} -\alias{applyProcessing} -\alias{processingLog} -\alias{scalePeaks} -\alias{addProcessing} -\alias{bin} -\alias{containsMz} -\alias{containsNeutralLoss} -\alias{entropy} -\alias{pickPeaks} -\alias{replaceIntensitiesBelow} -\alias{reset} -\alias{smooth} -\alias{spectrapply} -\alias{addProcessing,Spectra-method} -\alias{bin,Spectra-method} -\alias{containsMz,Spectra-method} -\alias{containsNeutralLoss,Spectra-method} -\alias{entropy,Spectra-method} -\alias{entropy,ANY-method} -\alias{pickPeaks,Spectra-method} -\alias{replaceIntensitiesBelow,Spectra-method} -\alias{reset,Spectra-method} -\alias{smooth,Spectra-method} -\alias{spectrapply,Spectra-method} -\title{Data manipulation and analysis methods} -\usage{ -applyProcessing( - object, - f = processingChunkFactor(object), - BPPARAM = bpparam(), - ... -) - -processingLog(x) - -scalePeaks(x, by = sum, msLevel. = uniqueMsLevels(x)) - -\S4method{addProcessing}{Spectra}(object, FUN, ..., spectraVariables = character()) - -\S4method{bin}{Spectra}( - x, - binSize = 1L, - breaks = NULL, - msLevel. = uniqueMsLevels(x), - FUN = sum, - zero.rm = TRUE -) - -\S4method{containsMz}{Spectra}( - object, - mz = numeric(), - tolerance = 0, - ppm = 20, - which = c("any", "all"), - BPPARAM = bpparam() -) - -\S4method{containsNeutralLoss}{Spectra}( - object, - neutralLoss = 0, - tolerance = 0, - ppm = 20, - BPPARAM = bpparam() -) - -\S4method{entropy}{Spectra}(object, normalized = TRUE) - -\S4method{entropy}{ANY}(object, ...) - -\S4method{pickPeaks}{Spectra}( - object, - halfWindowSize = 2L, - method = c("MAD", "SuperSmoother"), - snr = 0, - k = 0L, - descending = FALSE, - threshold = 0, - msLevel. = uniqueMsLevels(object), - ... -) - -\S4method{replaceIntensitiesBelow}{Spectra}( - object, - threshold = min, - value = 0, - msLevel. = uniqueMsLevels(object) -) - -\S4method{reset}{Spectra}(object, ...) - -\S4method{smooth}{Spectra}( - x, - halfWindowSize = 2L, - method = c("MovingAverage", "WeightedMovingAverage", "SavitzkyGolay"), - msLevel. = uniqueMsLevels(x), - ... -) - -\S4method{spectrapply}{Spectra}( - object, - FUN, - ..., - chunkSize = integer(), - f = factor(), - BPPARAM = SerialParam() -) -} -\arguments{ -\item{object}{A \code{Spectra} object.} - -\item{f}{For \code{spectrapply()} and \code{applyProcessing()}: \code{factor} defining -how \code{object} should be splitted for eventual parallel processing. -Defaults to \code{factor()} for \code{spectrapply()} hence the object is not -splitted while it defaults to \code{f = processingChunkSize(object)} for -\code{applyProcessing()} splitting thus the object by default into chunks -depending on \code{\link[=processingChunkSize]{processingChunkSize()}}.} - -\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more -information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method -of the \linkS4class{MsBackend}. See also \code{\link[=processingChunkSize]{processingChunkSize()}} for -additional information on parallel processing.} - -\item{...}{Additional arguments passed to internal and downstream functions.} - -\item{x}{A \code{Spectra}.} - -\item{by}{For \code{scalePeaks()}: function to calculate a single \code{numeric} from -intensity values of a spectrum by which all intensities (of -that spectrum) should be divided by. The default \code{by = sum} will -divide intensities of each spectrum by the sum of intensities of that -spectrum.} - -\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which -the function should be applied (defaults to all MS levels of \code{object}.} - -\item{FUN}{For \code{addProcessing()}: function to be applied to the peak matrix -of each spectrum in \code{object}. -For \code{bin()}: function to aggregate intensity values of peaks falling -into the same bin. Defaults to \code{FUN = sum} thus summing up intensities. -For \code{spectrapply()} and \code{chunkapply()}: function to be applied to -each individual or each chunk of \code{Spectra}.} - -\item{spectraVariables}{For \code{addProcessing()}: \code{character} with additional -spectra variables that should be passed along to the function defined -with \code{FUN}. See function description for details.} - -\item{binSize}{For \code{bin()}: \code{numeric(1)} defining the size for the m/z bins. -Defaults to \code{binSize = 1}.} - -\item{breaks}{For \code{bin()}: \code{numeric} defining the m/z breakpoints between -bins.} - -\item{zero.rm}{For \code{bin()}: \code{logical(1)} indicating whether to remove bins -with zero intensity. Defaults to \code{TRUE}, meaning the function will -discard bins created with an intensity of 0 to enhance memory -efficiency.} - -\item{mz}{For \code{containsMz()}: \code{numeric} with the m/z value(s) of the mass -peaks to check.} - -\item{tolerance}{For \code{containsMz()} and \code{neutralLoss()}: -\code{numeric(1)} allowing to define a constant maximal accepted difference -between m/z values for peaks to be matched.} - -\item{ppm}{For \code{containsMz()} and \code{neutralLoss()}: \code{numeric(1)} defining a -relative, m/z-dependent, maximal accepted difference between m/z values -for peaks to be matched.} - -\item{which}{For \code{containsMz()}: either \code{"any"} or \code{"all"} defining whether -any (the default) or all provided \code{mz} have to be present in the -spectrum.} - -\item{neutralLoss}{for \code{containsNeutralLoss()}: \code{numeric(1)} defining the -value which should be subtracted from the spectrum's precursor m/z.} - -\item{normalized}{for \code{entropy()}: \code{logical(1)} whether the normalized -entropy should be calculated (default). See also \code{\link[=nentropy]{nentropy()}} for -details.} - -\item{halfWindowSize}{For \code{pickPeaks()}: \code{integer(1)}, used in the -identification of the mass peaks: a local maximum has to be the -maximum in the window from \code{(i - halfWindowSize):(i + halfWindowSize)}. -For \code{smooth()}: \code{integer(1)}, used in the smoothing algorithm, the -window reaches from \code{(i - halfWindowSize):(i + halfWindowSize)}.} - -\item{method}{For \code{pickPeaks()}: \code{character(1)}, the noise estimators that -should be used, currently the the \emph{M}edian \emph{A}bsolute \emph{D}eviation -(\code{method = "MAD"}) and Friedman's Super Smoother -(\code{method = "SuperSmoother"}) are supported. -For \code{smooth()}: \code{character(1)}, the smoothing function that should be -used, currently, the Moving-Average- (\code{method = "MovingAverage"}), -Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")}, -Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported.} - -\item{snr}{For \code{pickPeaks()}: \code{double(1)} defining the -\emph{S}ignal-to-\emph{N}oise-\emph{R}atio. The intensity of a local maximum has to be -higher than \code{snr * noise} to be considered as peak.} - -\item{k}{For \code{pickPeaks()}: \code{integer(1)}, number of values left and right of -the peak that should be considered in the weighted mean calculation.} - -\item{descending}{For \code{pickPeaks()}: \code{logical}, if \code{TRUE} just values -betwee the nearest valleys around the peak centroids are used.} - -\item{threshold}{For \code{pickPeaks()}: a \code{numeric(1)} defining the proportion -of the maximal peak intensity. Only values above the threshold are -used for the weighted mean calculation. -For \code{replaceIntensitiesBelow()}: a \code{numeric(1)} defining the threshold -or a \code{function} to calculate the threshold for each spectrum on its -intensity values. Defaults to \code{threshold = min}.} - -\item{value}{For \code{replaceIntensitiesBelow()}: \code{numeric(1)} defining the -value with which intensities should be replaced with.} - -\item{chunkSize}{For \code{spectrapply()}: size of the chunks into which the -\code{Spectra} should be split. This parameter overrides parameters -\code{f} and \code{BPPARAM}.} -} -\value{ -See the documentation of the individual functions for a description of the -return value. -} -\description{ -Various data analysis functions are available for \code{Spectra} objects. These -can be categorized into functions that either return a \code{Spectra} object -(with the manipulated data) and functions that directly return the -result from the calculation. For the former category, the data manipulations -are cached in the result object's \emph{processing queue} and only exectuted -on-the-fly when the respective data gets extracted from the \code{Spectra} (see -section \emph{The processing queue} for more information). - -For the second category, the calculations are directly executed and the -result, usually one value per spectrum, returned. Generally, to reduce -memory demand, a chunk-wise processing of the data is performed. -} -\section{Data analysis methods returning a \code{Spectra}}{ - - -The methods listed here return a \code{Spectra} object as a result. -\itemize{ -\item \code{addProcessing()}: adds an arbitrary function that should be applied to the -peaks matrix of every spectrum in \code{object}. The function (can be passed -with parameter \code{FUN}) is expected to take a peaks matrix as input and to -return a peaks matrix. A peaks matrix is a numeric matrix with two columns, -the first containing the m/z values of the peaks and the second the -corresponding intensities. The function has to have \code{...} in its -definition. Additional arguments can be passed with \code{...}. With parameter -\code{spectraVariables} it is possible to define additional spectra variables -from \code{object} that should be passed to the function \code{FUN}. These will be -passed by their name (e.g. specifying \code{spectraVariables = "precursorMz"} -will pass the spectra's precursor m/z as a parameter named \code{precursorMz} -to the function. The only exception is the spectra's MS level, these will -be passed to the function as a parameter called \code{spectrumMsLevel} (i.e. -with \code{spectraVariables = "msLevel"} the MS levels of each spectrum will be -submitted to the function as a parameter called \code{spectrumMsLevel}). -Examples are provided in the package vignette. -\item \code{bin()}: aggregates individual spectra into discrete (m/z) bins. Binning is -performed only on spectra of the specified MS level(s) (parameter -\code{msLevel}, by default all MS levels of \code{x}). The bins can be defined with -parameter \code{breaks} which by default are equally sized bins, with size -being defined by parameter \code{binSize}, from the minimal to the maximal m/z -of all spectra (of MS level \code{msLevel}) within \code{x}. The same bins are used -for all spectra in \code{x}. All intensity values for peaks falling into the -same bin are aggregated using the function provided with parameter \code{FUN} -(defaults to \code{FUN = sum}, i.e. all intensities are summed up). Note that -the binning operation is applied to the peak data on-the-fly upon data -access and it is possible to \emph{revert} the operation with the \code{reset()} -function (see description of \code{reset()} below). -\item \code{countIdentifications}: counts the number of identifications each scan has -led to. See \code{\link[=countIdentifications]{countIdentifications()}} for more details. -\item \code{pickPeaks()}: picks peaks on individual spectra using a moving -window-based approach (window size = \code{2 * halfWindowSize}). For noisy -spectra there are currently two different noise estimators available, -the \emph{M}edian \emph{A}bsolute \emph{D}eviation (\code{method = "MAD"}) and -Friedman's Super Smoother (\code{method = "SuperSmoother"}), -as implemented in the \code{\link[MsCoreUtils:noise]{MsCoreUtils::noise()}}. -The method supports also to optionally \emph{refine} the m/z value of -the identified centroids by considering data points that belong (most -likely) to the same mass peak. Therefore the m/z value is calculated as an -intensity weighted average of the m/z values within the peak region. -The peak region is defined as the m/z values (and their respective -intensities) of the \code{2 * k} closest signals to the centroid or the closest -valleys (\code{descending = TRUE}) in the \code{2 * k} region. For the latter the \code{k} -has to be chosen general larger. See \code{\link[MsCoreUtils:refineCentroids]{MsCoreUtils::refineCentroids()}} for -details. -If the ratio of the signal to the highest intensity of the peak is below -\code{threshold} it will be ignored for the weighted average. -\item \code{replaceIntensitiesBelow()}: replaces intensities below a specified -threshold with the provided \code{value}. Parameter \code{threshold} can be either -a single numeric value or a function which is applied to all non-\code{NA} -intensities of each spectrum to determine a threshold value for each -spectrum. The default is \code{threshold = min} which replaces all values -which are <= the minimum intensity in a spectrum with \code{value} (the -default for \code{value} is \code{0}). Note that the function specified with -\code{threshold} is expected to have a parameter \code{na.rm} since \code{na.rm = TRUE} -will be passed to the function. If the spectrum is in profile mode, -ranges of successive non-0 peaks <= \code{threshold} are set to 0. -Parameter \code{msLevel.} allows to apply this to only spectra of certain MS -level(s). -\item \code{scalePeaks()}: scales intensities of peaks within each spectrum depending -on parameter \code{by}. With \code{by = sum} (the default) peak intensities are -divided by the sum of peak intensities within each spectrum. The sum of -intensities is thus 1 for each spectrum after scaling. Parameter -\code{msLevel.} allows to apply the scaling of spectra of a certain MS level. -By default (\code{msLevel. = uniqueMsLevels(x)}) intensities for all -spectra will be scaled. -\item \code{smooth()}: smooths individual spectra using a moving window-based approach -(window size = \code{2 * halfWindowSize}). Currently, the -Moving-Average- (\code{method = "MovingAverage"}), -Weighted-Moving-Average- (\verb{method = "WeightedMovingAverage")}, -weights depending on the distance of the center and calculated -\code{1/2^(-halfWindowSize:halfWindowSize)}) and -Savitzky-Golay-Smoothing (\code{method = "SavitzkyGolay"}) are supported. -For details how to choose the correct \code{halfWindowSize} please see -\code{\link[MsCoreUtils:smooth]{MsCoreUtils::smooth()}}. -} -} - -\section{Data analysis methods returning the result from the calculation}{ - - -The functions listed in this section return immediately the result from the -calculation. To reduce memory demand (and allow parallel processing) the -calculations a chunk-wise processing is generally performed. -\itemize{ -\item \code{chunkapply()}: apply an arbitrary function to chunks of spectra. See -\code{\link[=chunkapply]{chunkapply()}} for details and examples. -\item \code{containsMz()}: checks for each of the spectra whether they contain mass -peaks with an m/z equal to \code{mz} (given acceptable difference as defined by -parameters \code{tolerance} and \code{ppm} - see \code{\link[=common]{common()}} for details). Parameter -\code{which} allows to define whether any (\code{which = "any"}, the default) or -all (\code{which = "all"}) of the \code{mz} have to match. The function returns -\code{NA} if \code{mz} is of length 0 or is \code{NA}. -\item \code{containsNeutralLoss()}: checks for each spectrum in \code{object} if it has a -peak with an m/z value equal to its precursor m/z - \code{neutralLoss} (given -acceptable difference as defined by parameters \code{tolerance} and \code{ppm}). -Returns \code{NA} for MS1 spectra (or spectra without a precursor m/z). -\item \code{entropy()}: calculates the entropy of each spectra based on the metrics -suggested by Li et al. (https://doi.org/10.1038/s41592-021-01331-z). -See also \code{\link[=nentropy]{nentropy()}} in the \emph{MsCoreUtils} package for details. -\item \code{estimatePrecursorIntensity()}: defines the precursor intensities for MS2 -spectra using the intensity of the matching MS1 peak from the -closest MS1 spectrum (i.e. the last MS1 spectrum measured before the -respective MS2 spectrum). With \code{method = "interpolation"} it is also -possible to calculate the precursor intensity based on an interpolation of -intensity values (and retention times) of the matching MS1 peaks from the -previous and next MS1 spectrum. See \code{\link[=estimatePrecursorIntensity]{estimatePrecursorIntensity()}} for -examples and more details. -\item \code{estimatePrecursorMz()}: \strong{for DDA data}: allows to estimate a fragment -spectra's precursor m/z based on the reported precursor m/z and the data -from the previous MS1 spectrum. See \code{\link[=estimatePrecursorMz]{estimatePrecursorMz()}} for details. -\item \code{neutralLoss()}: calculates neutral loss spectra for fragment spectra. See -\code{\link[=neutralLoss]{neutralLoss()}} for detailed documentation. -\item \code{spectrapply()}: applies a given function to each individual spectrum or -sets of a \code{Spectra} object. By default, the \code{Spectra} is split into -individual spectra (i.e. \code{Spectra} of length 1) and the function \code{FUN} -is applied to each of them. An alternative splitting can be defined with -parameter \code{f}. Parameters for \code{FUN} can be passed using \code{...}. -The returned result and its order depend on the function \code{FUN} and how -\code{object} is split (hence on \code{f}, if provided). Parallel processing is -supported and can be configured with parameter \code{BPPARAM}, is however only -suggested for computational intense \code{FUN}. -As an alternative to the (eventual parallel) processing of the full -\code{Spectra}, \code{spectrapply()} supports also a chunk-wise processing. For this, -parameter \code{chunkSize} needs to be specified. \code{object} is then split into -chunks of size \code{chunkSize} which are then (stepwise) processed by \code{FUN}. -This guarantees a lower memory demand (especially for on-disk backends) -since only the data for one chunk needs to be loaded into memory in each -iteration. Note that by specifying \code{chunkSize}, parameters \code{f} and -\code{BPPARAM} will be ignored. -See also \code{chunkapply()} above or examples below for details on chunk-wise -processing. -} -} - -\section{The processing queue}{ - - -Operations that modify mass peak data, i.e. the m/z and intensity values of -a \code{Spectra} are generally not applied immediately to the data but are -\emph{cached} within the object's \emph{processing queue}. These operations are then -applied to the data only upon request, for example when m/z and/or -intensity values are extracted. This lazy execution guarantees that the -same functionality can be applied to any \code{Spectra} object, regardless of -the type of backend that is used. Thus, data manipulation operations can -also be applied to data that is \emph{read only}. As a side effect, this enables -also to \emph{undo} operations using the \code{reset()} function. - -Functions related to the processing queue are: -\itemize{ -\item \code{applyProcessing()}: for \code{Spectra} objects that use a \strong{writeable} backend -only: apply all steps from the lazy processing queue to the peak data and -write it back to the data storage. Parameter \code{f} allows to specify how -\code{object} should be split for parallel processing. This should either be -equal to the \code{dataStorage}, or \code{f = rep(1, length(object))} to disable -parallel processing alltogether. Other partitionings might result in -errors (especially if a \code{MsBackendHdf5Peaks} backend is used). -\item \code{processingLog()}: returns a \code{character} vector with the processing log -messages. -\item \code{reset()}: restores the data to its original state (as much as possible): -removes any processing steps from the lazy processing queue and calls -\code{reset()} on the backend which, depending on the backend, can also undo -e.g. data filtering operations. Note that a \verb{reset*(} call after -\code{applyProcessing()} will not have any effect. See examples below for more -information. -} -} - -\examples{ - -## Load a `Spectra` object with LC-MS/MS data. -fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", - package = "msdata") -sps_dda <- Spectra(fl) -sps_dda - - -## -------- FUNCTIONS RETURNING A SPECTRA -------- - -## Replace peak intensities below 40 with a value of 1 -sps_mod <- replaceIntensitiesBelow(sps_dda, threshold = 20, value = 1) -sps_mod - -## Get the intensities of the first spectrum before and after the -## operation -intensity(sps_dda[1]) -intensity(sps_mod[1]) - -## Remove all peaks with an intensity below 5. -sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf)) - -intensity(sps_mod) - -## In addition it is possible to pass a function to `filterIntensity()`: in -## the example below we want to keep only peaks that have an intensity which -## is larger than one third of the maximal peak intensity in that spectrum. -keep_peaks <- function(x, prop = 3) { - x > max(x, na.rm = TRUE) / prop -} -sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks) -intensity(sps_mod) - -## We can also change the proportion by simply passing the `prop` parameter -## to the function. To keep only peaks that have an intensity which is -## larger than half of the maximum intensity: -sps_mod <- filterIntensity(sps_dda, intensity = keep_peaks, prop = 2) -intensity(sps_mod) - -## With the `scalePeaks()` function we can alternatively scale the -## intensities of mass peaks per spectrum to relative intensities. This -## is specifically useful for fragment (MS2) spectra. We below thus -## scale the intensities per spectrum by the total sum of intensities -## (such that the sum of all intensities per spectrum is 1). -## Below we scale the intensities of all MS2 spectra in our data set. -sps_mod <- scalePeaks(sps_dda, msLevel = 2L) - -## MS1 spectra were not affected -sps_mod |> - filterMsLevel(1L) |> - intensity() - -## Intensities of MS2 spectra were scaled -sps_mod |> - filterMsLevel(2L) |> - intensity() - -## Since data manipulation operations are by default not directly applied to -## the data but only cached in the internal processing queue, it is also -## possible to remove these data manipulations with the `reset()` function: -tmp <- reset(sps_mod) -tmp -lengths(sps_dda) |> head() -lengths(sps_mod) |> head() -lengths(tmp) |> head() - -## Data manipulation operations cached in the processing queue can also be -## applied to the mass peaks data with the `applyProcessing()` function, if -## the `Spectra` uses a backend that supports that (i.e. allows replacing -## the mass peaks data). Below we first change the backend to a -## `MsBackendMemory()` and then use the `applyProcessing()` to modify the -## mass peaks data -sps_dda <- setBackend(sps_dda, MsBackendMemory()) -sps_mod <- filterIntensity(sps_dda, intensity = c(5, Inf)) -sps_mod <- applyProcessing(sps_mod) -sps_mod - -## While we can't *undo* this filtering operation now using the `reset()` -## function, accessing the data would now be faster, because the operation -## does no longer to be applied to the original data before returning to the -## user. - - -## -------- FUNCTIONS RETURNING THE RESULT -------- - -## With the `spectrapply()` function it is possible to apply an -## arbitrary function to each spectrum in a Spectra. -## In the example below we calculate the mean intensity for each spectrum -## in a subset of the sciex_im data. Note that we can access all variables -## of each individual spectrum either with the `$` operator or the -## corresponding method. -res <- spectrapply(sps_dda[1:20], FUN = function(x) mean(x$intensity[[1]])) -head(res) - -## As an alternative, applying a function `FUN` to a `Spectra` can be -## performed *chunk-wise*. The advantage of this is, that only the data for -## one chunk at a time needs to be loaded into memory reducing the memory -## demand. This type of processing can be performed by specifying the size -## of the chunks (i.e. number of spectra per chunk) with the `chunkSize` -## parameter -spectrapply(sps_dda[1:20], lengths, chunkSize = 5L) - -## Precursor intensity estimation. Some manufacturers don't report the -## precursor intensity for MS2 spectra: -sps_dda |> - filterMsLevel(2L) |> - precursorIntensity() - -## This intensity can however be estimated from the previously measured -## MS1 scan with the `estimatePrecursorIntensity()` function: -pi <- estimatePrecursorIntensity(sps_dda) - -## This function returned the result as a `numeric` vector with one -## value per spectrum: -pi - -## We can replace the precursor intensity values of the originating -## object: -sps_dda$precursorIntensity <- pi -sps_dda |> - filterMsLevel(2L) |> - precursorIntensity() - -} -\seealso{ -\itemize{ -\item \code{\link[=compareSpectra]{compareSpectra()}} for calculation of spectra similarity scores. -\item \code{\link[=processingChunkSize]{processingChunkSize()}} for information on parallel and chunk-wise data -processing. -\item \link{Spectra} for a general description of the \code{Spectra} object. -} -} -\author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail, Nir Shahaf, Mar Garcia-Aloy -} diff --git a/man/combinePeaks.Rd b/man/combinePeaks.Rd deleted file mode 100644 index a59b8f24..00000000 --- a/man/combinePeaks.Rd +++ /dev/null @@ -1,110 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra.R -\name{combinePeaks} -\alias{combinePeaks} -\alias{combinePeaks,Spectra-method} -\title{Aggregating and combining mass peaks data} -\usage{ -\S4method{combinePeaks}{Spectra}( - object, - tolerance = 0, - ppm = 20, - intensityFun = base::mean, - mzFun = base::mean, - weighted = TRUE, - msLevel. = uniqueMsLevels(object), - ... -) -} -\arguments{ -\item{object}{A \code{Spectra} object.} - -\item{tolerance}{\code{numeric(1)} allowing to define a constant maximal -accepted difference between m/z values for peaks to be grouped. Default -is \code{tolerance = 0}.} - -\item{ppm}{\code{numeric(1)} defining a relative, m/z-dependent, maximal -accepted difference between m/z values for peaks to be grouped. Default -is \code{ppm = 20}.} - -\item{intensityFun}{Function to aggregate intensities for all peaks in -each peak group into a single intensity value.} - -\item{mzFun}{Function to aggregate m/z values for all mass peaks within -each peak group into a single m/z value. This parameter is ignored if -\code{weighted = TRUE} (the default).} - -\item{weighted}{\code{logical(1)} whether m/z values of peaks within each peak -group should be aggregated into a single m/z value using an -intensity-weighted mean. Defaults to \code{weighted = TRUE}.} - -\item{msLevel.}{\code{integer} defining the MS level(s) of the spectra to which -the function should be applied (defaults to all MS levels of \code{object}.} - -\item{...}{ignored.} -} -\description{ -In addition to aggregating content of spectra variables (describe in -\code{\link[=combineSpectra]{combineSpectra()}}) it is also possible to aggregate and combine mass peaks -data from individual spectra within a \code{Spectra}. These \code{combinePeaks()} -function combines mass peaks \strong{within each spectrum} with a difference in -their m/z values that is smaller than the maximal acceptable difference -defined by \code{ppm} and \code{tolerance}. Parameters \code{intensityFun} and \code{mzFun} -allow to define functions to aggregate the intensity and m/z values for -each such group of peaks. With \code{weighted = TRUE} (the default), the m/z -value of the combined peak is calculated using an intensity-weighted mean -and parameter \code{mzFun} is ignored. The \code{\link[MsCoreUtils:group]{MsCoreUtils::group()}} function is -used for the grouping of mass peaks. Parameter \code{msLevel.} allows to define -selected MS levels for which peaks should be combined. This function -returns a \code{Spectra} with the same number of spectra than the input object, -but with possibly combined peaks within each spectrum. -Additional peak variables (other than \code{"mz"} and \code{"intensity"}) are -dropped (i.e. their values are replaced with \code{NA}) for combined peaks -unless they are constant across the combined peaks. See also -\code{\link[=reduceSpectra]{reduceSpectra()}} for a function to select a single \emph{representative} -mass peak for each peak group. -} -\examples{ - -## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk -## backend. -sciex_file <- dir(system.file("sciex", package = "msdata"), - full.names = TRUE) -sciex <- Spectra(sciex_file, backend = MsBackendMzR()) - -## Combine mass peaks per spectrum with a difference in their m/z value -## that is smaller than 20 ppm. The intensity values of such peaks are -## combined by summing their values, while for the m/z values the median -## is reported -sciex_comb <- combinePeaks(sciex, ppm = 20, - intensityFun = sum, mzFun = median) - -## Comparing the number of mass peaks before and after aggregation -lengths(sciex) |> head() -lengths(sciex_comb) |> head() - -## Plotting the first spectrum before and after aggregation -par(mfrow = c(1, 2)) -plotSpectra(sciex[2L]) -plotSpectra(sciex_comb[2L]) - -## Using `reduceSpectra()` to keep for each group of mass peaks with a -## difference in their m/z values < 20ppm the one with the highest intensity. -sciex_red <- reduceSpectra(sciex, ppm = 20) - -## Comparing the number of mass peaks before and after the operation -lengths(sciex) |> head() -lengths(sciex_red) |> head() -} -\seealso{ -\itemize{ -\item \code{\link[=combineSpectra]{combineSpectra()}} for functions to combine or aggregate \code{Spectra}'s -spectra data. -\item \code{\link[=combinePeaksData]{combinePeaksData()}} for the function to combine the mass peaks data. -\item \code{\link[=reduceSpectra]{reduceSpectra()}} and similar functions to filter mass peaks data. -\item \link{Spectra} for a general description of the \code{Spectra} object. -} -} -\author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto -} diff --git a/man/combineSpectra.Rd b/man/combineSpectra.Rd deleted file mode 100644 index d4f7bdb0..00000000 --- a/man/combineSpectra.Rd +++ /dev/null @@ -1,240 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-functions.R, R/Spectra.R -\name{concatenateSpectra} -\alias{concatenateSpectra} -\alias{combineSpectra} -\alias{joinSpectraData} -\alias{split} -\alias{c,Spectra-method} -\alias{split,Spectra,ANY-method} -\title{Merging, aggregating and splitting Spectra} -\usage{ -concatenateSpectra(x, ...) - -combineSpectra( - x, - f = x$dataStorage, - p = x$dataStorage, - FUN = combinePeaksData, - ..., - BPPARAM = bpparam() -) - -joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y") - -\S4method{c}{Spectra}(x, ...) - -\S4method{split}{Spectra,ANY}(x, f, drop = FALSE, ...) -} -\arguments{ -\item{x}{A \code{Spectra} object.} - -\item{...}{Additional arguments.} - -\item{f}{For \code{split()}: factor defining how to split \code{x}. See \code{\link[base:split]{base::split()}} -for details. -For \code{combineSpectra()}: \code{factor} defining the grouping of the spectra -that should be combined. Defaults to \code{x$dataStorage}.} - -\item{p}{For \code{combineSpectra()}: \code{factor} defining how to split the input -\code{Spectra} for parallel processing. Defaults to \code{x$dataStorage}, i.e., -depending on the used backend, per-file parallel processing will be -performed.} - -\item{FUN}{For \code{combineSpectra()}: function to combine the (peak matrices) -of the spectra. Defaults to \code{\link[=combinePeaksData]{combinePeaksData()}}.} - -\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more -information. This is passed directly to the \code{\link[=backendInitialize]{backendInitialize()}} method -of the \linkS4class{MsBackend}.} - -\item{y}{A \code{DataFrame} with the spectra variables to join/add.} - -\item{by.x}{A \code{character(1)} specifying the spectra variable used -for merging. Default is \code{"spectrumId"}.} - -\item{by.y}{A \code{character(1)} specifying the column used for -merging. Set to \code{by.x} if missing.} - -\item{suffix.y}{A \code{character(1)} specifying the suffix to be used -for making the names of columns in the merged spectra variables -unique. This suffix will be used to amend \code{names(y)}, while -\code{spectraVariables(x)} will remain unchanged.} - -\item{drop}{For \code{split()}: not considered.} -} -\description{ -Various functions are availabe to combine, aggregate or split data from one -of more \code{Spectra} objects. These are: -\itemize{ -\item \code{c()} and \code{concatenateSpectra()}: combines several \code{Spectra} objects into -a single object. The resulting \code{Spectra} contains all data from all -individual \code{Spectra}, i.e. the union of all their spectra variables. -Concatenation will fail if the processing queue of any of the \code{Spectra} -objects is not empty or if different backends are used for the \code{Spectra} -objects. In such cases it is suggested to first change the backends of -all \code{Spectra} to the same type of backend (using the \code{\link[=setBackend]{setBackend()}} -function and to eventually (if needed) apply the processing queue using -the \code{\link[=applyProcessing]{applyProcessing()}} function. -\item \code{combineSpectra()}: combines sets of spectra (defined with parameter \code{f}) -into a single spectrum per set aggregating their MS data (i.e. their -\emph{peaks data} matrices with the \emph{m/z} and intensity values of their -mass peaks). The spectra variable values of the first spectrum per set -are reported for the combined spectrum. The peak matrices of the spectra -per set are combined using the function specified with parameter \code{FUN} -which uses by default the \code{\link[=combinePeaksData]{combinePeaksData()}} function. See the -documentation of \code{\link[=combinePeaksData]{combinePeaksData()}} for details on the aggregation of -the peak data and the package vignette for examples. -The sets of spectra can be specified with parameter \code{f} which is expected -to be a \code{factor} or \code{vector} of length equal to the length of the -\code{Spectra} specifying to which set a spectrum belongs to. The function -returns a \code{Spectra} of length equal to the unique levels of \code{f}. The -optional parameter \code{p} allows to define how the \code{Spectra} should be -split for potential parallel processing. The default is -\code{p = x$dataStorage} and hence a per storage file parallel processing is -applied for \code{Spectra} with on disk data representations (such as the -\code{\link[=MsBackendMzR]{MsBackendMzR()}}). This also prevents that spectra from different data -files/samples are combined (eventually use e.g. \code{p = x$dataOrigin} or any -other spectra variables defining the originating samples for a spectrum). -Before combining the peaks data, all eventual present processing steps are -applied (by calling \code{\link[=applyProcessing]{applyProcessing()}} on the \code{Spectra}). This function -will replace the original \emph{m/z} and intensity values of a \code{Spectra} hence -it can not be called on a \code{Spectra} with a \emph{read-only} backend. In such -cases, the backend should be changed to a \emph{writeable} backend before -using the \code{\link[=setBackend]{setBackend()}} function (to e.g. a \code{\link[=MsBackendMemory]{MsBackendMemory()}} backend). -\item \code{joinSpectraData()}: Individual spectra variables can be directly -added with the \verb{$<-} or \verb{[[<-} syntax. The \code{joinSpectraData()} -function allows to merge a \code{DataFrame} to the existing spectra -data of a \code{Spectra}. This function diverges from the \code{\link[=merge]{merge()}} method in -two main ways: -\itemize{ -\item The \code{by.x} and \code{by.y} column names must be of length 1. -\item If variable names are shared in \code{x} and \code{y}, the spectra -variables of \code{x} are not modified. It's only the \code{y} -variables that are appended with the suffix defined in -\code{suffix.y}. This is to avoid modifying any core spectra -variables that would lead to an invalid object. -\item Duplicated Spectra keys (i.e. \code{x[[by.x]]}) are not -allowed. Duplicated keys in the \code{DataFrame} (i.e \code{y[[by.y]]}) -throw a warning and only the last occurrence is kept. These -should be explored and ideally be removed using for -\code{QFeatures::reduceDataFrame()}, \code{PMS::reducePSMs()} or similar -functions. -} -\item \code{split()}: splits the \code{Spectra} object based on parameter \code{f} into a \code{list} -of \code{Spectra} objects. -} -} -\examples{ - -## Create a Spectra providing a `DataFrame` containing a MS data. - -spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) -spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) -spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) - -s <- Spectra(spd) -s - -## Create a second Spectra from mzML files and use the `MsBackendMzR` -## on-disk backend. -sciex_file <- dir(system.file("sciex", package = "msdata"), - full.names = TRUE) -sciex <- Spectra(sciex_file, backend = MsBackendMzR()) -sciex - -## Subset to the first 100 spectra to reduce running time of the examples -sciex <- sciex[1:100] - - -## -------- COMBINE SPECTRA -------- - -## Combining the `Spectra` object `s` with the MS data from `sciex`. -## Calling directly `c(s, sciex)` would result in an error because -## both backends use a different backend. We thus have to first change -## the backends to the same backend. We change the backend of the `sciex` -## `Spectra` to a `MsBackendMemory`, the backend used by `s`. - -sciex <- setBackend(sciex, MsBackendMemory()) - -## Combine the two `Spectra` -all <- c(s, sciex) -all - -## The new `Spectra` objects contains the union of spectra variables from -## both: -spectraVariables(all) - -## The spectra variables that were not present in `s`: -setdiff(spectraVariables(all), spectraVariables(s)) - -## The values for these were filled with missing values for spectra from -## `s`: -all$peaksCount |> head() - - -## -------- AGGREGATE SPECTRA -------- - -## Sets of spectra can be combined into a single, representative spectrum -## per set using `combineSpectra()`. This aggregates the peaks data (i.e. -## the spectra's m/z and intensity values) while using the values for all -## spectra variables from the first spectrum per set. Below we define the -## sets as all spectra measured in the *same second*, i.e. rounding their -## retention time to the next closer integer value. -f <- round(rtime(sciex)) -head(f) - -cmp <- combineSpectra(sciex, f = f) - -## The length of `cmp` is now equal to the length of unique levels in `f`: -length(cmp) - -## The spectra variable value from the first spectrum per set is used in -## the representative/combined spectrum: -cmp$rtime - -## The peaks data was aggregated: the number of mass peaks of the first six -## spectra from the original `Spectra`: -lengths(sciex) |> head() - -## and for the first aggreagated spectra: -lengths(cmp) |> head() - -## The default peaks data aggregation method joins all mass peaks. See -## documentation of the `combinePeaksData()` function for more options. - - -## -------- SPLITTING DATA -------- - -## A `Spectra` can be split into a `list` of `Spectra` objects using the -## `split()` function defining the sets into which the `Spectra` should -## be splitted into with parameter `f`. -sciex_split <- split(sciex, f) - -length(sciex_split) -sciex_split |> head() - - -## -------- ADDING SPECTRA DATA -------- - -## Adding new spectra variables -sciex1 <- filterDataOrigin(sciex, dataOrigin(sciex)[1]) -spv <- DataFrame(spectrumId = sciex1$spectrumId[3:12], ## used for merging - var1 = rnorm(10), - var2 = sample(letters, 10)) -spv - -sciex2 <- joinSpectraData(sciex1, spv, by.y = "spectrumId") - -spectraVariables(sciex2) -spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")] -} -\seealso{ -\itemize{ -\item \code{\link[=combinePeaks]{combinePeaks()}} for functions to aggregate mass peaks data. -\item \link{Spectra} for a general description of the \code{Spectra} object. -} -} -\author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto -} diff --git a/man/compareSpectra.Rd b/man/compareSpectra.Rd deleted file mode 100644 index 375671c4..00000000 --- a/man/compareSpectra.Rd +++ /dev/null @@ -1,131 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra.R -\name{compareSpectra} -\alias{compareSpectra} -\alias{compareSpectra,Spectra,Spectra-method} -\alias{compareSpectra,Spectra,missing-method} -\title{Spectra similarity calculations} -\usage{ -\S4method{compareSpectra}{Spectra,Spectra}( - x, - y, - MAPFUN = joinPeaks, - tolerance = 0, - ppm = 20, - FUN = ndotproduct, - ..., - SIMPLIFY = TRUE -) - -\S4method{compareSpectra}{Spectra,missing}( - x, - y = NULL, - MAPFUN = joinPeaks, - tolerance = 0, - ppm = 20, - FUN = ndotproduct, - ..., - SIMPLIFY = TRUE -) -} -\arguments{ -\item{x}{A \code{Spectra} object.} - -\item{y}{A \code{Spectra} object.} - -\item{MAPFUN}{For \code{compareSpectra()}: function to map/match peaks between -the two compared spectra. See \code{\link[=joinPeaks]{joinPeaks()}} for more information and -possible functions. Defaults to \code{\link[=joinPeaks]{joinPeaks()}}.} - -\item{tolerance}{\code{numeric(1)} allowing to define a constant maximal -accepted difference between m/z values for peaks to be matched. This -parameter is directly passed to \code{MAPFUN}.} - -\item{ppm}{\code{numeric(1)} defining a relative, m/z-dependent, maximal -accepted difference between m/z values for peaks to be matched. This -parameter is directly passed to \code{MAPFUN}.} - -\item{FUN}{function to compare intensities of peaks between two spectra. -Defaults to \code{\link[=ndotproduct]{ndotproduct()}}.} - -\item{...}{Additional arguments passed to the internal functions.} - -\item{SIMPLIFY}{\code{logical(1)} defining whether the result matrix should be -\emph{simplified} to a \code{numeric} if possible (i.e. if either \code{x} or \code{y} is -of length 1).} -} -\description{ -\code{compareSpectra()} compares each spectrum in \code{x} with each spectrum in \code{y} -using the function provided with \code{FUN} (defaults to \code{\link[=ndotproduct]{ndotproduct()}}). If -\code{y} is missing, each spectrum in \code{x} is compared with each other spectrum -in \code{x}. -The matching/mapping of peaks between the compared spectra is done with the -\code{MAPFUN} function. The default \code{\link[=joinPeaks]{joinPeaks()}} matches peaks of both spectra -and allows to keep all peaks from the first spectrum (\code{type = "left"}), -from the second (\code{type = "right"}), from both (\code{type = "outer"}) and to -keep only matching peaks (\code{type = "inner"}); see \code{\link[=joinPeaks]{joinPeaks()}} for more -information and examples). The \code{MAPFUN} function should have parameters -\code{x}, \code{y}, \code{xPrecursorMz} and \code{yPrecursorMz} as these values are passed to -the function. - -In addition to \code{joinPeaks()} also \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} is supported for -GNPS-like similarity score calculations. Note that \code{joinPeaksGnps()} should -only be used in combination with \code{FUN = MsCoreUtils::gnps} -(see \code{\link[=joinPeaksGnps]{joinPeaksGnps()}} for more information and details). Use -\code{MAPFUN = joinPeaksNone} to disable internal peak matching/mapping if a -similarity scoring function is used that performs the matching internally. - -\code{FUN} is supposed to be a function to compare intensities of (matched) -peaks of the two spectra that are compared. The function needs to take two -matrices with columns \code{"mz"} and \code{"intensity"} as input and is supposed -to return a single numeric as result. In addition to the two peak matrices -the spectra's precursor m/z values are passed to the function as parameters -\code{xPrecursorMz} (precursor m/z of the \code{x} peak matrix) and \code{yPrecursorMz} -(precursor m/z of the \code{y} peak matrix). Additional parameters to functions -\code{FUN} and \code{MAPFUN} can be passed with \code{...}. Parameters \code{ppm} and -\code{tolerance} are passed to both \code{MAPFUN} and \code{FUN}. -The function returns a \code{matrix} with the results of \code{FUN} for each -comparison, number of rows equal to \code{length(x)} and number of columns -equal \code{length(y)} (i.e. element in row 2 and column 3 is the result from -the comparison of \code{x[2]} with \code{y[3]}). If \code{SIMPLIFY = TRUE} the \code{matrix} -is \emph{simplified} to a \code{numeric} if length of \code{x} or \code{y} is one. See also -the vignette for additional examples, such as using spectral entropy -similarity in the scoring. -} -\examples{ - -## Load a `Spectra` object with LC-MS/MS data. -fl <- system.file("TripleTOF-SWATH", "PestMix1_DDA.mzML", - package = "msdata") -sps_dda <- Spectra(fl) -sps_dda - -## Restrict to MS2 (fragment) spectra: -sps_ms2 <- filterMsLevel(sps_dda, msLevel = 2L) - -## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using -## the normalized dotproduct method. -res <- compareSpectra(sps_ms2[2:3], sps_ms2[10:20]) -## first row contains comparisons of spectrum 2 with spectra 10 to 20 and -## the second row comparisons of spectrum 3 with spectra 10 to 20 -res - -## We next calculate the pairwise similarity for the first 10 spectra -compareSpectra(sps_ms2[1:10]) - -## Use compareSpectra to determine the number of common (matching) peaks -## with a ppm of 10: -## type = "inner" uses a *inner join* to match peaks, i.e. keeps only -## peaks that can be mapped betwen both spectra. The provided FUN returns -## simply the number of matching peaks. -compareSpectra(sps_ms2[2:3], sps_ms2[10:20], ppm = 10, type = "inner", - FUN = function(x, y, ...) nrow(x)) - -## We repeat this calculation between all pairwise combinations -## of the first 20 spectra -compareSpectra(sps_ms2[1:20], ppm = 10, type = "inner", - FUN = function(x, y, ...) nrow(x)) -} -\author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto -} diff --git a/man/countIdentifications.Rd b/man/countIdentifications.Rd index 08afd04b..c7904ef6 100644 --- a/man/countIdentifications.Rd +++ b/man/countIdentifications.Rd @@ -109,9 +109,6 @@ sp <- countIdentifications(sp) ## and three PSMs respectively. table(sp$countIdentifications, sp$msLevel) } -\seealso{ -\code{\link[=addProcessing]{addProcessing()}} for other data analysis functions. -} \author{ Laurent Gatto } diff --git a/man/estimatePrecursorIntensity.Rd b/man/estimatePrecursorIntensity.Rd index 8780aab4..e4a7efd9 100644 --- a/man/estimatePrecursorIntensity.Rd +++ b/man/estimatePrecursorIntensity.Rd @@ -1,22 +1,21 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra.R -\name{estimatePrecursorIntensity,Spectra-method} -\alias{estimatePrecursorIntensity,Spectra-method} +% Please edit documentation in R/Spectra-functions.R +\name{estimatePrecursorIntensity} \alias{estimatePrecursorIntensity} \title{Estimate Precursor Intensities} \usage{ -\S4method{estimatePrecursorIntensity}{Spectra}( - object, +estimatePrecursorIntensity( + x, ppm = 20, tolerance = 0, method = c("previous", "interpolation"), msLevel. = 2L, - f = dataOrigin(object), + f = dataOrigin(x), BPPARAM = bpparam() ) } \arguments{ -\item{object}{\code{Spectra} with MS1 and MS2 spectra.} +\item{x}{\code{Spectra} with MS1 and MS2 spectra.} \item{ppm}{\code{numeric(1)} with the maximal allowed relative difference of m/z values between the precursor m/z of a spectrum and the m/z of the diff --git a/man/estimatePrecursorMz.Rd b/man/estimatePrecursorMz.Rd index 7bc9e6cd..f79bfa24 100644 --- a/man/estimatePrecursorMz.Rd +++ b/man/estimatePrecursorMz.Rd @@ -83,9 +83,6 @@ plot(precursorMz(s), precursorMz(s) - pmz, xlab = "precursor m/z", ## we could then replace the reported precursor m/z values s$precursorMz <- pmz } -\seealso{ -\code{\link[=addProcessing]{addProcessing()}} for other data analysis and manipulation functions. -} \author{ Mar Garcia-Aloy, Johannes Rainer } diff --git a/man/filterPeaksRanges.Rd b/man/filterPeaksRanges.Rd deleted file mode 100644 index db713c3b..00000000 --- a/man/filterPeaksRanges.Rd +++ /dev/null @@ -1,142 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-functions.R -\name{filterPeaksRanges} -\alias{filterPeaksRanges} -\title{Filter peaks based on spectra and peaks variable ranges} -\usage{ -filterPeaksRanges(object, ..., keep = TRUE) -} -\arguments{ -\item{object}{A \link{Spectra} object.} - -\item{...}{the ranges for the spectra and/or peaks variables. Has to be -provided as \verb{ = } pairs with \verb{} being the name of a -spectra or peaks variable (of numeric data type) and \verb{} being -either a \code{numeric} of length 2 or a \code{numeric} two column matrix (see -function desription above for details),} - -\item{keep}{\code{logical(1)} whether to keep (default) or remove peaks that -match the provided range(s).} -} -\description{ -The \code{filterPeaksRanges()} function allows to filter the peaks matrices of a -\link{Spectra} object using any set of range-based filters on numeric spectra -variables or peaks variables. These ranges can be passed to the function -using the \code{...} as \verb{ = } pairs. \verb{} -has to be an available spectra or peaks variable. \verb{} can be a -\code{numeric} of length 2 defining the lower and upper boundary, or a \code{numeric} -two-column matrix (multi-row matrices are also supported, see further -below). \code{filterPeaksRanges(s, mz = c(200, 300))} would for example reduce -the peaks matrices of the \code{Spectra} object \code{s} to mass peaks with an m/z -value between 200 and 300. \code{filterPeaksRanges()} returns the original -\code{Spectra} object with the filter operation added to the processing queue. -Thus, the filter gets \strong{only} applied when the peaks data gets extracted -with \code{mz()}, \code{intensity()} or \code{peaksData()}. If ranges for both spectra -\strong{and} peaks variables are defined, the function evaluates first whether -the spectra variable value for a spectrum is within the provided range and, -if so, applies also the peaks variable-based filter (otherwise an empty -peaks matrix is returned). - -If more than one spectra variable and/or peaks variable are defined, their -filter results are combined with a logical AND: a peak matrix is only -returned for a spectrum if all values of spectra variables are within the -provided (respective) ranges for spectra variables, and this matrix is -further filtered to contain only those peaks which values are within the -provided peaks variable ranges. - -\strong{Filtering with multiple ranges} per spectra and peaks variables is also -supported: ranges can also be provided as multi-row numeric (two-column) -matrices. In this case, the above described procedure is applied for each -row separately and their results are combined with a logical OR, i.e. -peaks matrices are returned that match any of the conditions/filters -of a row. The number of rows of the provided ranges (being it for spectra -or peaks variables) have to match. - -\strong{Missing value handling}: any comparison which involves a missing value -(being it a spectra variable value, a peaks variable value or a value -in one of the provided ranges) is treated as a logical \code{FALSE}. For -example, if the retention time of a spectrum is \code{NA} and the data is -filtered using a retention time range, an empty peaks matrix is returned -(for \code{keep = TRUE}, for \code{keep = FALSE} the full peaks matrix is returned). -} -\note{ -In contrast to some other \emph{filter} functions, this function does not provide -a \code{msLevel} parameter that allows to define the MS level of spectra on which -the filter should be applied. The filter(s) will always be applied to -\strong{all} spectra (irrespectively of their MS level). Through combination of -multiple filter ranges it is however possible to apply MS level-dependent -filters (see examples below for details). - -The filter will not be applied immediately to the data but only executed when -the mass peak data is accessed (through \code{peaksData()}, \code{mz()} or -\code{intensity()}) or by calling \code{applyProcessing()}. -} -\examples{ - -## Define a test Spectra -d <- data.frame(rtime = c(123.2, 134.2), msLevel = c(1L, 2L)) -d$mz <- list(c(100.1, 100.2, 100.3, 200.1, 200.2, 300.3), - c(100.3, 100.4, 200.2, 400.3, 400.4)) -## Use the index of the mass peak within the spectrum as index for -## better illustration of filtering results -d$intensity <- list(c(1:6), 1:5) -s <- Spectra(d) -s - -## Filter peaks removing all mass peaks with an m/z between 200 and 300 -res <- filterPeaksRanges(s, mz = c(200, 300), keep = FALSE) -res - -## The Spectra object has still the same length and spectra variables -length(res) -res$rtime - -## The filter gets applied when mass peak data gets extracted, using either -## `mz()`, `intensity()` or `peaksData()`. The filtered peaks data does -## not contain any mass peaks with m/z values between 200 and 300: -peaksData(res)[[1L]] -peaksData(res)[[2L]] - -## We next combine spectra and filter variables. We want to keep only mass -## peaks of MS2 spectra that have an m/z between 100 and 110. -res <- filterPeaksRanges(s, mz = c(100, 110), msLevel = c(2, 2)) -res -length(res) - -## Only data for peaks are returned for which the spectra's MS level is -## between 2 and 2 and with an m/z between 100 and 110. The peaks data for -## the first spectrum, that has MS level 1, is thus empty: -peaksData(res)[[1L]] - -## While the peaks matrix for the second spectrum (with MS level 2) contains -## the mass peaks with m/z between 100 and 110. -peaksData(res)[[2L]] - -## To keep also the peaks data for the first spectrum, we need to define -## an additional set of ranges, which we define using a second row in each -## ranges matrix. We use the same filter as above, i.e. keeping only mass -## peaks with an m/z between 100 and 110 for spectra with MS level 2, but -## add an additional row for MS level 1 spectra keeping mass peaks with an -## m/z between 0 and 2000. Filter results of different rows are combined -## using a logical OR, i.e. peaks matrices with mass peaks are returned -## matching either the first, or the second row. -res <- filterPeaksRanges(s, mz = rbind(c(100, 110), c(0, 1000)), - msLevel = rbind(c(2, 2), c(1, 1))) - -## The results for the MS level 2 spectrum are the same as before, but with -## the additional row we keep the full peaks matrix of the MS1 spectrum: -peaksData(res)[[1L]] -peaksData(res)[[2L]] - -## As a last example we define a filter that keeps all mass peaks with an -## m/z either between 100 and 200, or between 300 and 400. -res <- filterPeaksRanges(s, mz = rbind(c(100, 200), c(300, 400))) -peaksData(res)[[1L]] -peaksData(res)[[2L]] - -## Such filters could thus be defined to restrict/filter the MS data to -## specific e.g. retention time and m/z ranges. -} -\author{ -Johannes Rainer -} diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index c03adb62..de5a31b8 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -8,9 +8,17 @@ \alias{[,MsBackendDataFrame-method} \alias{ppm} \alias{bin,numeric-method} +\alias{containsMz} +\alias{containsNeutralLoss} +\alias{dropNaSpectraVariables} +\alias{entropy} +\alias{export} +\alias{pickPeaks} +\alias{replaceIntensitiesBelow} +\alias{reset} +\alias{selectSpectraVariables} \alias{show,MsBackendDataFrame-method} \alias{backendMerge,MsBackendDataFrame-method} -\alias{backendRequiredSpectraVariables,MsBackendDataFrame-method} \alias{acquisitionNum,MsBackendDataFrame-method} \alias{peaksData,MsBackendDataFrame-method} \alias{centroided,MsBackendDataFrame-method} @@ -21,7 +29,6 @@ \alias{dataOrigin<-,MsBackendDataFrame-method} \alias{dataStorage,MsBackendDataFrame-method} \alias{dataStorage<-,MsBackendDataFrame-method} -\alias{extractByIndex,MsBackendDataFrame,ANY-method} \alias{intensity,MsBackendDataFrame-method} \alias{intensity<-,MsBackendDataFrame-method} \alias{isEmpty,MsBackendDataFrame-method} @@ -62,7 +69,6 @@ \alias{cbind2,MsBackendDataFrame,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendDataFrame,ANY-method} \alias{filterAcquisitionNum,MsBackendDataFrame-method} -\alias{backendRequiredSpectraVariables,MsBackendHdf5Peaks-method} \alias{backendInitialize,MsBackendHdf5Peaks-method} \alias{show,MsBackendHdf5Peaks-method} \alias{peaksData,MsBackendHdf5Peaks-method} @@ -79,11 +85,9 @@ \alias{spectraData<-,MsBackendHdf5Peaks-method} \alias{$<-,MsBackendHdf5Peaks-method} \alias{[,MsBackendHdf5Peaks-method} -\alias{extractByIndex,MsBackendHdf5Peaks,ANY-method} \alias{backendMerge,MsBackendHdf5Peaks-method} \alias{show,MsBackendMemory-method} \alias{backendMerge,MsBackendMemory-method} -\alias{backendRequiredSpectraVariables,MsBackendMemory-method} \alias{acquisitionNum,MsBackendMemory-method} \alias{centroided,MsBackendMemory-method} \alias{centroided<-,MsBackendMemory-method} @@ -93,7 +97,6 @@ \alias{dataOrigin<-,MsBackendMemory-method} \alias{dataStorage,MsBackendMemory-method} \alias{dataStorage<-,MsBackendMemory-method} -\alias{extractByIndex,MsBackendMemory,ANY-method} \alias{intensity,MsBackendMemory-method} \alias{intensity<-,MsBackendMemory-method} \alias{ionCount,MsBackendMemory-method} @@ -137,7 +140,6 @@ \alias{cbind2,MsBackendMemory,dataframeOrDataFrameOrmatrix-method} \alias{split,MsBackendMemory,ANY-method} \alias{filterAcquisitionNum,MsBackendMemory-method} -\alias{backendRequiredSpectraVariables,MsBackendMzR-method} \alias{backendInitialize,MsBackendMzR-method} \alias{show,MsBackendMzR-method} \alias{peaksData,MsBackendMzR-method} @@ -170,12 +172,28 @@ .check = TRUE ) +containsMz(object, ...) + +containsNeutralLoss(object, ...) + +dropNaSpectraVariables(object, ...) + +entropy(object, ...) + +export(object, ...) + +pickPeaks(object, ...) + +replaceIntensitiesBelow(object, threshold = min, ...) + +reset(object, ...) + +selectSpectraVariables(object, ...) + \S4method{show}{MsBackendDataFrame}(object) \S4method{backendMerge}{MsBackendDataFrame}(object, ...) -\S4method{backendRequiredSpectraVariables}{MsBackendDataFrame}(object, ...) - \S4method{acquisitionNum}{MsBackendDataFrame}(object) \S4method{peaksData}{MsBackendDataFrame}(object, columns = c("mz", "intensity")) @@ -196,8 +214,6 @@ \S4method{dataStorage}{MsBackendDataFrame}(object) <- value -\S4method{extractByIndex}{MsBackendDataFrame,ANY}(object, i) - \S4method{intensity}{MsBackendDataFrame}(object) \S4method{intensity}{MsBackendDataFrame}(object) <- value @@ -285,8 +301,6 @@ dataOrigin = character() ) -\S4method{backendRequiredSpectraVariables}{MsBackendHdf5Peaks}(object, ...) - \S4method{backendInitialize}{MsBackendHdf5Peaks}( object, files = character(), @@ -326,16 +340,12 @@ \S4method{[}{MsBackendHdf5Peaks}(x, i, j, ..., drop = FALSE) -\S4method{extractByIndex}{MsBackendHdf5Peaks,ANY}(object, i) - \S4method{backendMerge}{MsBackendHdf5Peaks}(object, ...) \S4method{show}{MsBackendMemory}(object) \S4method{backendMerge}{MsBackendMemory}(object, ...) -\S4method{backendRequiredSpectraVariables}{MsBackendMemory}(object, ...) - \S4method{acquisitionNum}{MsBackendMemory}(object) \S4method{centroided}{MsBackendMemory}(object) @@ -354,8 +364,6 @@ \S4method{dataStorage}{MsBackendMemory}(object) <- value -\S4method{extractByIndex}{MsBackendMemory,ANY}(object, i) - \S4method{intensity}{MsBackendMemory}(object) \S4method{intensity}{MsBackendMemory}(object) <- value @@ -447,8 +455,6 @@ dataOrigin = character() ) -\S4method{backendRequiredSpectraVariables}{MsBackendMzR}(object, ...) - \S4method{backendInitialize}{MsBackendMzR}(object, files, ..., BPPARAM = bpparam()) \S4method{show}{MsBackendMzR}(object) diff --git a/man/joinPeaks.Rd b/man/joinPeaks.Rd index bc1fa688..29cabc8d 100644 --- a/man/joinPeaks.Rd +++ b/man/joinPeaks.Rd @@ -142,12 +142,7 @@ joinPeaksGnps(x, y, pmz_x, pmz_y) joinPeaksGnps(x, y, pmz_x, yPrecursorMz = NA) } \seealso{ -\itemize{ -\item \code{\link[=compareSpectra]{compareSpectra()}} for the function to calculate similarities between -spectra. -\item \code{\link[=gnps]{gnps()}} in the \emph{MsCoreUtils} package for more information on the GNPS -similarity score. -} +\code{\link[=gnps]{gnps()}} } \author{ Johannes Rainer, Michael Witting diff --git a/man/neutralLoss.Rd b/man/neutralLoss.Rd index d27cd3c8..da1a887e 100644 --- a/man/neutralLoss.Rd +++ b/man/neutralLoss.Rd @@ -1,11 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-neutralLoss.R +% Please edit documentation in R/AllGenerics.R, R/Spectra-neutralLoss.R \name{neutralLoss} \alias{neutralLoss} \alias{PrecursorMzParam} \alias{neutralLoss,Spectra,PrecursorMzParam-method} \title{Calculate Neutral Loss Spectra} \usage{ +neutralLoss(object, param, ...) + PrecursorMzParam( filterPeaks = c("none", "abovePrecursor", "belowPrecursor", "removePrecursor"), msLevel = c(2L, NA_integer_), @@ -16,6 +18,13 @@ PrecursorMzParam( \S4method{neutralLoss}{Spectra,PrecursorMzParam}(object, param, ...) } \arguments{ +\item{object}{\code{\link[=Spectra]{Spectra()}} object with the fragment spectra for which neutral +loss spectra should be calculated.} + +\item{param}{One of the \emph{parameter} objects discussed below.} + +\item{...}{Currently ignored.} + \item{filterPeaks}{For \code{PrecursorMzParam()}: \code{character(1)} or \code{function} defining if and how fragment peaks should be filtered before calculation. Pre-defined options are: \code{"none"} (keep all peaks), \code{"abovePrecursor"} @@ -38,13 +47,6 @@ for details.} \item{tolerance}{\code{numeric(1)} with absolute acceptable difference in m/z values to filter peaks. Defaults to \code{tolerance = 0}. See function description for details.} - -\item{object}{\code{\link[=Spectra]{Spectra()}} object with the fragment spectra for which neutral -loss spectra should be calculated.} - -\item{param}{One of the \emph{parameter} objects discussed below.} - -\item{...}{Currently ignored.} } \value{ A \code{\link[=Spectra]{Spectra()}} object with calculated neutral loss spectra. @@ -134,9 +136,6 @@ Aisporna A, Benton PH, Chen A, Derks RJE, Galano JM, Giera M and Siuzdak G Analysis in METLIN. Journal of the American Society for Mass Spectrometry. \doi{10.1021/jasms.1c00343} } -\seealso{ -\code{\link[=addProcessing]{addProcessing()}} for other data analysis and manipulation functions. -} \author{ Johannes Rainer } diff --git a/man/processingChunkSize.Rd b/man/processingChunkSize.Rd index a9382611..b47d8c69 100644 --- a/man/processingChunkSize.Rd +++ b/man/processingChunkSize.Rd @@ -1,10 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra-functions.R, R/Spectra.R +% Please edit documentation in R/Spectra-functions.R \name{processingChunkSize} \alias{processingChunkSize} \alias{processingChunkSize<-} \alias{processingChunkFactor} -\alias{backendBpparam,Spectra-method} \title{Parallel and chunk-wise processing of \code{Spectra}} \usage{ processingChunkSize(x) @@ -12,18 +11,11 @@ processingChunkSize(x) processingChunkSize(x) <- value processingChunkFactor(x) - -\S4method{backendBpparam}{Spectra}(object, BPPARAM = bpparam()) } \arguments{ \item{x}{\code{Spectra}.} \item{value}{\code{integer(1)} defining the chunk size.} - -\item{object}{\code{Spectra} object.} - -\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more -information.} } \value{ \code{processingChunkSize()} returns the currently defined processing diff --git a/man/spectraData.Rd b/man/spectraData.Rd deleted file mode 100644 index 2aad735f..00000000 --- a/man/spectraData.Rd +++ /dev/null @@ -1,601 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Spectra.R -\name{spectraData} -\alias{spectraData} -\alias{acquisitionNum} -\alias{centroided} -\alias{collisionEnergy} -\alias{dataOrigin} -\alias{dataStorage} -\alias{intensity} -\alias{ionCount} -\alias{isCentroided} -\alias{isEmpty} -\alias{isolationWindowLowerMz} -\alias{isolationWindowUpperMz} -\alias{isolationWindowTargetMz} -\alias{lengths} -\alias{msLevel} -\alias{mz} -\alias{peaksData} -\alias{peaksVariables} -\alias{polarity} -\alias{precursorCharge} -\alias{precursorIntensity} -\alias{precursorMz} -\alias{rtime} -\alias{scanIndex} -\alias{smoothed} -\alias{spectraNames} -\alias{spectraVariables} -\alias{tic} -\alias{uniqueMsLevels} -\alias{asDataFrame} -\alias{acquisitionNum,Spectra-method} -\alias{centroided,Spectra-method} -\alias{centroided<-,Spectra-method} -\alias{collisionEnergy,Spectra-method} -\alias{collisionEnergy<-,Spectra-method} -\alias{coreSpectraVariables} -\alias{dataOrigin,Spectra-method} -\alias{dataOrigin<-,Spectra-method} -\alias{dataStorage,Spectra-method} -\alias{intensity,Spectra-method} -\alias{ionCount,Spectra-method} -\alias{isCentroided,Spectra-method} -\alias{isEmpty,Spectra-method} -\alias{isolationWindowLowerMz,Spectra-method} -\alias{isolationWindowLowerMz<-,Spectra-method} -\alias{isolationWindowTargetMz,Spectra-method} -\alias{isolationWindowTargetMz<-,Spectra-method} -\alias{isolationWindowUpperMz,Spectra-method} -\alias{isolationWindowUpperMz<-,Spectra-method} -\alias{length,Spectra-method} -\alias{lengths,Spectra-method} -\alias{msLevel,Spectra-method} -\alias{mz,Spectra-method} -\alias{peaksData,Spectra-method} -\alias{peaksVariables,Spectra-method} -\alias{polarity,Spectra-method} -\alias{polarity<-,Spectra-method} -\alias{precScanNum,Spectra-method} -\alias{precursorCharge,Spectra-method} -\alias{precursorIntensity,Spectra-method} -\alias{precursorMz,Spectra-method} -\alias{precursorMz<-,Spectra-method} -\alias{rtime,Spectra-method} -\alias{rtime<-,Spectra-method} -\alias{scanIndex,Spectra-method} -\alias{smoothed,Spectra-method} -\alias{smoothed<-,Spectra-method} -\alias{spectraData,Spectra-method} -\alias{spectraData<-,Spectra-method} -\alias{spectraNames,Spectra-method} -\alias{spectraNames<-,Spectra-method} -\alias{spectraVariables,Spectra-method} -\alias{tic,Spectra-method} -\alias{uniqueMsLevels,Spectra-method} -\alias{$,Spectra-method} -\alias{$<-,Spectra-method} -\alias{[[,Spectra-method} -\alias{[[<-,Spectra-method} -\title{Accessing mass spectrometry data} -\usage{ -asDataFrame( - object, - i = seq_along(object), - spectraVars = spectraVariables(object) -) - -\S4method{acquisitionNum}{Spectra}(object) - -\S4method{centroided}{Spectra}(object) - -\S4method{centroided}{Spectra}(object) <- value - -\S4method{collisionEnergy}{Spectra}(object) - -\S4method{collisionEnergy}{Spectra}(object) <- value - -coreSpectraVariables() - -\S4method{dataOrigin}{Spectra}(object) - -\S4method{dataOrigin}{Spectra}(object) <- value - -\S4method{dataStorage}{Spectra}(object) - -\S4method{intensity}{Spectra}(object, f = processingChunkFactor(object), ...) - -\S4method{ionCount}{Spectra}(object) - -\S4method{isCentroided}{Spectra}(object, ...) - -\S4method{isEmpty}{Spectra}(x) - -\S4method{isolationWindowLowerMz}{Spectra}(object) - -\S4method{isolationWindowLowerMz}{Spectra}(object) <- value - -\S4method{isolationWindowTargetMz}{Spectra}(object) - -\S4method{isolationWindowTargetMz}{Spectra}(object) <- value - -\S4method{isolationWindowUpperMz}{Spectra}(object) - -\S4method{isolationWindowUpperMz}{Spectra}(object) <- value - -\S4method{length}{Spectra}(x) - -\S4method{lengths}{Spectra}(x, use.names = FALSE) - -\S4method{msLevel}{Spectra}(object) - -\S4method{mz}{Spectra}(object, f = processingChunkFactor(object), ...) - -\S4method{peaksData}{Spectra}( - object, - columns = c("mz", "intensity"), - f = processingChunkFactor(object), - ..., - BPPARAM = bpparam() -) - -\S4method{peaksVariables}{Spectra}(object) - -\S4method{polarity}{Spectra}(object) - -\S4method{polarity}{Spectra}(object) <- value - -\S4method{precScanNum}{Spectra}(object) - -\S4method{precursorCharge}{Spectra}(object) - -\S4method{precursorIntensity}{Spectra}(object) - -\S4method{precursorMz}{Spectra}(object) - -\S4method{precursorMz}{Spectra}(object, ...) <- value - -\S4method{rtime}{Spectra}(object) - -\S4method{rtime}{Spectra}(object) <- value - -\S4method{scanIndex}{Spectra}(object) - -\S4method{smoothed}{Spectra}(object) - -\S4method{smoothed}{Spectra}(object) <- value - -\S4method{spectraData}{Spectra}(object, columns = spectraVariables(object)) - -\S4method{spectraData}{Spectra}(object) <- value - -\S4method{spectraNames}{Spectra}(object) - -\S4method{spectraNames}{Spectra}(object) <- value - -\S4method{spectraVariables}{Spectra}(object) - -\S4method{tic}{Spectra}(object, initial = TRUE) - -\S4method{uniqueMsLevels}{Spectra}(object, ...) - -\S4method{$}{Spectra}(x, name) - -\S4method{$}{Spectra}(x, name) <- value - -\S4method{[[}{Spectra}(x, i, j, ...) - -\S4method{[[}{Spectra}(x, i, j, ...) <- value -} -\arguments{ -\item{object}{A \code{Spectra} object.} - -\item{i}{For \code{asDataFrame()}: A \code{numeric} indicating which scans to coerce -to a \code{DataFrame} (default is \code{seq_along(object)}).} - -\item{spectraVars}{\code{character()} indicating what spectra variables to add to -the \code{DataFrame}. Default is \code{spectraVariables(object)}, i.e. all -available variables.} - -\item{value}{A vector with values to replace the respective spectra -variable. Needs to be of the correct data type for the spectra variable.} - -\item{f}{For \code{intensity()}, \code{mz()} and \code{peaksData()}: factor defining how -data should be chunk-wise loaded an processed. Defaults to -\code{\link[=processingChunkFactor]{processingChunkFactor()}}.} - -\item{...}{Additional arguments.} - -\item{x}{A \code{Spectra} object.} - -\item{use.names}{For \code{lengths()}: ignored.} - -\item{columns}{For \code{spectraData()} accessor: optional \code{character} with -column names (spectra variables) that should be included in the -returned \code{DataFrame}. By default, all columns are returned. -For \code{peaksData()} accessor: optional \code{character} with requested columns -in the individual \code{matrix} of the returned \code{list}. Defaults to -\code{c("mz", "value")} but any values returned by \code{peaksVariables(object)} -with \code{object} being the \code{Spectra} object are supported.} - -\item{BPPARAM}{Parallel setup configuration. See \code{\link[=bpparam]{bpparam()}} for more -information. See also \code{\link[=processingChunkSize]{processingChunkSize()}} for more information -on parallel processing.} - -\item{initial}{For \code{tic()}: \code{logical(1)} whether the initially -reported total ion current should be reported, or whether the -total ion current should be (re)calculated on the actual data -(\code{initial = FALSE}, same as \code{ionCount()}).} - -\item{name}{For \code{$} and \verb{$<-}: the name of the spectra variable to return -or set.} - -\item{j}{For \code{[}: not supported.} -} -\description{ -As detailed in the documentation of the \link{Spectra} class, a \code{Spectra} object -is a container for mass spectrometry (MS) data that includes both the mass -peaks data (or \emph{peaks data}, generally \emph{m/z} and intensity values) as well -as spectra metadata (so called \emph{spectra variables}). Spectra variables -generally define one value per spectrum, while for peaks variables one value -per mass peak is defined and hence multiple values per spectrum (depending -on the number of mass peaks of a spectrum). - -Data can be extracted from a \code{Spectra} object using dedicated accessor -functions or also using the \code{$} operator. Depending on the backend class -used by the \code{Spectra} to represent the data, data can also be added or -replaced (again, using dedicated functions or using \verb{$<-}). -} -\section{Spectra variables}{ - - -A common set of \emph{core spectra variables} are defined for \code{Spectra}. These -have a pre-defined data type and each \code{Spectra} will return a value for -these if requested. If no value for a spectra variable is defined, a missing -value (of the correct data type) is returned. The list of core spectra -variables and their respective data type is: -\itemize{ -\item \emph{acquisitionNum} \code{integer(1)}: the index of acquisition of a spectrum -during an MS run. -\item \emph{centroided} \code{logical(1)}: whether the spectrum is in profile or centroid -mode. -\item \emph{collisionEnergy} \code{numeric(1)}: collision energy used to create an MSn -spectrum. -\item \emph{dataOrigin} \code{character(1)}: the \emph{origin} of the spectrum's data, e.g. the -mzML file from which it was read. -\item \emph{dataStorage} \code{character(1)}: the (current) storage location of the -spectrum data. This value depends on the backend used to handle and -provide the data. For an \emph{in-memory} backend like the \code{MsBackendDataFrame} -this will be \code{""}, for an on-disk backend such as the -\code{MsBackendHdf5Peaks} it will be the name of the HDF5 file where the -spectrum's peak data is stored. -\item \emph{isolationWindowLowerMz} \code{numeric(1)}: lower m/z for the isolation -window in which the (MSn) spectrum was measured. -\item \emph{isolationWindowTargetMz} \code{numeric(1)}: the target m/z for the isolation -window in which the (MSn) spectrum was measured. -\item \emph{isolationWindowUpperMz} \code{numeric(1)}: upper m/z for the isolation window -in which the (MSn) spectrum was measured. -\item \emph{msLevel} \code{integer(1)}: the MS level of the spectrum. -\item \emph{polarity} \code{integer(1)}: the polarity of the spectrum (\code{0} and \code{1} -representing negative and positive polarity, respectively). -\item \emph{precScanNum} \code{integer(1)}: the scan (acquisition) number of the precursor -for an MSn spectrum. -\item \emph{precursorCharge} \code{integer(1)}: the charge of the precursor of an MSn -spectrum. -\item \emph{precursorIntensity} \code{numeric(1)}: the intensity of the precursor of an -MSn spectrum. -\item \emph{precursorMz} \code{numeric(1)}: the m/z of the precursor of an MSn spectrum. -\item \emph{rtime} \code{numeric(1)}: the retention time of a spectrum. -\item \emph{scanIndex} \code{integer(1)}: the index of a spectrum within a (raw) file. -\item \emph{smoothed} \code{logical(1)}: whether the spectrum was smoothed. -} - -For each of these spectra variable a dedicated accessor function is defined -(such as \code{msLevel()} or \code{rtime()}) that allows to extract the values of -that spectra variable for all spectra in a \code{Spectra} object. Also, -replacement functions are defined, but not all backends might support -replacing values for spectra variables. As described above, additional -spectra variables can be defined or added. The \code{spectraVariables()} function -can be used to - -Values for multiple spectra variables, or all spectra vartiables* can be -extracted with the \code{spectraData()} function. -} - -\section{Peaks variables}{ - - -\code{Spectra} also provide mass peak data with the \emph{m/z} and intensity values -being the \emph{core} peaks variables: -\itemize{ -\item \emph{intensity} \code{numeric}: intensity values for the spectrum's peaks. -\item \emph{mz} \code{numeric}: the m/z values for the spectrum's peaks. -} - -Values for these can be extracted with the \code{mz()} and \code{intensity()} -functions, or the \code{peaksData()} function. The former functions return a -\code{NumericList} with the respective values, while the latter returns a \code{List} -with \code{numeric} two-column matrices. The list of peaks matrices can also -be extracted using \code{as(x, "list")} or \code{as(x, "SimpleList")} with \code{x} being -a \code{Spectra} object. - -Some \code{Spectra}/backends provide also values for additional peaks variables. -The set of available peaks variables can be extracted with the -\code{peaksVariables()} function. -} - -\section{Functions to access MS data}{ - - -The set of available functions to extract data from, or set data in, a -\code{Spectra} object are (in alphabetical order) listed below. Note that there -are also other functions to extract information from a \code{Spectra} object -documented in \code{\link[=addProcessing]{addProcessing()}}. -\itemize{ -\item \code{$}, \verb{$<-}: gets (or sets) a spectra variable for all spectra in \code{object}. -See examples for details. Note that replacing values of a peaks variable -is not supported with a non-empty processing queue, i.e. if any filtering -or data manipulations on the peaks data was performed. In these cases -\code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all cached data -operations. -\item \code{[[}, \verb{[[<-}: access or set/add a single spectrum variable (column) in the -backend. -\item \code{acquisitionNum()}: returns the acquisition number of each -spectrum. Returns an \code{integer} of length equal to the number of -spectra (with \code{NA_integer_} if not available). -\item \code{asDataFrame()}: converts the \code{Spectra} to a \code{DataFrame} (in long format) -contining all data. Returns a \code{DataFrame}. -\item \code{centroided()}, \verb{centroided<-}: gets or sets the centroiding -information of the spectra. \code{centroided()} returns a \code{logical} -vector of length equal to the number of spectra with \code{TRUE} if a -spectrum is centroided, \code{FALSE} if it is in profile mode and \code{NA} -if it is undefined. See also \code{isCentroided()} for estimating from -the spectrum data whether the spectrum is centroided. \code{value} -for \verb{centroided<-} is either a single \code{logical} or a \code{logical} of -length equal to the number of spectra in \code{object}. -\item \code{collisionEnergy()}, \verb{collisionEnergy<-}: gets or sets the -collision energy for all spectra in \code{object}. \code{collisionEnergy()} -returns a \code{numeric} with length equal to the number of spectra -(\code{NA_real_} if not present/defined), \verb{collisionEnergy<-} takes a -\code{numeric} of length equal to the number of spectra in \code{object}. -\item \code{coreSpectraVariables()}: returns the \emph{core} spectra variables along with -their expected data type. -\item \code{dataOrigin()}, \verb{dataOrigin<-}: gets or sets the \emph{data origin} for each -spectrum. \code{dataOrigin()} returns a \code{character} vector (same length than -\code{object}) with the origin of the spectra. \verb{dataOrigin<-} expects a -\code{character} vector (same length than \code{object}) with the replacement -values for the data origin of each spectrum. -\item \code{dataStorage()}: returns a \code{character} vector (same length than \code{object}) -with the data storage location of each spectrum. -\item \code{intensity()}: gets the intensity values from the spectra. Returns -a \code{\link[=NumericList]{NumericList()}} of \code{numeric} vectors (intensity values for each -spectrum). The length of the list is equal to the number of -\code{spectra} in \code{object}. -\item \code{ionCount()}: returns a \code{numeric} with the sum of intensities for -each spectrum. If the spectrum is empty (see \code{isEmpty()}), -\code{NA_real_} is returned. -\item \code{isCentroided()}: a heuristic approach assessing if the spectra in -\code{object} are in profile or centroided mode. The function takes -the \code{qtl}th quantile top peaks, then calculates the difference -between adjacent m/z value and returns \code{TRUE} if the first -quartile is greater than \code{k}. (See \code{Spectra:::.isCentroided()} for -the code.) -\item \code{isEmpty()}: checks whether a spectrum in \code{object} is empty -(i.e. does not contain any peaks). Returns a \code{logical} vector of -length equal number of spectra. -\item \code{isolationWindowLowerMz()}, \verb{isolationWindowLowerMz<-}: gets or sets the -lower m/z boundary of the isolation window. -\item \code{isolationWindowTargetMz()}, \verb{isolationWindowTargetMz<-}: gets or sets the -target m/z of the isolation window. -\item \code{isolationWindowUpperMz()}, \verb{isolationWindowUpperMz<-}: gets or sets the -upper m/z boundary of the isolation window. -\item \code{length()}: gets the number of spectra in the object. -\item \code{lengths()}: gets the number of peaks (m/z-intensity values) per -spectrum. Returns an \code{integer} vector (length equal to the -number of spectra). For empty spectra, \code{0} is returned. -\item \code{msLevel()}: gets the spectra's MS level. Returns an integer vector (names -being spectrum names, length equal to the number of spectra) with the MS -level for each spectrum. -\item \code{mz()}: gets the mass-to-charge ratios (m/z) from the -spectra. Returns a \code{\link[=NumericList]{NumericList()}} or length equal to the number of -spectra, each element a \code{numeric} vector with the m/z values of -one spectrum. -\item \code{peaksData()}: gets the \emph{peaks} data for all spectra in \code{object}. Peaks -data consist of the m/z and intensity values as well as possible additional -annotations (variables) of all peaks of each spectrum. The function -returns a \code{\link[=SimpleList]{SimpleList()}} of two dimensional arrays (either \code{matrix} or -\code{data.frame}), with each array providing the values for the requested -\emph{peak variables} (by default \code{"mz"} and \code{"intensity"}). Optional parameter -\code{columns} is passed to the backend's \code{peaksData()} function to allow -the selection of specific (or additional) peaks variables (columns) that -should be extracted (if available). Importantly, -it is \strong{not} guaranteed that each backend supports this parameter (while -each backend must support extraction of \code{"mz"} and \code{"intensity"} columns). -Parameter \code{columns} defaults to \code{c("mz", "intensity")} but any value -returned by \code{peaksVariables(object)} is supported. -Note also that it is possible to extract the peak data with -\code{as(x, "list")} and \code{as(x, "SimpleList")} as a \code{list} and \code{SimpleList}, -respectively. Note however that, in contrast to \code{peaksData()}, \code{as()} -does not support the parameter \code{columns}. -\item \code{peaksVariables()}: lists the available variables for mass peaks provided -by the backend. Default peak variables are \code{"mz"} and \code{"intensity"} (which -all backends need to support and provide), but some backends might provide -additional variables. -These variables correspond to the column names of the peak data array -returned by \code{peaksData()}. -\item \code{polarity()}, \verb{polarity<-}: gets or sets the polarity for each -spectrum. \code{polarity()} returns an \code{integer} vector (length equal -to the number of spectra), with \code{0} and \code{1} representing negative -and positive polarities, respectively. \verb{polarity<-} expects an -\code{integer} vector of length 1 or equal to the number of spectra. -\item \code{precursorCharge()}, \code{precursorIntensity()}, \code{precursorMz()}, -\code{precScanNum()}, \code{precAcquisitionNum()}: gets the charge (\code{integer}), -intensity (\code{numeric}), m/z (\code{numeric}), scan index (\code{integer}) -and acquisition number (\code{interger}) of the precursor for MS level > -2 spectra from the object. Returns a vector of length equal to -the number of spectra in \code{object}. \code{NA} are reported for MS1 -spectra of if no precursor information is available. -\item \code{rtime()}, \verb{rtime<-}: gets or sets the retention times (in seconds) -for each spectrum. \code{rtime()} returns a \code{numeric} vector (length -equal to the number of spectra) with the retention time for each -spectrum. \verb{rtime<-} expects a numeric vector with length equal -to the number of spectra. -\item \code{scanIndex()}: returns an \code{integer} vector with the \emph{scan index} -for each spectrum. This represents the relative index of the -spectrum within each file. Note that this can be different to the -\code{acquisitionNum} of the spectrum which represents the index of the -spectrum during acquisition/measurement (as reported in the mzML file). -\item \code{smoothed()},\verb{smoothed<-}: gets or sets whether a spectrum is -\emph{smoothed}. \code{smoothed()} returns a \code{logical} vector of length equal -to the number of spectra. \verb{smoothed<-} takes a \code{logical} vector -of length 1 or equal to the number of spectra in \code{object}. -\item \code{spectraData()}: gets general spectrum metadata (annotation, also called -header). \code{spectraData()} returns a \code{DataFrame}. Note that this -method does by default \strong{not} return m/z or intensity values. -\item \verb{spectraData<-}: \strong{replaces} the full spectra data of the \code{Spectra} -object with the one provided with \code{value}. The \verb{spectraData<-} function -expects a \code{DataFrame} to be passed as value with the same number of rows -as there a spectra in \code{object}. Note that replacing values of -peaks variables is not supported with a non-empty processing queue, i.e. -if any filtering or data manipulations on the peaks data was performed. -In these cases \code{\link[=applyProcessing]{applyProcessing()}} needs to be called first to apply all -cached data operations and empty the processing queue. -\item \code{spectraNames()}, \verb{spectraNames<-}: gets or sets the spectra names. -\item \code{spectraVariables()}: returns a \code{character} vector with the -available spectra variables (columns, fields or attributes of each -spectrum) available in \code{object}. Note that \code{spectraVariables()} does not -list the \emph{peak variables} (\code{"mz"}, \code{"intensity"} and eventual additional -annotations for each MS peak). Peak variables are returned by -\code{peaksVariables()}. -\item \code{tic()}: gets the total ion current/count (sum of signal of a -spectrum) for all spectra in \code{object}. By default, the value -reported in the original raw data file is returned. For an empty -spectrum, \code{0} is returned. -\item \code{uniqueMsLevels()}: get the unique MS levels available in \code{object}. This -function is supposed to be more efficient than \code{unique(msLevel(object))}. -} -} - -\examples{ - -## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk -## backend. -sciex_file <- dir(system.file("sciex", package = "msdata"), - full.names = TRUE) -sciex <- Spectra(sciex_file, backend = MsBackendMzR()) -sciex - -## Get the number of spectra in the data set -length(sciex) - -## Get the number of mass peaks per spectrum - limit to the first 6 -lengths(sciex) |> head() - -## Get the MS level for each spectrum - limit to the first 6 spectra -msLevel(sciex) |> head() - -## Alternatively, we could also use $ to access a specific spectra variable. -## This could also be used to add additional spectra variables to the -## object (see further below). -sciex$msLevel |> head() - -## Get the intensity and m/z values. -intensity(sciex) -mz(sciex) - -## Convert a subset of the Spectra object to a long DataFrame. -asDataFrame(sciex, i = 1:3, spectraVars = c("rtime", "msLevel")) - -## Create a Spectra providing a `DataFrame` containing the spectrum data. - -spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2)) -spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2)) -spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8)) - -s <- Spectra(spd) -s - -## List all available spectra variables (i.e. spectrum data and metadata). -spectraVariables(s) - -## For all *core* spectrum variables accessor functions are available. These -## return NA if the variable was not set. -centroided(s) -dataStorage(s) -rtime(s) -precursorMz(s) - -## The core spectra variables are: -coreSpectraVariables() - -## Add an additional metadata column. -s$spectrum_id <- c("sp_1", "sp_2") - -## List spectra variables, "spectrum_id" is now also listed -spectraVariables(s) - -## Get the values for the new spectra variable -s$spectrum_id - -## Extract specific spectra variables. -spectraData(s, columns = c("spectrum_id", "msLevel")) - - -## -------- PEAKS VARIABLES AND DATA -------- - -## Get the peak data (m/z and intensity values). -pks <- peaksData(s) -pks -pks[[1]] -pks[[2]] - -## Note that we could get the same resulb by coercing the `Spectra` to -## a `list` or `SimpleList`: -as(s, "list") -as(s, "SimpleList") - -## Or use `mz()` and `intensity()` to extract the m/z and intensity values -## separately -mz(s) -intensity(s) - -## Some `MsBackend` classes provide support for arbitrary peaks variables -## (in addition to the mandatory `"mz"` and `"intensity"` values. Below -## we create a simple data frame with an additional peak variable `"pk_ann"` -## and create a `Spectra` with a `MsBackendMemory` for that data. -## Importantly the number of values (per spectrum) need to be the same -## for all peak variables. - -tmp <- data.frame(msLevel = c(2L, 2L), rtime = c(123.2, 123.5)) -tmp$mz <- list(c(103.1, 110.4, 303.1), c(343.2, 453.1)) -tmp$intensity <- list(c(130.1, 543.1, 40), c(0.9, 0.45)) -tmp$pk_ann <- list(c(NA_character_, "A", "P"), c("B", "P")) - -## Create the Spectra. With parameter `peaksVariables` we can define -## the columns in `tmp` that contain peaks variables. -sps <- Spectra(tmp, source = MsBackendMemory(), - peaksVariables = c("mz", "intensity", "pk_ann")) -peaksVariables(sps) - -## Extract just the m/z and intensity values -peaksData(sps)[[1L]] - -## Extract the full peaks data -peaksData(sps, columns = peaksVariables(sps))[[1L]] - -## Access just the pk_ann variable -sps$pk_ann - - -} -\seealso{ -\itemize{ -\item \code{\link[=addProcessing]{addProcessing()}} for functions to analyze \code{Spectra}. -\item \link{Spectra} for a general description of the \code{Spectra} object. -} -} -\author{ -Sebastian Gibb, Johannes Rainer, Laurent Gatto, Philippine Louail -} diff --git a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg index e041fc61..e16506da 100644 --- a/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg +++ b/tests/testthat/_snaps/plotMzDelta/plotmzdelta-1000.svg @@ -1,254 +1,579 @@ - - + + - - - - - - - - - -Histogram of Mass Delta Distributions -M/Z delta -Frequency - - - - - -50 -100 -150 -200 - - - - - - - -0 -500 -1000 -1500 -2000 -2500 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -peg -A -R -N -D -C -E -Q/K -G -H -I/L -M -F -P -S -T -W -Y -V + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/test_MsBackend.R b/tests/testthat/test_MsBackend.R index 5e91a0fe..3d3f7e28 100644 --- a/tests/testthat/test_MsBackend.R +++ b/tests/testthat/test_MsBackend.R @@ -56,33 +56,6 @@ test_that("MsBackend methods throw errors", { expect_error(dm[1], "implemented for") expect_error(dm$a, "implemented for") expect_error(dm$a <- "a", "implemented for") - expect_error(extractByIndex(dm, 1), "implemented for") - expect_equal(backendRequiredSpectraVariables(dm), character()) - expect_error(precursorMz(dm) <- 12.3, "implemented for") -}) - -test_that("extractByIndex not implemented fallback", { - ## Backends that don't implement a dedicated `extractByIndex` method should - ## fall back to the [ method. - setClass("DummyBackend", - contains = "MsBackend", - slots = c(d = "integer")) - dm <- new("DummyBackend") - expect_error(extractByIndex(dm, 1L), "'extractByIndex' not implemented") - - dm@d <- 1:4 - - ## Have an implementation for [ but not extractByIndex: - setMethod("[", "DummyBackend", function(x, i, j, ..., drop = FALSE) { - x@d <- x@d[i] - x - }) - - res <- dm[c(3, 1)] - expect_equal(res@d, c(3L, 1L)) - - res <- extractByIndex(dm, c(3, 1)) - expect_equal(res@d, c(3L, 1L)) }) test_that("reset,MsBackend works", { @@ -102,9 +75,3 @@ test_that("backendBpparam,MsBackend works", { test_that("backendParallelFactor,MsBackend works", { expect_equal(backendParallelFactor(MsBackendMemory()), factor()) }) - -test_that("dataStorageBasePath,MsExperiment works", { - expect_identical(dataStorageBasePath(MsBackendMemory()), NA_character_) - tmp <- MsBackendMemory() - expect_warning(dataStorageBasePath(tmp) <- "/", "not support") -}) diff --git a/tests/testthat/test_MsBackendCached.R b/tests/testthat/test_MsBackendCached.R index e547b190..6ff1b7ee 100644 --- a/tests/testthat/test_MsBackendCached.R +++ b/tests/testthat/test_MsBackendCached.R @@ -87,24 +87,12 @@ test_that("[,MsBackendCached works", { res <- be[c(1, 4, 3), ] expect_true(length(res) == 3) expect_true(nrow(res@localData) == 3) - res_2 <- extractByIndex(be, c(1, 4, 3)) - expect_equal(res, res_2) df <- data.frame(msLevel = 1L, b = 1:6) be <- backendInitialize(be, data = df) res <- be[c(6, 1, 3)] expect_true(length(res) == 3) expect_equal(res@localData$b, c(6, 1, 3)) - res_2 <- extractByIndex(be, c(6, 1, 3)) - expect_equal(res, res_2) - - res <- be[c(6, 1, 3, 1)] - expect_true(length(res) == 4) - expect_equal(res@localData$b, c(6, 1, 3, 1)) - res_2 <- extractByIndex(be, c(6, 1, 3, 1)) - expect_equal(res, res_2) - - expect_equal(extractByIndex(be), be) }) test_that("$,MsBackendCached works", { @@ -302,10 +290,3 @@ test_that("lengths,MsBackendCached works", { res <- lengths(be) expect_true(all(res == 0)) }) - -test_that("precursorMz<-,MsBackendCached works", { - be <- backendInitialize(MsBackendCached(), nspectra = 4) - expect_true(all(is.na(precursorMz(be)))) - precursorMz(be) <- c(1.1, 1.2, 1.3, 1.34) - expect_equal(precursorMz(be), c(1.1, 1.2, 1.3, 1.34)) -}) diff --git a/tests/testthat/test_MsBackendDataFrame.R b/tests/testthat/test_MsBackendDataFrame.R index 2cc04795..ec3aeec6 100644 --- a/tests/testthat/test_MsBackendDataFrame.R +++ b/tests/testthat/test_MsBackendDataFrame.R @@ -576,42 +576,24 @@ test_that("show,MsBackendDataFrame works", { test_that("[,MsBackendDataFrame works", { be <- MsBackendDataFrame() expect_error(be[1]) - - expect_equal(extractByIndex(be), be) - df <- DataFrame(scanIndex = 1:2, a = "a", b = "b") be <- backendInitialize(be, df) res <- be[1] expect_true(validObject(res)) expect_equal(be@spectraData[1, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 1) - expect_equal(res, res_2) res <- be[2] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 2) - expect_equal(res, res_2) res <- be[2:1] expect_true(validObject(res)) expect_equal(be@spectraData[2:1, ], res@spectraData) - res_2 <- extractByIndex(be, 2:1) - expect_equal(res, res_2) - - res <- be[c(2, 1, 2)] - expect_equal(res$scanIndex, c(2, 1, 2)) - res_2 <- extractByIndex(be, c(2, 1, 2)) - expect_equal(res, res_2) res <- be[c(FALSE, FALSE)] expect_true(validObject(res)) expect_true(length(res) == 0) - res_2 <- extractByIndex(be, integer()) - expect_equal(res, res_2) res <- be[c(FALSE, TRUE)] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 2) - expect_equal(res, res_2) expect_error(be[TRUE], "match the length of") expect_error(be["a"], "does not have names") @@ -624,15 +606,11 @@ test_that("[,MsBackendDataFrame works", { expect_true(validObject(res)) expect_equal(dataStorage(res), "2") expect_equal(res@spectraData$file, "b") - res_2 <- extractByIndex(be, 3) - expect_equal(res, res_2) res <- be[c(3, 1)] expect_true(validObject(res)) expect_equal(dataStorage(res), c("2", "1")) expect_equal(res@spectraData$file, c("b", "a")) - res_2 <- extractByIndex(be, c(3, 1)) - expect_equal(res, res_2) }) test_that("cbind2, MsBackendDataFrame works", { @@ -660,32 +638,15 @@ test_that("selectSpectraVariables,MsBackendDataFrame works", { be <- backendInitialize(MsBackendDataFrame(), df) res <- selectSpectraVariables(be, c("dataStorage", "other_col")) - - expect_equal(res@peaksVariables, be@peaksVariables) expect_equal(colnames(res@spectraData), c("dataStorage", "other_col")) expect_equal(msLevel(res), c(NA_integer_, NA_integer_)) res <- selectSpectraVariables(be, c("dataStorage", "rtime")) expect_equal(colnames(res@spectraData), c("dataStorage", "rtime")) - expect_equal(res@peaksVariables, be@peaksVariables) - expect_error(selectSpectraVariables(be, "rtime"), "are required") + expect_error(selectSpectraVariables(be, "rtime"), "dataStorage is/are missing") expect_error(selectSpectraVariables(be, "something"), "something not available") - - df$mz <- list(c(1.2, 1.4), c(5.3, 34.5, 52.1)) - df$intensity <- list(c(123, 121.1), c(1231.1, 343.1, 21.1)) - be <- backendInitialize(MsBackendDataFrame(), df) - res <- selectSpectraVariables(be, c("dataStorage", "other_col")) - expect_equal(colnames(res@spectraData), c("dataStorage", "other_col")) - expect_equal(msLevel(res), c(NA_integer_, NA_integer_)) - expect_equal(res@peaksVariables, character()) - - be <- backendInitialize(MsBackendDataFrame(), df) - res <- selectSpectraVariables(be, c("dataStorage", "mz", "intensity")) - expect_equal(colnames(res@spectraData), c("dataStorage", "mz", "intensity")) - expect_equal(msLevel(res), c(NA_integer_, NA_integer_)) - expect_equal(res@peaksVariables, c("mz", "intensity")) }) test_that("$,$<-,MsBackendDataFrame works", { @@ -1040,8 +1001,3 @@ test_that("[[,[[<-,MsBackendDataFrame works", { test_that("supportsSetBackend,MsBackendDataFrame", { expect_true(supportsSetBackend(MsBackendDataFrame())) }) - -test_that("backendRequiredSpectraVariables,MsBackendDataFrame works", { - expect_equal(backendRequiredSpectraVariables(MsBackendDataFrame()), - "dataStorage") -}) diff --git a/tests/testthat/test_MsBackendHdf5Peaks.R b/tests/testthat/test_MsBackendHdf5Peaks.R index 17495169..b7afdf37 100644 --- a/tests/testthat/test_MsBackendHdf5Peaks.R +++ b/tests/testthat/test_MsBackendHdf5Peaks.R @@ -334,16 +334,12 @@ test_that("[,MsBackendHdf5Peaks works", { expect_identical(peaksData(res), sciex_pks[idx]) expect_identical(rtime(res), rtime(sciex_mzr)[idx]) expect_identical(msLevel(res), msLevel(sciex_mzr)[idx]) - res_2 <- extractByIndex(be, idx) - expect_equal(res, res_2) idx <- dataStorage(be) == fls[2] res <- be[idx, ] expect_true(validObject(res)) expect_true(all(dataStorage(res) == fls[2])) expect_identical(peaksData(res), sciex_pks[idx]) - res_2 <- extractByIndex(be, idx) - expect_equal(res, res_2) }) test_that("backendMerge,MsBackendHdf5Peaks works", { @@ -413,8 +409,3 @@ test_that("backendParallelFactor,MsBackendHdf5Peaks", { factor(dataStorage(sciex_hd5), levels = unique(dataStorage(sciex_hd5)))) }) - -test_that("backendRequiredSpectraVariables,MsBackendHdf5Peaks works", { - expect_equal(backendRequiredSpectraVariables(MsBackendHdf5Peaks()), - c("dataStorage", "scanIndex")) -}) diff --git a/tests/testthat/test_MsBackendMemory.R b/tests/testthat/test_MsBackendMemory.R index c4df695f..2cdacc37 100644 --- a/tests/testthat/test_MsBackendMemory.R +++ b/tests/testthat/test_MsBackendMemory.R @@ -501,67 +501,41 @@ test_that("$<-,MsBackendMemory works", { test_that("[,MsBackendMemory works", { be <- new("MsBackendMemory") - res <- extractByIndex(be) - expect_equal(res, be) - df <- data.frame(scanIndex = 1:2, a = "a", b = "b") be <- backendInitialize(be, df) res <- be[1] expect_true(validObject(res)) expect_equal(be@spectraData[1, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 1) - expect_equal(res, res_2) - res <- be[2] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 2) - expect_equal(res, res_2) - res <- be[2:1] expect_true(validObject(res)) expect_equal(be@spectraData[2:1, ], res@spectraData) - res_2 <- extractByIndex(be, 2:1) - expect_equal(res, res_2) res <- be[c(FALSE, FALSE)] expect_true(validObject(res)) expect_true(length(res) == 0) - res_2 <- extractByIndex(be, integer()) - expect_equal(res, res_2) - res <- be[c(FALSE, TRUE)] expect_true(validObject(res)) expect_equal(be@spectraData[2, ], res@spectraData[1, ]) - res_2 <- extractByIndex(be, 2) - expect_equal(res, res_2) expect_error(be[TRUE], "match the length of") expect_error(be["a"], "names") df <- data.frame(scanIndex = c(1L, 2L, 1L, 2L), - file = c("a", "a", "b", "b"), - idx = 1:4) + file = c("a", "a", "b", "b")) be <- backendInitialize(be, df) dataStorage(be) <- c("1", "1", "2", "2") res <- be[3] expect_true(validObject(res)) expect_equal(dataStorage(res), "2") expect_equal(res@spectraData$file, "b") - res_2 <- extractByIndex(be, 3) - expect_equal(res, res_2) res <- be[c(3, 1)] expect_true(validObject(res)) expect_equal(dataStorage(res), c("2", "1")) expect_equal(res@spectraData$file, c("b", "a")) - res_2 <- extractByIndex(be, c(3, 1)) - expect_equal(res, res_2) - - res <- be[c(3, 1, 3)] - expect_equal(res$idx, c(3, 1, 3)) - res_2 <- extractByIndex(be, c(3, 1, 3)) - expect_equal(res, res_2) }) test_that("cbind2, MsBackendMemory works", { @@ -960,8 +934,3 @@ test_that("tic,MsBackendMemory works", { test_that("supportsSetBackend,MsBackendMemory", { expect_true(supportsSetBackend(MsBackendMemory())) }) - -test_that("backendRequiredSpectraVariables,MsBackendMemory works", { - expect_equal(backendRequiredSpectraVariables(MsBackendMemory()), - "dataStorage") -}) diff --git a/tests/testthat/test_MsBackendMzR.R b/tests/testthat/test_MsBackendMzR.R index 44d38cd2..ff891738 100644 --- a/tests/testthat/test_MsBackendMzR.R +++ b/tests/testthat/test_MsBackendMzR.R @@ -474,8 +474,6 @@ test_that("[,MsBackendMzR works", { expect_equal(length(tmp), 13) expect_equal(tmp@spectraData$scanIndex, 13:25) expect_true(all(is.na(smoothed(tmp)))) - tmp_2 <- extractByIndex(sciex_mzr, 13:25) - expect_equal(tmp, tmp_2) ints <- intensity(tmp) spd <- spectraData(tmp) @@ -495,16 +493,8 @@ test_that("selectSpectraVariables,MsBackendMzR works", { "scanIndex")) expect_equal(colnames(res@spectraData), c("dataStorage", "msLevel", "rtime", "scanIndex")) - expect_equal(res@peaksVariables, character()) - - res <- selectSpectraVariables(be, c("dataStorage", "msLevel", "rtime", - "scanIndex", "mz", "intensity")) - expect_equal(colnames(res@spectraData), c("dataStorage", "msLevel", "rtime", - "scanIndex")) - expect_equal(res@peaksVariables, c("mz", "intensity")) - expect_error(selectSpectraVariables(be, c("dataStorage", "msLevel")), - "required") + "scanIndex is/are missing") }) test_that("$,$<-,MsBackendMzR works", { @@ -569,7 +559,6 @@ test_that("dropNaSpectraVariables works with MsBackendMzR", { expect_equal(mz(res[1]), mz(sciex_mzr[1])) expect_true(length(spectraVariables(res)) < length(spectraVariables(sciex_mzr))) - expect_equal(res@peaksVariables, sciex_mzr@peaksVariables) }) test_that("supportsSetBackend,MsBackendMzR", { @@ -581,31 +570,3 @@ test_that("backendParallelFactor,MsBackendMzR", { factor(dataStorage(sciex_mzr), levels = unique(dataStorage(sciex_mzr)))) }) - -test_that("dataStorageBasePath,dataStorageBasePath<-,MsBackendMzR works", { - tmpd <- normalizePath(tempdir()) - file.copy(sciex_file, tmpd) - - expect_equal(dataStorageBasePath(sciex_mzr), - MsCoreUtils::common_path(sciex_file)) - tmp <- sciex_mzr - dataStorageBasePath(tmp) <- tmpd - expect_true(validObject(tmp)) - bp <- normalizePath(dataStorageBasePath(tmp)) - expect_equal(bp, tmpd) - - #' errors - expect_error(dataStorageBasePath(tmp) <- "some path", "Provided path") -}) - -test_that("backendRequiredSpectraVariables,MsBackendMzR works", { - tmp <- MsBackendMzR() - expect_equal(backendRequiredSpectraVariables(tmp), - c("dataStorage", "scanIndex")) -}) - -test_that("precursorMz<-,MsbackendMzR works", { - a <- sciex_mzr[1:3] - precursorMz(a) <- c(12.2, 1.2, 1.4) - expect_equal(precursorMz(a), c(12.2, 1.2, 1.4)) -}) diff --git a/tests/testthat/test_Spectra-functions.R b/tests/testthat/test_Spectra-functions.R index 2dbcf372..ec73a72f 100644 --- a/tests/testthat/test_Spectra-functions.R +++ b/tests/testthat/test_Spectra-functions.R @@ -352,6 +352,32 @@ test_that("dropNaSpectraVariables works", { function(z) !any(is.na(z))))) }) +test_that(".has_mz works", { + sps <- Spectra(sciex_mzr)[1:10] + sps <- setBackend(sps, MsBackendDataFrame()) + mzs <- mz(sps) + x <- c(mzs[[2]][5], mzs[[3]][8]) + + res <- .has_mz(sps, mz = x, ppm = 0) + expect_true(length(res) == length(sps)) + expect_true(is.logical(res)) + + spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) + spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) + spd$intensity <- list(c(10, 20, 30, 40), c(11, 21, 31), c(12, 22, 32)) + sps <- Spectra(spd) + + res <- .has_mz(sps, mz = c(14, 34)) + expect_equal(res, c(TRUE, TRUE, FALSE)) + res <- .has_mz(sps, mz = c(14, 34), tolerance = 0.15) + expect_equal(res, c(TRUE, TRUE, TRUE)) + + res <- .has_mz(sps, mz = c(14, 34), condFun = all) + expect_true(all(!res)) + res <- .has_mz(sps, mz = c(14, 34), condFun = all, tolerance = 0.15) + expect_equal(res, c(FALSE, TRUE, TRUE)) +}) + test_that(".has_mz_each works", { spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3)) spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1)) @@ -664,6 +690,16 @@ test_that(".estimate_precursor_intensity works", { expect_true(all(is.na(res))) }) +test_that("estimatePrecursorIntensity works", { + fls <- msdata::proteomics(full.names = TRUE)[c(5, 3)] + second <- Spectra(fls[2], backend = MsBackendMzR()) + both <- Spectra(fls, backend = MsBackendMzR()) + + res_second <- estimatePrecursorIntensity(second) + res_both <- estimatePrecursorIntensity(both) + expect_equal(res_second, res_both[510:length(res_both)]) +}) + test_that(".chunk_factor works", { res <- .chunk_factor(10, chunkSize = 3) expect_equal(res, as.factor(c(1, 1, 1, 2, 2, 2, 3, 3, 3, 4))) @@ -825,75 +861,3 @@ test_that("processingChunkFactor works", { expect_error(processingChunkFactor("a"), "Spectra") }) - -test_that("filterPeaksRanges,Spectra works", { - df <- data.frame(rtime = 123.3, new_var = 4, msLevel = 2L) - df$mz <- list(c(100.1, 100.2, 100.3, 100.4, 200.1, 200.2, 200.3, - 300.1, 300.3, 300.4, 300.5)) - df$intensity <- list(1:11) - s <- Spectra(df) - ## Check errors - expect_error(filterPeaksRanges(3), "'Spectra' object") - expect_error(filterPeaksRanges(s, rtime = c(1, 2), not_exist = c(1, 2)), - "valid spectra variables") - expect_error(filterPeaksRanges(s, rtime = 2, mz = c(1, 2)), - "'numeric' of length 2") - expect_error(filterPeaksRanges( - s, rtime = rbind(c(1, 2), c(2, 3)), mz = c(1, 2)), - "Number of rows of the range matrices") - - ## Single range per variable - res <- filterPeaksRanges(s, rtime = c(100, 200), mz = cbind(200, 300)) - expect_true(inherits(res, "Spectra")) - expect_true(length(res@processingQueue) > 0L) - expect_equal(res@processingQueueVariables, c("rtime", "msLevel")) - expect_equal(length(res@processing), 1L) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], c(5:7)) - res <- filterPeaksRanges(s, rtime = c(100, 200), mz = cbind(200, 300), - keep = FALSE) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], c(1:4, 8:11)) - - ## Multiple ranges per variable - res <- filterPeaksRanges( - s, new_var = rbind(c(1, 8), c(1, 4), c(1, 5)), - rtime = rbind(c(100, 200), c(400, 500), c(100, 200)), - mz = rbind(c(100, 100.3), c(0, 500), c(300.3, 310))) - expect_true(inherits(res, "Spectra")) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], c(1:3, 9:11)) - res <- filterPeaksRanges( - s, new_var = rbind(c(1, 8), c(1, 4), c(1, 5)), - rtime = rbind(c(100, 200), c(400, 500), c(100, 200)), - mz = rbind(c(100, 100.3), c(0, 500), c(300.3, 310)), keep = FALSE) - expect_true(inherits(res, "Spectra")) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], c(4:8)) - - ## Filter also with msLevel; to have the same behaviour as with other - ## filters we would need to add a second filter for e.g. MS level 2 - s <- c(s, s) - s$msLevel <- c(1L, 2L) - res <- filterPeaksRanges(s, rtime = c(100, 200), msLevel = c(1, 1), - mz = c(100, 200)) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], 1:4) - a <- peaksData(res)[[2L]] - expect_true(nrow(a) == 0L) - res <- filterPeaksRanges(s, rtime = rbind(c(100, 200), c(100, 200)), - msLevel = rbind(c(1, 1), c(2, 2)), - mz = rbind(c(100, 200), c(0, 400))) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], 1:4) - a <- peaksData(res)[[2L]] - expect_equal(a[, 2L], 1:11) - res <- filterPeaksRanges(s, rtime = rbind(c(100, 200), c(100, 200)), - msLevel = rbind(c(1, 1), c(2, 2)), - mz = rbind(c(100, 200), c(0, 400)), - keep = FALSE) - a <- peaksData(res)[[1L]] - expect_equal(a[, 2L], 5:11) - a <- peaksData(res)[[2L]] - expect_true(nrow(a) == 0) -}) diff --git a/tests/testthat/test_Spectra.R b/tests/testthat/test_Spectra.R index 4cc721d9..3f8090fc 100644 --- a/tests/testthat/test_Spectra.R +++ b/tests/testthat/test_Spectra.R @@ -13,48 +13,32 @@ test_that("Spectra,ANY works", { df$polarity <- "NEG" expect_error(Spectra(df), "wrong data type: polarity") - - res <- Spectra(files = sciex_file, source = MsBackendMzR()) - expect_s4_class(res@backend, "MsBackendMzR") - expect_true(length(res) > 1) }) test_that("Spectra,missing works", { res <- Spectra() expect_true(length(res) == 0) - expect_s4_class(res@backend, "MsBackendMemory") - - res <- Spectra(backend = MsBackendDataFrame()) - expect_true(length(res) == 0) - expect_s4_class(res@backend, "MsBackendDataFrame") - - res <- Spectra(source = MsBackendDataFrame()) - expect_true(length(res) == 0) - expect_s4_class(res@backend, "MsBackendDataFrame") be <- backendInitialize(MsBackendDataFrame(), DataFrame(msLevel = c(1L, 2L), fromFile = 1L)) res <- Spectra(backend = be) - expect_s4_class(res@backend, "MsBackendDataFrame") expect_true(length(res) == 2) expect_identical(msLevel(res), c(1L, 2L)) }) test_that("Spectra,MsBackend works", { - be <- backendInitialize(MsBackendDataFrame(), - DataFrame(msLevel = c(1L, 2L), - fromFile = 1L)) + res <- Spectra() + expect_true(length(res) == 0) + + be <- backendInitialize(MsBackendDataFrame(), DataFrame(msLevel = c(1L, 2L), + fromFile = 1L)) res <- Spectra(be) expect_true(length(res) == 2) expect_identical(msLevel(res), c(1L, 2L)) }) test_that("Spectra,character works", { - res <- Spectra(sciex_file) - expect_true(is(res@backend, "MsBackendMzR")) - expect_true(length(res) > 0) - - res <- Spectra(sciex_file, source = MsBackendMzR()) + res <- Spectra(sciex_file, backend = MsBackendMzR()) expect_true(is(res@backend, "MsBackendMzR")) expect_equal(unique(res@backend$dataStorage), sciex_file) expect_identical(rtime(res), rtime(sciex_mzr)) @@ -67,7 +51,7 @@ test_that("Spectra,character works", { show(res) ## Empty character - res <- Spectra(character()) + res <- Spectra(character(), backend = MsBackendMzR()) expect_s4_class(res, "Spectra") expect_s4_class(res@backend, "MsBackendMzR") expect_true(length(res) == 0) @@ -78,37 +62,6 @@ test_that("Spectra,character works", { expect_true(length(res) == 0) }) -test_that(".create_spectra works, ", { - ## missing object - res <- .create_spectra() - expect_true(length(res) == 0) - expect_s4_class(res@backend, "MsBackendMemory") - expect_error(res <- .create_spectra(backend = MsBackendMzR()), "mandatory") - - ## object being a character, backend a MsBackendMemory -> error - res <- expect_error(.create_spectra(sciex_file), "DataFrame") - ## object being a character, backend a MsBackendMzR - res <- .create_spectra(sciex_file, backend = MsBackendMzR()) - expect_s4_class(res@backend, "MsBackendMzR") - dta <- spectraData(res@backend) - - ## object being a DataFrame, backend a MsBackendDataFrame - res <- .create_spectra(dta, backend = MsBackendDataFrame()) - expect_s4_class(res@backend, "MsBackendDataFrame") - expect_equal(res$msLevel, dta$msLevel) - - ## object missing but providing files - res <- .create_spectra(files = sciex_file, backend = MsBackendMzR()) - expect_s4_class(res@backend, "MsBackendMzR") - expect_equal(res$msLevel, dta$msLevel) - - ## object missing but providing data - res <- .create_spectra(data = dta, backend = MsBackendMemory()) - expect_s4_class(res@backend, "MsBackendMemory") - expect_equal(res$msLevel, dta$msLevel) - -}) - test_that("setBackend,Spectra works", { df <- DataFrame(rtime = as.numeric(1:9), fact = c(2L, 1L, 2L, 1L, 3L, 2L, 3L, 3L, 1L)) @@ -1938,57 +1891,4 @@ test_that("entropy,Spectra works", { expect_identical(res, vapply(df$intensity, MsCoreUtils::entropy, numeric(1))) }) -test_that("dataStorageBasePath,dataStorageBasePath<-,MsBackendMzR works", { - tmpd <- normalizePath(tempdir()) - file.copy(sciex_file, tmpd) - tmp <- Spectra(sciex_mzr) - expect_equal(dataStorageBasePath(tmp), - MsCoreUtils::common_path(sciex_file)) - tmp <- sciex_mzr - tmp <- Spectra(tmp) - dataStorageBasePath(tmp) <- tmpd - expect_true(validObject(tmp@backend)) - bp <- normalizePath(dataStorageBasePath(tmp)) - expect_equal(bp, tmpd) - - #' errors - expect_error(dataStorageBasePath(tmp) <- "some path", "Provided path") -}) - - -test_that("asDataFrame works", { - sciex_file <- normalizePath( - dir(system.file("sciex", package = "msdata"), full.names = TRUE)) - sp <- Spectra(sciex_file) - ## Full dataframe - df <- asDataFrame(sp) - expect_identical(nrow(df), sum(sapply(peaksData(sp), nrow))) - expect_identical(ncol(df), length(spectraVariables(sp)) + 2L) - expect_identical(names(df), c("mz", "intensity", spectraVariables(sp))) - ## Three first scans and 2 spectra variables - df <- asDataFrame(sp, i = 1:3, spectraVars = c("msLevel", "rtime")) - expect_identical(nrow(df), sum(sapply(peaksData(sp[1:3]), nrow))) - expect_identical(ncol(df), 2L + 2L) - ## Three first scans and no spectra variables - df <- asDataFrame(sp, i = 1:3, spectraVars = NULL) - expect_identical(nrow(df), sum(sapply(peaksData(sp[1:3]), nrow))) - expect_identical(ncol(df), 2L) - expect_identical(names(df), c("mz", "intensity")) -}) - -test_that("estimatePrecursorIntensity works", { - fls <- msdata::proteomics(full.names = TRUE)[c(5, 3)] - second <- Spectra(fls[2], backend = MsBackendMzR()) - both <- Spectra(fls, backend = MsBackendMzR()) - - res_second <- estimatePrecursorIntensity(second) - res_both <- estimatePrecursorIntensity(both) - expect_equal(res_second, res_both[510:length(res_both)]) -}) - -test_that("precursorMz<-,Spectra works", { - a <- sps_dda[1:3] - precursorMz(a) <- c(12.3, 1.1, 34.3) - expect_equal(precursorMz(a), c(12.3, 1.1, 34.3)) -}) diff --git a/tests/testthat/test_peaks-functions.R b/tests/testthat/test_peaks-functions.R index ef0978c8..b7204b46 100644 --- a/tests/testthat/test_peaks-functions.R +++ b/tests/testthat/test_peaks-functions.R @@ -490,248 +490,3 @@ test_that(".peaks_filter_precursor_keep_below works", { precursorMz = 14.2, tolerance = 0.1) expect_equal(unname(res[, "intensity"]), 1) }) - -test_that(".peaks_filter_ranges works", { - ## Testing all possible combinations, with/without spectra and/or peaks - ## variables, single/multiple variables, single/multiple rows, NA handling - x <- cbind(mz = c(100.1, 100.2, 100.3, 100.4, - 104.1, 104.2, - 200.3, 200.4, 200.5, - 300.1, 300.2), - intensity = 1:11) - ## res <- .peaks_filter_ranges(x, spectrumMsLevel = 1L, msLevel = 2L) - ## expect_equal(res, x) - - ## Single filters. - ranges <- list(rtime = cbind(1, 2), new_var = cbind(3, 4), - mz = cbind(200, 201), intensity = cbind(8, 9)) - - ## * No peaks variables. - pvars <- character() - svars <- c("rtime", "new_var") - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) == 0) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) == 0) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - - ## * No spectra variables. - pvars <- c("mz", "intensity") - svars <- character() - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, "intensity"], 8:9) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, "intensity"], c(1:7, 10:11)) - ranges$mz <- cbind(100, 106) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_true(nrow(res) == 0) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - ranges$mz <- cbind(200, 201) - - ## * Spectra and peaks variables. - svars <- c("rtime") - pvars <- c("mz") - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - ranges = ranges, spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, "intensity"], 7:9) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - ranges = ranges, spectrumMsLevel = 1L, - keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, "intensity"], c(1:6, 10:11)) - - ## Multiple filters. - ranges <- list(rtime = rbind(c(1, 2), c(0, 4), c(2, 3)), - new_var = rbind(c(3, 4), c(1, 9), c(3, 5)), - mz = rbind(c(200, 201), c(100, 101), c(200, 201)), - intensity = rbind(c(8, 9), c(1, 20), c(3, 12))) - - ## * No peaks variables. - svars <- c("rtime", "new_var") - pvars <- character() - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 1, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) == 0) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) == 0) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 20, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) == 0L) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 20, - new_var = 3, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x) - - ## * No spectra variables. - svars <- character() - pvars <- c("mz", "intensity") - res <- .peaks_filter_ranges(x, pvars = pvars, ranges = ranges, - spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, 2L], sort(c(8, 9, 1, 2, 3, 4, 7))) - res <- .peaks_filter_ranges(x, pvars = pvars, ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_true(nrow(res) < nrow(x)) - expect_equal(res[, 2L], c(5:6, 10:11)) - res <- .peaks_filter_ranges(x, pvars = c("intensity"), ranges = ranges, - spectrumMsLevel = 1L) - expect_equal(res, x) - res <- .peaks_filter_ranges(x, pvars = c("intensity"), ranges = ranges, - spectrumMsLevel = 1L, keep = FALSE) - expect_equal(res, x[logical(), , drop = FALSE]) - - ## * Spectra and peaks variables. - svars <- c("rtime") - pvars <- c("mz") - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 2, - ranges = ranges, spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res[, 2L], c(1:4, 7:9)) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 2, - ranges = ranges, spectrumMsLevel = 1L, - keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res[, 2L], c(5:6, 10:11)) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - ranges = ranges, spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res[, 2L], 1:4) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = 4, - ranges = ranges, spectrumMsLevel = 1L, - keep = FALSE) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res[, 2L], 5:11) - - ## Handling NA - ## * spectra variable value is NA - ranges <- lapply(ranges, function(z) z[1, , drop = FALSE]) - svars <- "rtime" - pvars <- c("mz", "intensity") - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = NA, - ranges = ranges, spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x[logical(), , drop = FALSE]) - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = NA, - ranges = ranges, spectrumMsLevel = 1L, - keep = FALSE) - expect_equal(res, x) - - svars <- c("rtime", "new_var") - res <- .peaks_filter_ranges(x, svars = svars, pvars = pvars, rtime = NA, - ranges = ranges, spectrumMsLevel = 1L, - new_var = 3) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x[logical(), , drop = FALSE]) - - ## * peaks variable value is NA - x[8, 2L] <- NA_real_ - res <- .peaks_filter_ranges(x, pvars = c("mz", "intensity"), - ranges = ranges, spectrumMsLevel = 1L) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(unname(res[, 2L]), 9L) - - ## * range value is NA - ranges$rtime <- cbind(NA, 2) - res <- .peaks_filter_ranges(x, svars = c("rtime", "new_var"), rtime = 2, - new_var = 3, spectrumMsLevel = 1L, - ranges = ranges) - expect_true(is.matrix(res)) - expect_equal(colnames(res), colnames(x)) - expect_equal(res, x[logical(), , drop = FALSE]) - res <- .peaks_filter_ranges(x, svars = c("rtime", "new_var"), rtime = 2, - new_var = 3, spectrumMsLevel = 1L, - ranges = ranges, keep = FALSE) - expect_equal(res, x) -}) - -test_that(".peaks_contain_mz works", { - pks <- cbind(mz = c(1.3, 1.5, 32.1, 45.6), c(1, 2, 3, 4)) - - expect_false(.peaks_contain_mz(pks)) - expect_true(.peaks_contain_mz(pks, 1.5)) - expect_false(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all)) - expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any)) - expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any, - tolerance = 0.1)) - expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all, - tolerance = 0.1)) -}) diff --git a/vignettes/MsBackend.Rmd b/vignettes/MsBackend.Rmd index c74f82e8..a6423e63 100644 --- a/vignettes/MsBackend.Rmd +++ b/vignettes/MsBackend.Rmd @@ -563,39 +563,35 @@ additionally available variables and the `columns` parameter of the (in addition to the required `"mz"` and `"intensity"` variables). -### `extractByIndex()` and `[` - -The `extractByIndex()` and `[` methods allows to subset `MsBackend` objects. -This operation is expected to reduce a `MsBackend` object to the selected -spectra. These methods must also support duplication (e.g. `[c(1, 1, 1)]` and -extraction in any arbitrary order (e.g. `[c(3, 1, 5, 3)]`). While both methods -subset the object, `extractByIndex()` only supports to subset with an `integer` -index, while `[`, to be compliant with the base R implementation, should support -to subset by indices or logical vectors. An error should be thrown if indices -are out of bounds, but the method should also support returning an empty backend -with `[integer()]`. Note that the `MsCoreUtils::i2index` function can be used to +### `[` + +The `[` method allows to subset `MsBackend` objects. This operation is expected +to reduce a `MsBackend` object to the selected spectra. The method should +support to subset by indices or logical vectors and should also support +duplicating elements (i.e. when duplicated indices are used) as well as to +subset in arbitrary order. An error should be thrown if indices are out of +bounds, but the method should also support returning an empty backend with +`[integer()]`. Note that the `MsCoreUtils::i2index` function can be used to check for correct input (and convert the input to an `integer` index). -The `extractByIndex()` method is used by the data operation and analysis methods -on `Spectra` objects, while the `[` is intended to be used by the end user (if -needed). Below we implement `extractByIndex()` for our backend: +Below we implement a possible `[` for our test backend class. We ignore the +parameters `j` from the definition of the `[` generic, since we treat our data +to be one-dimensional (with each spectrum being one element). ```{r} -setMethod("extractByIndex", c("MsBackendTest", "ANY"), function(object, i) { - object@spectraVars <- object@spectraVars[i, ] - object@mz <- object@mz[i] - object@intensity <- object@intensity[i] - object +setMethod("[", "MsBackendTest", function(x, i, j, ..., drop = FALSE) { + i <- MsCoreUtils::i2index(i, length = length(x)) + x@spectraVars <- x@spectraVars[i, ] + x@mz <- x@mz[i] + x@intensity <- x@intensity[i] + x }) ``` -The `[` does not need to be defined because a default implementation for -the base `MsBackend` exists. - We can now subset our backend to the last two spectra. ```{r} -a <- extractByIndex(be, 2:3) +a <- be[2:3] spectraData(a) ``` @@ -1590,23 +1586,6 @@ setMethod("backendParallelFactor", "MsBackend", function(object, ...) { ``` -### `backendRequiredSpectraVariables()` - -The `backendRequiredSpectraVariables()` method can be implemented if a backend -needs specific spectra variables to work. The default implementation is: - -```{r} -setMethod("backendRequiredSpectraVariables", "MsBackend", - function(object, ...) { - character() - }) -``` - -The implementation for `MsBackendMzR` returns `c("dataStorage", "scanIndex")` as -the backend needs these two spectra variables to load the MS data on-the-fly -from the original data files. - - ### `dropNaSpectraVariables()` The `dropNaSpectraVariables()` is supposed to allow removing all spectra @@ -1677,21 +1656,6 @@ This method thus retrieves first the MS levels of all spectra and then calls operation by selecting the unique MS levels directly using an SQL call. -### `precursorMz<-` - -Replace the values for the *precursor m/z* spectra -variable. Parameter `value` has to be of type `numeric` (`NA_real_` missing -values are supported, e.g. for MS1 spectra). The default implementation uses the -`$<-` method: - -```{r} -setReplaceMethod("precursorMz", "MsBackend", function(object, ..., value) { - object$precursorMz <- value - object -}) -``` - - ### `ionCount()` The `ionCount()` method should return a `numeric` (length equal to the number of diff --git a/vignettes/Spectra.Rmd b/vignettes/Spectra.Rmd index 35e0dfbb..74bcb1fb 100644 --- a/vignettes/Spectra.Rmd +++ b/vignettes/Spectra.Rmd @@ -360,39 +360,25 @@ Similar to spectra variables it is also possible to replace values for **existing** peaks variables using the `$<-` function. -## Filtering, aggregating and merging spectra data +## Filtering, subsetting and merging -Various functions are available to filter, subset and merge `Spectra` -objects. These can be generally subdivided into functions that subset or filter -*spectra data* and operations that filter *mass peak data*. A third category of -function allows to aggregate data within a `Spectra` or to merge and combine -multiple `Spectra` objects into one. Functions of the various categories are -described in the following subsections. Please refer to the function's -documentation for more details and information. +Apart from *classical* subsetting operations such as `[` and `split()`, a set of +filter functions are defined for `Spectra` objects (for detailed help please see +the `?Spectra` help). Filter and subset functions either reduce the number of +spectra within a `Spectra` object, or affect the number of +peaks (by either aggregating or subset) within each spectrum. Filter functions +affecting the total number of spectra are (in alphabetic order): - -### Filter spectra data - -These functions comprise subset operations that reduce the total number of -spectra in a `Spectra` object as well as filter functions that reduce the -content of the `Spectra`'s spectra data (i.e. the content of its -`spectraVariables()`). These functions thus don't change or affect the mass -peaks data of the `Spectra`'s individual spectra. - -- `[`: operation to reduce a `Spectra` object to selected elements. -- `dropNaSpectraVariables()`: drops `spectraVariables()` that contain only - missing values. The function returns a `Spectra` object with the same number - of elements, but with eventually fewer spectra variables. - `filterAcquisitionNum()`: retains spectra with certain acquisition numbers. - `filterDataOrigin()`: subsets to spectra from specific origins. - `filterDataStorage()`: subsets to spectra from certain data storage files. - `filterEmptySpectra()`: removes spectra without mass peaks. +- `filterMzRange()`: subsets spectra keeping only peaks with an m/z within the + provided m/z range. - `filterIsolationWindow()`: keeps spectra with the provided `mz` in their isolation window (m/z range). - `filterMsLevel()`: filters by MS level. - `filterPolarity()`: filters by polarity. -- `filterPrecursorCharge()`: retains (MSn) spectra with specified - precursor charge(s). - `filterPrecursorIsotopes()`: identifies precursor ions (from fragment spectra) that could represent isotopes of the same molecule. For each of these spectra groups only the spectrum of the monoisotopic precursor ion is returned. MS1 @@ -404,59 +390,50 @@ peaks data of the `Spectra`'s individual spectra. the provided m/z range. - `filterPrecursorMzValues(()`: retains (MSn) spectra with precursor m/z value matching the provided value(s) considering also a `tolerance` and `ppm`. +- `filterPrecursorCharge()`: retains (MSn) spectra with specified + precursor charge(s). - `filterPrecursorScan()`: retains (parent and children) scans of an acquisition number. -- `filterRanges()`: filters a `Spectra` object based on (multiple) user - defined *numeric* ranges for one or more available (numeric) spectra - variables. +- `filterRanges()`: allows filtering of the `Spectra` object based on user + defined *numeric* ranges (parameter `ranges`) for one or more available + spectra variables in object (spectra variable names can be specified with + parameter `spectraVariables`). Spectra for which the value of a spectra + variable is within it's defined range are retained. If multiple + ranges/spectra variables are defined, the `match` parameter can be used + to specify whether all conditions (`match = "all"`; the default) or if + any of the conditions must match (`match = "any"`; all spectra for which + values are within any of the provided ranges are retained). - `filterRt()`: filters based on retention time range. -- `filterValues()`: filters a `Spectra` object based on similarities of - *numeric* values of one or more available spectra variables. -- `selectSpectraVariables()`: reduces the (spectra) data within the object to - the selected spectra variables. - - -### Filter or aggregate mass peak data - -These function filter or aggregate the mass peak data (`peaksData()`) of each -spectrum in a `Spectra` without changing the total number of spectra. - -- `combinePeaks()`: groups peaks **within each spectrum** based on similarity of +- `filterValues()`: allows filtering of the `Spectra` object based on + similarities of *numeric* values of one or more `spectraVariables(object)` + (parameter `spectraVariables`) to provided values (parameter `values`) + given acceptable differences (parameters tolerance and ppm). If multiple + values/spectra variables are defined, the `match` parameter can be used + to specify whether all conditions (`match = "all"`; the default) or if + any of the conditions must match (`match = "any"`; all spectra for which + values are within any of the provided ranges are retained). + +Filter functions that return the same number of spectra, but affect/subset the +peaks data (m/z and intensity values) within each spectrum are: + +- `combinePeaks()`: groups peaks within each spectrum based on similarity of their m/z values and combines these into a single peak per peak group. - `deisotopeSpectra()`: deisotopes each individual spectrum keeping only the monoisotopic peak for peaks groups of potential isotopologues. -- `filterFourierTransformArtefacts()`: removes (Orbitrap) fast fourier transform - artifact peaks from spectra. - `filterIntensity()`: filter each spectrum keeping only peaks with intensities meeting certain criteria. -- `filterMzRange()`: filters mass peaks keeping (or removing) those with an - m/z within the provided m/z range. -- `filterMzValues()`: filters mass peaks within each spectrum keeping (or - removing) those with an m/z matching the provided value(s). -- `filterPeaksRanges()`: filters mass peaks using any set of range-based filters - on numeric spectra or peaks variables. +- `filterMzRange()`: subsets peaks data within each spectrum keeping only peaks + with their m/z values within the specified m/z range. - `filterPrecursorPeaks()`: removes peaks with either an m/z value matching the precursor m/z of the respective spectrum (with parameter `mz = "=="`) or peaks with an m/z value larger or equal to the precursor m/z (with parameter `mz = ">="`). +- `filterMzValues()`: subsets peaks within each spectrum keeping or removing + (all) peaks matching provided m/z value(s) (given parameters `ppm` and + `tolerance`). - `reduceSpectra()`: filters individual spectra keeping only the largest peak for groups of peaks with similar m/z values. - -### Merging, aggregating and splitting - - -- `c()`: combine several `Spectra` into a single `Spectra` object. -- `combineSpectra()`: allows to combine the MS data from sets of spectra into a - single spectrum per set. Thus, instead of filtering the data, this function - aggregates it. -- `joinSpectraData()`: merge a `DataFrame` to the existing spectra data. -- `split()`: splits the `Spectra` object based on a provided grouping factor. - - - -### Examples and use cases for filter operations - In this example, we use the `filterValues()` function to retain spectra with a base peak m/z close to 100 (+/- 30 ppm) and a retention time around 230 (+/- 5 s). @@ -894,26 +871,18 @@ See also `?plotSpectra` for more plotting options and examples. The `Spectra` package provides the `combineSpectra()` function that allows to *aggregate* multiple spectra into a single one. The main parameters of this -function are `f`, which defines the sets of spectra that should be combined, and -`FUN`, which allows to define the function that performs the actual -aggregation. The default aggregation function is `combinePeaksData()` (see -`?combinePeaksData` for details) that combines multiple spectra into a single -spectrum with all peaks from all input spectra (with additional paramter `peaks -= "union"`), or peaks that are present in a certain proportion of input spectra -(with parameter `peaks = "intersect"`; parameter `minProp` allows to define the -minimum required proportion of spectra in which a peak needs to be present. It -is important to mention that, by default, the function combines all mass peaks -from all spectra with a similar m/z value into a single, representative mass -peak aggregating all their intensities into one. To avoid the resulting -intensity to be affected by potential noise peaks it might be advised to first -*clean* the individual mass spectra using e.g. the `combinePeaks()` or -`reduceSpectra()` functions that first aggregate mass peaks **within** each -individual spectrum. - -In this example we below we use `combineSpectra()` to combine the spectra for -1-methylhistidine and caffeine into a single spectrum for each compound. We use -the spectra variable `$name`, that contains the names of the compounds, to -define which spectra should be grouped together. +function are `f`, which defines the grouping of the spectra, and `FUN` which +allows to define the function that performs the actual aggregation. The default +aggregation function is `combinePeaksData()` (see `?combinePeaksData` for +details) that combines multiple spectra into a single spectrum with all peaks +from all input spectra (with additional paramter `peaks = "union"`), or peaks +that are present in a certain proportion of input spectra (with parameter +`peaks = "intersect"`; parameter `minProp` allows to define the minimum +required proportion of spectra in which a peak needs to be present. Below we +use this function to combine the spectra for 1-methylhistidine and caffeine +into a single spectrum for each compound. We use the spectra variable `$name`, +that contains the names of the compounds, to define which spectra should be +grouped together. ```{r} sps_agg <- combineSpectra(sps, f = sps$name) @@ -1244,51 +1213,38 @@ head(basename(dataStorage(sps_tmt))) A (possibly incomplete) list of R packages providing additional backends that add support for additional data types or storage options is provided below: +- `r BiocStyle::Biocpkg("MsBackendMgf")`: support for import/export of mass + spectrometry files in mascot generic format (MGF). +- `r BiocStyle::Biocpkg("MsBackendMsp")`: allows to import/export data in NIST + MSP format. Extends the `MsBackendDataFrame` and keeps thus all data, after + import, in memory. +- `MsBackendMassbank` (package `r BiocStyle::Biocpkg("MsBackendMassbank")`): + allows to import/export data in MassBank text file format. Extends the + `MsBackendDataFrame` and keeps thus all data, after import, in memory. +- `MsBackendMassbankSql` (package `r BiocStyle::Biocpkg("MsBackendMassbank")`): + allows to directly connect to a MassBank SQL database to retrieve all MS data + and variables. Has a minimal memory footprint because all data is retrieved + on-the-fly from the SQL database. +- `r BiocStyle::Biocpkg("MsBackendSql")`: stores all MS data in a SQL database + and has thus a minimal memory footprint. - `MsBackendCompDb` (package `r BiocStyle::Biocpkg("CompoundDb")`): provides access to spectra data (spectra and peaks variables) from a *CompDb* database. Has a small memory footprint because all data (except precursor m/z values) are retrieved on-the-fly from the database. - +- `r Biocpkg("MsBackendRawFileReader")`: implements a backend for reading MS + data from Thermo Fisher Scientific's raw data files using the manufacturer's + NewRawFileReader .Net libraries. The package generalizes the functionality + introduced by the `r Biocpkg("rawrr")` package, see also + [@kockmann_rawrr_2021]. - `MsBackendHmdbXml` (package [`MsbackendHmdb`](https://github.com/rformassspectrometry/MsBackendHmdb)): allows import of MS data from xml files of the Human Metabolome Database (HMDB). Extends the `MsBackendDataFrame` and keeps thus all data, after import, in memory. - -- `MsBackendMassbank` (package `r BiocStyle::Biocpkg("MsBackendMassbank")`): - allows to import/export data in MassBank text file format. Extends the - `MsBackendDataFrame` and keeps thus all data, after import, in memory. - -- `MsBackendMassbankSql` (package `r BiocStyle::Biocpkg("MsBackendMassbank")`): - allows to directly connect to a MassBank SQL database to retrieve all MS data - and variables. Has a minimal memory footprint because all data is retrieved - on-the-fly from the SQL database. - -- `MsBackendMetaboLights` (package `r - BiocStyle::Biocpkg("MsBackendMetaboLights")`): retrieves and caches MS data - files from the MetaboLights repository. - -- `MsBackendMgf`: (package `r BiocStyle::Biocpkg("MsBackendMgf")`): support for - import/export of mass spectrometry files in mascot generic format (MGF). - -- `MsBackendMsp`: (package `r BiocStyle::Biocpkg("MsBackendMsp")`): allows to - import/export data in NIST MSP format. Extends the `MsBackendDataFrame` and - keeps thus all data, after import, in memory. - -- `MsBackendRawFileReader` (package `r Biocpkg("MsBackendRawFileReader")`): - implements a backend for reading MS data from Thermo Fisher Scientific's raw - data files using the manufacturer's NewRawFileReader .Net libraries. The - package generalizes the functionality introduced by the `r Biocpkg("rawrr")` - package, see also [@kockmann_rawrr_2021]. - -- `MsBackendSql` (package `r BiocStyle::Biocpkg("MsBackendSql")`): stores all MS - data in a SQL database and has thus a minimal memory footprint. - - `MsBackendTimsTof` (package [`MsBackendTimsTof`](https://github.com/rformassspectrometry/MsBackendTimsTof): allows import of data from Bruker TimsTOF raw data files (using the `opentimsr` R package). - - `MsBackendWeizMass` (package [`MsBackendWeizMass`](https://github.com/rformassspectrometry/MsBackendWeizMass): allows to access MS data from WeizMass MS/MS spectral databases. @@ -1320,60 +1276,6 @@ a `lengths(sps)` call, the number of peaks per spectra could also be determined 5000L)`. In that way only peak data of 5000 spectra at a time will be loaded into memory. - -# Serializing (saving), moving and loading serialized `Spectra` objects - -Serializing and re-loading variables/objects during an analysis using e.g. the -`save()` and `load()` functions are common in many workflows, especially if some -of the tasks are computationally intensive and take long time. Sometimes such -serialized objects might even be moved from one computer (or file system) to -another. These operations are unproblematic for `Spectra` objects with -*in-memory* backends such as the `MsBackendMemory` or `MsBackendDataFrame`, that -keep all data in memory, would however break for *on-disk* backends such as the -`MsBackendMzR` if the file path to the original data files is not identical. It -is thus suggested (if the size of the MS data respectively the available system -memory allows it) to change the backend for such `Spectra` objects to a -`MsBackendMemory` before serializing the object with `save()`. For `Spectra` -objects with a `MsBackendMzR` an alternative option would be to eventually -update/adapt the path to the directory containing the raw (e.g. mzML) data -files: assuming these data files are available on both computers, the path to -the directory containing these can be updated with the `dataStorageBasePath<-` -function allowing thus to move/copy serialized `Spectra` objects between -computers or file systems. - -An example workflow could be: - -files *a.mzML*, *b.mzML* are stored in a directory */data/mzML/* on one -computer. These get loaded as a `Spectra` object with `MsBackendMzR` and then -serialized to a file *A.RData*. - -```{r, eval = FALSE} -A <- Spectra(c("/data/mzML/a.mzML", "/data/mzML/b.mzML")) -save(A, file = "A.RData") -``` - -Assuming this file gets now copied to another computer (where the data is not -available in a folder */data/mzML/*) and loaded with `load()`. - -```{r, eval = FALSE} -load("A.RData") -``` - -This `Spectra` object would not be valid because its `MsBackendMzR` can no -longer access the MS data in the original data files. Assuming the user also -copied the data files *a.mzML* and *b.mzML*, but to a folder -*/some_other_folder/*, the base storage path of the object would need to be -adapted to match the directory where the data files are available on the second -computer: - -```{r, eval = FALSE} -dataStorageBasePath(A) <- "/some_other_folder" -``` - -By pointing now the storage path to the new storage location of the data files, -the `Spectra` object `A` would also be usable on the second computer. - - # Session information ```{r si}