diff --git a/.Rbuildignore b/.Rbuildignore index 1747a1d..f7a93f8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,9 +1,10 @@ -^RHermes\.Rproj$ -^\.Rproj\.user$ -^README\.Rmd$ -^codecov\.yml$ -^_pkgdown\.yml$ -^docs$ -^pkgdown$ -^doc$ -^Meta$ +^RHermes\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^codecov\.yml$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^doc$ +^Meta$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml new file mode 100644 index 0000000..980ef04 --- /dev/null +++ b/.github/workflows/check-bioc.yml @@ -0,0 +1,322 @@ +## Read more about GitHub actions the features of this GitHub Actions workflow +## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action +## +## For more details, check the biocthis developer notes vignette at +## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html +## +## You can add this workflow to other packages using: +## > biocthis::use_bioc_github_action() +## +## Using GitHub Actions exposes you to many details about how R packages are +## compiled and installed in several operating system.s +### If you need help, please follow the steps listed at +## https://github.com/r-lib/actions#where-to-find-help +## +## If you found an issue specific to biocthis's GHA workflow, please report it +## with the information that will make it easier for others to help you. +## Thank you! + +## Acronyms: +## * GHA: GitHub Action +## * OS: operating system + +on: + push: + pull_request: + +name: R-CMD-check-bioc + +## These environment variables control whether to run GHA code later on that is +## specific to testthat, covr, and pkgdown. +## +## If you need to clear the cache of packages, update the number inside +## cache-version as discussed at https://github.com/r-lib/actions/issues/86. +## Note that you can always run a GHA test without the cache by using the word +## "/nocache" in the commit message. +env: + has_testthat: 'false' + run_covr: 'true' + run_pkgdown: 'false' + has_RUnit: 'false' + cache-version: 'cache-v1' + run_docker: 'false' + +jobs: + build-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + ## Environment variables unique to this job. + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: '4.2', bioc: '3.15', cont: "bioconductor/bioconductor_docker:RELEASE_3_15", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + # - { os: macOS-latest, r: '4.2', bioc: '3.15'} + - { os: windows-latest, r: '4.2', bioc: '3.15'} + ## Check https://github.com/r-lib/actions/tree/master/examples + ## for examples using the http-user-agent + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + NOT_CRAN: true + TZ: UTC + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + + ## Set the R library to the directory matching the + ## R packages cache step further below when running on Docker (Linux). + - name: Set R Library home on Linux + if: runner.os == 'Linux' + run: | + mkdir /__w/_temp/Library + echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile + + ## Most of these steps are the same as the ones in + ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml + ## If they update their steps, we will also need to update ours. + - name: Checkout Repository + uses: actions/checkout@v3 + + ## R is already included in the Bioconductor docker images + - name: Setup R from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + + ## pandoc is already included in the Bioconductor docker images + - name: Setup pandoc from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-pandoc@v2 + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Restore R package cache + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" + uses: actions/cache@v3 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_15-r-4.2-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_15-r-4.2- + + - name: Cache R packages on Linux + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " + uses: actions/cache@v3 + with: + path: /home/runner/work/_temp/Library + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_15-r-4.2-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_15-r-4.2- + + - name: Install Linux system dependencies + if: runner.os == 'Linux' + run: | + sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') + echo $sysreqs + sudo -s eval "$sysreqs" + + - name: Install macOS system dependencies + if: matrix.config.os == 'macOS-latest' + run: | + ## Enable installing XML from source if needed + brew install libxml2 + echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + ## For textshaping, required by ragg, and required by pkgdown + brew install harfbuzz fribidi + + ## For installing usethis's dependency gert + brew install libgit2 + + ## Required for tcltk + brew install xquartz --cask + + - name: Install Windows system dependencies + if: runner.os == 'Windows' + run: | + ## Edit below if you have any Windows system dependencies + shell: Rscript {0} + + - name: Install BiocManager + run: | + message(paste('****', Sys.time(), 'installing BiocManager ****')) + remotes::install_cran("BiocManager") + shell: Rscript {0} + + - name: Set BiocVersion + run: | + BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE, force = TRUE) + shell: Rscript {0} + + - name: Install dependencies pass 1 + run: | + ## Try installing the package dependencies in steps. First the local + ## dependencies, then any remaining dependencies to avoid the + ## issues described at + ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html + ## https://github.com/r-lib/remotes/issues/296 + ## Ideally, all dependencies should get installed in the first pass. + + ## Set the repos source depending on the OS + ## Alternatively use https://storage.googleapis.com/bioconductor_docker/packages/ + ## though based on https://bit.ly/bioc2021-package-binaries + ## the Azure link will be the main one going forward. + gha_repos <- if( + .Platform$OS.type == "unix" && Sys.info()["sysname"] != "Darwin" + ) c( + "AnVIL" = "https://bioconductordocker.blob.core.windows.net/packages/3.15/bioc", + BiocManager::repositories() + ) else BiocManager::repositories() + + ## For running the checks + message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) + install.packages(c("rcmdcheck", "BiocCheck", "webshot", "markdown"), repos = gha_repos) # Added webshot for compiling vignettes with plotly HTML elements + webshot::install_phantomjs() + + ## Pass #1 at installing dependencies + ## This pass uses AnVIL-powered fast binaries + ## details at https://github.com/nturaga/bioc2021-bioconductor-binaries + ## The speed gains only apply to the docker builds. + message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = gha_repos, build_vignettes = FALSE, upgrade = TRUE) + continue-on-error: true + shell: Rscript {0} + + - name: Install dependencies pass 2 + run: | + ## Pass #2 at installing dependencies + ## This pass does not use AnVIL and will thus update any packages + ## that have seen been updated in Bioconductor + message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE, force = TRUE) + shell: Rscript {0} + + - name: Install BiocGenerics + if: env.has_RUnit == 'true' + run: | + ## Install BiocGenerics + BiocManager::install("BiocGenerics") + shell: Rscript {0} + + - name: Install covr + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Install pkgdown + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("pkgdown") + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - name: Run CMD check + env: + _R_CHECK_CRAN_INCOMING_: false + DISPLAY: 99.0 + run: | + options(crayon.enabled = TRUE) + rcmdcheck::rcmdcheck( + args = c("--no-manual", "--no-vignettes", "--timings"), + build_args = c("--no-manual", "--keep-empty-dirs", "--no-resave-data"), + error_on = "warning", + check_dir = "check" + ) + shell: Rscript {0} + + ## Might need an to add this to the if: && runner.os == 'Linux' + - name: Reveal testthat details + if: env.has_testthat == 'true' + run: find . -name testthat.Rout -exec cat '{}' ';' + + - name: Run RUnit tests + if: env.has_RUnit == 'true' + run: | + BiocGenerics:::testPackage() + shell: Rscript {0} + + - name: Run BiocCheck + env: + DISPLAY: 99.0 + run: | + BiocCheck::BiocCheck( + dir('check', 'tar.gz$', full.names = TRUE), + `quit-with-status` = TRUE, + `no-check-R-ver` = TRUE, + `no-check-bioc-help` = TRUE + ) + shell: Rscript {0} + + - name: Test coverage + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + covr::codecov() + shell: Rscript {0} + + - name: Install package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: R CMD INSTALL . + + - name: Build pkgdown site + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) + ## at least one locally before this will work. This creates the gh-pages + ## branch (erasing anything you haven't version controlled!) and + ## makes the git history recognizable by pkgdown. + + - name: Install deploy dependencies + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + apt-get update && apt-get -y install rsync + + - name: Deploy pkgdown site to GitHub pages 🚀 + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + uses: JamesIves/github-pages-deploy-action@releases/v4 + with: + clean: false + branch: gh-pages + folder: docs + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-biocversion-RELEASE_3_15-r-4.2-results + path: check + + ## Note that DOCKER_PASSWORD is really a token for your dockerhub + ## account, not your actual dockerhub account password. + ## This comes from + ## https://seandavi.github.io/BuildABiocWorkshop/articles/HOWTO_BUILD_WORKSHOP.html#6-add-secrets-to-github-repo + ## Check https://github.com/docker/build-push-action/tree/releases/v1 + ## for more details. + - uses: docker/build-push-action@v1 + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && runner.os == 'Linux' " + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: rogerginber/rhermes + tag_with_ref: true + tag_with_sha: true + tags: latest diff --git a/.gitignore b/.gitignore index 1196e39..4b6b7d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ -myDB.csv -.Rproj.user -RHermes.Rproj -..Rcheck/00check.log -.Rhistory -doc -Meta +.Rproj.user +RHermes.Rproj +..Rcheck/00check.log .Rhistory +doc +Meta +check diff --git a/DESCRIPTION b/DESCRIPTION index 3681c04..e968ba3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,33 +1,35 @@ Package: RHermes -Title: Targeting the metabolome, the broad-scoped way +Title: Improving metabolome characterization using molecular formula databases Version: 0.99.0 -Authors@R: c(person("Roger", "Gine", email = "alrgibe9@gmail.com", role = c("aut", "cre")), - person("Jordi", "Capellades", role = "ctb"), - person("Jordi", "Rofes", role = "ctb")) +Authors@R: + c(person("Roger", "Gine", email = "roger.gine@estudiants.urv.cat", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-0288-9619")), + person("Jordi", "Capellades", role = "ctb", + comment = c(ORCID = "0000-0002-6502-8202")), + person("Jordi", "Rofes", role = "ctb", + comment = c(ORCID = "0000-0002-3914-0233"))) Description: RHermes is a broad-scoped targeted metabolomics package to identify compounds from biological and environmental samples using a context-specific database of formulas and adducts. RHermes offers a comprehensive and sample-specific MS1 and MS2 compound identification strategy. Additionally, - the package offers an easy-to-use GUI to help with the processing and to - visualize the results. + the package offers an easy-to-use GUI to process and visualize the data. License: GPL-3 Encoding: UTF-8 -LazyData: true -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 VignetteBuilder: knitr biocViews: MassSpectrometry, Metabolomics, GUI BugReports: https://github.com/RogerGinBer/RHermes/issues Depends: R (>= 3.5.0) Imports: - BiocParallel, dplyr, enviPat, + MSnbase, mzR, xcms, Spectra, + MetaboCoreUtils, reticulate, - keras, - tensorflow, ggplot2, plotly, methods, @@ -51,5 +53,4 @@ Imports: Suggests: readxl, knitr, - testthat, - covr + testthat diff --git a/NAMESPACE b/NAMESPACE index 209a05b..ddbb9f0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,13 +43,11 @@ export(readTime) export(remAd) export(removeISF) export(removeSOI) -export(setCluster) export(setDB) export(setExpParam) export(setTime) export(ssNetwork) exportMethods(show) -import(BiocParallel) import(KEGGREST) import(data.table, except = between) import(ggplot2) @@ -60,18 +58,17 @@ import(networkD3) import(philentropy) import(plotly, except = c(groups, last_plot)) import(reticulate) -import(tensorflow) import(visNetwork) -importFrom(BiocParallel,bplapply) +importFrom(MSnbase,readMSData) +importFrom(MetaboCoreUtils,addElements) +importFrom(MetaboCoreUtils,adducts) +importFrom(MetaboCoreUtils,subtractElements) +importFrom(data.table,data.table) importFrom(dplyr,between) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(grDevices,colorRamp) importFrom(grDevices,rgb) -importFrom(keras,array_reshape) -importFrom(keras,k_argmax) -importFrom(keras,load_model_hdf5) -importFrom(keras,predict_classes) importFrom(stats,IQR) importFrom(stats,approx) importFrom(stats,median) @@ -81,3 +78,5 @@ importFrom(utils,data) importFrom(utils,read.csv) importFrom(utils,read.csv2) importFrom(utils,write.csv) +importFrom(xcms,chromPeaks) +importFrom(xcms,findChromPeaks) diff --git a/R/Classes.R b/R/Classes.R index 3a7e994..a061621 100644 --- a/R/Classes.R +++ b/R/Classes.R @@ -1,308 +1,290 @@ -#Avoids R CMD CHECK notes on data.table package -if(getRversion() >= "2.15.1") utils::globalVariables(c(".")) - -#' @title ExpParam -#' @family Params -#' @description ExpParam holds all experimental data as well as database-related -#' info generated during the processing steps. Use setExpParams to save it into -#' your RHermesExp object -#' @slot ppm The mass spectrometer (MS) mass error in -#' parts per million (ppm) -#' @slot res The MS resolution at m/z = 200 -#' @slot nthr The noise threshold to consider. Any signal -#' weaker than it will NOT be considered in the PL. Useful to filter -#' 'grass-like' peaks in a Q-TOF instrument. Defaults to 1000. -#' @slot minmz Minimum mz registered in the MS1 experiment. -#' Defaults to 80 -#' @slot maxmz Maximum mz registered in the MS1 experiment. -#' Defaults to 1000. -#' @slot ion Polarity used in the experiment. RHermes currently -#' only supports one polarity at a time per RHermesExp, so if you're -#' performing both + and - polarity you will have to use 2 different -#' RHermesExp objects, one for each. Important: must be described ONLY as -#' '+' or '-'. -#' @slot instr MS instrument type used to acquire the data. It can be -#' either QTOF' or 'Orbitrap'. -#' @slot DB Formula database -#' @slot adlist Adduct list -#' @slot ionF Ionic formula list. Formed by all formula-adduct -#' combinations -#' @slot isoList Isotopic pattern exploration results. -#' @export ExpParam -#' @return A ExpParam object -#' @examples -#' -#' #Default object -#' exp <- ExpParam() -#' #Setting some values - check all possible slots above -#' exp2 <- ExpParam(ppm = 3, res = 150000, instr = "Orbitrap") -#' -ExpParam <- setClass("ExpParam", slots = list(ppm = "numeric", - res = "numeric", nthr = "numeric", minmz = "numeric", maxmz = "numeric", - ion = "character", instr = "character", DB = "list", adlist = "data.frame", - ionF = "list", isoList = "list"), prototype = list(ppm = 5, - res = 30000, nthr = 1000, minmz = 80, maxmz = 1200, ion = "+", - instr = "QTOF", DB = list(), adlist = data.frame(), ionF = list(), - isoList = list())) - -#'@title SOIParam -#'@family Params -#'@description The SOIParam class contains all details regarding SOI detection, -#' such as the number of RT bins and their width, minimum data point intensity, -#' usage of blank substraction, etc. -#'@details It's a part of the RHermesSOI object, and is an input for findSOI(). -#' This object is usually generated by getSOIParam(), which features -#' already-made templates. -#'@slot specs A data frame containing the RT bin information. Each row -#' represents a binning step that will be applied to the annotated data points -#' to detect the SOIs. -#'@slot maxlen Maximum SOI length in seconds. If the SOI is longer tha maxlen, a -#' broad peak picking will divide the long SOI into smaller sized SOIs. -#'@slot minint Minimum data point intensity. If a point is smaller than minint, -#' it will not be considered in the SOI detection. -#'@slot blanksub Logical. Whether to perform blanksub. It should NOT be entered -#' by the user in this point, since findSOI will fill in this information. -#'@slot blankname Character. The corresponding blank file name. As before, it -#' should not be entered by the user, findSOI will fill it automatically. -#'@author Roger Gine -#'@seealso RHermesSOI findSOI getSOIParam -#'@return A SOIParam object -#'@examples SOIParam() -#'@export SOIParam -SOIParam <- setClass("SOIParam", slots = list(specs = "data.frame", - maxlen = "numeric", minint = "numeric", blanksub = "logical", - blankname = "character"), prototype = list(specs = data.frame(), - maxlen = 30, minint = 1000, blanksub = FALSE, blankname = "None")) - - - -#'@title RHermesPL -#'@family Containers -#'@description The RHermesPL class contains all the annotation details from a -#' single MS1 mzML file, as well as some raw information that is useful for -#' other steps of the workflow. RHermesPL objects are generated by processMS1 -#' and should not be declared manually. -#'@slot peaklist Data.table containing all annotated data points. It stores, rt -#' values, mz, intensities, ionic formulas and isotope annotations. -#'@slot header Dataframe with headers from the mzML file. Extracted with -#' mzR::header() and filtered empty scans (if any). -#'@slot raw Dataframe of raw data points from the mzML file. Extracted with -#' mzR::peaks() and filtered by the noise parameter from ExpParam. -#'@slot labelled Logical. Whether all possible 13C isotopologues were searched -#' for in the data. Usually FALSE, only TRUE if you're dealing with 13C -#' labelled data. Should not be set by the user here. Instead, tell processMS1 -#' using labelled = TRUE. -#'@slot filename Filename of the original mzML file. -#'@author Roger Gine -#'@seealso processMS1 ExpParam -#'@export RHermesPL -#'@return An RHermesPL object -#'@examples -#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", -#' package = "RHermes"))} -#'PL(struct, 1) #Access the object using the getter function. -RHermesPL <- setClass("RHermesPL", slots = list(peaklist = "data.table", - header = "data.frame", raw = "data.table", labelled = "logical", - filename = "character")) - - - -#'@title RHermesSOI -#'@family Containers -#'@description The RHermesSOI class contains the SOI list, some precalculated -#' information for plotting (in functions such as plotSOI) -#'@slot SOIList The SOI list itself. -#'@slot PlotDF A dataframe contaning all SOI data points with annotations, ready -#' to be plotted when needed. -#'@slot SOIParam A SOIParam object containing all information about SOI generation -#'@slot filename The filename of the mzML used to generate the SOI list. -#'@author Roger Gine -#'@seealso findSOI plotSOI plotIso -#'@export RHermesSOI -#'@return An RHermesSOI object -#'@examples -#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", -#' package = "RHermes"))} -#'RHermesSOI() #Do NOT generate a SOI object directly, use findSOI -#'SOI(struct, 1) #Access the object using the getter function. -RHermesSOI <- setClass("RHermesSOI", slots = list(SOIList = "data.table", - PlotDF = "data.table", SOIParam = "SOIParam", filename = "character")) - - -#'@title ILParam -#'@family Params -#'@description The ILParam class contains all details regarding IL generation, -#' such as m/z window for grouping close SOIs, whether to prioritize or -#' restrict the inclusion list to a subset of adducts, etc. -#'@details It's a part of the RHermesIL object and is an input for generateIL(). -#'@slot filtermz Deltamz used to group SOIs with similar RTs. Defaults to -#' +-0.5Da. It should be matched with the m/z isolation window of your MS2 -#' method settings. -#'@slot filterrt Delta RT used to group SOIs with similar RTs. Defaults to 10s. -#'@slot rtmargin Similar to filterrt. Defaults to 5s. -#'@slot priorization Whether to prioritze adducts or not. Options are "only" to -#' select only a set of adduct annotations, "full" to cover all SOIs -#' independently of the adduct annotations or "yes" to specify some adducts to -#' be prioritized in order. -#'@slot ad The specified adducts. See priorization. -#'@author Roger Gine -#'@seealso RHermesIL generateIL -#'@export ILParam -#'@return An ILParam object -#'@examples ILParam() -ILParam <- setClass("ILParam", slots = list(filtermz = "numeric", - filterrt = "numeric", rtmargin = "numeric", priorization = "character", - ad = "character"), prototype = list(filtermz = 0.5, filterrt = 10, - rtmargin = 5, priorization = "only", ad = c("M+H"))) - - -#'@title RHermesIL -#'@family Containers -#'@description The RHermesIL contains an inclusion list (IL) to perform the MS2 -#' experiment with it. It also contains information about whether adduct -#' annotations have been prioritized over others, restricted, etc. -#'@details Should not be created directly by the user. Instead, use generateIL. -#'@slot IL The inclusion list itself -#'@slot annotation A list of the SOIs corresponding to each IL entry -#'@slot SOInum Which SOI list was used to generate the IL -#'@slot ILParam An ILParam object containing all IL generation parameters -#'@author Roger Gine -#'@seealso generateIL exportIL plotIL filterIL ILParam -#'@export RHermesIL -#'@return An RHermesIL object -#'@examples -#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", -#' package = "RHermes"))} -#'RHermesIL() #Do NOT generate an IL object directly -#'IL(struct, 1) #Access the object using the getter function. -#' -RHermesIL <- setClass("RHermesIL", slots = list(IL = "data.table", - annotation = "list", SOInum = "numeric", ILParam = "ILParam")) - - -#'@title RHermesMS2Exp -#'@family Containers -#'@description The RHermesMS2Exp contains global information about the MS2 steps -#' of the workflow. Particularly, it contains the inclusion list to be used, -#' the MS2Data that has been acquired according to the IL and a list of -#' purified MS2 spectra and identifications against a reference MS2 database -#' (if performed) -#'@slot IL An RHermesIL object -#'@slot MS2Data A list that contains header and peaks of the MS2 data acquired -#' for each IL entry. It gets its information after running processMS2. -#'@slot Ident A list of different relevant information about the deconvoluted -#' MS2 spectra generated by processMS2. -#'@author Roger Gine -#'@seealso processMS2 generateIL IL Ident -#'@export RHermesMS2Exp -#'@return An RHermesMS2Exp object -#' @examples RHermesMS2Exp() #Do not generate the object manually -RHermesMS2Exp <- setClass("RHermesMS2Exp", slots = list(IL = "RHermesIL", - MS2Data = "list", Ident = "list")) - - -#' @title setCluster -#' @description Returns the most appropriate BiocParallel backend for RHermes -#' according to your OS and RAM. It is entirely optional, but recommended -#' nonetheless. -#' @author Roger Gine -#' @import BiocParallel -#' @examples -#' BiocParallel::register(setCluster()) #Registers the most appropriate backend -#' @return A BiocParallel backend -#' @export -setCluster <- function(){ - if (Sys.info()[1] == "Windows") { - ram <- system2("wmic", args = "OS get FreePhysicalMemory /Value", - stdout = TRUE) - ram <- ram[grepl("FreePhysicalMemory", ram)] - ram <- gsub("FreePhysicalMemory=", "", ram, fixed = TRUE) - ram <- gsub("\r", "", ram, fixed = TRUE) - ram <- as.integer(ram) - - #Suppose max 2GB per worker - nwork <- min(floor(ram/2e6), BiocParallel::snowWorkers()) - if (nwork <= 1) { - warning(paste("Maybe you have too little available RAM.", - "Proceeding with SerialParam()")) - return(backend <- BiocParallel::SerialParam(progressbar = TRUE)) - } - return(backend <- BiocParallel::SnowParam(nwork, progressbar = TRUE)) - } else { - backend <- BiocParallel::MulticoreParam( - BiocParallel::multicoreWorkers() - 2, - progressbar = TRUE) - } - return(backend) -} - -#'@title RHermesMeta -#'@description The metadata storage class. It holds the experimental parameters, -#' all the names of the processed files and the timestamps for all operations -#' performed on the parental RHermesExp object -#'@slot ExpParam Contains all experimental information (ppm error, resolution, -#' polarity, etc.), as well as the formula and adduct databases used. -#'@slot filenames All filenames of the processed files. This info is also -#' available for individual PL and SOI objects in their respective slot. -#'@slot timestamps Timestamps generated when any operation was performed on the -#' parental object. You can easily check them with readTime(). -#'@author Roger Gine -#'@seealso ExpParam readTime setTime setCluster Cluster -RHermesMeta <- setClass("RHermesMeta", - slots = list( - ExpParam = "ExpParam", - filenames = "character", - timestamps = "character" - ), prototype = list( - ExpParam = ExpParam(), - filenames = character(0), - timestamps = c(paste("System info:", - paste(Sys.info(), collapse = "/")), - paste("RHermes version:", - packageVersion("RHermes"))) - ) -) - -#'@title RHermesData -#'@author Roger Gine -#'@description The experimental data storage class. Holds all -#' information of the peaklists, SOI lists, inclusion lists, MS2 -#' data and identifications. -#'@slot PL A list object that holds all RHermesPL objects -#' containing a PL each. -#'@slot SOI List that holds RHermesSOI objects with a SOI list -#' inside each. -#'@slot MS2Exp List to hold inclusion lists, MSMS experimental -#' data and compound identifications. -#'@seealso RHermesPL RHermesSOI -RHermesData <- setClass("RHermesData", - slots = list(PL = "list", SOI = "list", - MS2Exp = "list") -) - -#' @title RHermesExp -#' @author Roger Gine -#' @description The main RHermes class: The RHermesExp. It is a container for -#' all generated information. All main RHermes functions use it and return an -#' updated version of the object. -#' @slot metadata Where all the complementary info is stored (experimental -#' parameters, timestamps, databases, etc.). See [RHermes]{RHermesMeta} for -#' more info. -#' @slot data All experimental info is stored in data. It is divided into PL -#' (peaklist) SOI (scans of interest) and MS2Exp (for the IL, MS2 data and -#' identifications). See [RHermes]{RHermesData} for more info. -#' @seealso setDB setExpParam processMS1 -#' @return An RHermesExp object -#' @export RHermesExp -#' @examples -#' if(FALSE){ -#' myHermes <- RHermesExp() #Initializing empty object -#' } -RHermesExp <- setClass("RHermesExp", - slots = list(metadata = "RHermesMeta", - data = "RHermesData") -) - - - - +#Avoids R CMD CHECK notes on data.table package +if(getRversion() >= "2.15.1") utils::globalVariables(c(".")) + +#' @title ExpParam +#' @family Params +#' @description ExpParam holds all experimental data as well as database-related +#' info generated during the processing steps. Use setExpParams to save it into +#' your RHermesExp object +#' @slot ppm The mass spectrometer (MS) mass error in +#' parts per million (ppm) +#' @slot res The MS resolution at m/z = 200 +#' @slot nthr The noise threshold to consider. Any signal +#' weaker than it will NOT be considered in the PL. Useful to filter +#' 'grass-like' peaks in a Q-TOF instrument. Defaults to 1000. +#' @slot minmz Minimum mz registered in the MS1 experiment. +#' Defaults to 80 +#' @slot maxmz Maximum mz registered in the MS1 experiment. +#' Defaults to 1000. +#' @slot ion Polarity used in the experiment. RHermes currently +#' only supports one polarity at a time per RHermesExp, so if you're +#' performing both + and - polarity you will have to use 2 different +#' RHermesExp objects, one for each. Important: must be described ONLY as +#' '+' or '-'. +#' @slot instr MS instrument type used to acquire the data. It can be +#' either QTOF' or 'Orbitrap'. +#' @slot DB Formula database +#' @slot adlist Adduct list +#' @slot ionF Ionic formula list. Formed by all formula-adduct +#' combinations +#' @slot isoList Isotopic pattern exploration results. +#' @export ExpParam +#' @return A ExpParam object +#' @examples +#' +#' #Default object +#' exp <- ExpParam() +#' #Setting some values - check all possible slots above +#' exp2 <- ExpParam(ppm = 3, res = 150000, instr = "Orbitrap") +#' +ExpParam <- setClass("ExpParam", slots = list(ppm = "numeric", + res = "numeric", nthr = "numeric", minmz = "numeric", maxmz = "numeric", + ion = "character", instr = "character", DB = "list", adlist = "data.frame", + ionF = "list", isoList = "list"), prototype = list(ppm = 5, + res = 30000, nthr = 1000, minmz = 80, maxmz = 1200, ion = "+", + instr = "QTOF", DB = list(), adlist = data.frame(), ionF = list(), + isoList = list())) + +#'@title SOIParam +#'@family Params +#'@description The SOIParam class contains all details regarding SOI detection, +#' such as the number of RT bins and their width, minimum data point intensity, +#' usage of blank substraction, etc. +#'@details It's a part of the RHermesSOI object, and is an input for findSOI(). +#' This object is usually generated by getSOIParam(), which features +#' already-made templates. +#'@slot specs A data frame containing the RT bin information. Each row +#' represents a binning step that will be applied to the annotated data points +#' to detect the SOIs. +#'@slot maxlen Maximum SOI length in seconds. If the SOI is longer tha maxlen, a +#' broad peak picking will divide the long SOI into smaller sized SOIs. +#'@slot minint Minimum data point intensity. If a point is smaller than minint, +#' it will not be considered in the SOI detection. +#'@slot blanksub Logical. Whether to perform blanksub. It should NOT be entered +#' by the user in this point, since findSOI will fill in this information. +#'@slot blankname Character. The corresponding blank file name. As before, it +#' should not be entered by the user, findSOI will fill it automatically. +#'@author Roger Gine +#'@seealso RHermesSOI findSOI getSOIParam +#'@return A SOIParam object +#'@examples SOIParam() +#'@export SOIParam +SOIParam <- setClass("SOIParam", + slots = list(specs = "data.frame", + maxlen = "numeric", + minint = "numeric", + blanksub = "logical", + blankname = "character", + mode = "character", + cwp = "CentWaveParam" + ), + prototype = list(specs = data.frame(), + maxlen = 30, + minint = 1000, + blanksub = FALSE, + blankname = "None", + mode = "regular", + cwp = xcms::CentWaveParam(ppm = 6, + peakwidth = c(8,60), prefilter = c(0,100), + snthresh = 0, noise = 0, fitgauss = FALSE, + firstBaselineCheck = FALSE) + )) + + + +#'@title RHermesPL +#'@family Containers +#'@description The RHermesPL class contains all the annotation details from a +#' single MS1 mzML file, as well as some raw information that is useful for +#' other steps of the workflow. RHermesPL objects are generated by processMS1 +#' and should not be declared manually. +#'@slot peaklist Data.table containing all annotated data points. It stores, rt +#' values, mz, intensities, ionic formulas and isotope annotations. +#'@slot header Dataframe with headers from the mzML file. Extracted with +#' mzR::header() and filtered empty scans (if any). +#'@slot raw Dataframe of raw data points from the mzML file. Extracted with +#' mzR::peaks() and filtered by the noise parameter from ExpParam. +#'@slot labelled Logical. Whether all possible 13C isotopologues were searched +#' for in the data. Usually FALSE, only TRUE if you're dealing with 13C +#' labelled data. Should not be set by the user here. Instead, tell processMS1 +#' using labelled = TRUE. +#'@slot filename Filename of the original mzML file. +#'@author Roger Gine +#'@seealso processMS1 ExpParam +#'@export RHermesPL +#'@return An RHermesPL object +#'@examples +#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", +#' package = "RHermes"))} +#'PL(struct, 1) #Access the object using the getter function. +RHermesPL <- setClass("RHermesPL", slots = list(peaklist = "data.table", + header = "data.frame", raw = "data.table", labelled = "logical", + filename = "character")) + + + +#'@title RHermesSOI +#'@family Containers +#'@description The RHermesSOI class contains the SOI list, some precalculated +#' information for plotting (in functions such as plotSOI) +#'@slot SOIList The SOI list itself. +#'@slot PlotDF A dataframe contaning all SOI data points with annotations, ready +#' to be plotted when needed. +#'@slot SOIParam A SOIParam object containing all information about SOI generation +#'@slot filename The filename of the mzML used to generate the SOI list. +#'@author Roger Gine +#'@seealso findSOI plotSOI plotIso +#'@export RHermesSOI +#'@return An RHermesSOI object +#'@examples +#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", +#' package = "RHermes"))} +#'RHermesSOI() #Do NOT generate a SOI object directly, use findSOI +#'SOI(struct, 1) #Access the object using the getter function. +RHermesSOI <- setClass("RHermesSOI", slots = list(SOIList = "data.table", + PlotDF = "data.table", SOIParam = "SOIParam", filename = "character")) + + +#'@title ILParam +#'@family Params +#'@description The ILParam class contains all details regarding IL generation, +#' such as m/z window for grouping close SOIs, whether to prioritize or +#' restrict the inclusion list to a subset of adducts, etc. +#'@details It's a part of the RHermesIL object and is an input for generateIL(). +#'@slot filtermz Deltamz used to group SOIs with similar RTs. Defaults to +#' +-0.5Da. It should be matched with the m/z isolation window of your MS2 +#' method settings. +#'@slot filterrt Delta RT used to group SOIs with similar RTs. Defaults to 10s. +#'@slot rtmargin Similar to filterrt. Defaults to 5s. +#'@slot priorization Whether to prioritze adducts or not. Options are "only" to +#' select only a set of adduct annotations, "full" to cover all SOIs +#' independently of the adduct annotations or "yes" to specify some adducts to +#' be prioritized in order. +#'@slot ad The specified adducts. See priorization. +#'@author Roger Gine +#'@seealso RHermesIL generateIL +#'@export ILParam +#'@return An ILParam object +#'@examples ILParam() +ILParam <- setClass("ILParam", slots = list(filtermz = "numeric", + filterrt = "numeric", rtmargin = "numeric", priorization = "character", + ad = "character"), prototype = list(filtermz = 0.5, filterrt = 10, + rtmargin = 5, priorization = "only", ad = c("M+H"))) + + +#'@title RHermesIL +#'@family Containers +#'@description The RHermesIL contains an inclusion list (IL) to perform the MS2 +#' experiment with it. It also contains information about whether adduct +#' annotations have been prioritized over others, restricted, etc. +#'@details Should not be created directly by the user. Instead, use generateIL. +#'@slot IL The inclusion list itself +#'@slot annotation A list of the SOIs corresponding to each IL entry +#'@slot SOInum Which SOI list was used to generate the IL +#'@slot ILParam An ILParam object containing all IL generation parameters +#'@author Roger Gine +#'@seealso generateIL exportIL plotIL filterIL ILParam +#'@export RHermesIL +#'@return An RHermesIL object +#'@examples +#'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", +#' package = "RHermes"))} +#'RHermesIL() #Do NOT generate an IL object directly +#'IL(struct, 1) #Access the object using the getter function. +#' +RHermesIL <- setClass("RHermesIL", slots = list(IL = "data.table", + annotation = "list", SOInum = "numeric", ILParam = "ILParam")) + + +#'@title RHermesMS2Exp +#'@family Containers +#'@description The RHermesMS2Exp contains global information about the MS2 steps +#' of the workflow. Particularly, it contains the inclusion list to be used, +#' the MS2Data that has been acquired according to the IL and a list of +#' purified MS2 spectra and identifications against a reference MS2 database +#' (if performed) +#'@slot IL An RHermesIL object +#'@slot MS2Data A list that contains header and peaks of the MS2 data acquired +#' for each IL entry. It gets its information after running processMS2. +#'@slot Ident A list of different relevant information about the deconvoluted +#' MS2 spectra generated by processMS2. +#'@author Roger Gine +#'@seealso processMS2 generateIL IL Ident +#'@export RHermesMS2Exp +#'@return An RHermesMS2Exp object +#' @examples RHermesMS2Exp() #Do not generate the object manually +RHermesMS2Exp <- setClass("RHermesMS2Exp", slots = list(IL = "RHermesIL", + MS2Data = "list", Ident = "list")) + + + +#'@title RHermesMeta +#'@description The metadata storage class. It holds the experimental parameters, +#' all the names of the processed files and the timestamps for all operations +#' performed on the parental RHermesExp object +#'@slot ExpParam Contains all experimental information (ppm error, resolution, +#' polarity, etc.), as well as the formula and adduct databases used. +#'@slot filenames All filenames of the processed files. This info is also +#' available for individual PL and SOI objects in their respective slot. +#'@slot timestamps Timestamps generated when any operation was performed on the +#' parental object. You can easily check them with readTime(). +#'@author Roger Gine +#'@seealso ExpParam readTime setTime setCluster Cluster +RHermesMeta <- setClass("RHermesMeta", + slots = list( + ExpParam = "ExpParam", + filenames = "character", + timestamps = "character" + ), prototype = list( + ExpParam = ExpParam(), + filenames = character(0), + timestamps = c(paste("System info:", + paste(Sys.info(), collapse = "/")), + paste("RHermes version:", + packageVersion("RHermes"))) + ) +) + +#'@title RHermesData +#'@author Roger Gine +#'@description The experimental data storage class. Holds all +#' information of the peaklists, SOI lists, inclusion lists, MS2 +#' data and identifications. +#'@slot PL A list object that holds all RHermesPL objects +#' containing a PL each. +#'@slot SOI List that holds RHermesSOI objects with a SOI list +#' inside each. +#'@slot MS2Exp List to hold inclusion lists, MSMS experimental +#' data and compound identifications. +#'@seealso RHermesPL RHermesSOI +RHermesData <- setClass("RHermesData", + slots = list(PL = "list", SOI = "list", + MS2Exp = "list") +) + +#' @title RHermesExp +#' @author Roger Gine +#' @description The main RHermes class: The RHermesExp. It is a container for +#' all generated information. All main RHermes functions use it and return an +#' updated version of the object. +#' @slot metadata Where all the complementary info is stored (experimental +#' parameters, timestamps, databases, etc.). See [RHermes]{RHermesMeta} for +#' more info. +#' @slot data All experimental info is stored in data. It is divided into PL +#' (peaklist) SOI (scans of interest) and MS2Exp (for the IL, MS2 data and +#' identifications). See [RHermes]{RHermesData} for more info. +#' @seealso setDB setExpParam processMS1 +#' @return An RHermesExp object +#' @export RHermesExp +#' @examples +#' if(FALSE){ +#' myHermes <- RHermesExp() #Initializing empty object +#' } +RHermesExp <- setClass("RHermesExp", + slots = list(metadata = "RHermesMeta", + data = "RHermesData") +) + + + + diff --git a/R/IL_functions.R b/R/IL_functions.R index cba595a..0e29530 100644 --- a/R/IL_functions.R +++ b/R/IL_functions.R @@ -65,9 +65,9 @@ inclusionList <- function(struct, params, id) { which(oSoi$start==y$start & oSoi$end==y$end & oSoi$mass==y$mass & - oSoi$formula==y$formula) + oSoi$formula==y$formula)[1] })) - } else { + }else{ GL <- oSoi[, c("start", "end", "formula", "mass", "MaxInt", "anot")] GL$originalSOI <- seq_len(nrow(GL)) } @@ -107,21 +107,21 @@ inclusionList <- function(struct, params, id) { }, character(1)) GL$entrynames <- unlist(GL$entrynames) - PL_id <- which(struct@metadata@filenames == SoiList@filename)[1] + PL_id <- which(struct@metadata@filenames %in% SoiList@filename)[1] raw <- PL(struct, PL_id)@raw if(nrow(raw) != 0){ - # GL$XIC <- lapply(seq_len(nrow(GL)), function(x){ - # cur <- GL[x,] - # # mzs <- c(min(cur$mass) * (1 - ppm * 1e-6), - # # max(cur$mass) * (1 + ppm * 1e-6)) - # # mzs <- c(min(cur$mass) - filtermz, - # # max(cur$mass) + filtermz) - # mzs <- c(min(cur$jointentries[[1]]$mass) * (1 - ppm * 1e-6), - # max(cur$jointentries[[1]]$mass) * (1 + ppm * 1e-6)) - # calculate_XIC_estimation(raw, - # mzs, - # c(cur$start, cur$end)) - # }) + GL$XIC <- lapply(seq_len(nrow(GL)), function(x){ + cur <- GL[x,] + # mzs <- c(min(cur$mass) * (1 - ppm * 1e-6), + # max(cur$mass) * (1 + ppm * 1e-6)) + # mzs <- c(min(cur$mass) - filtermz, + # max(cur$mass) + filtermz) + mzs <- c(min(cur$jointentries[[1]]$mass) * (1 - ppm * 1e-6), + max(cur$jointentries[[1]]$mass) * (1 + ppm * 1e-6)) + calculate_XIC_estimation(raw, + mzs, + c(cur$start, cur$end)) + }) } GL <- RHermesIL(IL = as.data.table(GL[, -5]), annotation = GL$jointentries, SOInum = id, ILParam = params) @@ -175,8 +175,8 @@ GLgroup <- function(GL, rtmargin, ppm) { idx <- which(between(GL$start, st - rtmargin, end + rtmargin) & between(GL$end, st - rtmargin, end + rtmargin) & between(GL$mass, m - m * ppm/1e+06, m + m * ppm/1e+06)) - if(rtmargin <= 0){idx <- 1} + out <- c(out, list(GL[idx, ])) GL <- GL[-idx, ] if (nrow(GL) == 0) { diff --git a/R/MS1_functions.R b/R/MS1_functions.R index 2d0672a..c82edef 100644 --- a/R/MS1_functions.R +++ b/R/MS1_functions.R @@ -70,10 +70,15 @@ function(struct, files, labelled = FALSE) { preprocessing <- function(struct){ - F_DB <- struct@metadata@ExpParam@DB[,c("MolecularFormula", "EnviPatMass")] - #Could break if colname isn't exactly "MolecularFormula" - F_DB <- dplyr::distinct_at(F_DB, "MolecularFormula", - .keep_all = T) + ## Fix to keep backwards compatibility with already processed files + if("EnviPatMass" %in% colnames(DB(struct))){ + colnames(struct@metadata@ExpParam@DB)[colnames(DB(struct)) == "EnviPatMass"] <- "ExactMass" + } + if (!all(c("MolecularFormula", "ExactMass") %in% colnames(DB(struct)))){ + stop("DB does not have the columns MolecularFormula and ExactMass, please rerun setDB") + } + F_DB <- struct@metadata@ExpParam@DB[,c("MolecularFormula", "ExactMass")] + F_DB <- dplyr::distinct_at(F_DB, "MolecularFormula", .keep_all = T) colnames(F_DB) <- c("fms", "m") IF_DB <- IonicForm(F_DB, struct@metadata@ExpParam@adlist) @@ -86,26 +91,27 @@ preprocessing <- function(struct){ return(list(IF_DB, IC)) } +#'@importFrom data.table data.table import_and_filter <- function(lf, minpks = 20, noise = 1000) { - #Opening the connection to a single mzML file + ## Open the connection to a single mzML file fileml <- mzR::openMSfile(lf) plist <- mzR::peaks(fileml) h <- mzR::header(fileml) - #Filtering header + ## Filter empty headers h <- h[, -which(vapply(h, function(x) all(x == 0), FUN.VALUE = logical(1)))] if (any(h$peaksCount < minpks)) { - #Removing scans with very very few peaks detected + #Remove scans with very very few peaks detected plist <- plist[-which(h$peaksCount < minpks)] h <- h[-which(h$peaksCount < minpks), ] } - #Removing all MS>1 scans + #Remove all MS>1 scans if(any(h$msLevel != 1)){ plist <- plist[-which(h$msLevel != 1)] h <- h[-which(h$msLevel != 1), ] } raw <- lapply(seq_along(plist), function(x) { - #Extracting raw data into a DT + #Extract raw data into a DT rpeaks <- plist[[x]] rt <- h$retentionTime[x] return(data.table(mz = rpeaks[, 1], rtiv = rpeaks[, 2], rt = rt)) @@ -125,13 +131,14 @@ IonicForm <- function(F_DB, Ad_DB) { return(list(db, connections)) } +#' @importFrom MetaboCoreUtils addElements subtractElements calculate_ionic_forms <- function(i, F_DB, Ad_DB){ f <- as.character(F_DB$fms[i]) j <- apply(Ad_DB, 1, function(x) { current_f <- f if (x[3] != 1) current_f <- multform(current_f, as.numeric(x[3])) - if (x[6] != "FALSE") current_f <- sumform(current_f, x[6]) - if (x[7] != "FALSE") current_f <- subform(current_f, x[7]) + if (x[6] != "FALSE") current_f <- addElements(current_f, x[6]) + if (x[7] != "FALSE") current_f <- subtractElements(current_f, x[7]) if (is.na(current_f)) return(NA) ch <- ifelse(x[5] == "positive", "+", "-") @@ -164,90 +171,6 @@ calculate_ionic_forms <- function(i, F_DB, Ad_DB){ return(list(db, con)) } -sumform <- function(f1, f2) { - f1 <- tryCatch({ - CHNOSZ::count.elements(f1) - }, error = function(cond){return(NA)}) - if (any(is.na(f1))) return(NA) - n1 <- names(f1) - f2 <- CHNOSZ::count.elements(f2) - n2 <- names(f2) - interAB <- n1[which(n1 %in% n2)] - if (length(interAB) != 0) { - s1 <- vapply(interAB, function(x) { - f1[[x]] + f2[[x]] - }, numeric(1)) - names(s1) <- interAB - } else { - s1 <- integer() - } - uniqueA <- n1[which(!(n1 %in% n2))] - if (length(uniqueA) != 0) {s1 <- c(s1, f1[uniqueA])} - uniqueB <- n2[which(!(n2 %in% n1))] - if (length(uniqueB) != 0) {s1 <- c(s1, f2[uniqueB])} - alln <- unique(c(n1, n2)) - alln <- alln[!(alln %in% c("C", "H"))] - ord <- c() - if ("C" %in% unique(c(n1, n2))) {ord <- c("C")} - if ("H" %in% unique(c(n1, n2))) {ord <- c(ord, "H")} - ord <- c(ord, sort(alln)) - s1 <- s1[ord] - - output <- paste0(mapply(function(x, y) { - if (y == 1) { - return(x) - } - return(paste0(x, y)) - }, names(s1), s1), collapse = "") - - return(output) -} - -subform <- function(f1, f2) { - f1 <- tryCatch({ - CHNOSZ::count.elements(f1) - }, error = function(cond){return(NA)}) - if (any(is.na(f1))) {return(NA)} - n1 <- names(f1) - f2 <- CHNOSZ::count.elements(f2) - n2 <- names(f2) - interAB <- n1[which(n1 %in% n2)] - if (length(interAB) < length(n2)) {return(NA)} - if (length(interAB) != 0) { - s1 <- vapply(interAB, function(x) { - res <- ifelse(f1[[x]] - f2[[x]] > 0, f1[[x]] - f2[[x]], NA) - }, numeric(1)) - if (any(is.na(s1))) {return(NA)} - if (any(s1 == 0)) { - n1 <- n1[which(n1 %in% n2)[s1 != 0]] - n2 <- n2[which(n2 %in% n1)[s1 != 0]] - s1 <- s1[s1 != 0] - } - names(s1) <- interAB - } else { - s1 <- integer() - } - uniqueA <- n1[which(!(n1 %in% n2))] - if (length(uniqueA) != 0) {s1 <- c(s1, f1[uniqueA])} - uniqueB <- n2[which(!(n2 %in% n1))] - if (length(uniqueB) != 0) {s1 <- c(s1, f2[uniqueB])} - alln <- unique(c(n1, n2)) - alln <- alln[!(alln %in% c("C", "H"))] - ord <- c() - if ("C" %in% unique(c(n1, n2))) {ord <- c("C")} - if ("H" %in% unique(c(n1, n2))) {ord <- c(ord, "H")} - ord <- c(ord, sort(alln)) - s1 <- s1[ord] - - output <- paste0(mapply(function(x, y) { - if (y == 1) { - return(x) - } - return(paste0(x, y)) - }, names(s1), s1), collapse = "") - - return(output) -} multform <- function(f, k) { f <- CHNOSZ::count.elements(f) @@ -333,7 +256,7 @@ isocalc_parallel <- function(x, kTHR, resol_factor, isotopecode, isOrbi){ } else { limitfactor <- 2 * kTHR * x[1, 1]/resol_factor } - breaks <- which(diff(x[,1]) > 5*median(diff(x[,1]))) #Find iso clusters + breaks <- which(diff(x[,1]) > 5 * median(diff(x[,1]))) #Find iso clusters breaks <- data.frame(st = c(1, breaks + 1), end = c(breaks, nrow(x))) clust <- lapply(seq_len(nrow(breaks)), function(i){ x[seq(breaks$st[i], breaks$end[i]), , drop = FALSE] diff --git a/R/MS2_functions.R b/R/MS2_functions.R index 4e35e9c..ad66707 100644 --- a/R/MS2_functions.R +++ b/R/MS2_functions.R @@ -207,7 +207,6 @@ CliqueMSMS <- function(MS2Exp, idx, contaminant = 173.5, delta = 0.1, return(sub(pattern = " ", replacement = "", x = res)) }) %>% unlist() %>% unique() }) - #MS2 processing function selection processingFun <- switch(sstype, "regular" = generate_ss_continuous_ms2, diff --git a/R/MS2_methods.R b/R/MS2_methods.R index 5fbb477..c47efa6 100644 --- a/R/MS2_methods.R +++ b/R/MS2_methods.R @@ -96,6 +96,8 @@ exportMGF <- function(struct, id, file, whichSpec = NA) { #' @author Jordi Capellades #' @inheritParams exportMSP #' @param fname Name of the output file, without the .mzML termination +#' @param collisionEnergy Numeric. Desired collision energy (in %NCE, eV, or any +#' other unit) #' @description Exports the superspectra of a given MS2Exp ID into an #' mzML format file. Warning! The retention time information is not #' included in the mzML file. @@ -389,7 +391,6 @@ function(struct, id, file) { }) ms2$anot <- unlist(ms2$anot) ms2$hits <- unlist(ms2$hits) - write.csv(ms2, file = file) }) diff --git a/R/Metadata_functions.R b/R/Metadata_functions.R index bb2cb92..0dcc6eb 100644 --- a/R/Metadata_functions.R +++ b/R/Metadata_functions.R @@ -1,37 +1,43 @@ adductTables <- function(ch_max = 1, mult_max = 1) { - data(adducts, package = "enviPat", envir = environment()) - adducts$Mass[49] <- adducts$Mass[49] * (-1) #Fixed wrong one - - negative.envi <- adducts[which(adducts$Ion_mode == "negative"), ] - positive.envi <- adducts[which(adducts$Ion_mode == "positive"), ] - - ad_positive <- which( - positive.envi$Charge %in% as.character(seq(ch_max)) & - positive.envi$Mult %in% seq(mult_max) - ) - ad_negative <- which( - negative.envi$Charge %in% as.character(seq(from = -1, to = -ch_max)) & - negative.envi$Mult %in% seq(mult_max) - ) - - positive.envi <- positive.envi[ad_positive, ] - negative.envi <- negative.envi[ad_negative, ] - - negative.ad <- negative.envi[, -c(2, 9)] - colnames(negative.ad)[c(1, 4)] <- c("adduct", "massdiff") - negative.ad$massdiff <- negative.ad$massdiff * abs(negative.ad$Charge) - - positive.ad <- positive.envi[, -c(2, 9)] - colnames(positive.ad)[c(1, 4)] <- c("adduct", "massdiff") - positive.ad$massdiff <- positive.ad$massdiff * abs(positive.ad$Charge) + ads <- adducts_MetaboCore_to_Hermes() + ads <- filter(ads, abs(Charge) <= ch_max) + ads <- filter(ads, abs(Mult) <= mult_max) + return(list(ads[ads$Ion_mode == "negative", ], + ads[ads$Ion_mode == "positive", ])) +} - return(list(negative.ad, positive.ad)) +#' @importFrom MetaboCoreUtils adducts +adducts_MetaboCore_to_Hermes <- function(){ + ads <- rbind(MetaboCoreUtils::adducts("positive"), + MetaboCoreUtils::adducts("negative")) + + #Reorder columns + ads <- ads[,c("name", "charge", "mass_multi", "mass_add", "positive", + "formula_add", "formula_sub")] + + #Adapt names and format + names(ads) <- c("adduct", "Charge", "Mult", "massdiff", "Ion_mode", + "Formula_add", "Formula_ded") + ads$Ion_mode <- ifelse(ads$Ion_mode, "positive", "negative") + ads$Mult <- ads$Mult * abs(ads$Charge) + ads$massdiff <- ads$massdiff * abs(ads$Charge) + + #Adapt atoms to add and subtract + incomplete <- grepl("[A-Za-z]$", ads$Formula_add) + ads$Formula_add[incomplete] <- paste0(ads$Formula_add[incomplete], "1") + + incomplete <- grepl("[A-Za-z]$", ads$Formula_ded) + ads$Formula_ded[incomplete] <- paste0(ads$Formula_ded[incomplete], "1") + ads$Formula_ded[ads$Formula_ded == ""] <- "C0" + + ads } + #' @importFrom utils data read.csv read.csv2 write.csv -database_importer <- function(template = "hmdb", - filename = "./app/www/norman.xls", - minmass = 70, maxmass = 750, keggpath = "") { +database_importer <- function(template = "custom", + filename, + minmass = 50, maxmass = 1200, keggpath = "") { if (template == "hmdb") { db <- read.csv(system.file("extdata", "hmdb.csv", package = "RHermes")) db <- db[!grepl("\\.", db$MolecularFormula), ] @@ -43,21 +49,21 @@ database_importer <- function(template = "hmdb", } else if (template == "custom") { if (grepl(pattern = "csv", x = filename)) { db <- read.csv(filename) - if(ncol(db) == 1){ + if (ncol(db) == 1){ db <- read.csv2(filename) } } else { db <- readxl::read_excel(filename) } if (!all(c("MolecularFormula", "Name") %in% names(db))) { - stop("The colnames aren't adequate. The file must contain the + stop("The colnames are not correct. The file must contain the 'MolecularFormula' and 'Name' columns at least") } } else if (template == "kegg_p") { - suppressWarnings( + suppressWarnings({ splitpath <- split(seq_along(keggpath), seq_len(ceiling(length(keggpath)/10))) - ) + }) comp <- lapply(splitpath, function(x) { pathdata <- KEGGREST::keggGet(keggpath[x]) lapply(pathdata, function(y) { @@ -65,8 +71,8 @@ database_importer <- function(template = "hmdb", }) }) comp <- unique(unlist(comp)) - suppressWarnings(splitcomp <- split(seq_along(comp), - seq_len(ceiling(length(comp)/10)))) + suppressWarnings({splitcomp <- split(seq_along(comp), + seq_len(ceiling(length(comp)/10)))}) db <- lapply(splitcomp, function(x) { data <- KEGGREST::keggGet(comp[x]) data <- lapply(data, function(y) { @@ -82,6 +88,9 @@ database_importer <- function(template = "hmdb", 'norman' and 'custom'") } + #Clean molecule names + db$Name <- gsub('[^[:graph:][:space:]]', '', db$Name) + #Clean molecular formulas that contain unknown elements db <- db[grepl("^C.?.?", db$MolecularFormula), ] db <- db[!grepl("[", db$MolecularFormula, fixed = TRUE), ] @@ -91,28 +100,22 @@ database_importer <- function(template = "hmdb", db <- db[!grepl(".", db$MolecularFormula, fixed = TRUE), ] db <- db[!grepl(")", db$MolecularFormula, fixed = TRUE), ] db$MolecularFormula <- as.character(db$MolecularFormula) - - #Calculate M0 mass from formula - isotopes <- NULL #To appease R CMD Check "no visible binding" - data(isotopes, package = "enviPat", envir = environment()) - db$EnviPatMass <- lapply(db$MolecularFormula, function(x) { - res <- tryCatch(enviPat::isopattern(isotopes, x, threshold = 99, - verbose = FALSE), - error = function(cond){NA}) - # Isotopes should be loaded first - if (is.matrix(res[[1]])) { - res <- res[[1]][1, 1] - res <- as.numeric(unname(res)) - return(res) - } else { - return(NA) - } - }) - if (any(is.na(db$EnviPatMass))) { - db <- db[!is.na(db$EnviPatMass), ] + + # Clean deuterium in the formulae and substitute by [2H] + db$MolecularFormula <- gsub(pattern = "D([0-9]*)", + replacement = "\\[2H\\1\\]", + x = db$MolecularFormula) + + #Calculate exact mass from formula + db$ExactMass <- MetaboCoreUtils::calculateMass(db$MolecularFormula) + if (any(is.na(db$ExactMass))) { + invalid <- which(is.na(db$ExactMass)) + warning("Careful, some of the molecular formulas were not valid, first 10 are:", + db$MolecularFormula[invalid[seq(1, min(10, length(invalid)))]]) + db <- db[!invalid, ] } - db$EnviPatMass <- as.numeric(db$EnviPatMass) - db <- filter(db, dplyr::between(as.numeric(db$EnviPatMass), - minmass, maxmass)) + db$ExactMass <- as.numeric(db$ExactMass) + db <- dplyr::filter(db, dplyr::between(as.numeric(db$ExactMass), + minmass, maxmass)) return(db) } diff --git a/R/Plot_functions.R b/R/Plot_functions.R index 074b076..2c405cc 100644 --- a/R/Plot_functions.R +++ b/R/Plot_functions.R @@ -188,7 +188,7 @@ function(struct, id, formula, ads = NA, rtrange= c(0, 1e4), dynamicaxis = TRUE, "Is everything OK?") } datafile <- struct@data@PL[[plid]]@peaklist - datafile <- datafile[.data$isov == "M0", ] + datafile <- datafile[datafile$isov == "M0", ] Class <- NULL; isov <- NULL #To appease R CMD Check "no visible binding" datafile[, Class := "Sample"] @@ -302,7 +302,7 @@ function(struct, id, formula, ads = NA, rtrange= c(0, 1e4), dynamicaxis = TRUE, #'@examples #'\dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", #' package = "RHermes"))} -#'p <- plotFidelity(struct, 1, 9) +#'p <- plotFidelity(struct, 1, 8) #'@export setGeneric("plotFidelity", function(struct, id, entry, plot = TRUE) { standardGeneric("plotFidelity") @@ -314,7 +314,7 @@ function(struct, id, entry, plot = TRUE) { #Extract SOI and PL information from the selected SOI list SOI <- struct@data@SOI[[id]] fname <- SOI@filename - correspondingPL <- which(struct@metadata@filenames == fname) + correspondingPL <- which(struct@metadata@filenames == fname)[1] PL <- struct@data@PL[[correspondingPL]]@peaklist #Filter the PL to the selected SOI region diff --git a/R/SOI_functions.R b/R/SOI_functions.R index bd34d76..29c55bc 100644 --- a/R/SOI_functions.R +++ b/R/SOI_functions.R @@ -38,10 +38,8 @@ #' #'@export #' -#'@importFrom keras load_model_hdf5 predict_classes #'@importFrom dplyr distinct filter #'@import magrittr -#'@import tensorflow #'@import reticulate setGeneric("findSOI", function(struct, params, fileID, blankID = numeric(0)) { standardGeneric("findSOI") @@ -86,9 +84,6 @@ function(struct, params, fileID, blankID = numeric(0)) { params@blanksub <- FALSE) } if (blankID[i] != 0) { - reticulate::py_available(initialize = TRUE) - reticulate::py_module_available("keras") - reticulate::py_module_available("tensorflow") cur@blanksub <- TRUE cur@blankname <- struct@data@PL[[blankID[i]]]@filename blankPL <- struct@data@PL[[blankID[i]]]@peaklist @@ -126,11 +121,13 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { h <- PL@header formulaDB <- ExpParam@ionF[[1]] FA_to_ion <- ExpParam@ionF[[2]] + ppm <- ExpParam@ppm params <- SOIParam@specs maxlen <- SOIParam@maxlen noise <- SOIParam@minint useblank <- SOIParam@blanksub + mode <- tryCatch(SOIParam@mode, error = function(cond){"regular"}) ## Setting up PeakList DataPL <- as.data.table(DataPL) @@ -141,53 +138,64 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { setkeyv(formulaDB, c("f")) setkeyv(FA_to_ion, c("ion")) - ## Density filtering - message("Starting density filtering: ") - GR <- apply(params, 1, function(x) { - densityProc(x, DataPL, h) - }) - ## Grouping different filtered results - message("Now Grouping:") - Groups <- GR[[1]] - setkeyv(Groups, c("formula", "start", "end")) - if (length(GR) > 1) { - for (i in 2:length(GR)) { - Groups <- groupGrouper(GR, i, Groups) + if(mode == "regular"){ + ## Density filtering + message("Starting density filtering: ") + GR <- apply(params, 1, function(x) { + densityProc(x, DataPL, h) + }) + ## Grouping different filtered results + message("Now Grouping:") + Groups <- GR[[1]] + setkeyv(Groups, c("formula", "start", "end")) + if (length(GR) > 1) { + for (i in 2:length(GR)) { + Groups <- groupGrouper(GR, i, Groups) + } } + Groups <- distinct(Groups) + } else { + message("Starting Centwave-based SOI detection") + cwp <- SOIParam@cwp + Groups <- calculateSOICentwave(filename, formulaDB, cwp, ppm = ppm) } - Groups <- distinct(Groups) - ## Initial Peak Retrieval - message("Initial peak retrieval:") - peakscol <- lapply(seq_len(nrow(Groups)), retrievePeaks, + message("Retrieving datapoints from SOIs:") + Groups$peaks <- lapply(seq_len(nrow(Groups)), retrievePeaks, Groups, DataPL) - Groups$peaks <- peakscol - ## Group Characterization - message("") - message("Starting group characterization:") + + + ## SOI Characterization + message("Starting SOI characterization:") + message("Mass calculation:") setkeyv(Groups, "formula") Groups$mass <- formulaDB[.(Groups$formula),2] - if (any(Groups$length > maxlen)) { - Groups <- groupShort(Groups, maxlen) - } - + message("Number of scans:") - nscans <- apply(Groups, 1, function(x) { + Groups$nscans <- apply(Groups, 1, function(x) { d <- x["peaks"][[1]] return(dim(d)[1]) }) - Groups$nscans <- nscans Groups <- Groups[Groups$nscans > 5, ] - + + if (any(Groups$length > maxlen) & mode == "regular") { + Groups <- groupShort(Groups[,1:6], maxlen) + Groups$nscans <- apply(Groups, 1, function(x) { + d <- x["peaks"][[1]] + return(dim(d)[1]) + }) + Groups <- Groups[Groups$nscans > 5, ] + } + ## Blank substraction if (useblank) { Groups <- blankSubstraction(Groups, blankPL) if (nrow(Groups) == 0) {return(RHermesSOI())} } + ## Rest of characterization - message("") message("Width calculation:") width <- apply(Groups, 1, function(x) { d <- x["peaks"][[1]] @@ -219,7 +227,8 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { message("Calculating chaos:") Groups$chaos <- lapply(Groups$peaks, function(x) { - rho_chaos(x, nlevels = 10, fillGaps = TRUE) + tryCatch({rho_chaos(x, nlevels = 10, fillGaps = TRUE)}, + error = function(cond){return(0)}) }) %>% as.numeric() setkeyv(Groups, c("formula")) @@ -233,8 +242,8 @@ PLprocesser <- function(PL, ExpParam, SOIParam, blankPL = NA, filename) { output <- RHermesSOI(SOIList = Groups, PlotDF = as.data.table(plist), SOIParam = SOIParam, filename = filename) - return(output) - } + return(output) +} densityProc <- function(x, DataPL, h){ rtbin <- x[1] @@ -339,13 +348,38 @@ groupGrouper <- function(GR, i, Groups){ return(res) } -retrievePeaks <- function(i, Groups, PL){ +#' @importFrom MSnbase readMSData +#' @importFrom xcms findChromPeaks chromPeaks +#' @importFrom data.table as.data.table +calculateSOICentwave <- function(filename, DB, CentWaveParam, ppm){ + msdata <- readMSData(filename, mode = "onDisk") + pks <- findChromPeaks(msdata, CentWaveParam) + pks <- as.data.frame(chromPeaks(pks)) + pks$anot <- lapply(pks$mz, function(mz){ + range <- mz * c(1 - ppm * 1e-6, + 1 + ppm * 1e-6) + DB$f[between(DB$m, range[1], range[2])] + }) + pks <- pks[sapply(pks$anot, length) > 0, ] + soi <- lapply(seq_len(nrow(pks)), function(i){ + peak <- pks[i, ] + data.frame(start = peak$rtmin, + end = peak$rtmax, + length = peak$rtmax - peak$rtmin, + formula = peak$anot[[1]]) + }) + soi <- do.call(rbind, soi) + soi <- as.data.table(soi) + return(soi) +} + +retrievePeaks <- function(i, Groups, PL, delta = 0){ x <- Groups[i, ] rtmin <- x[1, 1] rtmax <- x[1, 2] f <- x[1, 4] - pks <- PL[.(f)] %>% filter(., .data$rt > rtmin[[1]] & - .data$rt < rtmax[[1]]) + pks <- PL[.(f)] + pks <- filter(pks, between(pks$rt, rtmin[[1]] - delta, rtmax[[1]] + delta)) return(pks[, c(1, 2, 5)]) } @@ -353,14 +387,15 @@ groupShort <- function(Groups, maxlen, BPPARAM = bpparam()){ message("Shortening and selecting long groups:") SG <- filter(Groups, length <= maxlen) LG <- filter(Groups, length > maxlen) - LG <- lapply(seq_len(nrow(LG)), parallelGroupShort, LG, - maxlen) + LG <- lapply(seq_len(nrow(LG)), parallelGroupShort, LG, maxlen) LG <- do.call(rbind, LG) return(rbind(SG, LG)) } + +#Experimental Centwave approach to SOI partitioning + parallelGroupShort <- function(i, LG, maxlen){ - #Experimental Centwave approach to SOI partitioning ms1data <- LG$peaks[[i]] curGR <- LG[i,] tryCatch({ @@ -371,8 +406,11 @@ parallelGroupShort <- function(i, LG, maxlen){ snthresh = 0, noise = 0, fitgauss = FALSE, firstBaselineCheck = FALSE) ) - }, error = function(cond){}) + }, error = function(cond){ + pks <- data.frame() #To avoid problems, act as if no peaks found + }) + #If XCMS detects any peak if (nrow(pks) != 0) { pks <- as.data.frame(pks) pks <- pks[order(pks[,2]), ] @@ -397,7 +435,8 @@ parallelGroupShort <- function(i, LG, maxlen){ starts <- c(starts, max(ends)) ends <- c(ends, max(ms1data[, 1])) known_peak <- c(known_peak, FALSE) - } + } + #Split remaining long traces into smaller ones if (any(!known_peak)) { too_long <- ends - starts > maxlen if (any(too_long & !known_peak)) { @@ -417,7 +456,7 @@ parallelGroupShort <- function(i, LG, maxlen){ length = ends - starts, formula = curGR$formula, peaks = curGR$peaks, mass = curGR$mass) } else { - #Divide long group into equal-sized smaller groups + #Divide long traces into equal-sized smaller traces times <- seq(from = curGR[1, 1][[1]], to = curGR[1, 2][[1]], length.out = ceiling(curGR[1, 3][[1]] / maxlen) + 1) deltat <- times[2] - times[1] @@ -425,8 +464,7 @@ parallelGroupShort <- function(i, LG, maxlen){ length = deltat, formula = curGR$formula, peaks = curGR$peaks, mass = curGR$mass) } - - #Reestructuration of the groups: data point 'splitting' + #Data point redistribution within each new SOI NewGR[, "peaks"] <- apply(NewGR, 1, function(x) { pks <- x[5][[1]] return(pks[between(pks$rt, x[1][[1]], x[2][[1]]), ]) @@ -434,73 +472,33 @@ parallelGroupShort <- function(i, LG, maxlen){ return(NewGR) } -#' @importFrom keras k_argmax array_reshape blankSubstraction <- function(Groups, blankPL){ message("Blank substraction:") + blankPL <- blankPL[blankPL$isov == "M0", ] setkeyv(blankPL, c("formv", "rt")) message("First cleaning") toKeep <- lapply(seq_len(nrow(Groups)), firstCleaning, Groups, blankPL) %>% unlist() sure <- Groups[which(toKeep), ] if (any(toKeep)) {Groups <- Groups[-which(toKeep),]} - reticulate::py_available(initialize = TRUE) #Start Python connection - if (reticulate::py_module_available("keras") & - reticulate::py_module_available("tensorflow")) { - model <- load_model_hdf5(system.file("extdata", "ImprovedModel.h5", - package = "RHermes")) #ANN - setkeyv(blankPL, "formv") - - message("Preparing input for ANN") - RES <- lapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL) - Groups$MLdata <- RES - NAgroups <- do.call(rbind, lapply(RES, function(x){is.na(x[1])})) - - if (any(NAgroups)) { - Groups <- Groups[-which(NAgroups), ] - RES <- RES[-which(NAgroups)] - } - organizeddata <- do.call(rbind, lapply(RES, function(x) { - return(c(x[1, ], x[2, ])) - })) - if (nrow(organizeddata) != 0) { - organizeddata <- keras::array_reshape(organizeddata, - c(nrow(organizeddata), 400), - order = "C") #ANN input - q <- model %>% predict(organizeddata) %>% k_argmax() - - Groups <- Groups[-which(q$numpy == 0), ] #ANN output - Groups <- Groups[, -c("MLdata")] - - } - } else { - setkeyv(blankPL, "formv") - message("Preparing input for cosine similarity") - RES <- lapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL) - cosines <- sapply(RES, function(x){ - if(is.na(x)[1]){return(Inf)} - philentropy::cosine_dist(x[1,] + 1e-12, x[2,] + 1e-12, - testNA = FALSE) - }) - Groups <- Groups[cosines < 0.8, ] - warning("A Keras installation was not found and ANN blank ", - "subtraction was not performed. A less-accurate ", - "cosine similarity score was applied instead.") - } + setkeyv(blankPL, "formv") + message("Preparing input for cosine similarity") + RES <- lapply(seq_len(nrow(Groups)), prepareNetInput, Groups, blankPL) + cosines <- sapply(RES, function(x){ + if(is.na(x)[1]){return(Inf)} + philentropy::cosine_dist(x[1,] + 1e-12, x[2,] + 1e-12, + testNA = FALSE) + }) + Groups <- Groups[cosines < 0.8, ] Groups <- rbind(sure, Groups) return(Groups) } #'@importFrom stats IQR quantile firstCleaning <- function(i, Groups, blankPL){ - cur <- Groups[i, ] - st <- cur$start - end <- cur$end - f <- cur$formula + peaks <- Groups$peaks[[i]] deltat <- 10 - peaks <- cur$peaks[[1]] - blankpks <- blankPL[.(f)] %>% filter(., .data$rt >= st - deltat & - .data$rt <= end + deltat & - .data$isov == "M0") + blankpks <- retrievePeaks(i, Groups, blankPL, deltat) blankpks <- distinct(blankpks[, c(1, 2)]) if (nrow(blankpks) < 5) {return(TRUE)} #No blank signals @@ -541,9 +539,7 @@ prepareNetInput <- function(i, Groups, blankPL){ length.out = Npoints), rule = 1, ties = min)) smooth_pks[is.na(smooth_pks[, "y"]), "y"] <- 0 - blankpks <- blankPL[.(f)] %>% filter(., .data$rt >= st - deltat & - .data$rt <= end + deltat & - .data$isov == "M0") + blankpks <- retrievePeaks(i, Groups, blankPL, deltat) blankpks <- distinct(blankpks[, c(1, 2)]) if (length(unique(blankpks$rt)) < 2) { blankpks <- data.frame(rt = c(st, end), rtiv = c(0, 0)) @@ -583,7 +579,7 @@ preparePlottingDF <- function(i, Groups){ } parallelFilter <- function(anot, ScanResults, bins, timebin){ - data <- as.vector(ScanResults[.(anot), "rt"]) + data <- ScanResults[.(anot), "rt"][[1]] mint <- min(data) maxt <- max(data) res <- rep(0, length(bins)) @@ -649,7 +645,7 @@ rho_chaos <- function(data, nlevels = 20, fillGaps = TRUE){ nr <- nrow(data) #Filling 1-gaps if(fillGaps){ - for(i in seq_len(nr-2)){ + for(i in seq_len(nr - 2)){ if(data$above[i] & !data$above[i+1] & data$above[i+2]){ data$above[i+1] <- TRUE } @@ -712,7 +708,7 @@ setMethod("filterSOI", signature = c("RHermesExp", "numeric", "ANY", "ANY", "ANY "Aborting filter") return(struct) } - + ##Filter by isotopic fidelity if (isofidelity) { # Isotopic elution similarity @@ -727,7 +723,7 @@ setMethod("filterSOI", signature = c("RHermesExp", "numeric", "ANY", "ANY", "ANY " similarity. Aborting filter") return(struct) } - + # Isotopic pattern similarity message("Calculating isotopic fidelity metrics:") isodata <- lapply(with_isos, plotFidelity, struct = struct, @@ -742,7 +738,7 @@ setMethod("filterSOI", signature = c("RHermesExp", "numeric", "ANY", "ANY", "ANY "fidelity. Aborting filter") return(struct) } - + rtmargin <- 20 # Removing confirmed isotopic signals soilist <- soilist[order(-soilist$MaxInt), ] @@ -975,7 +971,6 @@ generateDiffDB <- function(DB, formulas, polarity = 1){ } #' @rawNamespace import(data.table, except = between) -#' @importFrom BiocParallel bplapply anotateISF <- function(SL, DB, polarity = 1){ DB$df_spectra <- as.data.table(DB$df_spectra) DB$df_metabolite <- as.data.table(DB$df_metabolite) @@ -987,9 +982,8 @@ anotateISF <- function(SL, DB, polarity = 1){ setkeyv(DB$df_metabolite, c("REFformula")) setkeyv(DB$list_fragments, c("ID_spectra")) - SL$ISF <- bplapply(seq_len(nrow(SL)), anotateParallelISF, - SL = SL, DB = DB, polarity = polarity, - BPPARAM = bpparam()) + SL$ISF <- lapply(seq_len(nrow(SL)), anotateParallelISF, + SL = SL, DB = DB, polarity = polarity) return(SL) } @@ -1111,7 +1105,6 @@ cleanupISF <- function(SL){ #' removed. If using justAnnotate = TRUE, the ISF aren't removed. #' @seealso plotISF #' @export -#' @importFrom BiocParallel bplapply #' @examples #' if(FALSE){ #' #You need a MS2 Database with the proper format diff --git a/R/SOI_methods.R b/R/SOI_methods.R index ba04707..a0a8052 100644 --- a/R/SOI_methods.R +++ b/R/SOI_methods.R @@ -57,6 +57,10 @@ duplicateSOI <- function(struct, id){ #' These are all stored in /app/www/SOIFilterParams.csv, feel free to #' locally change them or add new ones for your use (if you know what #' you're doing). +#' @param mode Whether SOI detection should use the regular density-based +#' algorithm or xcms peak detection for defining the SOIs +#' @param cwp A CentWaveParam object used for either SOI detection (xcms mode) +#' or long SOI splitting (regular mode) #' @return A SoiParam object #' @examples #' if(FALSE){ @@ -64,19 +68,24 @@ duplicateSOI <- function(struct, id){ #' par2 <- getSOIpar('triple-x') #Etc. etc. #' } #'@export -setGeneric("getSOIpar", function(tag = "double") { +setGeneric("getSOIpar", function(tag = "double", mode = "regular", cwp = NA) { standardGeneric("getSOIpar") }) #' @rdname getSOIpar -setMethod("getSOIpar", c("ANY"), function(tag = "double") { +setMethod("getSOIpar", c("ANY", "ANY", "ANY"), + function(tag = "double", mode = "regular", cwp = NA) { temp <- read.csv2(system.file("extdata", "SOITemplates.csv", package = "RHermes")) specdf <- filter(temp, .data$name == tag)[, seq(2, 6)] if (nrow(specdf) == 0) { stop("No templated was found with that ID", call. = FALSE, ) } - return(SOIParam(specs = specdf[, seq(1, 3)], maxlen = specdf[1, 4], - minint = specdf[1, 5])) + obj <- SOIParam(specs = specdf[, seq(1, 3)], + maxlen = specdf[1, 4], + minint = specdf[1, 5], + mode = mode) + if(is(cwp, "CentWaveParam")) obj@cwp <- cwp + return(obj) }) diff --git a/README.Rmd b/README.Rmd index f887500..b3d0fd6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,159 +1,110 @@ ---- -output: github_document ---- - - - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) -``` - -# RHermes - - -[![Codecov test coverage](https://codecov.io/gh/RogerGinBer/RHermes/branch/master/graph/badge.svg?token=HL73R4GHFJ)](https://codecov.io/gh/RogerGinBer/RHermes?branch=master) - - - - -`RHermes` is a broad-scoped targeted metabolomics software designed to analyse -LC-MS and LC-MS/MS data to identify compounds in biological or environmental -samples. - -The `RHermes` workflow works with both Orbitrap and q-TOF instrument data and -comes with an easy to use GUI that will guide you every step of the way. - -You are in control of your metabolites: whether it's natural products, biomedical or -enviormental samples, `RHermes` has you covered. By restricting the formula database, -you can focus on just the compounds you are interested in and achieve greater -metabolome coverage depth. - -Have you ever wished you could just **see** the metabolites in your data? With -`RHermes` you can do that and much more. Say goodbye to manually calculating m/z's -and plotting XIC of different adducts: with our GUI you are just one click away -from a metabolite-centric plot. - -For more info, check out the documentation [here](https://rogerginber.github.io/RHermes/) -and our article preprint [here](https://www.biorxiv.org/content/10.1101/2021.03.08.434466v1.full.pdf) - - -## System requirements - -The recommended system requirements are: - -* At least 8-16 GB of RAM -* An internet connection to perform KEGG queries - -## Installation - -You can download the development version from -[GitHub](https://github.com/RogerGinBer/RHermes) with: - -```{r, eval = FALSE} -if(!requireNamespace("devtools", quietly = TRUE)) - install.packages("devtools") -devtools::install_github("RogerGinBer/RHermes") -``` - -## Setup -`RHermes` can perform almost all its functions after installation, but the SOI -Blank Substraction step requires a valid `keras` and `tensorflow` installation -(which rely on Python). - -### Option 1: Default installation -For most users, both `keras` and `tensorflow` can be configured with: - -```{r, eval = FALSE} -reticulate::install_miniconda() -keras::install_keras() -tensorflow::install_tensorflow() -``` -After which you can check the following: -```{r, eval = FALSE} -tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", - package = "RHermes")) -is(model, "python.builtin.object") #Gives TRUE if the loading is successful. -``` -If both commands don't give any error (the "Your CPU supports ..." warning is -fine) the installation has been successful. -If it fails (which can happen in some users with previous Python installations, -try Option 2). - -### Option 2: Manual installation -First install Miniconda: -```{r, eval = FALSE} -reticulate::install_miniconda() -``` -Now find the Miniconda Prompt in your computer. Instead of relying on the -default r-reticulate environment, type the following to create a new -environment: - -`conda create -n newenv python=3.6 tensorflow keras` - -When finished, type in R: -```{r, eval = FALSE} -reticulate::use_condaenv("newenv", required = TRUE) -tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", - package = "RHermes")) -``` - -Everything should run smoothly. If not, try manually installing Anaconda from -their website and letting `reticulate` know where to find the environment. - -Also check out [Keras](https://tensorflow.rstudio.com/tutorials/beginners/basic-ml/) and [Tensorflow](https://tensorflow.rstudio.com/tutorials/) R tutorials. - -## Analyzing LC-MS data with RHermes -Once installed, you can use `RHermes` programmatically like this: -```{r example, eval = FALSE} -library(RHermes) -#Generate a Exp object -example <- RHermesExp() - -#Set your formula and adduct database -example <- setDB(example, db = "hmdb") - -#Process your MS1 files -example <- processMS1(example, - system.file("extdata", "MS1TestData.mzML", - package = "RHermes")) -#Generate SOIs -example <- findSOI(example, getSOIpar(), 1) - -#Generate an IL (Inclusion List) -example <- generateIL(example, 1, ILParam()) -``` - -With the generated inclusion list, you can export it and run a Parallel Reaction -Monitoring (PRM) MS2 experiment to reveal coeluting isomers or use any other MS2 mode -you see fit. - -Or start the interactive GUI typing: -```{r, eval = FALSE} -RHermesGUI() -``` -In the GUI you will find abundant help pages to guide you along the processing :+1: - -Please check the User Guide [vignette](https://rogerginber.github.io/RHermes/articles/RHermes_UserGuide.html) for more detailed info and real examples. - - -## Bug reporting - -Suggestions and bug reports are more than welcome at: https://github.com/RogerGinBer/RHermes/issues - - -## Citation - -Please cite this package as: - -HERMES: a molecular formula-oriented method to target the metabolome -Roger Giné, Jordi Capellades, Josep M. Badia, Dennis Vughs, Michaela Schwaiger-Haber, Maria Vinaixa, Andrea M. Brunner, Gary J. Patti, Oscar Yanes -bioRxiv 2021.03.08.434466; doi: https://doi.org/10.1101/2021.03.08.434466 - +--- +output: github_document +--- + + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +# RHermes + + +[![Codecov test coverage](https://codecov.io/gh/RogerGinBer/RHermes/coverage.svg?branch=master)](https://codecov.io/gh/RogerGinBer/RHermes?branch=master) +[![R-CMD-check](https://github.com/RogerGinBer/RHermes/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/RogerGinBer/RHermes/actions/workflows/check-bioc.yml) +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) + + + + +`RHermes` is a broad-scoped targeted metabolomics package designed to analyse +LC-MS and LC-MS/MS data to identify compounds in biological or environmental +samples. + +The `RHermes` workflow works with both Orbitrap and qTOF instrument data and +comes with an interactive GUI to process and visualize the data. + +Whether it's natural products, biomedical or enviormental samples, `RHermes` can +help you improve your matrix characterization. By selecting an appropiate +formula database, you can focus on just the compounds you are interested in and +improve your coverage. + +With `RHermes` you can **see** the metabolites in your data and much +more. There's no need to manually calculate m/z's to plot the XIC of different +adducts: with the GUI you are just one click away from a metabolite-centric +plot. + +For more info, check out the documentation [here](https://rogerginber.github.io/RHermes/) +and our article [here](https://www.nature.com/articles/s41592-021-01307-z). + + +## Installation + +You can download the development version from +[GitHub](https://github.com/RogerGinBer/RHermes) with: + +```{r, eval = FALSE} +if(!requireNamespace("devtools", quietly = TRUE)) + install.packages("devtools") +devtools::install_github("RogerGinBer/RHermes") +``` + +## Analyzing LC-MS data with RHermes + +Once installed, you can use `RHermes` programmatically like this: + +```{r example, eval = FALSE} +library(RHermes) +#Generate a Exp object +example <- RHermesExp() + +#Set your formula and adduct database +example <- setDB(example, db = "hmdb") + +#Process your MS1 files +example <- processMS1(example, + system.file("extdata", "MS1TestData.mzML", + package = "RHermes")) +#Generate SOIs +example <- findSOI(example, getSOIpar(), 1) + +#Generate an IL (Inclusion List) +example <- generateIL(example, 1, ILParam()) +``` + +With the generated inclusion list, you can export it and run a Parallel Reaction +Monitoring (PRM) MS2 experiment to reveal coeluting isomers or use any other MS2 mode +you see fit. + +Or start the interactive GUI typing: +```{r, eval = FALSE} +RHermesGUI() +``` +In the GUI you will find abundant help pages to guide you along the processing :+1: + +Please check the User Guide [vignette](https://rogerginber.github.io/RHermes/articles/RHermes_UserGuide.html) for more detailed info and real examples. + +## Database availability + +We have compiled some molecular formula open databases ready to be used with RHermes for all sorts of samples: HMDB, ChEBI, NORMAN, LipidMaps, COCONUT, etc. They are freely available at [this Zenodo repository](https://zenodo.org/record/5025560). + +## Bug reporting + +Suggestions and bug reports are more than welcome at: https://github.com/RogerGinBer/RHermes/issues + + +## Citation + +Please cite this package as: + +Giné, R., Capellades, J., Badia, J.M. et al. HERMES: a molecular-formula-oriented method to target the metabolome. Nat Methods 18, 1370–1376 (2021). https://doi.org/10.1038/s41592-021-01307-z + + + diff --git a/README.md b/README.md index 8c10125..85f9b7b 100644 --- a/README.md +++ b/README.md @@ -1,165 +1,108 @@ - - - -# RHermes - - - -[![Codecov test -coverage](https://codecov.io/gh/RogerGinBer/RHermes/branch/master/graph/badge.svg?token=HL73R4GHFJ)](https://codecov.io/gh/RogerGinBer/RHermes?branch=master) - - - - - -`RHermes` is a broad-scoped targeted metabolomics software designed to -analyse LC-MS and LC-MS/MS data to identify compounds in biological or -environmental samples. - -The `RHermes` workflow works with both Orbitrap and q-TOF instrument -data and comes with an easy to use GUI that will guide you every step of -the way. - -You are in control of your metabolites: whether it’s natural products, -biomedical or enviormental samples, `RHermes` has you covered. By -restricting the formula database, you can focus on just the compounds -you are interested in and achieve greater metabolome coverage depth. - -Have you ever wished you could just **see** the metabolites in your -data? With `RHermes` you can do that and much more. Say goodbye to -manually calculating m/z’s and plotting XIC of different adducts: with -our GUI you are just one click away from a metabolite-centric plot. - -For more info, check out the documentation -[here](https://rogerginber.github.io/RHermes/) and the Nature Methods article -[here](https://www.nature.com/articles/s41592-021-01307-z) - -## System requirements - -The recommended system requirements are: - -- At least 8-16 GB of RAM -- An internet connection to perform KEGG queries - -## Installation - -You can download the development version from -[GitHub](https://github.com/RogerGinBer/RHermes) with: - -``` r -if(!requireNamespace("devtools", quietly = TRUE)) - install.packages("devtools") -devtools::install_github("RogerGinBer/RHermes") -``` - -## Setup - -`RHermes` can perform almost all its functions after installation, but -the SOI Blank Substraction step requires a valid `keras` and -`tensorflow` installation (which rely on Python). - -### Option 1: Default installation - -For most users, both `keras` and `tensorflow` can be configured with: - -``` r -reticulate::install_miniconda() -keras::install_keras() -tensorflow::install_tensorflow() -``` - -After which you can check the following: - -``` r -tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", - package = "RHermes")) -is(model, "python.builtin.object") #Gives TRUE if the loading is successful. -``` - -If both commands don’t give any error (the “Your CPU supports …” warning -is fine) the installation has been successful. If it fails (which can -happen in some users with previous Python installations, try Option 2). - -### Option 2: Manual installation - -First install Miniconda: - -``` r -reticulate::install_miniconda() -``` - -Now find the Miniconda Prompt in your computer. Instead of relying on -the default r-reticulate environment, type the following to create a new -environment: - -`conda create -n newenv python=3.6 tensorflow keras` - -When finished, type in R: - -``` r -reticulate::use_condaenv("newenv", required = TRUE) -tensorflow::tf_config() -model <- keras::load_model_hdf5(system.file("extdata", "ImprovedModel.h5", - package = "RHermes")) -``` - -Everything should run smoothly. If not, try manually installing Anaconda -from their website and letting `reticulate` know where to find the -environment. - -Also check out -[Keras](https://tensorflow.rstudio.com/tutorials/beginners/basic-ml/) -and [Tensorflow](https://tensorflow.rstudio.com/tutorials/) R tutorials. - -## Analyzing LC-MS data with RHermes - -Once installed, you can use `RHermes` programmatically like this: - -``` r -library(RHermes) -#Generate a Exp object -example <- RHermesExp() - -#Set your formula and adduct database -example <- setDB(example, db = "hmdb") - -#Process your MS1 files -example <- processMS1(example, - system.file("extdata", "MS1TestData.mzML", - package = "RHermes")) -#Generate SOIs -example <- findSOI(example, getSOIpar(), 1) - -#Generate an IL (Inclusion List) -example <- generateIL(example, 1, ILParam()) -``` - -With the generated inclusion list, you can export it and run a Parallel -Reaction Monitoring (PRM) MS2 experiment to reveal coeluting isomers or -use any other MS2 mode you see fit. - -Or start the interactive GUI typing: - -``` r -RHermesGUI() -``` - -In the GUI you will find abundant help pages to guide you along the -processing :+1: - -Please check the User Guide -[vignette](https://rogerginber.github.io/RHermes/articles/RHermes_UserGuide.html) -for more detailed info and real examples. - -## Bug reporting - -Suggestions and bug reports are more than welcome at: - - -## Citation - -Please cite this package as: - -Giné, R., Capellades, J., Badia, J.M. et al. HERMES: a molecular-formula-oriented method to target the metabolome. Nat Methods 18, 1370–1376 (2021). https://doi.org/10.1038/s41592-021-01307-z + + + +# RHermes + + + +[![Codecov test +coverage](https://codecov.io/gh/RogerGinBer/RHermes/branch/master/graph/badge.svg?token=HL73R4GHFJ)](https://codecov.io/gh/RogerGinBer/RHermes?branch=master) +[![R-CMD-check](https://github.com/RogerGinBer/RHermes/actions/workflows/check-bioc.yml/badge.svg)](https://github.com/RogerGinBer/RHermes/actions/workflows/check-bioc.yml) +[![Lifecycle: +experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) + + + + +`RHermes` is a broad-scoped targeted metabolomics package designed to +analyse LC-MS and LC-MS/MS data to identify compounds in biological or +environmental samples. + +The `RHermes` workflow works with both Orbitrap and qTOF instrument data +and comes with an interactive GUI to process and visualize the data. + +Whether it’s natural products, biomedical or enviormental samples, +`RHermes` can help you improve your matrix characterization. By +selecting an appropiate formula database, you can focus on just the +compounds you are interested in and improve your coverage. + +With `RHermes` you can **see** the metabolites in your data and much +more. There’s no need to manually calculate m/z’s to plot the XIC of +different adducts: with the GUI you are just one click away from a +metabolite-centric plot. + +For more info, check out the documentation +[here](https://rogerginber.github.io/RHermes/) and our article +[here](https://www.nature.com/articles/s41592-021-01307-z). + +## Installation + +You can download the development version from +[GitHub](https://github.com/RogerGinBer/RHermes) with: + +``` r +if(!requireNamespace("devtools", quietly = TRUE)) + install.packages("devtools") +devtools::install_github("RogerGinBer/RHermes") +``` + +## Analyzing LC-MS data with RHermes + +Once installed, you can use `RHermes` programmatically like this: + +``` r +library(RHermes) +#Generate a Exp object +example <- RHermesExp() + +#Set your formula and adduct database +example <- setDB(example, db = "hmdb") + +#Process your MS1 files +example <- processMS1(example, + system.file("extdata", "MS1TestData.mzML", + package = "RHermes")) +#Generate SOIs +example <- findSOI(example, getSOIpar(), 1) + +#Generate an IL (Inclusion List) +example <- generateIL(example, 1, ILParam()) +``` + +With the generated inclusion list, you can export it and run a Parallel +Reaction Monitoring (PRM) MS2 experiment to reveal coeluting isomers or +use any other MS2 mode you see fit. + +Or start the interactive GUI typing: + +``` r +RHermesGUI() +``` + +In the GUI you will find abundant help pages to guide you along the +processing :+1: + +Please check the User Guide +[vignette](https://rogerginber.github.io/RHermes/articles/RHermes_UserGuide.html) +for more detailed info and real examples. + +## Database availability + +We have compiled some molecular formula open databases ready to be used +with RHermes for all sorts of samples: HMDB, ChEBI, NORMAN, LipidMaps, +COCONUT, etc. They are freely available at [this Zenodo +repository](https://zenodo.org/record/5025560). + +## Bug reporting + +Suggestions and bug reports are more than welcome at: + + +## Citation + +Please cite this package as: + +Giné, R., Capellades, J., Badia, J.M. et al. HERMES: a +molecular-formula-oriented method to target the metabolome. Nat Methods +18, 1370–1376 (2021). diff --git a/_pkgdown.yaml b/_pkgdown.yaml deleted file mode 100644 index 723c6b0..0000000 --- a/_pkgdown.yaml +++ /dev/null @@ -1,60 +0,0 @@ -title: RHermes -url: https://rogerginber.github.io/RHermes -template: - params: - bootswatch: flatly -reference: - - title: Processing functions - desc: Needed in the RHermes workflow - contents: - - '`setExpParam`' - - '`setDB`' - - '`processMS1`' - - '`findSOI`' - - '`filterSOI`' - - '`removeISF`' - - '`generateIL`' - - '`filterIL`' - - '`processMS2`' - - title: Getters - desc: Access to the RHermesExp subobjects - contents: - - '`adlist`' - - '`Cluster`' - - '`DB`' - - '`PL`' - - '`SOI`' - - '`IL`' - - '`MS2Data`' - - '`Ident`' - - title: Exporting functions - desc: Export your IL and MS2 spectras - contents: - - '`exportIL`' - - '`exportMGF`' - - '`exportMSP`' - - '`exportmzML`' - - '`exportSIRIUS`' - - title: Plotting functions - desc: Bring your data alive - contents: - - '`plotPL`' - - '`plotSOI`' - - '`plotFidelity`' - - '`plotISF`' - - '`plotIL`' - - '`plotSS`' - - '`plotRawMS2`' - - title: Accessory functions - desc: Getting the job done - contents: - - '`addAd`' - - '`remAd`' - - '`getSOIPar`' - - '`readTime`' - - '`setTime`' - - '`SOIsim`' - - '`ssNetwork`' - - '`setCluster`' -navbar: - type: default diff --git a/inst/app/ExtraInfo_UI.R b/inst/app/ExtraInfo_UI.R index 408e3dc..92fd17f 100644 --- a/inst/app/ExtraInfo_UI.R +++ b/inst/app/ExtraInfo_UI.R @@ -38,7 +38,7 @@ ExtraInfo_UI <- function(id){ sidebarPanel( div(radioButtons(inputId = ns("MS2files"), label = "Select inclusion list with MS2 data:", choices = "", selected = ""), - selectInput(ns("MS2ILentry"), "Select IL entry to check:", choices = "", selected = ""), + selectizeInput(ns("MS2ILentry"), "Select IL entry to check:", choices = NULL), style = "margin-left: 5%; margin-top: 3%"), width = "AUTO"), sidebarPanel( plotlyOutput(ns("MS2raw")), @@ -155,7 +155,7 @@ ExtraInfoServer <- function(id, struct){ },{ if(struct$hasMS2){ numMS2entry <- seq_along(struct$dataset@data@MS2Exp[[as.numeric(input$MS2files)]]@MS2Data) - updateSelectInput(session, "MS2ILentry", choices = numMS2entry, selected = numMS2entry[1]) + updateSelectizeInput(session, "MS2ILentry", choices = numMS2entry, selected = numMS2entry[1], server = TRUE) } }) diff --git a/inst/app/MS2PlotUI.R b/inst/app/MS2PlotUI.R index f0e6472..91c8a89 100644 --- a/inst/app/MS2PlotUI.R +++ b/inst/app/MS2PlotUI.R @@ -1,430 +1,477 @@ -MS2PlotUI <- function(id) { - ns <- NS(id) - tagList( - tags$br(), - - radioGroupButtons(inputId = ns("selectclass"), label = "Select a graph :", - choices = c(` Identifications` = "ident", - ` Raw MSMS data` = "raw"), - justified = TRUE), - radioGroupButtons(ns("id"), "Select MS2Exp:", choices = "", - selected = ""), - br(), - - conditionalPanel("input.selectclass == 'ident'", ns = ns, - radioGroupButtons(ns("selectplot"), - choices = c(`Superspectra Plot` = "sup", - `Superspectra Comparison` = "comp", - `Compound matches` = "hits"), justified = TRUE - ), - conditionalPanel("input.selectplot == 'sup'", ns = ns, - fluidRow(column(6, selectizeInput(ns("selectss"), - label = "Select superspectrum to plot", - choices = NULL, selected = NULL))), - plotlyOutput(ns("ssplot"))), - conditionalPanel("input.selectplot == 'hits'", ns = ns, - fluidRow( - column(6, - selectizeInput(ns("selectident"), - label = "Select the superspectra entry to check:", - choices = NULL, selected = NULL)), - column(6, - pickerInput(ns("selecthits"), - label = "Select which hits to check against:", - choices = NULL, selected = NULL, multiple = TRUE, - options = list( - `actions-box` = TRUE)) - ) - ), - plotlyOutput(ns("mirrorplot")) - ), - - conditionalPanel("input.selectplot == 'comp'", ns = ns, - fluidRow( - column(6, - selectizeInput(ns("selectident2"), - label = "Select the superspectra entry to check:", - choices = NULL, selected = NULL)), - column(6, - pickerInput(inputId = ns("otherss"), - label = "Select which superspectra/s to compare with:", - choices = NULL, selected = NULL, multiple = TRUE, - options = list(`max-options` = 10)) - ) - ), - plotlyOutput(ns("versusplot")) - ) - ), - conditionalPanel("input.selectclass == 'raw'", ns = ns, - fluidRow( - column(6, - checkboxGroupButtons( - inputId = ns("rawset"), - label = "Select what to plot:", - choices = c("Raw data plot", "Network", "Peak table"), - checkIcon = list( - yes = tags$i(class = "fa fa-check-square", - style = "color: #4D4263"), - no = tags$i(class = "fa fa-square-o", - style = "color: #4D4263")), - selected = c("Raw data plot", "Network", "Peak table") - ) - ) - ), - fluidRow(column(width = 6, - selectizeInput(ns("selectILentry"), - label = "Select the IL entry to check:", choices = NULL, - selected = NULL - ) - ), - column(width = 6, tags$b("Select plot mode:"), - switchInput(ns("bymz"), onLabel = "By m/z", - offLabel = "By group", value = TRUE, - labelWidth = "AUTO"))), - fluidRow( - conditionalPanel("input.rawset.includes('Raw data plot')", - ns = ns, - conditionalPanel("input.bymz", ns = ns, - plotlyOutput(ns("rawMSMS_bymz"), height = "800")), - conditionalPanel("!input.bymz", ns = ns, - plotlyOutput(ns("rawMSMS_bygroup"), height = "800")), - ), - conditionalPanel("input.rawset.includes('Network')", - ns = ns, visNetworkOutput(ns("rawss"))), - conditionalPanel("input.rawset.includes('Peak table')", - ns = ns, - column(12, align="center", - tableOutput(ns('rawpks')))) - ) - ), - - ) - -} - -MS2PlotServer <- function(id, struct) { - moduleServer(id, function(input, output, session) { - # observeEvent({},{}, ignoreNULL = TRUE, ignoreInit = TRUE) - - ####Updates to buttons, selections, etc.#### - observeEvent({ - struct$hasMS2 - struct$dataset@data@MS2Exp - }, { - if (struct$hasMS2) { - #Determine which have ms2 data - whichMS2 <- vapply(struct$dataset@data@MS2Exp, - function(ms2) { - return(length(ms2@MS2Data) != 0) - }, logical(1)) - whichMS2 <- which(whichMS2) - - #Update accordingly - updateRadioGroupButtons(session, "id", choices = whichMS2, - selected = whichMS2[1]) - - } else { - #Hide panels again - updateRadioGroupButtons(session, "id", choices = NULL, - selected = NULL) - } - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 100) - - observeEvent({ - input$id - }, - { - ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] - sslist <- ms2@Ident[["MS2Features"]] - with_hits <- which(vapply(sslist$results, - function(x){is.data.frame(x)}, - logical(1))) - original_IL <- unique(sslist$ILentry) - updateSelectizeInput(session, "selectident", - choices = as.character(with_hits), server = TRUE, - selected = as.character(with_hits[1]), - options = list(maxOptions = 50000)) - updateSelectizeInput(session, "selectILentry", - choices = as.character(original_IL), - selected = as.character(original_IL[1]), - server = TRUE, options = list(maxOptions = 50000)) - updateSelectizeInput(session, "selectident2", - choices = as.character(seq_len(nrow((sslist)[1]))), server = TRUE, - selected = as.character(1), - options = list(maxOptions = 50000)) - updateSelectizeInput(session, "selectss", - choices = as.character(seq_len(nrow((sslist)[1]))), server = TRUE, - selected = as.character(1), - options = list(maxOptions = 50000)) - - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 50) - - observeEvent({input$selectident2},{ - ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] - sslist <- ms2@Ident[["MS2Features"]] - - otherss <- seq_len(nrow(sslist))[-as.numeric(input$selectident2)] - - updatePickerInput(session, "otherss", - choices = as.character(otherss), - selected = as.character(otherss[1])) - }, ignoreNULL = TRUE, ignoreInit = TRUE - ) - - #### Plots #### - observeEvent({ - input$selectILentry - input$id - }, { - if (input$selectILentry != "" & !is.na(input$selectILentry)) { - tryCatch({ - rawplots <- plotRawMS2(struct$dataset, - as.numeric(input$id), as.numeric(input$selectILentry)) - rtrange <- range(rawplots$p_bymz$data$rt) - - filtermz <- IL(struct$dataset, as.numeric(input$id))@ILParam@filtermz - mzprec <- IL(struct$dataset, as.numeric(input$id))@IL$mass[as.numeric(input$selectILentry)] - mzrange <- c(mzprec - filtermz, mzprec + filtermz) - - original_file <- SOI(struct$dataset, - IL(struct$dataset, as.numeric(input$id))@SOInum)@filename - original_PL <- which(vapply(struct$dataset@data@PL, function(x) { - return(x@filename == original_file) - }, logical(1)))[1] - original_PL <- PL(struct$dataset, original_PL) - - annotated_scans <- filter(original_PL@peaklist, - between(rt, rtrange[1], rtrange[2]) & - between(mz, mzrange[1], mzrange[2])) - raw_scans <- filter(original_PL@raw, - between(rt, rtrange[1], rtrange[2]) & - between(mz, mzrange[1], mzrange[2])) - suppressWarnings({ - ms1plot <- ggplot() + - geom_point(aes(x=rt, y=rtiv, mz = mz), data = raw_scans, color = "grey") + - geom_point(aes(x=rt, y=rtiv, color = formv, mz = mz), data = annotated_scans) + - labs(color = "Ion. Formula") + - ggtitle("Above: continuous MS2 scans
Below: MS1 data within isolation window")+ - theme_minimal() - }) - - output$rawMSMS_bymz <- renderPlotly( - subplot(rawplots[["p_bymz"]], ggplotly(ms1plot), nrows = 2, - shareX = TRUE) - ) - output$rawMSMS_bygroup <- renderPlotly( - subplot(rawplots[["p_bygroup"]], ggplotly(ms1plot), nrows = 2, - shareX = TRUE) - ) - - output$rawpks <- renderTable(rawplots[["pks"]]) - if (is(rawplots[["net"]], "visNetwork")) { - output$rawss <- renderVisNetwork(rawplots[["net"]]) - } - }, error = function(e){message("Raw MS2 plot failed")}) - - } - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -1) - - observeEvent({ - input$selectident - input$id - },{ - tryCatch({ - ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] - sslist <- ms2@Ident[["MS2Features"]] - if(is.data.frame(sslist) & !is.na(as.numeric(input$selectident))){ - if(as.numeric(input$selectident) <= nrow(sslist)){ - cur <- sslist$results[[as.numeric(input$selectident)]] - if(is.data.frame(cur)){ - updatePickerInput(session, "selecthits", choices = cur$formula, - selected = cur$formula[[1]]) - } - } - } - }, error = function(cond){}) - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 50) - - #Identification plot (against MS2 DB hits) - observeEvent({ - input$selectident - input$selecthits - input$id - }, { - if (input$selectident != "" & !is.na(input$selectident) & - length(input$selecthits) != 0) { - tryCatch({ - ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] - sslist <- ms2@Ident[["MS2Features"]] - with_hits <- which(vapply(sslist$results, - function(x){is.data.frame(x)}, - logical(1))) - if(as.numeric(input$selectident) %in% with_hits){ - identplots <- plotMirror(struct$dataset, - as.numeric(input$id), as.numeric(input$selectident), - input$selecthits, mode = "hits") - output$mirrorplot <- renderPlotly(identplots) - } - }, error = function(cond){warning("Identity plot failed")}) - } - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) - - #Pairwise MS2 spectrum plot - observeEvent({ - input$selectident2 - input$otherss - input$id - }, { - if (input$selectident2 != "" & !is.na(input$selectident2) & - length(input$otherss) != 0) { - tryCatch({ - ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] - sslist <- ms2@Ident[["MS2Features"]] - identplots <- plotMirror(struct$dataset, - as.numeric(input$id), as.numeric(input$selectident2), - as.numeric(input$otherss), mode = "versus") - output$versusplot <- renderPlotly(identplots) - }, error = function(e){message("Mirrorplot failed")}) - } - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) - - #Single MS2 spectrum plot - observeEvent({ - input$selectss - input$id - }, { - if (!is.na(input$selectss) & input$selectss != "") { - identplots <- plotSS(struct$dataset, as.numeric(input$id), - as.numeric(input$selectss)) - output$ssplot <- renderPlotly(identplots) - } - }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) - }) -} - -plotMirror <- function(struct, id, ssnumber, patform, mode = "hits") { - entry <- struct@data@MS2Exp[[id]]@Ident$MS2Features[ssnumber,] - query <- entry$ssdata[[1]] - maxint <- max(query$int) - query$int <- query$int/max(query$int) * 100 - bestdf <- query[query$int > 10, ] - bestdf$mz <- round(bestdf$mz, 4) - molecmass <- entry$precmass - baseline <- 1000 - subtitle <- "" - title <- "" - baseline <- baseline/maxint * 100 - - f <- list( - family = "Open Sans", - size = 16, - color = "black" - ) - - if(mode == "hits"){ - ref <- struct@data@MS2Exp[[id]]@Ident$MS2_correspondance[[ssnumber]] - pattern <- struct@data@MS2Exp[[id]]@Ident$DatabaseSpectra[ref] - pattern <- unname(pattern) #Avoids "name.subname" when unlisting - pattern <- unlist(pattern, recursive = FALSE, use.names = TRUE) - } - - mirrplot <- lapply(patform, function(x){ - pl <- ggplot() - if(mode == "hits"){ - index <- entry$results[[1]]$id[entry$results[[1]]$formula == x] - if(length(index) == 0){ - refspec <- NULL - } else { - refspec <- pattern[[x]][[index]] - spec_energy <- names(pattern[[x]])[index] - comp_name <- strsplit(patform, split = "#")[[which(patform == x)[1]]][3] - a <- list( - text = paste(comp_name, spec_energy), - font = f, - xref = "paper", - yref = "paper", - yanchor = "bottom", - xanchor = "center", - align = "center", - x = 0.5, - y = 1, - showarrow = FALSE - ) - } - if(is.null(refspec)){return(ggplotly(pl))} - refspec <- as.data.frame(t(refspec)) - pl <- pl + scale_x_continuous(limits = c(min(c(refspec$mz, - query$mz, molecmass)) - 20, - max(c(refspec$mz, query$mz, - molecmass)) + 20)) - } - if(mode == "versus"){ - refmass<-struct@data@MS2Exp[[id]]@Ident$MS2Features$precmass[[x]] - refspec <- struct@data@MS2Exp[[id]]@Ident$MS2Features$ssdata[[x]] - if(is.null(refspec)){return(ggplotly(pl))} - - a <- list( - text = paste("SS comparison between", ssnumber, "and", x), - font = f, - xref = "paper", - yref = "paper", - yanchor = "bottom", - xanchor = "center", - align = "center", - x = 0.5, - y = 1, - showarrow = FALSE - ) - - refdf <- data.frame(mz = refmass) - pl <- pl + - geom_point(data = refdf, aes(x = .data$mz, y = 0), shape = 25, - size = 4, color = "red", fill = "red") + - scale_x_continuous(limits = c(min(c(refspec$mz, query$mz, - molecmass, refmass)) - 20, - max(c(refspec$mz, query$mz, molecmass, - refmass)) + 20)) - } - colnames(refspec) <- c("mz", "int") - refspec$int <- refspec$int/max(refspec$int) * 100 - - moldf <- data.frame(mz = molecmass) - bldf <- data.frame(xmin = min(c(refspec$mz, query$mz, - molecmass)) - 5, - xmax = max(c(refspec$mz,query$mz, - molecmass) + 5), - y = baseline) - - pl <- pl + geom_segment(data = query, aes(x = .data$mz, - xend = .data$mz, y = 0, yend = .data$int), - color = "black") + - geom_segment(data = refspec, aes(x = .data$mz, - xend = .data$mz, y = 0, yend = -.data$int), - color = "red") + - geom_segment(data = bldf, aes(x = .data$xmin, - xend = .data$xmax, y = .data$y, yend = .data$y), - linetype = "dashed", color = "black", alpha = 0.3) + - geom_segment(data = bldf, aes(x = .data$xmin, - xend = .data$xmax, y = -.data$y, yend = -.data$y), - linetype = "dashed", color = "red", alpha = 0.3) + - geom_point(data = moldf, aes(x = .data$mz, y = 0), - shape = 17, size = 2) + - theme_minimal() + ylab("% Intensity") + - theme(plot.margin = unit(c(1, 0.7, 1, 0.8), "cm"), - text = element_text(size = 11, - family = "Segoe UI Light"), - plot.title = element_text(hjust = 0.5)) + - geom_text(data = bestdf, aes(x = .data$mz, - y = .data$int + 5, - label = .data$mz), - family = "Segoe UI Light", check_overlap = TRUE) - - base_height <- ifelse(length(patform)<5, 850/length(patform), 200) - ggplotly(pl, height = base_height * length(patform)) %>% - layout(annotations = a) - }) - - return(subplot(mirrplot, nrows = length(mirrplot), shareX = TRUE, - which_layout = 1)) -} - +MS2PlotUI <- function(id) { + ns <- NS(id) + tagList( + tags$br(), + + radioGroupButtons(inputId = ns("selectclass"), label = "Select a graph :", + choices = c(` Identifications` = "ident", + ` Raw MSMS data` = "raw"), + justified = TRUE), + radioGroupButtons(ns("id"), "Select MS2Exp:", choices = "", + selected = ""), + br(), + + conditionalPanel("input.selectclass == 'ident'", ns = ns, + radioGroupButtons(ns("selectplot"), + choices = c(`Superspectra Plot` = "sup", + `Superspectra Comparison` = "comp", + `Compound matches` = "hits"), justified = TRUE + ), + conditionalPanel("input.selectplot == 'sup'", ns = ns, + fluidRow(column(6, selectizeInput(ns("selectss"), + label = "Select superspectrum to plot", + choices = NULL, selected = NULL))), + plotlyOutput(ns("ssplot"))), + conditionalPanel("input.selectplot == 'hits'", ns = ns, + fluidRow( + column(6, + selectizeInput(ns("selectident"), + label = "Select the superspectra entry to check:", + choices = NULL, selected = NULL)), + column(6, + pickerInput(ns("selecthits"), + label = "Select which hits to check against:", + choices = NULL, selected = NULL, multiple = TRUE, + options = list( + `actions-box` = TRUE)) + ) + ), + plotlyOutput(ns("mirrorplot")) + ), + + conditionalPanel("input.selectplot == 'comp'", ns = ns, + fluidRow( + column(6, + selectizeInput(ns("selectident2"), + label = "Select the superspectra entry to check:", + choices = NULL, selected = NULL)), + column(6, + pickerInput(inputId = ns("otherss"), + label = "Select which superspectra/s to compare with:", + choices = NULL, selected = NULL, multiple = TRUE, + options = list(`max-options` = 10)) + ) + ), + plotlyOutput(ns("versusplot")) + ) + ), + conditionalPanel("input.selectclass == 'raw'", ns = ns, + fluidRow( + column(6, + checkboxGroupButtons( + inputId = ns("rawset"), + label = "Select what to plot:", + choices = c("Raw data plot", "Network", "Peak table"), + checkIcon = list( + yes = tags$i(class = "fa fa-check-square", + style = "color: #4D4263"), + no = tags$i(class = "fa fa-square-o", + style = "color: #4D4263")), + selected = c("Raw data plot", "Network", "Peak table") + ) + ) + ), + fluidRow(column(width = 6, + selectizeInput(ns("selectILentry"), + label = "Select the IL entry to check:", choices = NULL, + selected = NULL + ) + ), + column(width = 6, tags$b("Select plot mode:"), + switchInput(ns("bymz"), onLabel = "By m/z", + offLabel = "By group", value = TRUE, + labelWidth = "AUTO"))), + fluidRow( + conditionalPanel("input.rawset.includes('Raw data plot')", + ns = ns, + conditionalPanel("input.bymz", ns = ns, + plotlyOutput(ns("rawMSMS_bymz"), height = "800")), + conditionalPanel("!input.bymz", ns = ns, + plotlyOutput(ns("rawMSMS_bygroup"), height = "800")), + ), + conditionalPanel("input.rawset.includes('Network')", + ns = ns, visNetworkOutput(ns("rawss"))), + conditionalPanel("input.rawset.includes('Peak table')", + ns = ns, + column(12, align="center", + tableOutput(ns('rawpks')))) + ) + ), + + ) + +} + +MS2PlotServer <- function(id, struct) { + moduleServer(id, function(input, output, session) { + # observeEvent({},{}, ignoreNULL = TRUE, ignoreInit = TRUE) + + #### Updates to buttons, selections, etc. #### + observeEvent({ + struct$hasMS2 + struct$dataset@data@MS2Exp + }, { + if (struct$hasMS2) { + ## Determine which have ms2 data + whichMS2 <- vapply(struct$dataset@data@MS2Exp, + function(ms2) { + return(length(ms2@MS2Data) != 0) + }, logical(1)) + whichMS2 <- which(whichMS2) + + #Update accordingly + updateRadioGroupButtons(session, "id", choices = whichMS2, + selected = whichMS2[1]) + + } else { + #Hide panels again + updateRadioGroupButtons(session, "id", choices = NULL, + selected = NULL) + } + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 100) + + observeEvent({ + input$id + }, + { + ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] + sslist <- ms2@Ident[["MS2Features"]] + + + ## Identification against DB mirror plot selector + with_hits <- which(vapply(sslist$results, + function(x){is.data.frame(x)}, + logical(1))) + updateSelectizeInput(session, "selectident", + choices = as.character(with_hits), server = TRUE, + selected = as.character(with_hits[1]), + options = list(maxOptions = 50000)) + + ## Raw MS2 plot selector + original_IL <- unique(sslist$ILentry) + ## Add some useful info to IL labels + entries_list <- as.list(as.character(original_IL)) + if (length(original_IL != 0)){ + labels <- lapply(original_IL, function(id){ + entries <- sslist[sslist$ILentry == id, ] + paste(id, + "RT:", round(min(entries$start), 2), "-", round(max(entries$end),2), + "mz:", round(mean(entries$precmass), 4)) + }) + names(entries_list) <- labels + } + updateSelectizeInput(session, "selectILentry", + choices = entries_list, + selected = entries_list[1], + server = TRUE, options = list(maxOptions = 50000)) + + + + #MS2 spectra plotter and mirror plot among MS2 spectra + spectra_info <- paste(seq_len(nrow(sslist)), + "RT:", round(sslist$start, 2), round(sslist$apex, 2), round(sslist$end, 2), + "mz:", round(sslist$precmass, 4)) + spectra_list <- as.list(seq_len(nrow(sslist))) + names(spectra_list) <- spectra_info + + + updateSelectizeInput(session, "selectident2", + choices = spectra_list, + server = TRUE, + selected = NULL, + options = list(maxOptions = 50000)) + updateSelectizeInput(session, "selectss", + choices = spectra_list, + server = TRUE, + selected = NULL, + options = list(maxOptions = 50000)) + + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 50) + + observeEvent({input$selectident2},{ + if(input$selectident2 != ""){ + ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] + sslist <- ms2@Ident[["MS2Features"]] + + spectra_info <- paste(seq_len(nrow(sslist)), + "RT:", round(sslist$start, 2), round(sslist$apex, 2), round(sslist$end, 2), + "mz:", round(sslist$precmass, 4)) + spectra_list <- as.list(seq_len(nrow(sslist))) + names(spectra_list) <- spectra_info + + #Remove picker1 selection from picker2's options + updatePickerInput(session, "otherss", + choices = spectra_list[-as.numeric(input$selectident2)], + selected = NULL) + } + }, ignoreNULL = TRUE, ignoreInit = TRUE + ) + + #### Plots #### + observeEvent({ + input$selectILentry + input$id + }, { + if (input$selectILentry != "" & !is.na(input$selectILentry)) { + tryCatch({ + rawplots <- plotRawMS2(struct$dataset, + as.numeric(input$id), as.numeric(input$selectILentry)) + rtrange <- range(rawplots$p_bymz$data$rt) + + filtermz <- IL(struct$dataset, as.numeric(input$id))@ILParam@filtermz + mzprec <- IL(struct$dataset, as.numeric(input$id))@IL$mass[as.numeric(input$selectILentry)] + mzrange <- c(mzprec - filtermz, mzprec + filtermz) + + original_file <- SOI(struct$dataset, + IL(struct$dataset, as.numeric(input$id))@SOInum)@filename + original_PL <- which(vapply(struct$dataset@data@PL, function(x) { + return(x@filename == original_file) + }, logical(1)))[1] + original_PL <- PL(struct$dataset, original_PL) + + annotated_scans <- dplyr::filter(original_PL@peaklist, + between(rt, rtrange[1], rtrange[2]) & + between(mz, mzrange[1], mzrange[2])) + suppressWarnings({ + ms1plot <- ggplot() + + geom_point(aes(x=rt, y=rtiv, color = formv, mz = mz), data = annotated_scans) + + labs(color = "Ion. Formula") + + ggtitle("Above: continuous MS2 scans
Below: MS1 data within isolation window")+ + theme_minimal() + }) + + raw_scans <- original_PL@raw + if(nrow(raw_scans) != 0){ + raw_scans <- dplyr::filter(original_PL@raw, + between(rt, rtrange[1], rtrange[2]) & + between(mz, mzrange[1], mzrange[2])) + raw_scans <- dplyr::anti_join(raw_scans, annotated_scans) + ms1plot <- ms1plot + + geom_point(aes(x=rt, y=rtiv, mz = mz, color = "Unknown"), data = raw_scans) + + } + output$rawMSMS_bymz <- renderPlotly( + subplot(rawplots[["p_bymz"]], ggplotly(ms1plot), nrows = 2, + shareX = TRUE) + ) + output$rawMSMS_bygroup <- renderPlotly( + subplot(rawplots[["p_bygroup"]], ggplotly(ms1plot), nrows = 2, + shareX = TRUE) + ) + + output$rawpks <- renderTable(rawplots[["pks"]]) + if (is(rawplots[["net"]], "visNetwork")) { + output$rawss <- renderVisNetwork(rawplots[["net"]]) + } + }, error = function(e){ + sendSweetAlert(session = session, title = "Error", + text = "Raw MS2 plot failed, meaning no enough points were acquired for this entry", + type = "warning") + message("Raw MS2 plot failed") + }) + + } + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -1) + + observeEvent({ + input$selectident + input$id + },{ + tryCatch({ + ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] + sslist <- ms2@Ident[["MS2Features"]] + if(is.data.frame(sslist) & !is.na(as.numeric(input$selectident))){ + if(as.numeric(input$selectident) <= nrow(sslist)){ + cur <- sslist$results[[as.numeric(input$selectident)]] + if(is.data.frame(cur)){ + updatePickerInput(session, "selecthits", choices = cur$formula, + selected = cur$formula[[1]]) + } + } + } + }, error = function(cond){}) + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = 50) + + #Identification plot (against MS2 DB hits) + observeEvent({ + input$selectident + input$selecthits + input$id + }, { + if (input$selectident != "" & !is.na(input$selectident) & + length(input$selecthits) != 0) { + tryCatch({ + ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] + sslist <- ms2@Ident[["MS2Features"]] + with_hits <- which(vapply(sslist$results, + function(x){is.data.frame(x)}, + logical(1))) + if(as.numeric(input$selectident) %in% with_hits){ + identplots <- plotMirror(struct$dataset, + as.numeric(input$id), as.numeric(input$selectident), + input$selecthits, mode = "hits") + output$mirrorplot <- renderPlotly(identplots) + } + }, error = function(cond){warning("Identity plot failed")}) + } + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) + + #Pairwise MS2 spectrum plot + observeEvent({ + input$selectident2 + input$otherss + input$id + }, { + if (input$selectident2 != "" & !is.na(input$selectident2) & + length(input$otherss) != 0) { + tryCatch({ + ms2 <- struct$dataset@data@MS2Exp[[as.numeric(input$id)]] + sslist <- ms2@Ident[["MS2Features"]] + identplots <- plotMirror(struct$dataset, + as.numeric(input$id), as.numeric(input$selectident2), + as.numeric(input$otherss), mode = "versus") + output$versusplot <- renderPlotly(identplots) + }, error = function(e){message("Mirrorplot failed")}) + } + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) + + #Single MS2 spectrum plot + observeEvent({ + input$selectss + input$id + }, { + if (!is.na(input$selectss) & input$selectss != "") { + identplots <- plotSS(struct$dataset, as.numeric(input$id), + as.numeric(input$selectss)) + output$ssplot <- renderPlotly(identplots) + } + }, ignoreNULL = TRUE, ignoreInit = TRUE, priority = -2) + }) +} + +plotMirror <- function(struct, id, ssnumber, patform, mode = "hits") { + entry <- struct@data@MS2Exp[[id]]@Ident$MS2Features[ssnumber,] + query <- entry$ssdata[[1]] + maxint <- max(query$int) + query$int <- query$int/max(query$int) * 100 + bestdf <- query[query$int > 10, ] + bestdf$mz <- round(bestdf$mz, 4) + molecmass <- entry$precmass + baseline <- 1000 + subtitle <- "" + title <- "" + baseline <- baseline/maxint * 100 + + f <- list( + family = "Open Sans", + size = 16, + color = "black" + ) + + if(mode == "hits"){ + ref <- struct@data@MS2Exp[[id]]@Ident$MS2_correspondance[[ssnumber]] + pattern <- struct@data@MS2Exp[[id]]@Ident$DatabaseSpectra[ref] + pattern <- unname(pattern) #Avoids "name.subname" when unlisting + pattern <- unlist(pattern, recursive = FALSE, use.names = TRUE) + } + + mirrplot <- lapply(patform, function(x){ + pl <- ggplot() + if(mode == "hits"){ + index <- entry$results[[1]]$id[entry$results[[1]]$formula == x] + if(length(index) == 0){ + refspec <- NULL + } else { + refspec <- pattern[[x]][[index]] + spec_energy <- names(pattern[[x]])[index] + comp_name <- strsplit(patform, split = "#")[[which(patform == x)[1]]][3] + a <- list( + text = paste(comp_name, spec_energy), + font = f, + xref = "paper", + yref = "paper", + yanchor = "bottom", + xanchor = "center", + align = "center", + x = 0.5, + y = 1, + showarrow = FALSE + ) + } + if(is.null(refspec)){return(ggplotly(pl))} + refspec <- as.data.frame(t(refspec)) + pl <- pl + scale_x_continuous(limits = c(min(c(refspec$mz, + query$mz, molecmass)) - 20, + max(c(refspec$mz, query$mz, + molecmass)) + 20)) + } + if(mode == "versus"){ + refmass<-struct@data@MS2Exp[[id]]@Ident$MS2Features$precmass[[x]] + refspec <- struct@data@MS2Exp[[id]]@Ident$MS2Features$ssdata[[x]] + if(is.null(refspec)){return(ggplotly(pl))} + + a <- list( + text = paste("SS comparison between", ssnumber, "and", x), + font = f, + xref = "paper", + yref = "paper", + yanchor = "bottom", + xanchor = "center", + align = "center", + x = 0.5, + y = 1, + showarrow = FALSE + ) + + refdf <- data.frame(mz = refmass) + pl <- pl + + geom_point(data = refdf, aes(x = .data$mz, y = 0), shape = 25, + size = 4, color = "red", fill = "red") + + scale_x_continuous(limits = c(min(c(refspec$mz, query$mz, + molecmass, refmass)) - 20, + max(c(refspec$mz, query$mz, molecmass, + refmass)) + 20)) + } + colnames(refspec) <- c("mz", "int") + refspec$int <- refspec$int/max(refspec$int) * 100 + + moldf <- data.frame(mz = molecmass) + bldf <- data.frame(xmin = min(c(refspec$mz, query$mz, + molecmass)) - 5, + xmax = max(c(refspec$mz,query$mz, + molecmass) + 5), + y = baseline) + + pl <- pl + geom_segment(data = query, aes(x = .data$mz, + xend = .data$mz, y = 0, yend = .data$int), + color = "black") + + geom_segment(data = refspec, aes(x = .data$mz, + xend = .data$mz, y = 0, yend = -.data$int), + color = "red") + + geom_segment(data = bldf, aes(x = .data$xmin, + xend = .data$xmax, y = .data$y, yend = .data$y), + linetype = "dashed", color = "black", alpha = 0.3) + + geom_segment(data = bldf, aes(x = .data$xmin, + xend = .data$xmax, y = -.data$y, yend = -.data$y), + linetype = "dashed", color = "red", alpha = 0.3) + + geom_point(data = moldf, aes(x = .data$mz, y = 0), + shape = 17, size = 2) + + theme_minimal() + ylab("% Intensity") + + theme(plot.margin = unit(c(1, 0.7, 1, 0.8), "cm"), + text = element_text(size = 11, + family = "Segoe UI Light"), + plot.title = element_text(hjust = 0.5)) + + geom_text(data = bestdf, aes(x = .data$mz, + y = .data$int + 5, + label = .data$mz), + family = "Segoe UI Light", check_overlap = TRUE) + + base_height <- ifelse(length(patform)<5, 850/length(patform), 200) + ggplotly(pl, height = base_height * length(patform)) %>% + layout(annotations = a) + }) + + return(subplot(mirrplot, nrows = length(mirrplot), shareX = TRUE, + which_layout = 1)) +} + diff --git a/inst/app/PLPlotUI.R b/inst/app/PLPlotUI.R index 039894a..48ba7a9 100644 --- a/inst/app/PLPlotUI.R +++ b/inst/app/PLPlotUI.R @@ -64,7 +64,8 @@ PLPlotUI <- function(id){ column(4, switchInput(ns("mzmetric"), "Use ppm?", value = TRUE, onLabel = "ppm", offLabel = "absolute")), column(8, numericInput(ns("mztol"), "mz tolerance", value = 3, min = 0, max = 5000, step = 0.001)), sliderInput(ns("RTinterval_raw"), label = "Select an RT interval:", - min = 0, max = 1800, value = c(0,1800)) + min = 0, max = 1800, value = c(0,1800)), + sliderInput(ns("Intensity_raw"), "Select a log10 intensity range:", min = 0, max = 10, step = 0.05, value=c(0,10)) ) ), width = 12), width = "100%"), @@ -148,6 +149,7 @@ PLPlotServer <- function(id, struct){ input$mzmetric input$mztol input$PLfiles_raw + input$Intensity_raw },{ if(struct$hasPL){ d <- struct$dataset@data@PL[[as.numeric(input$PLfiles_raw)]]@raw @@ -159,13 +161,17 @@ PLPlotServer <- function(id, struct){ tol <- as.numeric(input$mztol) } d <- filter(d, between(mz, target - tol, target + tol) & - between(rt, as.numeric(input$RTinterval_raw[[1]]), - as.numeric(input$RTinterval_raw[[2]]))) + between(rt, + as.numeric(input$RTinterval_raw[[1]]), + as.numeric(input$RTinterval_raw[[2]])), + between(log10(rtiv), + as.numeric(input$Intensity_raw[[1]]), + as.numeric(input$Intensity_raw[[2]]))) if(nrow(d) < 1e5){ p1 <- ggplotly(ggplot(d) + geom_point(aes(x=rt, y=mz, color = log10(rtiv)))+ labs(color = "log10(Intensity)") + - scale_colour_gradient(low = "#CFB7E5", high = "#15004D") + + scale_color_viridis_c(option = "inferno") + xlab("Retention time (s)") + ylab("m/z") ) @@ -173,7 +179,7 @@ PLPlotServer <- function(id, struct){ p2 <- ggplotly(ggplot(d) + geom_point(aes(x=rt, y=log10(rtiv), color = mz)) + labs(color = "mz") + - scale_colour_gradient(low = "#CFB7E5", high = "#15004D") + + scale_color_viridis_c(option = "inferno") + xlab("Retention time (s)") + ylab("Intensity") ) diff --git a/inst/app/SOI_UI.R b/inst/app/SOI_UI.R index 9d73c14..4d9c569 100644 --- a/inst/app/SOI_UI.R +++ b/inst/app/SOI_UI.R @@ -279,9 +279,7 @@ SOIServer <- function(id, struct){ tags$li(tags$b("Maximum SOI RT"), ": The algorithm will split long SOIs (larger than the selected value), into smaller ones. Longer values take shorter (since there aren't as many SOI entries), but blank substraction problems can arise and it's not really worth it. Better left as default."), - tags$li(tags$b("Blank Substraction"), ": Whether to perform Blank Substraction (Optional but recommended).", - tags$b("Important:"), "The blank substraction step requires setting up Keras and Tensorflow first. - If you haven't done it please check the package Readme file to see how to do it.") + tags$li(tags$b("Blank Substraction"), ": Whether to perform Blank Substraction (Optional but recommended).") ), tags$p("To generate the SOI list (or lists) you have to set up the desired parameters and click \"Add selected config\". When you do so, you will see that the input parameters appear in the box below the button. You can either add more configs diff --git a/inst/app/Settings_UI.R b/inst/app/Settings_UI.R deleted file mode 100644 index 73a3aab..0000000 --- a/inst/app/Settings_UI.R +++ /dev/null @@ -1,57 +0,0 @@ -Settings_UI <- function(id){ - ns <- NS(id) - tagList( - sidebarPanel( - fluidRow( - column(selectInput(inputId = ns("partype"), - label = "Select Parallelization type:", - choices = c("serial", "snow"), selected = "snow"), - width = 4), - column(numericInput(inputId = ns("coreN"), label = "Number of cores", - value = 1, min = 1, - max = BiocParallel::bpworkers(), - step = 1), width = 4), - ), - fluidRow( - column(actionButton(ns("saveSet"), label = "Save Settings", - icon = icon("save")), width = 4) - ), - width = 12) - ) -} - -SettingsServer <- function(id, struct){ - moduleServer(id, function(input, output, session) { - - if(Sys.info()["sysname"] != "Windows"){ - updateSelectInput(session = session, inputId = "partype", - choices = c(serial = "serial", snow = "snow", - "multicore (FORK)" = "multicore")) - } - - observeEvent({ - input$partype - }, { - if(input$partype == "serial"){updateNumericInput(session = session, - inputId = "coreN", - value = 1, max = 1) - } else {updateNumericInput(session = session, inputId = "coreN", - max = BiocParallel::bpworkers())} - }) - - toReturn <- reactiveValues(dataset = RHermesExp(), trigger = 0) - observeEvent({ - input$saveSet - }, { - backend <- switch(input$partype, - serial = BiocParallel::SerialParam(), - snow = BiocParallel::SnowParam(workers = as.numeric(input$coreN)), - multicore = BiocParallel::MulticoreParam(workers = - as.numeric(input$coreN)) - ) - BiocParallel::register(backend) - }, ignoreInit = TRUE, ignoreNULL = TRUE) - - return(toReturn) - }) -} diff --git a/inst/app/app.R b/inst/app/app.R index e9e74b0..31bffb3 100644 --- a/inst/app/app.R +++ b/inst/app/app.R @@ -1,180 +1,171 @@ -for(i in list.files(system.file("app", package = "RHermes"), pattern = "UI", - full.names = TRUE)){ - source(i, local = TRUE) -} - -library(shiny) -library(shinyFiles) -library(shinydashboard) -library(shinyWidgets) -library(RHermes) -library(data.table, quietly = TRUE) -library(enviPat) -library(igraph) -require(mzR) -require(magrittr) -require(CHNOSZ) -require(ggplot2) -library(keras) -library(plotly) -library(visNetwork) -library(KEGGREST) -library(slickR) -library(BiocParallel) -library(DT) - -header <- dashboardHeader( - title = p("RHermes", style = "font-family: Open Sans, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Arial; font-size: 22px"), - dropdownMenuOutput(outputId = "dropdown_m") - ) - -sidebar <- dashboardSidebar( - sidebarMenu( - menuItem("Intro", tabName = "intro", icon = icon("info")), - menuItem("From mzML to PL", tabName = "mz2PL", icon = icon("bolt")), - menuItem("SOI detection", tabName = "SOIdet", icon = icon("barcode")), - menuItem("Inclusion list generation", tabName = "IL", icon = icon("list")), - menuItem("MSMS data processing", tabName = "MSMS", icon = icon("atom")), - menuItem("Identifications", tabName = "ident", icon = icon("fingerprint")), - menuItem("Plots", tabName = "Plots", icon = icon("chart-bar"), - menuSubItem("PL Exploration", tabName = "PLplot"), - menuSubItem("SOI Exploration", tabName = "SOIplot"), - menuSubItem("MS2 Exploration", tabName = "MS2plot") - ), - menuItem("Tables", tabName = "extra", icon = icon("table")), - menuItem("Settings", tabName = "sett", icon = icon("file-alt")), - menuItem("Loading/Saving", tabName = "IO", icon = icon("save")), - div(actionButton("shutdown", label="Shutdown app", icon = icon("power-off"), - style ="background-color: #222d32; color: #b8c7ce; border-color: #b8c7ce; border-width:1px"), - style = "margin-top:100%; width: 220px; display: inline-grid; position:fixed; bottom:0") - ) -) - -body <- dashboardBody( - tags$head(tags$link(rel="stylesheet", href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.0.0/animate.min.css")), ##Adds animate.css animations - tags$script(HTML("$('body').addClass('fixed');")), - includeCSS("./www/customStyle.css"), - tabItems( - # First tab content - tabItem(tabName = "intro", - titlePanel( - div(class="animate__animated animate__pulse", h1(tags$b("RHermes"), align = "center"), style = "text-align: center;")), - mainPanel(width = 20, - HTML("

RHermes is a broad-scoped targeted metabolomics software designed to identify compounds in biological and environmental samples. - We invert the classical peak-first metabolite detection workflows by first annotating the LC-MS1 raw data points themselves - using a database of plasusible formulas and adducts. This ultimately results in a very sample-specific inclusion list of ions to monitor in a MS2 experiment

-

RHermes has shown that up to 90% of a typical data-dependent acquisition (DDA) MS2 experiment is wasted in redundant and non-biological ions, leading to fake positive - identifications while missing the true compounds in the sample.

"), - HTML("

We believe there's a better way to characterize the metabolites in our samples

- RHermes

"), - HTML("

You can find the molecular formula databases and sample mzML files in the paper in our Zenodo dataset

"), - hr(), - HTML("

The Workflow

"), - br(), - tags$script(src = 'scroll.js'), - HTML("

"), - img(src = "step1.svg", width = "100%", class = "steps"), - HTML("

"), - img(src = "step2.svg", width = "100%", class = "steps"), - HTML("

"), - img(src = "step3.svg", width = "100%", class = "steps"), - hr(), - h2("Bug reports"), - HTML("

In case you find any abnormal behaviour (eg: app unexpectedly crashes, etc.) feel free to email - the mantainer at roger.gine@estudiants.urv or open a Github issue. - You should provide a detailed description about what happened, screenshots of the error and, desirably, a minimally reproducible example

"), - h2("Citation"), - HTML("

Please cite this software as:

"), - HTML("

HERMES: a molecular formula-oriented method to target the metabolome

- Roger Giné, Jordi Capellades, Josep M. Badia, Dennis Vughs, Michaela Schwaiger-Haber, Maria Vinaixa, Andrea M. Brunner, Gary J. Patti, Oscar Yanes

- bioRxiv 2021.03.08.434466; doi: https://doi.org/10.1101/2021.03.08.434466

"), - style = "padding : 20px 50px 50px 50px") - ), - - tabItem(tabName = "mz2PL", PL_UI("PL_UI")), - tabItem(tabName = "SOIdet", SOI_UI("SOI_UI")), - tabItem(tabName = "IL", IL_UI("IL_UI")), - tabItem(tabName = "MSMS", MS2_UI("MS2_UI")), - tabItem(tabName = "Plots", verticalLayout()), - tabItem(tabName = "PLplot", PLPlotUI("PLPlotUI")), - tabItem(tabName = "SOIplot", SOIPlotUI("SOIPlotUI")), - tabItem(tabName = "MS2plot", MS2PlotUI("MS2PlotUI")), - tabItem(tabName = "ident", Ident_UI("Identifications")), - tabItem(tabName = "extra", ExtraInfo_UI("ExtraInfo_UI")), - tabItem(tabName = "sett", Settings_UI("Settings_UI")), - tabItem(tabName = "IO", IO_UI("IO_UI")) - ) -) - -ui <- dashboardPage(header, sidebar, body, title = "RHermes") - -server <- function(input, output, session){ - - struct <- reactiveValues(dataset=RHermesExp(), hasPL = FALSE, hasSOI = FALSE, - hasIL = FALSE, hasMS2 = FALSE) - - observe({ - struct$hasPL <- ifelse(length(struct$dataset@data@PL)!=0, TRUE, FALSE) - struct$hasSOI <- ifelse(length(struct$dataset@data@SOI)!=0, TRUE, FALSE) - struct$hasIL <- ifelse(length(struct$dataset@data@MS2Exp)!=0, TRUE, FALSE) - if(struct$hasIL){ - struct$hasMS2 <- any(vapply(struct$dataset@data@MS2Exp, function(x){ - length(x@Ident)!=0 - }, logical(1))) } - }) - - output$dropdown_m <- renderMenu({ - dropdownMenu(type = "tasks", badgeStatus = "primary", - taskItem(value = ifelse(struct$hasPL, 100, 0), color = "green", - "The object has PL/s" - ), - taskItem(value = ifelse(struct$hasSOI, 100, 0), color = "aqua", - "The object has SOI list/s" - ), - taskItem(value = ifelse(struct$hasIL, 100, 0),color = "yellow", - "The object has IL/s" - ), - taskItem(value = ifelse(struct$hasMS2, 100, 0), color = "red", - "The object has MS2 data" - ) - )}) - - PLresults <- PLServer("PL_UI", struct = struct) - observeEvent(PLresults$trigger, { - struct$dataset <- PLresults$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - SOIresults <- SOIServer("SOI_UI", struct = struct) - observeEvent(SOIresults$trigger, { - struct$dataset <- SOIresults$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - ILresults <- ILServer("IL_UI", struct = struct) - observeEvent(ILresults$trigger, { - struct$dataset <- ILresults$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - MS2results <- MS2Server("MS2_UI", struct) - observeEvent(MS2results$trigger, { - struct$dataset <- MS2results$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - IOresults <- IOServer("IO_UI", struct = struct) - observeEvent(IOresults$trigger, { - struct$dataset <- IOresults$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - PLPlotServer("PLPlotUI", struct = struct) - SOIPlotServer("SOIPlotUI", struct = struct) - MS2PlotServer("MS2PlotUI", struct = struct) - IdentServer("Identifications", struct = struct) - ExtraInfoServer("ExtraInfo_UI", struct = struct) - - setResults <- SettingsServer("Settings_UI", struct = struct) - observeEvent(setResults$trigger, { - struct$dataset <- setResults$dataset - }, ignoreNULL = TRUE, ignoreInit = TRUE) - - observeEvent(input$shutdown, {shiny::stopApp()}) -} - -shinyApp(ui, server) +for(i in list.files(system.file("app", package = "RHermes"), pattern = "UI", + full.names = TRUE)){ + source(i, local = TRUE) +} + +library(shiny) +library(shinyFiles) +library(shinydashboard) +library(shinyWidgets) +library(RHermes) +library(data.table, quietly = TRUE) +library(enviPat) +library(igraph) +library(mzR) +library(magrittr) +library(CHNOSZ) +library(ggplot2) +library(plotly) +library(visNetwork) +library(KEGGREST) +library(slickR) +library(DT) + +header <- dashboardHeader( + title = p("RHermes", style = "font-family: Open Sans, -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Arial; font-size: 22px"), + dropdownMenuOutput(outputId = "dropdown_m") + ) + +sidebar <- dashboardSidebar( + sidebarMenu( + menuItem("Intro", tabName = "intro", icon = icon("info")), + menuItem("From mzML to PL", tabName = "mz2PL", icon = icon("bolt")), + menuItem("SOI detection", tabName = "SOIdet", icon = icon("barcode")), + menuItem("Inclusion list generation", tabName = "IL", icon = icon("list")), + menuItem("MSMS data processing", tabName = "MSMS", icon = icon("atom")), + menuItem("Identifications", tabName = "ident", icon = icon("fingerprint")), + menuItem("Plots", tabName = "Plots", icon = icon("chart-bar"), + menuSubItem("PL Exploration", tabName = "PLplot"), + menuSubItem("SOI Exploration", tabName = "SOIplot"), + menuSubItem("MS2 Exploration", tabName = "MS2plot") + ), + menuItem("Tables", tabName = "extra", icon = icon("table")), + menuItem("Loading/Saving", tabName = "IO", icon = icon("save")), + div(actionButton("shutdown", label="Shutdown app", icon = icon("power-off"), + style ="background-color: #222d32; color: #b8c7ce; border-color: #b8c7ce; border-width:1px"), + style = "margin-top:100%; width: 220px; display: inline-grid; position:fixed; bottom:0") + ) +) + +body <- dashboardBody( + tags$head(tags$link(rel="stylesheet", href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.0.0/animate.min.css")), ##Adds animate.css animations + tags$script(HTML("$('body').addClass('fixed');")), + includeCSS("./www/customStyle.css"), + tabItems( + # First tab content + tabItem(tabName = "intro", + titlePanel( + div(class="animate__animated animate__pulse", h1(tags$b("RHermes"), align = "center"), style = "text-align: center;")), + mainPanel(width = 20, + HTML("

RHermes is a broad-scoped targeted metabolomics software designed to identify compounds in biological and environmental samples. + We invert the classical peak-first metabolite detection workflows by first annotating the LC-MS1 raw data points themselves + using a database of plasusible formulas and adducts. This ultimately results in a very sample-specific inclusion list of ions to monitor in a MS2 experiment

+

RHermes has shown that up to 90% of a typical data-dependent acquisition (DDA) MS2 experiment is wasted in redundant and non-biological ions, leading to fake positive + identifications while missing the true compounds in the sample.

"), + HTML("

We believe there's a better way to characterize the metabolites in our samples

- RHermes

"), + HTML("

You can find the molecular formula databases and sample mzML files in the paper in our Zenodo dataset

"), + hr(), + HTML("

The Workflow

"), + br(), + tags$script(src = 'scroll.js'), + HTML("

"), + img(src = "step1.svg", width = "100%", class = "steps"), + HTML("

"), + img(src = "step2.svg", width = "100%", class = "steps"), + HTML("

"), + img(src = "step3.svg", width = "100%", class = "steps"), + hr(), + h2("Bug reports"), + HTML("

In case you find any abnormal behaviour (eg: app unexpectedly crashes, etc.) feel free to email + the mantainer at roger.gine@estudiants.urv or open a Github issue. + You should provide a detailed description about what happened, screenshots of the error and, desirably, a minimally reproducible example

"), + h2("Citation"), + HTML("

Please cite this software as:

"), + HTML("

HERMES: a molecular formula-oriented method to target the metabolome

+ Roger Giné, Jordi Capellades, Josep M. Badia, Dennis Vughs, Michaela Schwaiger-Haber, Maria Vinaixa, Andrea M. Brunner, Gary J. Patti, Oscar Yanes

+ bioRxiv 2021.03.08.434466; doi: https://doi.org/10.1101/2021.03.08.434466

"), + style = "padding : 20px 50px 50px 50px") + ), + + tabItem(tabName = "mz2PL", PL_UI("PL_UI")), + tabItem(tabName = "SOIdet", SOI_UI("SOI_UI")), + tabItem(tabName = "IL", IL_UI("IL_UI")), + tabItem(tabName = "MSMS", MS2_UI("MS2_UI")), + tabItem(tabName = "Plots", verticalLayout()), + tabItem(tabName = "PLplot", PLPlotUI("PLPlotUI")), + tabItem(tabName = "SOIplot", SOIPlotUI("SOIPlotUI")), + tabItem(tabName = "MS2plot", MS2PlotUI("MS2PlotUI")), + tabItem(tabName = "ident", Ident_UI("Identifications")), + tabItem(tabName = "extra", ExtraInfo_UI("ExtraInfo_UI")), + tabItem(tabName = "IO", IO_UI("IO_UI")) + ) +) + +ui <- dashboardPage(header, sidebar, body, title = "RHermes") + +server <- function(input, output, session){ + + struct <- reactiveValues(dataset=RHermesExp(), hasPL = FALSE, hasSOI = FALSE, + hasIL = FALSE, hasMS2 = FALSE) + + observe({ + struct$hasPL <- ifelse(length(struct$dataset@data@PL)!=0, TRUE, FALSE) + struct$hasSOI <- ifelse(length(struct$dataset@data@SOI)!=0, TRUE, FALSE) + struct$hasIL <- ifelse(length(struct$dataset@data@MS2Exp)!=0, TRUE, FALSE) + if(struct$hasIL){ + struct$hasMS2 <- any(vapply(struct$dataset@data@MS2Exp, function(x){ + length(x@Ident)!=0 + }, logical(1))) } + }) + + output$dropdown_m <- renderMenu({ + dropdownMenu(type = "tasks", badgeStatus = "primary", + taskItem(value = ifelse(struct$hasPL, 100, 0), color = "green", + "The object has PL/s" + ), + taskItem(value = ifelse(struct$hasSOI, 100, 0), color = "aqua", + "The object has SOI list/s" + ), + taskItem(value = ifelse(struct$hasIL, 100, 0),color = "yellow", + "The object has IL/s" + ), + taskItem(value = ifelse(struct$hasMS2, 100, 0), color = "red", + "The object has MS2 data" + ) + )}) + + PLresults <- PLServer("PL_UI", struct = struct) + observeEvent(PLresults$trigger, { + struct$dataset <- PLresults$dataset + }, ignoreNULL = TRUE, ignoreInit = TRUE) + + SOIresults <- SOIServer("SOI_UI", struct = struct) + observeEvent(SOIresults$trigger, { + struct$dataset <- SOIresults$dataset + }, ignoreNULL = TRUE, ignoreInit = TRUE) + + ILresults <- ILServer("IL_UI", struct = struct) + observeEvent(ILresults$trigger, { + struct$dataset <- ILresults$dataset + }, ignoreNULL = TRUE, ignoreInit = TRUE) + + MS2results <- MS2Server("MS2_UI", struct) + observeEvent(MS2results$trigger, { + struct$dataset <- MS2results$dataset + }, ignoreNULL = TRUE, ignoreInit = TRUE) + + IOresults <- IOServer("IO_UI", struct = struct) + observeEvent(IOresults$trigger, { + struct$dataset <- IOresults$dataset + }, ignoreNULL = TRUE, ignoreInit = TRUE) + + PLPlotServer("PLPlotUI", struct = struct) + SOIPlotServer("SOIPlotUI", struct = struct) + MS2PlotServer("MS2PlotUI", struct = struct) + IdentServer("Identifications", struct = struct) + ExtraInfoServer("ExtraInfo_UI", struct = struct) + + observeEvent(input$shutdown, {shiny::stopApp()}) +} + +shinyApp(ui, server) diff --git a/inst/extdata/ImprovedModel.h5 b/inst/extdata/ImprovedModel.h5 deleted file mode 100644 index 15896a3..0000000 Binary files a/inst/extdata/ImprovedModel.h5 and /dev/null differ diff --git a/inst/extdata/exampleObject.rds b/inst/extdata/exampleObject.rds index 911ae98..4a0e805 100644 Binary files a/inst/extdata/exampleObject.rds and b/inst/extdata/exampleObject.rds differ diff --git a/man/exportIL.Rd b/man/exportIL.Rd index d1fb3f2..d0cc9ff 100644 --- a/man/exportIL.Rd +++ b/man/exportIL.Rd @@ -12,7 +12,7 @@ exportIL( mode = "both", maxOver = 5, defaultIT = 100, - sepFiles = FALSE, + sepFiles = TRUE, maxInjections = 9999 ) @@ -23,7 +23,7 @@ exportIL( mode = "both", maxOver = 5, defaultIT = 100, - sepFiles = FALSE, + sepFiles = TRUE, maxInjections = 9999 ) } @@ -53,8 +53,7 @@ aplicable in Orbitrap instruments and for "continuous" and "both" modes). Defaults to 100ms.} \item{sepFiles}{Logical, whether to generate a single csv file or multiple -csvs, each corresponding to each injection/chromatographic run. From our -experience with an Orbitrap Fusion, separate csvs will simplify the task.} +csvs, each corresponding to each injection/chromatographic run.} \item{maxInjections}{Numeric, the maximum number of planned injections to export. Defaults to 9999 to export all of them.} diff --git a/man/exportmzML.Rd b/man/exportmzML.Rd index adec979..1102933 100644 --- a/man/exportmzML.Rd +++ b/man/exportmzML.Rd @@ -4,7 +4,7 @@ \alias{exportmzML} \title{exportmzML} \usage{ -exportmzML(struct, id, fname, whichSpec = NA) +exportmzML(struct, id, fname, whichSpec = NA, collisionEnergy = 35) } \arguments{ \item{struct}{RHermesExp object} @@ -14,6 +14,9 @@ exportmzML(struct, id, fname, whichSpec = NA) \item{fname}{Name of the output file, without the .mzML termination} \item{whichSpec}{Which entries to export. Defaults to all of them.} + +\item{collisionEnergy}{Numeric. Desired collision energy (in %NCE, eV, or any +other unit)} } \value{ An .mzML file diff --git a/man/getSOIpar.Rd b/man/getSOIpar.Rd index 6f520d8..1f57b32 100644 --- a/man/getSOIpar.Rd +++ b/man/getSOIpar.Rd @@ -5,9 +5,9 @@ \alias{getSOIpar,ANY-method} \title{getSOIpar} \usage{ -getSOIpar(tag = "double") +getSOIpar(tag = "double", mode = "regular", cwp = NA) -\S4method{getSOIpar}{ANY}(tag = "double") +\S4method{getSOIpar}{ANY}(tag = "double", mode = "regular", cwp = NA) } \arguments{ \item{tag}{A character string that tells which premade SOI parameter @@ -17,6 +17,12 @@ chromatography experiments, 'single-x', 'double-x' and 'triple-x'. These are all stored in /app/www/SOIFilterParams.csv, feel free to locally change them or add new ones for your use (if you know what you're doing).} + +\item{mode}{Whether SOI detection should use the regular density-based +algorithm or xcms peak detection for defining the SOIs} + +\item{cwp}{A CentWaveParam object used for either SOI detection (xcms mode) +or long SOI splitting (regular mode)} } \value{ A SoiParam object diff --git a/man/plotFidelity.Rd b/man/plotFidelity.Rd index 434a909..d35ae5d 100644 --- a/man/plotFidelity.Rd +++ b/man/plotFidelity.Rd @@ -33,7 +33,7 @@ Plots the selected SOI isotopic profile and a \examples{ \dontshow{struct <- readRDS(system.file("extdata", "exampleObject.rds", package = "RHermes"))} -p <- plotFidelity(struct, 1, 9) +p <- plotFidelity(struct, 1, 8) } \seealso{ Other plots: diff --git a/man/setCluster.Rd b/man/setCluster.Rd deleted file mode 100644 index 1576276..0000000 --- a/man/setCluster.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Classes.R -\name{setCluster} -\alias{setCluster} -\title{setCluster} -\usage{ -setCluster() -} -\value{ -A BiocParallel backend -} -\description{ -Returns the most appropriate BiocParallel backend for RHermes - according to your OS and RAM. It is entirely optional, but recommended - nonetheless. -} -\examples{ -BiocParallel::register(setCluster()) #Registers the most appropriate backend -} -\author{ -Roger Gine -} diff --git a/tests/testthat/test-Methods.R b/tests/testthat/test-Methods.R index 09182eb..f5e6472 100644 --- a/tests/testthat/test-Methods.R +++ b/tests/testthat/test-Methods.R @@ -18,7 +18,7 @@ test_that("RHermesExp methods work", { toadd = "Cs") expect_warning(addAd(myHermes, "M+Cs", deltam = 132.905, ch = 0, mult = 1, toadd = "Cs")) - remAd(myHermes, "M-H") + remAd(myHermes, "[M-H]-") expect_warning(remAd(myHermes, "M+2Rb")) }) diff --git a/tests/testthat/test-PL.R b/tests/testthat/test-PL.R index 7fc34bd..96a0943 100644 --- a/tests/testthat/test-PL.R +++ b/tests/testthat/test-PL.R @@ -1,61 +1,59 @@ -context("Peaklist generation works fine") - -test_that("Raw data can be loaded",{ - ms1data <- RHermes:::import_and_filter(lf = system.file("extdata", - "MS1TestData.mzML", - package = "RHermes")) - expect_length(ms1data, 3) - expect_equal(nrow(ms1data[[1]]), 40040) -}) - -test_that("ScanSearch works",{ - BiocParallel::register(BiocParallel::SerialParam()) - myHermes <- readRDS(system.file("extdata", - "exampleObject.rds", - package = "RHermes")) - myHermes <- processMS1(myHermes, system.file("extdata", - "MS1TestData.mzML", - package = "RHermes")) - #Same result as precalculated version - expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), - nrow(myHermes@data@PL[[1]]@peaklist)) -}) - -test_that("Labelled proc works",{ - BiocParallel::register(BiocParallel::SerialParam()) - myHermes <- readRDS(system.file("extdata", - "exampleObject.rds", - package = "RHermes")) - myHermes <- processMS1(myHermes, - system.file("extdata", - "MS1TestData.mzML", - package = "RHermes"), - labelled = TRUE) - - expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), 1378) -}) - -test_that("PL plot works", { - myHermes <- readRDS(system.file("extdata", - "exampleObject.rds", - package = "RHermes")) - - p <- RHermes::plotPL(myHermes, 1, "C3H7NO2", rtrange = c(0,1500), - dynamicaxis = TRUE, ads = NA) - expect_true(is(p, "plotly")) -}) - - -test_that("Coverage plot works", { - myHermes <- readRDS(system.file("extdata", - "exampleObject.rds", - package = "RHermes")) - myHermes <- processMS1(myHermes, - system.file("extdata", - "MS1TestData.mzML", - package = "RHermes"), - labelled = TRUE) - p <- RHermes:::plotCoverage(myHermes, 2) - expect_true(is(p[[1]], "plotly") & is(p[[2]], "plotly")) -}) - +context("Peaklist generation works fine") + +test_that("Raw data can be loaded",{ + ms1data <- RHermes:::import_and_filter(lf = system.file("extdata", + "MS1TestData.mzML", + package = "RHermes")) + expect_length(ms1data, 3) + expect_equal(nrow(ms1data[[1]]), 40040) +}) + +test_that("ScanSearch works",{ + myHermes <- readRDS(system.file("extdata", + "exampleObject.rds", + package = "RHermes")) + myHermes <- processMS1(myHermes, system.file("extdata", + "MS1TestData.mzML", + package = "RHermes")) + #Same result as precalculated version + expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), + nrow(myHermes@data@PL[[1]]@peaklist)) +}) + +test_that("Labelled proc works",{ + myHermes <- readRDS(system.file("extdata", + "exampleObject.rds", + package = "RHermes")) + myHermes <- processMS1(myHermes, + system.file("extdata", + "MS1TestData.mzML", + package = "RHermes"), + labelled = TRUE) + + expect_equal(nrow(myHermes@data@PL[[2]]@peaklist), 1378) +}) + +test_that("PL plot works", { + myHermes <- readRDS(system.file("extdata", + "exampleObject.rds", + package = "RHermes")) + + p <- RHermes::plotPL(myHermes, 1, "C3H7NO2", rtrange = c(0,1500), + dynamicaxis = TRUE, ads = NA) + expect_true(is(p, "plotly")) +}) + + +test_that("Coverage plot works", { + myHermes <- readRDS(system.file("extdata", + "exampleObject.rds", + package = "RHermes")) + myHermes <- processMS1(myHermes, + system.file("extdata", + "MS1TestData.mzML", + package = "RHermes"), + labelled = TRUE) + p <- RHermes:::plotCoverage(myHermes, 2) + expect_true(is(p[[1]], "plotly") & is(p[[2]], "plotly")) +}) + diff --git a/tests/testthat/test-SOI.R b/tests/testthat/test-SOI.R index 551e553..7b4e554 100644 --- a/tests/testthat/test-SOI.R +++ b/tests/testthat/test-SOI.R @@ -8,7 +8,6 @@ test_that("A SOI param class can be created",{ }) test_that("SOI generation works",{ - BiocParallel::register(BiocParallel::SerialParam()) myHermes <- readRDS(system.file("extdata", "exampleObject.rds", package = "RHermes")) @@ -16,31 +15,13 @@ test_that("SOI generation works",{ expect_equal(nrow(myHermes@data@SOI[[2]]@SOIList), 11) }) -test_that("Blank substraction is configured and works",{ - #Skip on bioconductor because this part requires that the user previously - #configures Keras and Tensorflow. - skip_on_bioc() - library(reticulate) - library(keras) +test_that("Blank substraction is configured and works", { library(data.table) - - #Has Python, Keras and Tensorflow - skip_if(!py_available(initialize = TRUE)) - expect(py_module_available("keras"), failure_message = "No Keras") - expect(py_module_available("tensorflow"), failure_message = "No TensorFlow") - - #Can load the model - model <- load_model_hdf5(system.file("extdata", - "ImprovedModel.h5", - package = "RHermes")) - expect(is(model, "python.builtin.object"), - failure_message = "Model doesn't load") - set.seed(1234) #Does the model work as intended? blank <- data.table(rt = seq(0,20,0.2), rtiv = rnorm(101, 10, 3), - formv = "foo", isov = "M0") + formv = "foo", isov = "M0", mz = 100) setkeyv(blank, "formv") group <- dplyr::tibble(start = c(0,10), end = c(10,20), peaks = list( @@ -59,16 +40,6 @@ test_that("Blank substraction is configured and works",{ expect_false(RHermes:::firstCleaning(1, group, blank)) #Blank-like expect_true(RHermes:::firstCleaning(2, group, blank)) #Totally different - #Check that the interpolation works and the network generates the right - #result (0, meaning sample and blank are the "same") - organizeddata <- RHermes:::prepareNetInput(1, group, blank) - organizeddata <- c(organizeddata[1, ], organizeddata[2, ]) - organizeddata <- rbind(organizeddata,organizeddata,organizeddata) - organizeddata <- keras::array_reshape(organizeddata, - c(nrow(organizeddata), 400), - order = "C") #ANN input - q <- model %>% predict(organizeddata) %>% k_argmax() - expect_true(all(q$numpy() == 0)) }) test_that("SOI plot works", { diff --git a/tests/testthat/test-prePL.R b/tests/testthat/test-prePL.R index 72d696b..340a2ce 100644 --- a/tests/testthat/test-prePL.R +++ b/tests/testthat/test-prePL.R @@ -1,70 +1,64 @@ -context("Formula and adduct table functionality") -test_that("Database importer works", { - hmdb <- RHermes:::database_importer(template = "hmdb") - expect_equal(nrow(hmdb), 59) - - norman <- RHermes:::database_importer(template = "norman") - expect_equal(nrow(norman), 106) - - custom <- RHermes:::database_importer( - template = "custom", filename = system.file("extdata", "hmdb.csv", - package = "RHermes")) - expect_equal(nrow(custom), 59) - - custom2 <- RHermes:::database_importer( - template = "custom", filename = system.file("extdata", "norman.xlsx", - package = "RHermes")) - expect_equal(nrow(custom2), 106) - - skip_if_offline() - kegg <- RHermes:::database_importer(template = "kegg_p", - keggpath = "hsa00010") - expect_equal(nrow(kegg), 24) -}) - -test_that("Adduct tables generate successfully",{ - ad <- RHermes:::adductTables(1,1) - expect_equal(nrow(ad[[1]]), 10) - expect_equal(nrow(ad[[2]]), 14) - expect_equal(ad[[2]][1,"adduct"], "M+H") -}) - - -context("Ionic formulas") -test_that("Ionic formulas generate correctly", { - BiocParallel::register(BiocParallel::SerialParam()) - hmdb <- RHermes:::database_importer(template = "hmdb") - ad <- RHermes:::adductTables(1,1) - colnames(hmdb)[c(2,3)] <- c("m","fms") - ionf <- RHermes:::IonicForm(hmdb[1:5,], ad[[2]]) - expect_length(ionf, 2) - expect_equal(nrow(ionf[[1]]), 70) -}) - - -context("Isotopic distribution calculation") -test_that("Isotopes generate nicely", { - BiocParallel::register(BiocParallel::SerialParam()) - ppm <- 2 - minmass <- 80 - maxmass <- 1050 - noiselevel <- 1e3 - FWHM <- 120000 - ion <- "+" - par <- ExpParam(ppm = ppm, res = FWHM, nthr = noiselevel, - minmz = minmass, maxmz = maxmass, ion = ion) - hmdb <- RHermes:::database_importer(template = "hmdb", minmass = 50, - maxmass = 100) - ad <- RHermes:::adductTables(1,1) - colnames(hmdb)[c(2,3)] <- c("m","fms") - test <- RHermes:::IonicForm(hmdb[1:5,], ad[[2]][1:5, ]) - IC <- RHermes:::IsoCalc(test[[1]], FWHM = par@res, intTHR = 0.2, kTHR = 1) - expect_length(IC, 2) - expect_length(IC[[1]], 25) - expect_equal(nrow(IC[[2]]), 5) -}) - -context("Parallel backend selection") -test_that("setCluster works", { - RHermes:::setCluster() -}) +context("Formula and adduct table functionality") +test_that("Database importer works", { + hmdb <- RHermes:::database_importer(template = "hmdb") + expect_equal(nrow(hmdb), 59) + + norman <- RHermes:::database_importer(template = "norman") + expect_equal(nrow(norman), 118) + + custom <- RHermes:::database_importer( + template = "custom", filename = system.file("extdata", "hmdb.csv", + package = "RHermes")) + expect_equal(nrow(custom), 59) + + custom2 <- RHermes:::database_importer( + template = "custom", filename = system.file("extdata", "norman.xlsx", + package = "RHermes")) + expect_equal(nrow(custom2), 118) + + skip_if_offline() + kegg <- RHermes:::database_importer(template = "kegg_p", + keggpath = "hsa00010") + expect_equal(nrow(kegg), 26) +}) + +test_that("Adduct tables generate successfully",{ + ad <- RHermes:::adductTables(1,1) + expect_equal(nrow(ad[[1]]), 11) + expect_equal(nrow(ad[[2]]), 21) + expect_equal(ad[[2]][1,"adduct"], "[M+H]+") +}) + + +context("Ionic formulas") +test_that("Ionic formulas generate correctly", { + hmdb <- RHermes:::database_importer(template = "hmdb") + ad <- RHermes:::adductTables(1,1) + colnames(hmdb)[c(2,3)] <- c("m","fms") + ionf <- RHermes:::IonicForm(hmdb[1:5,], ad[[2]][1:5,]) + expect_length(ionf, 2) + expect_equal(nrow(ionf[[1]]), 24) +}) + + +context("Isotopic distribution calculation") +test_that("Isotopes generate nicely", { + ppm <- 2 + minmass <- 80 + maxmass <- 1050 + noiselevel <- 1e3 + FWHM <- 120000 + ion <- "+" + par <- ExpParam(ppm = ppm, res = FWHM, nthr = noiselevel, + minmz = minmass, maxmz = maxmass, ion = ion) + hmdb <- RHermes:::database_importer(template = "hmdb", minmass = 50, + maxmass = 100) + ad <- RHermes:::adductTables(1,1) + colnames(hmdb)[c(2,3)] <- c("m","fms") + test <- RHermes:::IonicForm(hmdb[1:5,], ad[[2]][1:5, ]) + IC <- RHermes:::IsoCalc(test[[1]], FWHM = par@res, intTHR = 0.2, kTHR = 1) + expect_length(IC, 2) + expect_length(IC[[1]], 24) + expect_equal(nrow(IC[[2]]), 6) +}) + diff --git a/vignettes/RHermes_UserGuide.Rmd b/vignettes/RHermes_UserGuide.Rmd index e787108..076f0c0 100644 --- a/vignettes/RHermes_UserGuide.Rmd +++ b/vignettes/RHermes_UserGuide.Rmd @@ -14,7 +14,6 @@ vignette: | --- ```{r setup, include=FALSE} -BiocParallel::register(BiocParallel::SerialParam()) knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message = FALSE) knitr::opts_chunk$set(collapse = TRUE) library(plotly) @@ -28,11 +27,6 @@ This document will guide you through the package functions and show you what a typical RHermes workflow looks like. ## Installation -The recommended specs for RHermes are: - - - At least a 4 core processor - - 16 GB of RAM or more - - An internet connection to perform KEGG queries You can download the development version from [GitHub](https://github.com/RogerGinBer/RHermes) with: @@ -45,6 +39,7 @@ devtools::install_github("RogerGinBer/RHermes") ## MS1 workflow ### Setting up the RHermesExp object + All the information generated by RHermes is stored in a single RHermesExp object. To generate it, start with: @@ -99,7 +94,7 @@ multiplicity <= 2 (for instance [2M+H]+, [M+2H]2+ and so on). You may also just select a subset of adducts using with `adlist` param: ```{r} myHermes <- setDB(myHermes, db = "hmdb", admult = 2, adcharge = 2, - adlist = c("M+H", "M+Na", "M+K", "M+2H", "2M+H")) + adlist = c("[M+H]+", "[M+Na]+", "[M+K]+", "[M+NH4]+", "[2M+H]+")) ``` @@ -112,16 +107,18 @@ new adduct profiles with addAd() and remove them with remAd(): ```{r} #We will manually add the "M+2H" adduct -myHermes <- addAd(myHermes, name = "M+2H", deltam = 2*1.00727600, ch = 2, +myHermes <- addAd(myHermes, name = "[M+2H]2+", deltam = 2*1.00727600, ch = 2, mult = 1, toadd = "H2") #For instance, remove adducts of unused solvents myHermes <- remAd(myHermes, c("M+DMSO+H", "M+IsoProp+H")) ``` + ```{r eval=FALSE} adlist(myHermes) ``` + ```{r echo=FALSE} knitr::kable(adlist(myHermes)) ``` @@ -324,21 +321,6 @@ RHermesGUI() ``` And the app should start in your default browser. -## Advanced functionality -### Parallelization -Another parameter you want to set up (especially if you're dealing with -many formulas and adducts - say >10K formulas) is a custom parallel -backend. - -RHermes functions use your default backend (bpparam()), but you can set any -backend you like with register(). You can use setCluster to automatically -select an appropiate backend based on your OS and RAM. - -```{r} -BiocParallel::register(setCluster()) -``` - - ## Session Info ```{r}