From b1922c5dcdcaa2960fe1cdd47ed1351ea89d052e Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 09:21:49 +0200
Subject: [PATCH 01/14] ci: force installation of MsCoreUtils from github

---
 .github/workflows/check-bioc.yml | 5 +++--
 DESCRIPTION                      | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index a9d90f242..75482f579 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -54,8 +54,8 @@ jobs:
       matrix:
         config:
           - { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
-          - { os: macOS-latest, r: 'devel', bioc: '3.19'}
-          - { os: windows-latest, r: 'devel', bioc: '3.19'}
+          - { os: macOS-latest, r: 'next', bioc: '3.19'}
+          - { os: windows-latest, r: 'next', bioc: '3.19'}
     env:
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       RSPM: ${{ matrix.config.rspm }}
@@ -178,6 +178,7 @@ jobs:
           ## Ideally, all dependencies should get installed in the first pass.
 
           ## Pass #1 at installing dependencies
+          BiocManager::install("RforMassSpectrometry/MsCoreUtils")
           BiocManager::install(c("ncdf4", "mzR"))
           message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****'))
           remotes::install_local(dependencies = TRUE, repos =
diff --git a/DESCRIPTION b/DESCRIPTION
index d5e084bc5..d63fae793 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,8 +41,8 @@ Authors@R: c(
 	   role = "ctb",
 	   comment = c(ORCID = "0000-0002-5492-6904")),
 	   person(given = "Carl", family = "Brunius",
-	   email = "carl.brunius@chalmers.se", 
-	   role = "ctb", 
+	   email = "carl.brunius@chalmers.se",
+	   role = "ctb",
 	   comment = c(ORCID = "0000-0003-3957-870X"))
 	   )
 Depends:

From 8fb85251dfe89b71b52aeeba1432d61d10b99380 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 10:06:01 +0200
Subject: [PATCH 02/14] ci: force installation of some packages from source

---
 .github/workflows/check-bioc.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 75482f579..20e710c8d 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -177,6 +177,11 @@ jobs:
           ## https://github.com/r-lib/remotes/issues/296
           ## Ideally, all dependencies should get installed in the first pass.
 
+          ## Workaround for problems with cached S4objects in binary packages
+          BiocManager::install("GenomeInfoDb", force = TRUE, type = "source")
+          BiocManager::install("S4Vectors", force = TRUE, type = "source")
+          BiocManager::install("SummarizedExperiment", force = TRUE, type = "source")
+
           ## Pass #1 at installing dependencies
           BiocManager::install("RforMassSpectrometry/MsCoreUtils")
           BiocManager::install(c("ncdf4", "mzR"))

From f2f447521a5abd5393c04d802053ab2abd97035c Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 13:38:48 +0200
Subject: [PATCH 03/14] ci: install packages from source

---
 .github/workflows/check-bioc.yml | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 20e710c8d..1a4dc65ea 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -186,8 +186,7 @@ jobs:
           BiocManager::install("RforMassSpectrometry/MsCoreUtils")
           BiocManager::install(c("ncdf4", "mzR"))
           message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos =
-          BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE)
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE, type = "source")
 
           BiocManager::install(c("rmarkdown", "BiocStyle"))
         continue-on-error: true
@@ -197,19 +196,12 @@ jobs:
         run: |
           ## Pass #2 at installing dependencies
           message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE)
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE, type = "source")
 
           ## Manually install packages that seem to be skipped.
           message(paste('****', Sys.time(), 'force installation of selected packages  ****'))
           BiocManager::install(c("faahKO"))
-          BiocManager::install("ProtGenerics")
-          BiocManager::install("MSnbase")
-          BiocManager::install("mzR", type = "source", force = TRUE)
-          BiocManager::install("Spectra")
-          BiocManager::install("MsBackendMgf")
-          BiocManager::install("MetaboCoreUtils")
           BiocManager::install("magick")
-          BiocManager::install("RforMassSpectrometry/MsExperiment")
 
           ## For running the checks
           message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****'))

From e1504ab2b4c954bf3762a89c96a8610e4d2cc007 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 14:32:21 +0200
Subject: [PATCH 04/14] ci: some more tentative fixes

---
 .github/workflows/check-bioc.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 1a4dc65ea..facf49c0f 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -183,10 +183,8 @@ jobs:
           BiocManager::install("SummarizedExperiment", force = TRUE, type = "source")
 
           ## Pass #1 at installing dependencies
-          BiocManager::install("RforMassSpectrometry/MsCoreUtils")
-          BiocManager::install(c("ncdf4", "mzR"))
           message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE, type = "source")
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, force = TRUE, type = "source")
 
           BiocManager::install(c("rmarkdown", "BiocStyle"))
         continue-on-error: true
@@ -196,11 +194,12 @@ jobs:
         run: |
           ## Pass #2 at installing dependencies
           message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, upgrade = TRUE, type = "source")
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, force = TRUE, type = "source")
 
           ## Manually install packages that seem to be skipped.
           message(paste('****', Sys.time(), 'force installation of selected packages  ****'))
-          BiocManager::install(c("faahKO"))
+          BiocManager::install("xcms", type = "source")
+          BiocManager::install("faahKO", type = "source", force = TRUE)
           BiocManager::install("magick")
 
           ## For running the checks

From cdef6dc481f5b7e78027e7b018b37477ccc01b94 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 16:17:34 +0200
Subject: [PATCH 05/14] ci: force installation of MsCoreUtils from github

---
 .github/workflows/check-bioc.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index facf49c0f..a573c9dc1 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -198,6 +198,7 @@ jobs:
 
           ## Manually install packages that seem to be skipped.
           message(paste('****', Sys.time(), 'force installation of selected packages  ****'))
+          BiocManager::install("RforMassSpectrometry/MsCoreUtils", force = TRUE)
           BiocManager::install("xcms", type = "source")
           BiocManager::install("faahKO", type = "source", force = TRUE)
           BiocManager::install("magick")

From b52457ae41319d08082cedac2e243a52f6fa0c17 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Tue, 2 Apr 2024 16:48:24 +0200
Subject: [PATCH 06/14] ci: install all packages from source

---
 .github/workflows/check-bioc.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index a573c9dc1..1b389dc59 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -160,12 +160,12 @@ jobs:
       - name: Install BiocManager
         run: |
           message(paste('****', Sys.time(), 'installing BiocManager ****'))
-          remotes::install_cran("BiocManager")
+          remotes::install_cran("BiocManager", type = "source")
         shell: Rscript {0}
 
       - name: Set BiocVersion
         run: |
-          BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE)
+          BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE, type = "source")
         shell: Rscript {0}
 
       - name: Install dependencies pass 1
@@ -186,7 +186,7 @@ jobs:
           message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****'))
           remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, force = TRUE, type = "source")
 
-          BiocManager::install(c("rmarkdown", "BiocStyle"))
+          BiocManager::install(c("rmarkdown", "BiocStyle"), type = "source")
         continue-on-error: true
         shell: Rscript {0}
 
@@ -201,19 +201,19 @@ jobs:
           BiocManager::install("RforMassSpectrometry/MsCoreUtils", force = TRUE)
           BiocManager::install("xcms", type = "source")
           BiocManager::install("faahKO", type = "source", force = TRUE)
-          BiocManager::install("magick")
+          BiocManager::install("magick", type = "source")
 
           ## For running the checks
           message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****'))
-          remotes::install_cran("rcmdcheck")
-          BiocManager::install(c("BiocCheck", "DBI"))
+          remotes::install_cran("rcmdcheck", type = "source")
+          BiocManager::install("BiocCheck", type = "source")
         shell: Rscript {0}
 
       - name: Install BiocGenerics
         if:  env.has_RUnit == 'true'
         run: |
           ## Install BiocGenerics
-          BiocManager::install("BiocGenerics")
+          BiocManager::install("BiocGenerics", type = "source")
         shell: Rscript {0}
 
       - name: Install covr

From 93a45c3c030ca9ce08e0e8de50f6ea2cf9b4564e Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 07:21:27 +0200
Subject: [PATCH 07/14] ci: install also RCurl

---
 .github/workflows/check-bioc.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 1b389dc59..12bc7502e 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -202,6 +202,8 @@ jobs:
           BiocManager::install("xcms", type = "source")
           BiocManager::install("faahKO", type = "source", force = TRUE)
           BiocManager::install("magick", type = "source")
+          BiocManager::install("RCurl", type = "source", force = TRUE)
+          BiocManager::install("mzR")
 
           ## For running the checks
           message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****'))

From 78156480a40922d4421062a9d75c7d932106d7b7 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 08:25:20 +0200
Subject: [PATCH 08/14] ci: add RCurl to Suggests

---
 .github/workflows/check-bioc.yml | 1 +
 DESCRIPTION                      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 12bc7502e..35a8030b9 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -179,6 +179,7 @@ jobs:
 
           ## Workaround for problems with cached S4objects in binary packages
           BiocManager::install("GenomeInfoDb", force = TRUE, type = "source")
+          BiocManager::install("BiocParallel", force = TRUE, type = "source")
           BiocManager::install("S4Vectors", force = TRUE, type = "source")
           BiocManager::install("SummarizedExperiment", force = TRUE, type = "source")
 
diff --git a/DESCRIPTION b/DESCRIPTION
index d63fae793..44f3546a7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -84,7 +84,8 @@ Suggests:
     multtest,
     MsBackendMgf,
     signal,
-    mgcv
+    mgcv,
+    RCurl
 Enhances:
     Rgraphviz,
     rgl

From 706b730a00186f40ed88c7fe30a372e87213c9a7 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 09:42:30 +0200
Subject: [PATCH 09/14] ci: ensure packages are indeed installed from source

---
 .github/workflows/check-bioc.yml | 14 ++++++++------
 DESCRIPTION                      |  3 +--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
index 35a8030b9..6372778e9 100644
--- a/.github/workflows/check-bioc.yml
+++ b/.github/workflows/check-bioc.yml
@@ -182,10 +182,16 @@ jobs:
           BiocManager::install("BiocParallel", force = TRUE, type = "source")
           BiocManager::install("S4Vectors", force = TRUE, type = "source")
           BiocManager::install("SummarizedExperiment", force = TRUE, type = "source")
+          ## install xcms with dependencies - to ensure dependencies are installed from source;
+          ## somehow install_local installs binary packages instead.
+          BiocManager::install("mzR", force = TRUE)
+          BiocManager::install("MSnbase", force = TRUE, type = "source")
+          BiocManager::install("xcms", force = TRUE, type = "source", dependencies = TRUE)
+          BiocManager::install("faahKO", type = "source")
 
           ## Pass #1 at installing dependencies
           message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, force = TRUE, type = "source")
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, build_manual = FALSE, type = "source")
 
           BiocManager::install(c("rmarkdown", "BiocStyle"), type = "source")
         continue-on-error: true
@@ -195,16 +201,12 @@ jobs:
         run: |
           ## Pass #2 at installing dependencies
           message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****'))
-          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, force = TRUE, type = "source")
+          remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = FALSE, type = "source")
 
           ## Manually install packages that seem to be skipped.
           message(paste('****', Sys.time(), 'force installation of selected packages  ****'))
           BiocManager::install("RforMassSpectrometry/MsCoreUtils", force = TRUE)
-          BiocManager::install("xcms", type = "source")
-          BiocManager::install("faahKO", type = "source", force = TRUE)
           BiocManager::install("magick", type = "source")
-          BiocManager::install("RCurl", type = "source", force = TRUE)
-          BiocManager::install("mzR")
 
           ## For running the checks
           message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****'))
diff --git a/DESCRIPTION b/DESCRIPTION
index 44f3546a7..d63fae793 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -84,8 +84,7 @@ Suggests:
     multtest,
     MsBackendMgf,
     signal,
-    mgcv,
-    RCurl
+    mgcv
 Enhances:
     Rgraphviz,
     rgl

From fd09f2c9b9adfc76458d5320a33a227ae7e8f4d2 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 11:43:34 +0200
Subject: [PATCH 10/14] tests: disable parallel processing in unit tests

---
 tests/testthat.R | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/testthat.R b/tests/testthat.R
index a3f2a3018..015120e3e 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -3,14 +3,8 @@ library(xcms)
 library(faahKO)
 library(MSnbase)
 library(msdata)
-
-if (.Platform$OS.type == "unix") {
-    prm <- MulticoreParam(3)
-} else {
-    # prm <- SnowParam(3)
-    prm <- SerialParam()
-}
-register(bpstart(prm))
+library(BiocParallel)
+register(SerialParam())
 
 ## Create some objects we can re-use in different tests:
 faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"),
@@ -19,7 +13,7 @@ faahko_3_files <- c(system.file('cdf/KO/ko15.CDF', package = "faahKO"),
 
 cwp <- CentWaveParam(noise = 10000, snthresh = 40, prefilter = c(3, 10000))
 faahko_od <- readMSData(faahko_3_files, mode = "onDisk")
-faahko_xod <- findChromPeaks(faahko_od, param = cwp)
+faahko_xod <- findChromPeaks(faahko_od, param = cwp, BPPARAM = SerialParam())
 od_x <- faahko_od
 mzr <- matrix(c(335, 335, 344, 344), ncol = 2, byrow = TRUE)
 od_chrs <- chromatogram(od_x, mz = mzr)

From 92b7016ed65b530dfdc61c6c340f6477a9577e75 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 12:50:46 +0200
Subject: [PATCH 11/14] tests: remove obsolete line to stop parallel processes

---
 tests/testthat.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat.R b/tests/testthat.R
index 015120e3e..6034409a2 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -4,6 +4,8 @@ library(faahKO)
 library(MSnbase)
 library(msdata)
 library(BiocParallel)
+prm <- SerialParam()
+
 register(SerialParam())
 
 ## Create some objects we can re-use in different tests:
@@ -79,5 +81,3 @@ ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")]
 tst <- loadXcmsData("faahko_sub2")
 
 test_check("xcms")
-
-bpstop(prm)

From ff2fadc971ac5a779671a4412011cac0d682078b Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Wed, 3 Apr 2024 14:22:13 +0200
Subject: [PATCH 12/14] docs: small fix in the vignette

---
 vignettes/xcms.Rmd | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd
index 924e1b870..9434c2667 100644
--- a/vignettes/xcms.Rmd
+++ b/vignettes/xcms.Rmd
@@ -1293,7 +1293,7 @@ laboratories and over time, the same samples may result in variation in
 retention time, especially because the LC system can be quite unstable. In these
 cases, an alignment step using the `adjustRtime()` function with the
 `LamaParam` parameter can allow the user to perform this type of alignment.
-We will go through this step by step below. 
+We will go through this step by step below.
 
 Let's load an already analyzed dataset `ref` and our previous dataset before
 alignment, which will be `tst`. We will first restrict their retention time
@@ -1307,10 +1307,10 @@ tst <- loadXcmsData("faahko_sub2")
 Now, we will attempt to align these two samples with the previous dataset. The
 first step is to extract landmark features (referred to as `lamas`). To achieve
 this, we will identify the features present in every QC sample of the `ref`
-dataset. To do so, we will categorize (using `factor()`) our data by 
+dataset. To do so, we will categorize (using `factor()`) our data by
 `sample_type` and only retain the QC samples. This variable will be utilized to
-filter the features using the `PercentMissingFilter()` parameter within the 
-`filterFeatures()` function (see section above for more information on this 
+filter the features using the `PercentMissingFilter()` parameter within the
+`filterFeatures()` function (see section above for more information on this
 method)
 
 ```{r}
@@ -1318,7 +1318,7 @@ f <- sampleData(ref)$sample_type
 f[f != "QC"] <- NA
 ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f))
 ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")]
-ref_mz_rt
+head(ref_mz_rt)
 ```
 
 This is what the `lamas` input should look like for alignment. In terms of
@@ -1404,16 +1404,16 @@ chromatographic peaks along with the fitted model line.
 ```{r}
 #access summary of matches and model information
 summary <- summarizeLamaMatch(param)
-summary 
+summary
 
 # coverage for each file
 summary$Matched_peaks / summary$Total_peaks * 100
 
-#access the information on the model of for the first file 
+#access the information on the model of for the first file
 summary$model_summary[[1]]
 
-# Plot obs vs. ref with fitting line 
-plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file", 
+# Plot obs vs. ref with fitting line
+plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file",
      colPoint = "red")
 ```
 

From ab847b08faa271d0472ab57063f5352b53daf732 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Thu, 4 Apr 2024 11:35:37 +0200
Subject: [PATCH 13/14] docs: fix typos mentioned by Carl

---
 vignettes/xcms.Rmd | 244 +++++++++++++++++++++++----------------------
 1 file changed, 125 insertions(+), 119 deletions(-)

diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd
index 9434c2667..1931aaa5c 100644
--- a/vignettes/xcms.Rmd
+++ b/vignettes/xcms.Rmd
@@ -1,5 +1,5 @@
 ---
-title: "LC-MS data pre-processing and analysis with xcms"
+title: "LC-MS data preprocessing and analysis with xcms"
 package: xcms
 output:
   BiocStyle::html_document:
@@ -7,7 +7,7 @@ output:
     includes:
       in_header: xcms.bioschemas.html
 vignette: >
-  %\VignetteIndexEntry{LC-MS data prep-rocessing and analysis with xcms}
+  %\VignetteIndexEntry{LC-MS data preprocessing and analysis with xcms}
   %\VignetteEngine{knitr::rmarkdown}
   %\VignetteEncoding{UTF-8}
   %\VignetteDepends{xcms,RColorBrewer,faahKO,pander,BiocStyle,pheatmap,SummarizedExperiment}
@@ -40,8 +40,8 @@ register(SerialParam())
 # Introduction
 
 The `r Biocpkg("xcms")` package provides the functionality to perform the
-pre-processing of LC-MS, GC-MS or LC-MS/MS data in which raw signals from mzML,
-mzXML or CDF files are processed into *feature* abundances. This pre-processing
+preprocessing of LC-MS, GC-MS or LC-MS/MS data in which raw signals from mzML,
+mzXML or CDF files are processed into *feature* abundances. This preprocessing
 includes chromatographic peak detection, sample alignment and correspondence
 analysis.
 
@@ -50,13 +50,13 @@ and has since been updated and modernized in several rounds to better integrate
 it with other R-based packages for the analysis of untargeted metabolomics
 data. This includes version 3 of *xcms* that used the `r Biocpkg("MSnbase")`
 package for MS data representation [@gattoMSnbaseEfficientElegant2020a]. The
-most recent update (*xcms* version 4) enables in addition pre-processing of MS
+most recent update (*xcms* version 4) enables in addition preprocessing of MS
 data represented by the modern `r Biocpkg("MsExperiment")` and
 `r Biocpkg("Spectra")` packages which provides an even better integration with
 the [RforMassSpectrometry](https://rformassspectrometry.org) R package ecosystem
 simplifying e.g. also compound annotation [@rainer_modular_2022].
 
-This document describes data import, exploration and pre-processing of a simple
+This document describes data import, exploration and preprocessing of a simple
 test LC-MS data set with the *xcms* package version >= 4. The same functions can
 be applied to the older *MSnbase*-based workflows (xcms version 3). Additional
 documents and tutorials covering also other topics of untargeted metabolomics
@@ -65,7 +65,7 @@ tutorial](https://jorainer.github.io/xcmsTutorials) available with more examples
 and details.
 
 
-# Pre-processing of LC-MS data
+# Preprocessing of LC-MS data
 
 ## Data import
 
@@ -89,7 +89,7 @@ Below we load all required packages, locate the raw CDF files within the
 *faahKO* package and build a *phenodata* `data.frame` describing the
 experimental setup. Generally, such data frames should contain all relevant
 experimental variables and sample descriptions (including also the names of the
-raw data files) and will be imported into R using either the `read.table`
+raw data files) and will be imported into R using either the `read.table()`
 function (if the file is in *csv* or tabulator delimited text file format) or
 also using functions from the *readxl* R package if it is in Excel file format.
 
@@ -140,21 +140,22 @@ more details on `Spectra` backends and how to change between them.
 
 The `MsExperiment` object is a simple and flexible container for MS
 experiments. The *raw* MS data is stored as a `Spectra` object that can be
-accessed through the `spectra` function.
+accessed through the `spectra()` function.
 
 ```{r}
 spectra(faahko)
 ```
 
-All spectra are organized *sequentially* (i.e., not by file) but the `fromFile`
-function can be used to get for each spectrum the information to which of the
-data files it belongs. Below we simply count the number of spectra per file.
+All spectra are organized *sequentially* (i.e., not by file) but the
+`fromFile()` function can be used to get for each spectrum the information to
+which of the data files it belongs. Below we simply count the number of spectra
+per file.
 
 ```{r}
 table(fromFile(faahko))
 ```
 
-Information on samples can be retrieved through the `sampleData` function.
+Information on samples can be retrieved through the `sampleData()` function.
 
 ```{r}
 sampleData(faahko)
@@ -171,7 +172,7 @@ sampleData(faahko_3)
 ```
 
 As a first evaluation of the data we below plot the base peak chromatogram (BPC)
-for each file in our experiment. We use the `chromatogram` method and set the
+for each file in our experiment. We use the `chromatogram()` method and set the
 `aggregationFun` to `"max"` to return for each spectrum the maximal intensity
 and hence create the BPC from the raw data. To create a total ion chromatogram
 we could set `aggregationFun` to `"sum"`.
@@ -187,7 +188,7 @@ names(group_colors) <- c("KO", "WT")
 plot(bpis, col = group_colors[sampleData(faahko)$sample_group])
 ```
 
-The `chromatogram` method returned a `MChromatograms` object that organizes
+The `chromatogram()` method returned a `MChromatograms` object that organizes
 individual `Chromatogram` objects (which in fact contain the chromatographic
 data) in a two-dimensional array: columns represent samples and rows
 (optionally) m/z and/or retention time ranges. Below we extract the chromatogram
@@ -201,7 +202,7 @@ intensity(bpi_1) |> head()
 
 From the BPC above it seems that after around 4200 seconds no signal is measured
 anymore. Thus, we filter below the full data set to a retention time range from
-2550 to 4250 seconds using the `filterRt` function. Note that at present this
+2550 to 4250 seconds using the `filterRt()` function. Note that at present this
 will only subset the spectra within the `MsExperiment`. Subsequently we
 re-create also the BPC.
 
@@ -213,8 +214,8 @@ bpis <- chromatogram(faahko, aggregationFun = "max")
 
 We next create boxplots representing the distribution of the total ion currents
 per data file. Such plots can be very useful to spot potentially problematic MS
-runs. To extract this information, we use the `tic` function on the `Spectra`
-object within `faahko` and split the values by file using `fromFile`.
+runs. To extract this information, we use the `tic()` function on the `Spectra`
+object within `faahko` and split the values by file using `fromFile()`.
 
 ```{r data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file." }
 ## Get the total ion current by file
@@ -229,7 +230,7 @@ In addition, we can also cluster the samples based on similarity of their base
 peak chromatograms. Samples would thus be grouped based on similarity of their
 LC runs. For that we need however to *bin* the data along the retention time
 axis, since retention times will generally differ between samples. Below we use
-the `bin` function on the BPC to bin intensities into 2 second wide retention
+the `bin()` function on the BPC to bin intensities into 2 second wide retention
 time bins. The clustering is then performed using complete linkage hierarchical
 clustering on the pairwise correlations of the binned base peak chromatograms.
 
@@ -258,7 +259,7 @@ same sample index having the most similar BPC.
 
 Chromatographic peak detection aims at identifying all signal in each sample
 created from ions of the same originating compound species. Chromatographic peak
-detection can be performed in *xcms* with the `findChromPeaks` function and a
+detection can be performed in *xcms* with the `findChromPeaks()` function and a
 *parameter* object which defines and configures the algorithm that should be
 used (see `?findChromPeaks` for a list of supported algorithms). Before running
 any peak detection it is however strongly suggested to first visually inspect
@@ -267,7 +268,7 @@ be present in the samples in order to evaluate and adapt the settings of the
 peak detection algorithm since the default settings will not be appropriate for
 most LC-MS setups.
 
-Below we extract the EIC for one compound using the `chromatogram` function by
+Below we extract the EIC for one compound using the `chromatogram()` function by
 specifying in addition the m/z and retention time range where we would expect
 the signal for that compound.
 
@@ -280,10 +281,10 @@ chr_raw <- chromatogram(faahko, mz = mzr, rt = rtr)
 plot(chr_raw, col = group_colors[chr_raw$sample_group])
 ```
 
-Note that `Chromatogram` objects extracted by the `chromatogram` method contain
-an `NA` value if in a certain scan (i.e. in a spectrum for a specific retention
-time) no signal was measured in the respective m/z range. This is reflected by
-the lines not being drawn as continuous lines in the plot above.
+Note that `Chromatogram` objects extracted by the `chromatogram()` method
+contain an `NA` value if in a certain scan (i.e. in a spectrum for a specific
+retention time) no signal was measured in the respective m/z range. This is
+reflected by the lines not being drawn as continuous lines in the plot above.
 
 The peak above has thus a width of about 50 seconds. We can use this information
 to define the `peakwidth` parameter of the *centWave* peak detection method
@@ -337,7 +338,7 @@ parameter. Also, since less data is available to the algorithms, background
 signal estimation is performed differently and different settings for `snthresh`
 will need to be used (generally a lower `snthresh` will be used for EICs since
 the estimated background signal tends to be higher for data subsets than for the
-full data). Below we perform the peak detection with the `findChromPeaks`
+full data). Below we perform the peak detection with the `findChromPeaks()`
 function on the EIC generated above. The submitted *parameter* object defines
 which algorithm will be used and allows to define the settings for this
 algorithm. We use a `CentWaveParam` parameter object to use and configure the
@@ -347,27 +348,27 @@ algorithm. We use a `CentWaveParam` parameter object to use and configure the
 xchr <- findChromPeaks(chr_raw, param = CentWaveParam(snthresh = 2))
 ```
 
-We can access the identified chromatographic peaks with the `chromPeaks`
+We can access the identified chromatographic peaks with the `chromPeaks()`
 function.
 
 ```{r peak-detection-eic-chromPeaks}
 chromPeaks(xchr)
 ```
 
-Parallel to the `chromPeaks` matrix there is also a `chromPeakData` data frame
-that allows to add arbitrary annotations to each chromatographic peak, such as
-e.g. the MS level in which the peak was detected:
+Parallel to the `chromPeaks()` matrix there is also a `chromPeakData()` data
+frame that allows to add arbitrary annotations to each chromatographic peak,
+such as e.g. the MS level in which the peak was detected:
 
 ```{r peak-detection-chromatogram-chromPeakData}
 chromPeakData(xchr)
 ```
 
 Below we plot the EIC along with all identified chromatographic peaks using the
-`plot` function on the result object from above. Additional parameters `peakCol`
-and `peakBg` allow to define a foreground and background (fill) color for each
-identified chromatographic peak in the provided result object (i.e., we need to
-define one color for each row of `chromPeaks(xchr)` - column `"column"` (or
-`"sample"` if present) in that peak matrix specifies the sample in which the
+`plot()` function on the result object from above. Additional parameters
+`peakCol` and `peakBg` allow to define a foreground and background (fill) color
+for each identified chromatographic peak in the provided result object (i.e., we
+need to define one color for each row of `chromPeaks(xchr)` - column `"column"`
+(or `"sample"` if present) in that peak matrix specifies the sample in which the
 peak was identified).
 
 ```{r peak-detection-eic-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. Peak area of identified chromatographic peaks are highlighted in the sample group color."}
@@ -394,12 +395,12 @@ cwp <- CentWaveParam(peakwidth = c(20, 80), noise = 5000,
 faahko <- findChromPeaks(faahko, param = cwp)
 ```
 
-The results of `findChromPeaks` on a `MsExperiment` object are returned as an
+The results of `findChromPeaks()` on a `MsExperiment` object are returned as an
 `XcmsExperiment` object. This object extends `MsExperiment` directly (hence
 providing the same access to all raw data) and contains all *xcms*
-pre-processing results. Note also that additional rounds of chromatographic peak
+preprocessing results. Note also that additional rounds of chromatographic peak
 detections could be performed and their results being added to existing peak
-detection results by additional calls to `findChromPeaks` on the result object
+detection results by additional calls to `findChromPeaks()` on the result object
 and using parameter `add = TRUE`.
 
 The `chromPeaks` function can also here be used to access the results from the
@@ -411,7 +412,7 @@ chromPeaks(faahko) |>
     head()
 ```
 
-Columns of this `chromPeaks` matrix might differ depending on the used peak
+Columns of this `chromPeaks()` matrix might differ depending on the used peak
 detection algorithm. Columns that all algorithms have to provide are: `"mz"`,
 `"mzmin"`, `"mzmax"`, `"rt"`, `"rtmin"` and `"rtmax"` that define the m/z and
 retention time range of the chromatographic peak (i.e. all mass peaks within
@@ -422,7 +423,7 @@ intensity. Finally, `"sample"` provides the index of the sample in which the
 peak was identified.
 
 Additional annotations for each individual peak can be extracted with the
-`chromPeakData` function. This data frame could also be used to add/store
+`chromPeakData()` function. This data frame could also be used to add/store
 arbitrary annotations for each detected peak (that don't necessarily need to be
 numeric).
 
@@ -433,9 +434,9 @@ chromPeakData(faahko)
 Peak detection will not always work perfectly for all types of peak shapes
 present in the data set leading to peak detection artifacts, such as (partially
 or completely) overlapping peaks or artificially split peaks (common issues
-especially for *centWave*). *xcms* provides the `refineChromPeaks` function that
-can be called on peak detection results in order to *refine* (or clean) peak
-detection results by either removing identified peaks not passing a certain
+especially for *centWave*). *xcms* provides the `refineChromPeaks()` function
+that can be called on peak detection results in order to *refine* (or clean)
+peak detection results by either removing identified peaks not passing a certain
 criteria or by merging artificially split or partially or completely overlapping
 chromatographic peaks. Different algorithms are available that can again be
 configured with their respective parameter objects: `CleanPeaksParam` and
@@ -508,7 +509,7 @@ peak refinement step.
 faahko <- faahko_pp
 ```
 
-Below we use the data from the `chromPeaks` matrix to calculate per-file
+Below we use the data from the `chromPeaks()` matrix to calculate per-file
 summaries of the peak detection results, such as the number of peaks per file as
 well as the distribution of the retention time widths.
 
@@ -529,7 +530,7 @@ pandoc.table(
                      "peaks."))
 ```
 
-While by default `chromPeaks` will return all identified chromatographic peaks
+While by default `chromPeaks()` will return all identified chromatographic peaks
 in a result object it is also possible to extract only chromatographic peaks for
 a specified m/z and/or rt range:
 
@@ -538,7 +539,7 @@ chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900))
 ```
 
 We can also plot the location of the identified chromatographic peaks in the
-m/z - retention time space for one file using the `plotChromPeaks`
+m/z - retention time space for one file using the `plotChromPeaks()`
 function. Below we plot this information for the third sample.
 
 ```{r peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample." }
@@ -561,7 +562,7 @@ peaks within that region. This can thus also be used to validate and verify that
 the used peak detection settings identified e.g. peaks for known compounds or
 internal standards properly. Below we extract the ion chromatogram for the m/z -
 rt region above and access the detected peaks in that region using the
-`chromPeaks` function.
+`chromPeaks()` function.
 
 ```{r peak-detection-eic-example-peak, message = FALSE}
 chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr)
@@ -610,8 +611,8 @@ detected peaks (indicated by the smaller width of the boxes).
 
 Note that in addition to the above described identification of chromatographic
 peaks, it is also possible to *manually* define and add chromatographic peaks
-with the `manualChromPeaks` function (see `?manualChromPeaks` help page for more
-information).
+with the `manualChromPeaks()` function (see `?manualChromPeaks` help page for
+more information).
 
 
 ## Alignment
@@ -625,7 +626,7 @@ between different samples within an experiment.
 
 A plethora of alignment algorithms exist (see [@Smith:2013gr]), with some of
 them being also implemented in *xcms*. Alignment of LC-MS data can be performed
-in *xcms* using the `adjustRtime` method and an algorithm-specific parameter
+in *xcms* using the `adjustRtime()` method and an algorithm-specific parameter
 class (see `?adjustRtime` for an overview of available methods in *xcms*).
 
 In the example below we use the *obiwarp* method [@Prince:2006jj] to align the
@@ -637,11 +638,12 @@ experiment.
 faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6))
 ```
 
-Note that `adjustRtime`, besides calculating adjusted retention times for each
+Note that `adjustRtime()`, besides calculating adjusted retention times for each
 spectrum, adjusts also the retention times of the identified chromatographic
 peaks in the *xcms* result object. Adjusted retention times of individual
-spectra can be extracted from the result object using either the `adjustedRtime`
-function or using `rtime` with parameter `adjusted = TRUE` (the default):
+spectra can be extracted from the result object using either the
+`adjustedRtime()` function or using `rtime()` with parameter `adjusted = TRUE`
+(the default):
 
 ```{r alignment-rtime, message = FALSE }
 ## Extract adjusted retention times
@@ -656,9 +658,9 @@ rtime(faahko, adjusted = FALSE) |> head()
 
 To evaluate the impact of the alignment we plot the BPC on the adjusted data. In
 addition we plot also the differences between the adjusted and the raw retention
-times per sample using the `plotAdjustedRtime` function. To disable the
+times per sample using the `plotAdjustedRtime()` function. To disable the
 automatic extraction of all identified chromatographic peaks by the
-`chromatogram` function (which would not make much sense for a BPC) we use
+`chromatogram()` function (which would not make much sense for a BPC) we use
 `chromPeaks = "none"` below.
 
 ```{r alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 8, fig.cap = "Obiwarp aligned data. Base peak chromatogram before (top) and after alignment (middle) and difference between adjusted and raw retention times along the retention time axis (bottom)." }
@@ -694,7 +696,7 @@ grid()
 both the raw and the adjusted retention times for all spectra and subset
 operation will in many cases drop adjusted retention times. Thus it might
 sometimes be useful to immediately **replace** the raw retention times in the
-data using the `applyAdjustedRtime` function.
+data using the `applyAdjustedRtime()` function.
 
 
 ### Subset-based alignment
@@ -752,7 +754,7 @@ Note that for any subset-alignment all parameters such as `minFraction` are
 relative to the `subset`, not the full experiment!
 
 Below we first remove any previous alignment results with the
-`dropAdjustedRtime` function to allow a fresh alignment using the subset-based
+`dropAdjustedRtime()` function to allow a fresh alignment using the subset-based
 option outlined above. In addition to removing adjusted retention times for all
 spectra, this function will also *restore* the original retention times for
 identified chromatographic peaks.
@@ -819,18 +821,18 @@ highly similar to those of the subset sample which was used for adjustment.
 
 ## Correspondence
 
-Correspondence is usually the final step in LC-MS data pre-processing in which
+Correspondence is usually the final step in LC-MS data preprocessing in which
 data, presumably representing signal from the same originating ions, is matched
 across samples. As a result, chromatographic peaks from different samples with
 similar m/z and retention times get grouped into LC-MS *features*. The function
-to perform the correspondence in *xcms* is called `groupChromPeaks` that again
+to perform the correspondence in *xcms* is called `groupChromPeaks()` that again
 supports different algorithms which can be selected and configured with a
 specific parameter object (see `?groupChromPeaks` for an overview). For our
 example we will use the *peak density* method [@Smith:2006ic] that, within small
 slices along the m/z dimension, combines chromatographic peaks depending on the
 density of these peaks along the retention time axis. To illustrate this, we
 *simulate* below the peak grouping for an m/z slice containing multiple
-chromatoghaphic peaks within each sample using the `plotChromPeakDensity`
+chromatoghaphic peaks within each sample using the `plotChromPeakDensity()`
 function and a `PeakDensityParam` object with parameter `minFraction = 0.4`
 (features are only defined if in at least 40% of samples a chromatographic peak
 was present) - parameter `sampleGroups` is used to define to which sample group
@@ -922,7 +924,7 @@ points(featureDefinitions(faahko)$mzmed, mzw, pch = 21,
 ```
 
 Results from the correspondence analysis can be accessed with the
-`featureDefinitions` and `featureValues` function. The former returns a data
+`featureDefinitions()` and `featureValues()` function. The former returns a data
 frame with general information on each of the defined features, with each row
 being one feature and columns providing information on the median m/z and
 retention time as well as the indices of the chromatographic peaks assigned to
@@ -933,9 +935,9 @@ features.
 featureDefinitions(faahko) |> head()
 ```
 
-The `featureValues` function returns a `matrix` with rows being features and
+The `featureValues()` function returns a `matrix` with rows being features and
 columns samples. The content of this matrix can be defined using the `value`
-argument which can be any column name in the `chromPeaks` matrix. With the
+argument which can be any column name in the `chromPeaks()` matrix. With the
 default `value = "into"` a matrix with the integrated signal of the peaks
 corresponding to a feature in a sample are returned. This is then generally used
 as the intensity matrix for downstream analysis. Below we extract the
@@ -956,17 +958,17 @@ section).
 
 The performance of peak detection, alignment and correspondence should always be
 evaluated by inspecting extracted ion chromatograms e.g. of known compounds,
-internal standards or identified features in general. The `featureChromatograms`
-function allows to extract chromatograms for each feature present in
-`featureDefinitions`. The returned `MChromatograms` object contains an ion
-chromatogram for each feature (each row containing the data for one feature) and
-sample (each column representing containing data for one sample). Parameter
-`features` allows to define specific features for which the EIC should be
-returned. These can be specified with their index or their ID (i.e. their row
-name in the `featureDefinitions` data frame. If `features` is not defined, EICs
-are returned for **all** features in a data set, which can take also a
-considerable amount of time. Below we extract the chromatograms for the first 4
-features.
+internal standards or identified features in general. The
+`featureChromatograms()` function allows to extract chromatograms for each
+feature present in `featureDefinitions()`. The returned `MChromatograms` object
+contains an ion chromatogram for each feature (each row containing the data for
+one feature) and sample (each column representing containing data for one
+sample). Parameter `features` allows to define specific features for which the
+EIC should be returned. These can be specified with their index or their ID
+(i.e. their row name in the `featureDefinitions()` data frame. If `features` is
+not defined, EICs are returned for **all** features in a data set, which can
+take also a considerable amount of time. Below we extract the chromatograms for
+the first 4 features.
 
 ```{r featureChromatograms, message = FALSE }
 feature_chroms <- featureChromatograms(faahko, features = 1:4)
@@ -1001,7 +1003,7 @@ intensity signal). The aim of the gap filling step is to reduce the number of
 such missing values by integrating signals from the original data files for
 samples in which no chromatographic peak was found from the m/z - rt region
 where signal from the ion is expected. Gap filling can be performed in *xcms*
-with the `fillChromPeaks` function and a parameter object selecting and
+with the `fillChromPeaks()` function and a parameter object selecting and
 configuring the gap filling algorithm. The method of choice is
 `ChromPeakAreaParam` that integrates the signal (in samples in which no
 chromatographic peak was found for a feature) in the m/z - rt region that is
@@ -1033,15 +1035,15 @@ the LC-MS feature grouping vignette for details) we could also extract all
 results as a `SummarizedExperiment` object. This is the *standard* data
 container for Bioconductor defined in the `r Biocpkg("SummarizedExperiment")`
 package and integration with other Bioconductor packages might thus be easier
-using that type of object. Below we use the `quantify` function to extract the
-*xcms* pre-processing results as such a `SummarizedExperiment`
-object. Internally, the `featureValues` function is used to generate the feature
-value matrix. We can pass any parameters from that function to the `quantify`
-call. Below we use `value = "into"` and `method = "sum"` to report the
-integrated peak signal as intensity and to sum these values in samples in which
-more than one chromatographic peak was assigned to a feature (for that option it
-is important to run `refineChromPeaks` like described above to merge overlapping
-peaks in each sample).
+using that type of object. Below we use the `quantify()` function to extract the
+*xcms* preprocessing results as such a `SummarizedExperiment`
+object. Internally, the `featureValues()` function is used to generate the
+feature value matrix. We can pass any parameters from that function to the
+`quantify()` call. Below we use `value = "into"` and `method = "sum"` to report
+the integrated peak signal as intensity and to sum these values in samples in
+which more than one chromatographic peak was assigned to a feature (for that
+option it is important to run `refineChromPeaks()` like described above to merge
+overlapping peaks in each sample).
 
 ```{r}
 library(SummarizedExperiment)
@@ -1049,24 +1051,24 @@ res <- quantify(faahko, value = "into", method = "sum")
 res
 ```
 
-The information from `featureDefinitions` is now stored in the `rowData` of this
-object. The `rowData` provides annotations and information for each **row** in
-the `SummarizedExperiment` (which in our case are the **features**).
+The information from `featureDefinitions()` is now stored in the `rowData()` of
+this object. The `rowData()` provides annotations and information for each
+**row** in the `SummarizedExperiment` (which in our case are the **features**).
 
 ```{r}
 rowData(res)
 ```
 
 Annotations for **columns** (in our case **samples**) are stored as
-`colData`. In this data frame each row contains annotations for one sample (and
-hence one column in the feature values matrix).
+`colData()`. In this data frame each row contains annotations for one sample
+(and hence one column in the feature values matrix).
 
 ```{r}
 colData(res)
 ```
 
-Finally, the feature matrix is stored as an `assay` within the object. Note that
-a `SummarizedExperiment` can have multiple assays which have to be numeric
+Finally, the feature matrix is stored as an *assay* within the object. Note
+that a `SummarizedExperiment` can have multiple assays which have to be numeric
 matrices with the number of rows and columns matching the number of features and
 samples, respectively. Below we list the names of the available assays.
 
@@ -1074,7 +1076,7 @@ samples, respectively. Below we list the names of the available assays.
 assayNames(res)
 ```
 
-And we can access the actual data using the `assay` function, optionally also
+And we can access the actual data using the `assay()` function, optionally also
 providing the name of the assay we want to access. Below we show the first 6
 lines of that matrix.
 
@@ -1102,7 +1104,7 @@ And we can extract the feature values without gap-filling:
 assay(res, "raw_nofill") |> head()
 ```
 
-Finally, a history of the full processing with *xcms* is available as `metadata`
+Finally, a history of the full processing with *xcms* is available as *metadata*
 in the `SummarizedExperiment`.
 
 ```{r}
@@ -1110,7 +1112,7 @@ metadata(res)
 ```
 
 This same information can also be extracted from the *xcms* result object using
-the `processHistory` function. Below we extract the information for the first
+the `processHistory()` function. Below we extract the information for the first
 processing step.
 
 ```{r}
@@ -1162,20 +1164,21 @@ properties of the mice analyzed (sex, age, litter mates etc).
 
 ## Quality-based filtering of features
 
-When dealing with metabolomics results, it is often necessary to filter
-features based on certain criteria. These criteria are typically derived from
-statistical formulas applied to full rows of data, where each row represents a
-feature. The `filterFeatures` function provides a robust solution for filtering
-features based on these conventional quality assessment criteria. It supports
-multiple types of filtering, allowing users to tailor the filtering process to
-their specific needs, all controlled by the `filter` argument. This function
-and its implementations are applicable to both `XcmsExperiment` results objects
-and `SummarizedExperiment` objects.
-
-We will demonstrate how to use the `filterFeatures` function to perform quality
-assessment and filtering on both the `faahko` and `res` variables defined above.
-The `filter` argument can accommodate various types of input, each determining the
-specific type of quality assessment and filtering to be performed.
+When dealing with metabolomics results, it is often necessary to filter features
+based on certain criteria. These criteria are typically derived from statistical
+formulas applied to full rows of data, where each row represents a feature. The
+`filterFeatures()` function provides a robust solution for filtering features
+based on these conventional quality assessment criteria. It supports multiple
+types of filtering, allowing users to tailor the filtering process to their
+specific needs, all controlled by the `filter` argument. This function and its
+implementations are applicable to both `XcmsExperiment` results objects and
+`SummarizedExperiment` objects.
+
+We will demonstrate how to use the `filterFeatures()` function to perform
+quality assessment and filtering on both the `faahko` and `res` variables
+defined above.  The `filter` argument can accommodate various types of input,
+each determining the specific type of quality assessment and filtering to be
+performed.
 
 The `RsdFilter` enable users to filter features based on their relative
 standard deviation (coefficient of variation) for a specified `threshold`. It
@@ -1277,12 +1280,13 @@ samples. More information can be found in the documentation of the filter:
 
 Normalizing features' signal intensities is required, but at present not (yet)
 supported in `xcms` (some methods might be added in near future). It is advised
-to use the `SummarizedExperiment` returned by the `quantify` method for any
+to use the `SummarizedExperiment` returned by the `quantify()` method for any
 further data processing, as this type of object stores feature definitions,
 sample annotations as well as feature abundances in the same object. For the
 identification of e.g. features with significant different
 intensities/abundances it is suggested to use functionality provided in other R
-packages, such as Bioconductor's excellent `limma` package.
+packages, such as Bioconductor's excellent *limma* package.
+
 
 ## Alignment to an external reference dataset
 
@@ -1305,11 +1309,11 @@ tst <- loadXcmsData("faahko_sub2")
 ```
 
 Now, we will attempt to align these two samples with the previous dataset. The
-first step is to extract landmark features (referred to as `lamas`). To achieve
+first step is to extract landmark features (referred to as *lamas*). To achieve
 this, we will identify the features present in every QC sample of the `ref`
 dataset. To do so, we will categorize (using `factor()`) our data by
 `sample_type` and only retain the QC samples. This variable will be utilized to
-filter the features using the `PercentMissingFilter()` parameter within the
+filter the features using the `PercentMissingFilter` parameter within the
 `filterFeatures()` function (see section above for more information on this
 method)
 
@@ -1410,13 +1414,14 @@ summary
 summary$Matched_peaks / summary$Total_peaks * 100
 
 #access the information on the model of for the first file
-summary$model_summary[[1]]
+summary$Model_summary[[1]]
 
 # Plot obs vs. ref with fitting line
 plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file",
      colPoint = "red")
 ```
 
+
 # Additional details and notes
 
 ## Subsetting and filtering
@@ -1424,7 +1429,7 @@ plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file",
 *xcms* result objects can be subset/filtered by sample using the `[`
 method or one of the `filter*` functions (although the `XcmsExperiment` supports
 at present only few selected filter functions). In some cases filtering can
-remove pre-processing results, but most filter functions support parameters
+remove preprocessing results, but most filter functions support parameters
 `keepFeatures` and `keepAdjustedRtime` that can be set to `TRUE` to avoid their
 removal.
 
@@ -1442,8 +1447,8 @@ memory. This needs also to be considered, when the number of parallel processes
 is defined.
 
 Unix-based systems (Linux, macOS) support `multicore`-based parallel
-processing. To configure it globally we `register` the parameter class. Note
-also that `bpstart` is used below to initialize the parallel processes.
+processing. To configure it globally we `register()` the parameter class. Note
+also that `bpstart()` is used below to initialize the parallel processes.
 
 ```{r multicore, message = FALSE, eval = FALSE }
 register(bpstart(MulticoreParam(2)))
@@ -1468,9 +1473,10 @@ register(bpstart(SnowParam(2)))
 Some of the documentations listed here are still based on xcms version 3 but
 will be subsequently updated.
 
-- [Metabolomics pre-processing with
-  `xcms`](https://jorainer.github.io/metabolomics2018): more detailed
-  description of the pre-processing of LC-MS data with *xcms*.
+- [Exploring and analyzing LC-MS data with Spectra and
+  xcms](https://jorainer.github.io/xcmsTutorials/): tutorial explaining general
+  data handling using the *Spectra* package and LC-MS data preprocessing with
+  *xcms*.
 - [MetaboAnnotationTutorials](https://jorainer.github.io/MetaboAnnotationTutorials):
   examples for annotation of metabolomics data from [@rainer_modular_2022].
 - [@gattoMSnbaseEfficientElegant2020a]: describes the concept of the *on-disk*

From 9bb7bd5c80519b937e59c0c045990fc8cfea52f3 Mon Sep 17 00:00:00 2001
From: jorainer <johannes.rainer@gmail.com>
Date: Thu, 4 Apr 2024 13:03:21 +0200
Subject: [PATCH 14/14] fix: plot,LamaParama method

- Fix `plot,LamaParama`: x and y coordinates were switched.
- Small changes in the vignette.
- Ensure .R files are loaded in the correct order.
---
 DESCRIPTION           |  2 +-
 R/PlainTextParam.R    |  2 +
 R/methods-Params.R    |  3 +-
 man/XcmsExperiment.Rd | 92 +++++++++++++++++++++----------------------
 vignettes/xcms.Rmd    | 35 +++++++++++-----
 5 files changed, 77 insertions(+), 57 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d63fae793..f889cc5ce 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -104,11 +104,11 @@ Collate:
     'MPI.R'
     'MsExperiment-functions.R'
     'MsExperiment.R'
+    'XcmsExperiment.R'
     'PlainTextParam.R'
     'RDataParam.R'
     'XcmsExperiment-functions.R'
     'XcmsExperiment-plotting.R'
-    'XcmsExperiment.R'
     'c.R'
     'cwTools.R'
     'databases.R'
diff --git a/R/PlainTextParam.R b/R/PlainTextParam.R
index 73e7c33d8..4eabe7b84 100644
--- a/R/PlainTextParam.R
+++ b/R/PlainTextParam.R
@@ -1,3 +1,5 @@
+#' @include XcmsExperiment.R
+
 #' @title Store contents of `MsExperiment` and `XcmsExperiment` objects as
 #' plain text files
 #'
diff --git a/R/methods-Params.R b/R/methods-Params.R
index e08ffda42..e1f848ee6 100644
--- a/R/methods-Params.R
+++ b/R/methods-Params.R
@@ -1265,7 +1265,8 @@ setMethod("plot", signature(x = "LamaParama"),
                               zero_weight = x@zeroWeight,
                               bs = x@bs)
     datap <- x@rtMap[[index]]
-    plot(datap, type = "p", xlab = xlab, ylab = ylab, col = colPoints, ...)
+    plot(datap[, 2L], datap[, 1L], type = "p", xlab = xlab, ylab = ylab,
+         col = colPoints, ...)
     points(model, type = "l", col = colFit)
 })
 
diff --git a/man/XcmsExperiment.Rd b/man/XcmsExperiment.Rd
index 1c12584c3..de7e1c35b 100644
--- a/man/XcmsExperiment.Rd
+++ b/man/XcmsExperiment.Rd
@@ -1,7 +1,7 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/AllGenerics.R, R/MsExperiment.R,
-%   R/XcmsExperiment-functions.R, R/XcmsExperiment-plotting.R,
-%   R/XcmsExperiment.R, R/methods-XCMSnExp.R
+%   R/XcmsExperiment.R, R/XcmsExperiment-functions.R,
+%   R/XcmsExperiment-plotting.R, R/methods-XCMSnExp.R
 \name{filterFeatureDefinitions}
 \alias{filterFeatureDefinitions}
 \alias{filterRt,MsExperiment-method}
@@ -16,8 +16,6 @@
 \alias{polarity,MsExperiment-method}
 \alias{filterIsolationWindow,MsExperiment-method}
 \alias{chromatogram,MsExperiment-method}
-\alias{featureArea}
-\alias{plot,MsExperiment,missing-method}
 \alias{XcmsExperiment}
 \alias{XcmsExperiment-class}
 \alias{show,XcmsExperiment-method}
@@ -50,6 +48,8 @@
 \alias{chromatogram,XcmsExperiment-method}
 \alias{processHistory,XcmsExperiment-method}
 \alias{filterFile,XcmsExperiment-method}
+\alias{featureArea}
+\alias{plot,MsExperiment,missing-method}
 \title{Next Generation \code{xcms} Result Object}
 \usage{
 filterFeatureDefinitions(object, ...)
@@ -88,17 +88,6 @@ filterFeatureDefinitions(object, ...)
   BPPARAM = bpparam()
 )
 
-featureArea(
-  object,
-  mzmin = min,
-  mzmax = max,
-  rtmin = min,
-  rtmax = max,
-  features = character()
-)
-
-\S4method{plot}{MsExperiment,missing}(x, y, msLevel = 1L, peakCol = "#ff000060", ...)
-
 \S4method{[}{XcmsExperiment,ANY,ANY,ANY}(x, i, j, ..., drop = TRUE)
 
 \S4method{filterIsolationWindow}{XcmsExperiment}(object, mz = numeric())
@@ -204,6 +193,17 @@ featureArea(
   keepFeatures = FALSE,
   ...
 )
+
+featureArea(
+  object,
+  mzmin = min,
+  mzmax = max,
+  rtmin = min,
+  rtmax = max,
+  features = character()
+)
+
+\S4method{plot}{MsExperiment,missing}(x, y, msLevel = 1L, peakCol = "#ff000060", ...)
 }
 \arguments{
 \item{object}{An \code{XcmsExperiment} object.}
@@ -272,39 +272,8 @@ type of the returned object. Currently only
 \item{BPPARAM}{For \code{chromatogram}: parallel processing setup. Defaults
 to \code{BPPARAM = bpparam()}. See \code{\link[=bpparam]{bpparam()}} for more information.}
 
-\item{mzmin}{For \code{featureArea}: function to calculate the \code{"mzmin"} of
-a feature based on the \code{"mzmin"} values of the individual
-chromatographic peaks assigned to that feature. Defaults to
-\code{mzmin = min}.}
-
-\item{mzmax}{For \code{featureArea}: function to calculate the \code{"mzmax"} of
-a feature based on the \code{"mzmax"} values of the individual
-chromatographic peaks assigned to that feature. Defaults to
-\code{mzmax = max}.}
-
-\item{rtmin}{For \code{featureArea}: function to calculate the \code{"rtmin"} of
-a feature based on the \code{"rtmin"} values of the individual
-chromatographic peaks assigned to that feature. Defaults to
-\code{rtmin = min}.}
-
-\item{rtmax}{For \code{featureArea}: function to calculate the \code{"rtmax"} of
-a feature based on the \code{"rtmax"} values of the individual
-chromatographic peaks assigned to that feature. Defaults to
-\code{rtmax = max}.}
-
-\item{features}{For \code{filterFeatureDefinitions} and \code{featureArea}: \code{logical},
-\code{integer} or \code{character} defining the features to keep or from which
-to extract the feature area, respectively. See function description
-for more information.}
-
 \item{x}{An \code{XcmsExperiment} object.}
 
-\item{y}{For \code{plot}: should not be defined as it is not supported.}
-
-\item{peakCol}{For \code{plot}: defines the border color of the rectangles
-indicating the identified chromatographic peaks. Only a single color
-is supported. Defaults to `peakCol = "#ff000060".}
-
 \item{i}{For \code{[}: \code{integer} or \code{logical} defining the samples/files to
 subset.}
 
@@ -366,6 +335,11 @@ For \code{filterChromPeaks}: currently only \code{method = "keep"} is supported.
 retention times should be returned. The default is to return adjusted
 retention times, if available.}
 
+\item{features}{For \code{filterFeatureDefinitions} and \code{featureArea}: \code{logical},
+\code{integer} or \code{character} defining the features to keep or from which
+to extract the feature area, respectively. See function description
+for more information.}
+
 \item{intensity}{For \code{featureValues}: \code{character(1)} specifying the name
 of the column in the \code{chromPeaks(objects)} matrix containing the
 intensity value of the peak that should be used for the conflict
@@ -396,6 +370,32 @@ also parameter \code{type} below for additional information.}
 \item{keepFeatures}{for most subsetting functions (\code{[}, \code{filterFile}):
 \code{logical(1)}: wheter eventually present feature definitions should
 be retained in the returned (filtered) object.}
+
+\item{mzmin}{For \code{featureArea}: function to calculate the \code{"mzmin"} of
+a feature based on the \code{"mzmin"} values of the individual
+chromatographic peaks assigned to that feature. Defaults to
+\code{mzmin = min}.}
+
+\item{mzmax}{For \code{featureArea}: function to calculate the \code{"mzmax"} of
+a feature based on the \code{"mzmax"} values of the individual
+chromatographic peaks assigned to that feature. Defaults to
+\code{mzmax = max}.}
+
+\item{rtmin}{For \code{featureArea}: function to calculate the \code{"rtmin"} of
+a feature based on the \code{"rtmin"} values of the individual
+chromatographic peaks assigned to that feature. Defaults to
+\code{rtmin = min}.}
+
+\item{rtmax}{For \code{featureArea}: function to calculate the \code{"rtmax"} of
+a feature based on the \code{"rtmax"} values of the individual
+chromatographic peaks assigned to that feature. Defaults to
+\code{rtmax = max}.}
+
+\item{y}{For \code{plot}: should not be defined as it is not supported.}
+
+\item{peakCol}{For \code{plot}: defines the border color of the rectangles
+indicating the identified chromatographic peaks. Only a single color
+is supported. Defaults to `peakCol = "#ff000060".}
 }
 \description{
 The \code{XcmsExperiment} is a data container for \code{xcms} preprocessing results
diff --git a/vignettes/xcms.Rmd b/vignettes/xcms.Rmd
index 1931aaa5c..3f59a2e92 100644
--- a/vignettes/xcms.Rmd
+++ b/vignettes/xcms.Rmd
@@ -1323,6 +1323,7 @@ f[f != "QC"] <- NA
 ref <- filterFeatures(ref, PercentMissingFilter(threshold = 0, f = f))
 ref_mz_rt <- featureDefinitions(ref)[, c("mzmed","rtmed")]
 head(ref_mz_rt)
+nrow(ref_mz_rt)
 ```
 
 This is what the `lamas` input should look like for alignment. In terms of
@@ -1396,29 +1397,45 @@ abline(v = mtch[[1]]$obs)
 ```
 
 The overlay of BPC above provides insight into the correlation between accurate
-alignment and the presence of peaks matching with `lamas.` Furthermore, a more
-detailed examination of the matching and the model used for fitting each file
-is possible. Numerical information can be obtained using the
+alignment and the presence of peaks matching with `lamas`. For this particular
+sample no chromatographic peaks were matched to the `lamas`  between 2500 and
+3000 seconds and hence the alignment in that region was not good. For the second
+file, chrom peaks could also be matched in that region resulting in a better
+alignment.
+
+```{r}
+par(mfrow = c(1, 1))
+plot(bpc[1, 2], col = "#00000080", main = "Distribution CP matched to Lamas")
+points(rtime(bpc_tst_adj[1, 2]), intensity(bpc_tst_adj[1, 2]), type = "l",
+       col = "#0000ff80")
+grid()
+abline(v = mtch[[2]]$obs)
+```
+
+Furthermore, a more detailed examination of the matching and the model used for
+fitting each file is possible. Numerical information can be obtained using the
 `summarizeLamaMatch()` function. From this, the percentage of chromatographic
 peaks utilized for alignment can be computed relative to the total number of
-peaks in the file. Additionally, it is feasible to directly `plot()` the
-`param` object for the file of interest, showcasing the distribution of these
+peaks in the file. Additionally, it is feasible to directly `plot()` the `param`
+object for the file of interest, showcasing the distribution of these
 chromatographic peaks along with the fitted model line.
 
 ```{r}
-#access summary of matches and model information
+#' access summary of matches and model information
 summary <- summarizeLamaMatch(param)
 summary
 
-# coverage for each file
+#' coverage for each file
 summary$Matched_peaks / summary$Total_peaks * 100
 
-#access the information on the model of for the first file
+#' access the information on the model of for the first file
 summary$Model_summary[[1]]
 
-# Plot obs vs. ref with fitting line
+#' Plot obs vs. ref with fitting line
 plot(param, index = 1L, main = "ChromPeaks versus Lamas for the first file",
      colPoint = "red")
+abline(0, 1, lty = 3, col = "grey")
+grid()
 ```