mottensmann · Jun 19, 2022 · Jun 19, 2022 · Jun 19, 2022 · Jun 19, 2022 · Jun 20, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@ NEWS.html
 internal
 vignettes/test_GCalignR*
 README.html
+^CRAN-SUBMISSION$
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 1.0.7
+Date: 2024-07-03 17:46:40 UTC
+SHA: ed565043a1a73846929ba038570c4ea7290166b4
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: GCalignR
 Title: Simple Peak Alignment for Gas-Chromatography Data
-Version: 1.0.4
-Date: 2022-02-09
+Version: 1.0.7.1
+Date: 2024-09-29
 Encoding: UTF-8
 Authors@R: c(
     person("Meinolf", "Ottensmann", email = "meinolf.ottensmann@web.de", role = c("aut","cre"), 
@@ -28,11 +28,12 @@ Imports:
     stringr,
     utils,
     pbapply,
+    methods,
     tibble
 License: GPL (>= 2) | file LICENSE
 Language: en-GB
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.2
 Suggests:
     knitr,
     pander,

diff --git a/NEWS.md b/NEWS.md
@@ -1,110 +1,135 @@
 
+# GCalignR 1.0.7
+
+------------------------------------------------------------------------
+
+- replacing defunct vegan::adonis by vegan::adonis2
+
+# GCalignR 1.0.6
+
+------------------------------------------------------------------------
+
+- Removing unused argument `gc_peak_df` from `align_peaks`
+
+# GCalignR 1.0.5
+
+------------------------------------------------------------------------
+
+- Bugfix in `choose_optimal_reference` that always selected the first
+  sample as a reference. Thanks to Heberto del Rio who pointed this out
+  on <https://github.com/mottensmann/GCalignR/issues/27>
+
 # GCalignR 1.0.3.9
 
-  - Small bug fixed that caused problems when plotting x-axis labels in
-    `gc_heatmap`. An error occurred in case of two peaks that were
-    identical when rounded to decimal places.
-  - Added a test for detecting inconsistently ordered retention times
-    within samples. Retention times are expected in increasing order,
-    starting with the lowest number. If this assumption is violated,
-    retention times are reordered as indicated by a warning.
+- **Speedboost** when setting `max_diff_peak2mean = 0`: In this special
+  case there is no need to use a time-consuming iterative approach but
+  peaks can be sorted simply based on absolute values. This is
+  implemented in two steps. (1) Across all samples, unique retention
+  times are extracted, sorted in increasing temporal order and written
+  to a template data frame. (2) For each sample, peaks are matched to
+  the corresponding row of the template data frame.
+- Small bug fixed that caused problems when plotting x-axis labels in
+  `gc_heatmap`.
+- Added a test for detecting inconsistently ordered retention times
+  within samples. Retention times are expected in increasing order,
+  starting with the lowest number. If this assumption is violated,
+  retention times are reordered and a warning is shown.
 
 # GCalignR 1.0.3
 
------
-
-  - Added `fill = TRUE` as a parameter in `utils::read.table` when
-    reading data from text within internal functions. *Loading GC data
-    with utils::read.table failed in cases of missing values in a column
-    (i.e. empty). This is the correct behaviour as missing data should
-    always be coded explicitly by ‘NA’*
-  - Tibbles are now coerced to data frames
-  - Added a new boolean parameter `remove_empty` for the main function
-    `align_chromatograms`. If samples are empty (i.e.. no peak) this
-    parameter allows to remove those cases from the dataset to avoid
-    problems in post-hoc analyses. By default `FALSE`, i.e.. all but the
-    blank samples are kept.
-  - Added a new boolean parameter `permute` for the functions
-    `align_chromatograms` and `align_peaks`. This allows to change the
-    default behaviour of random permutation of samples during the
-    alignment and might be useful if exact replication is needed.
+------------------------------------------------------------------------
+
+- Added `fill = TRUE` as a parameter in `utils::read.table` when reading
+  data from text within internal functions. *Loading GC data with
+  utils::read.table failed in cases of missing values in a column
+  (i.e. empty). This is the correct behaviour as missing data should
+  always be coded explicitly by ‘NA’*
+- Tibbles are now coerced to data frames
+- Added a new boolean parameter `remove_empty` for the main function
+  `align_chromatograms`. If samples are empty (i.e.. no peak) this
+  parameter allows to remove those cases from the dataset to avoid
+  problems in post-hoc analyses. By default `FALSE`, i.e.. all but the
+  blank samples are kept.
+- Added a new boolean parameter `permute` for the functions
+  `align_chromatograms` and `align_peaks`. This allows to change the
+  default behaviour of random permutation of samples during the
+  alignment and might be useful if exact replication is needed.
 
 # GCalignR 1.0.2
 
------
+------------------------------------------------------------------------
 
-  - The accompanying manuscript is published
-    <https://doi.org/10.1371/journal.pone.0198311> and the citation has
-    been added
-  - The function *beta* `read_empower2` allows to import HPLC data that
-    has been generated using the EMPOWER 2 software
+- The accompanying manuscript is published
+  <https://doi.org/10.1371/journal.pone.0198311> and the citation has
+  been added
+- The function *beta* `read_empower2` allows to import HPLC data that
+  has been generated using the EMPOWER 2 software
 
 # GCalignR 1.0.1
 
------
+------------------------------------------------------------------------
 
 **Bugfixes**
 
-  - A bugfix was applied for handling multiple blanks correctly.
-  - Progressbars are removed in non-interactive R sessions
+- A bugfix was applied for handling multiple blanks correctly.
+- Progressbars are removed in non-interactive R sessions
 
------
+------------------------------------------------------------------------
 
 # GCalignR 1.0.0
 
 **New functions implemented**
 
-  - `choose_optimal_reference` offers an automatism to pick suitable
-    references.
-  - `draw_chromatograms` allows to represent a peak list in form of
-    chromatogram.
-  - `remove_blanks`allows to get rid of peaks that represent
-    contamination after aligning a dataset
-  - `remove_singletons` allows to remove single peaks from the dataset
-    after aligning
-  - `merge_redundant_rows` allows to merge rows that were not recognised
-    as redundant during the alignment by increasing the threshold value
-    for the evaluation of similarity
+- `choose_optimal_reference` offers an automatism to pick suitable
+  references.
+- `draw_chromatograms` allows to represent a peak list in form of
+  chromatogram.
+- `remove_blanks`allows to get rid of peaks that represent contamination
+  after aligning a dataset
+- `remove_singletons` allows to remove single peaks from the dataset
+  after aligning
+- `merge_redundant_rows` allows to merge rows that were not recognised
+  as redundant during the alignment by increasing the threshold value
+  for the evaluation of similarity
 
 **Algorithm**
 
-  - Using `pbapply`, we implemented progress bars to inform the user
-    about the progress and the estimated running time of intermediate
-    steps in the alignment of peak lists.
-  - By implementing more efficient code, we were able to speed up the
-    processing, especially picking references is faster by an order of
-    magnitude.
-  - Retention times are not rounded to two decimals anymore.
-    Calculations still capture a precision of two decimals for
-    computational reasons.
-  - Within the aligned results, retention times correspond to the input
-    values. Linear adjustments are only used internally and are
-    documented within the Logfile accessible in the output.
-  - Reference samples that are used for the coarse alignment of
-    retention times can be picked using a novel algorithm that
-    determines the average similarity across the dataset.
+- Using `pbapply`, we implemented progress bars to inform the user about
+  the progress and the estimated running time of intermediate steps in
+  the alignment of peak lists.
+- By implementing more efficient code, we were able to speed up the
+  processing, especially picking references is faster by an order of
+  magnitude.
+- Retention times are not rounded to two decimals anymore. Calculations
+  still capture a precision of two decimals for computational reasons.
+- Within the aligned results, retention times correspond to the input
+  values. Linear adjustments are only used internally and are documented
+  within the Logfile accessible in the output.
+- Reference samples that are used for the coarse alignment of retention
+  times can be picked using a novel algorithm that determines the
+  average similarity across the dataset.
 
 **warning messages**
 
-  - Warnings addressing formatting issues are now more explicit and
-    partly rephrased to avoid ambiguity.
+- Warnings addressing formatting issues are now more explicit and partly
+  rephrased to avoid ambiguity.
 
 **Plots**
 
-  - Added horizontal axis to barplots summarising peak numbers in
-    `plot.GCalign`.
-  - Changed to more prominent colours in binary heatmaps with
-    `gc_heatmap`.
-  - The function `draw_chromatograms` was added as another visualisation
-    tool.
+- Added horizontal axis to barplots summarising peak numbers in
+  `plot.GCalign`.
+- Changed to more prominent colours in binary heatmaps with
+  `gc_heatmap`.
+- The function `draw_chromatograms` was added as another visualisation
+  tool.
 
 **Vignettes**
 
-  - We included a second vignette that explains the algorithm and the
-    supported data in detail.
+- We included a second vignette that explains the algorithm and the
+  supported data in detail.
 
 **Documentation**
 
-  - Helpfiles were rewritten to enhance clarity.
+- Helpfiles were rewritten to enhance clarity.
 
------
+------------------------------------------------------------------------
diff --git a/NEWS.rmd b/NEWS.rmd
@@ -3,6 +3,19 @@ output: github_document
 html_preview: false
 ---
 
+# GCalignR 1.0.7
+___
+* replacing defunct vegan::adonis by vegan::adonis2
+
+# GCalignR 1.0.6
+___
+* Removing unused argument `gc_peak_df` from `align_peaks`
+
+# GCalignR 1.0.5
+___
+
+* Bugfix in `choose_optimal_reference` that always selected the first sample as a reference. Thanks to Heberto del Rio who pointed this out on https://github.com/mottensmann/GCalignR/issues/27
+
 # GCalignR 1.0.3.9
 
 * **Speedboost** when setting `max_diff_peak2mean = 0`: In this special case there is no need to use a time-consuming iterative approach but peaks can be sorted simply based on absolute values. This is implemented in two steps. (1) Across all samples, unique retention times are extracted, sorted in increasing temporal order and written to a template data frame. (2) For each sample, peaks are matched to the corresponding row of the template data frame. 

diff --git a/R/GCalignR.R b/R/GCalignR.R
@@ -30,7 +30,6 @@
 #'@details
 #' More details on the package are found in the vignettes that can be accessed via \code{browseVignettes("GCalignR")}.
 #'
-#' @docType package
 #' @name GCalignR
 #'
-NULL
+"_PACKAGE"
diff --git a/R/align_peaks.R b/R/align_peaks.R
@@ -24,10 +24,6 @@
 #'\strong{max_diff_peak2mean} around the mean retention time no shifting is done
 #'and the algorithm proceeds with the following sample.
 #'
-#'@param gc_peak_df data.frame containing GC-data (e.g. retention time, peak
-#'  area, peak height) of one sample. Variables are stored in columns, rows
-#'  represent peaks.
-#'
 #'@param gc_peak_list List of data.frames. Each data.frame contains GC-data
 #'  (e.g. retention time, peak area, peak height) of one sample. Variables are
 #'  stored in columns. Rows represent distinct peaks. Retention time is a

diff --git a/R/blank_substraction.R b/R/blank_substraction.R
@@ -59,7 +59,7 @@ if (is.null(input)) stop("No input was defined")
 if (is.null(blanks)) stop("Define name(s) of blanks")
 
     # read data and prepare a list
-if (class(input) == "GCalign") {
+if (inherits(input, "GCalign")) {
     if (is.null(conc_col_name)) stop("Define the name of a data frame")
     if (conc_col_name %in%  names(input[["aligned"]])) {
     input2 <- input[["aligned"]][[conc_col_name]]

diff --git a/R/choose_optimal_reference.R b/R/choose_optimal_reference.R
@@ -4,7 +4,7 @@
 #' Full alignments of peak lists require the specification of a fixed reference to which all other samples are aligned to. This function provides an simple algorithm to find the most suitable sample among a dataset. The so defined reference can be used for full alignments using \code{\link{linear_transformation}}. The functions is evoked internally by \code{\link{align_chromatograms}} if no reference was specified by the user.
 #'
 #' @details
-#' Every sample is considered in determining the optimal reference in comparison to all other samples by estimating the similarity to all other samples. For a reference-sample pair, the deviation in retention times between all reference peaks and the always nearest peak in the sample is summed and divided by the number of reference peaks. The calculated value is a similarity score that converges to zero the more similar reference and sample are. For every potential reference, the median score of all pair-wise comparisons is used as a similarity proxy. The optimal sample is then defined by the minimum value among these scores. This functions is used internally in \code{\link{align_chromatograms}} to select a reference if non was specified by the user.
+#' Every sample is considered in determining the optimal reference in comparison to all other samples by estimating the similarity to all other samples. For a reference-sample pair, the deviation in retention times between all reference peaks and the always nearest peak in the sample is summed up and divided by the number of reference peaks. The calculated value is a similarity score that converges to zero the more similar reference and sample are. For every potential reference, the median score of all pair-wise comparisons is used as a similarity proxy. The optimal sample is then defined by the minimum value among these scores. This functions is used internally in \code{\link{align_chromatograms}} to select a reference if non was specified by the user.
 #'
 #' @inheritParams align_chromatograms
 #'
@@ -40,11 +40,14 @@ choose_optimal_reference <- function(data = NULL, rt_col_name = NULL, sep = "\t"
     ## get the median scores for shared peaks
     x <- df_median_sim_score(gc_peak_list = gc_peak_list,rt_col_name = rt_col_name, method = method)
 
-    ## take the best, depending on the method choose
+    ## take the best, depending on the method chosen
     if (method == "Match") {
         index <- which(x[["score"]] == max(x[["score"]]))
     } else if (method == "Deviance") {
-        index <- which(min(x[["score"]]/x[["n_peaks"]]) == min(x[["score"]]/x[["n_peaks"]]))
+        #index <- which(min(x[["score"]]/x[["n_peaks"]]) == min(x[["score"]]/x[["n_peaks"]]))
+        # Sun Jun 19 22:40:46 2022 ------------------------------
+        # Bugfix thanks to hebertodelrio on GitHub!
+        index <- which(x[["score"]]/x[["n_peaks"]] == min(x[["score"]]/x[["n_peaks"]]))
     }
 
     ## If more than one would get the same score, take the most central run

diff --git a/R/draw_chromatogram.R b/R/draw_chromatogram.R
@@ -78,16 +78,16 @@ draw_chromatogram <- function(data = NULL, rt_col_name = NULL, conc_col_name = N
         out <- check_input(data = data, rt_col_name = rt_col_name, sep = sep, plot = F, message = F)
         if (out == FALSE) stop("Data is not formatted correctly. See check_input for details")
         } else {
-        if (class(data) == "GCalign") {
+        if (inherits(data, "GCalign")) {
             if (!(rt_col_name %in% names(data[["aligned"]])))  stop(print(paste(rt_col_name,"is not a valid variable name. Data contains:",paste(names(data[["aligned"]]),collapse = " & "))))
-        } else if (class(data) == "list") {
+        } else if (inherits(data, "list")) {
             out <- check_input(data = data, rt_col_name = rt_col_name, sep = sep, plot = F, message = F)
             if (out == FALSE) stop("Data is malformed. See check_input for details")
         }
     }
     if (is.character(data)) {
 peak_list <- read_peak_list(data, sep, rt_col_name)
-    } else if (class(data) == "GCalign") {
+    } else if (inherits(data, "GCalign")) {
         step <- match.arg(step, choices = c("aligned","input","shifted"))
         if (step == "input") {
             peak_list <- data[["input_list"]]
@@ -103,7 +103,7 @@ peak_list <- read_peak_list(data, sep, rt_col_name)
                 return(x)
         })
         }
-    } else if (class(data) == "list") {
+    } else if (inherits(data, "list")) {
         peak_list <- lapply(data, FUN = function(x) {
             if (any(is.na(rowSums(x)))) {
                 p <- as.vector(which(is.na(rowSums(x))))

diff --git a/R/merge_redundant_rows.R b/R/merge_redundant_rows.R
@@ -33,7 +33,7 @@
 #' @export
 #'
 merge_redundant_rows <- function(data, min_diff_peak2peak = NULL) {
-    if (class(data) != "GCalign") stop("Only data of type GCalign is supported")
+    if (!methods::is(data, "GCalign")) stop("Only data of type GCalign is supported")
     if (is.null(min_diff_peak2peak)) stop("Specify an numeric threshold value in minutes")
     gc_peak_list_aligned <- data[["aligned_list"]]
 

diff --git a/R/norm_peaks.R b/R/norm_peaks.R
@@ -34,7 +34,7 @@ out <- match.arg(out)
 ## some checks
 if (is.null(conc_col_name)) {stop("List containing peak concentration is not specified. Define conc_col_name")}
 
-if (class(data) == "GCalign") {
+if (inherits(data, "GCalign")) {
     which <-  "aligned"
     conc_list <- data[[which]][[conc_col_name]]
 } else if (is.list(data)) {

diff --git a/R/remove_blanks.R b/R/remove_blanks.R
@@ -24,7 +24,7 @@
 #' @export
 #'
 remove_blanks <- function(data, blanks) {
-    if (class(data) == "GCalign") {
+    if (inherits(data, "GCalign")) {
         rt_col_name <-  data[["Logfile"]][["Call"]][["rt_col_name"]]
         data <- data[["aligned_list"]]
     } else if (is.list(data)) {

diff --git a/R/remove_singletons.R b/R/remove_singletons.R
@@ -19,7 +19,7 @@
 #' @export
 #'
 remove_singletons <- function(data) {
-    if (class(data) == "GCalign") {
+    if (inherits(data, "GCalign")) {
         rt_col_name <-  data[["Logfile"]][["Call"]][["rt_col_name"]]
         data <- data[["aligned_list"]]
     } else if (is.list(data)) {

diff --git a/README-unnamed-chunk-7-1.png b/README-unnamed-chunk-7-1.png