From 4c522d2c6d248e5ee55d914b3c97f4a7b4f2ca85 Mon Sep 17 00:00:00 2001
From: grlloyd <g.r.lloyd@bham.ac.uk>
Date: Mon, 8 Jul 2019 13:42:15 +0100
Subject: [PATCH] update glog to output error_flag and optimised value for
 lambda

Also updated tests
---
 R/glog_transformation.R                   | 54 +++++++++++------------
 tests/testthat/test-glog_transformation.R | 12 ++---
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/R/glog_transformation.R b/R/glog_transformation.R
index fd54f07..b6f1815 100644
--- a/R/glog_transformation.R
+++ b/R/glog_transformation.R
@@ -1,5 +1,5 @@
-#' @importFrom stats optimize 
-#' 
+#' @importFrom stats optimize
+#'
 
 NULL
 
@@ -17,7 +17,7 @@ glog <- function(y, y0=0, lambda){
 }
 
 #' Internal function for max. likelihood optimisation of glog params
-#' Calculates the alternative Jacobian fcn described in 
+#' Calculates the alternative Jacobian fcn described in
 #' https://doi.org/10.1186/1471-2105-8-234
 #'
 #' @param y values.
@@ -28,7 +28,7 @@ glog <- function(y, y0=0, lambda){
 jglog <- function(y, y0=0, lambda){
     z <- glog(y, y0, lambda)
     D <- log(sqrt((y - y0)^2 + lambda))
-    
+
     # average over all features (bins)
     gmn <- exp(apply(D, 2, mean, na.rm=TRUE))
     zj <- z * gmn  # ML estimate
@@ -45,16 +45,16 @@ jglog <- function(y, y0=0, lambda){
 SSE <- function(lambda, y0=0, y) {
     # calculate ML estimate
     z <- jglog(y, y0, lambda)
-    
+
     # average over all features
     mean_spec <- apply(z, 1, mean, na.rm=TRUE)
-    
+
     # calculate sum of squared difference between true and estimate
     s <- sum((z - mean_spec)^2, na.rm=TRUE)
     return(s)
 }
 
-#' Performs glog transformation on the data set, 
+#' Performs glog transformation on the data set,
 #' using QC samples to estimate lambda.
 #'
 #' https://doi.org/10.1186/1471-2105-8-234
@@ -64,12 +64,12 @@ SSE <- function(lambda, y0=0, y) {
 #' @param qc_label Class label for QC sample
 #' @param store_lambda If value of optimised lambda parameter needs to be
 #' returned
-#' @examples 
+#' @examples
 #' attach (testData)
 #' out <- mv_imputation(df=testData$data, method='knn')
 #' out <- glog_transformation (df=out, classes=testData$class,
 #'  qc_label='QC')
-#' 
+#'
 #' @return data frame, peak inntensity matrix after glog transformation
 #' @export glog_transformation
 
@@ -80,22 +80,22 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) {
     Please check your data and qc_label parameter.")
     }
     df <- check_peak_matrix(peak_data=df, classes=classes)
-    
+
     # data for the QC samples only
     df_qc <- df[, classes == qc_label]
-    
+
     # set offset to the minimum of the QC samples
     offset <- min(df_qc, na.rm=TRUE)
-    
+
     # set minimum of qc data to 0
     df_qc <- df_qc - offset
-    
+
     # stop optimisation when improvement less than this value
     step_threshold <- 1e-16
-    
+
     # variance of all features
     VF <- apply(df_qc, 1, var, na.rm=TRUE)
-    
+
     # if variance of any feature is 0
     if (min(VF) == 0) {
         # set min value to minimum greater than 0
@@ -103,24 +103,24 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) {
     } else {
         newminVar <- min(VF)
     }
-    
+
     # set lower  and upper limit of optimisation
     low_lim <- 0
     upper_lim <- max(pmax(VF, max(VF) / newminVar))
-    
-    # search for optimal value of lambda. 
+
+    # search for optimal value of lambda.
     # NB y0 set to default of 0 as not being implemented here
     lambda <- optimize(f=SSE, interval=c(low_lim, upper_lim), y0=0,
         y=df_qc, tol=step_threshold)
-    
+
     # make sure value of objective is numeric
     lambda <- as.numeric(lambda[[1]])
-    print(lambda)
-    
+    lambda_opt = lambda
+
     # If SSE optimisation fails use fixed lambda value
     lambda_std <- 5.0278 * 10^(-9)
     error_flag <- FALSE
-    
+
     # if optimisation reached upper limit then trigger use of fixed value
     if (abs(upper_lim - lambda) <= 1e-05) {
         cat("Error!Lambda tending to infinity!Using standard\n")
@@ -130,12 +130,12 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) {
         cat("Error!Lambda tending to -infinity!Using standard\n")
         error_flag <- TRUE
     }
-    
+
     # if flag triggered then apply scale factor
     if (error_flag) {
         # set lambda to default
         lambda <- lambda_std
-        
+
         # set scaling factor to 1 / mean (total signal) over all samples
         scal_fact <- apply(df, 2, sum, na.rm=TRUE)
         scal_fact <- mean(scal_fact)
@@ -143,14 +143,14 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) {
         # apply scaling factor
         df <- df * scal_fact
     }
-    
+
     # set minimum over all values to 0
     df <- df - min(df, na.rm=TRUE)
     # apply glog using optimised values
     df_glog <- as.data.frame(glog(df, 0, lambda))
-    
+
     if (store_lambda){
-        return(list(df_glog, lambda))
+        return(list(df_glog, lambda, lambda_opt, error_flag))
     } else {
         return(df_glog)
     }
diff --git a/tests/testthat/test-glog_transformation.R b/tests/testthat/test-glog_transformation.R
index 9733c44..7b57c9a 100644
--- a/tests/testthat/test-glog_transformation.R
+++ b/tests/testthat/test-glog_transformation.R
@@ -2,13 +2,13 @@ context("test-glog_transformation")
 
 test_that("Glog function returns expected output", {
   out <- mv_imputation(df=testData$data, method="knn")
-  expect_equal (glog_transformation (df=out, classes=testData$class, 
+  expect_equal (glog_transformation (df=out, classes=testData$class,
     qc_label="QC"), testData$glog_transformation)
 })
 
 test_that("Glog function fails if qc_label is wrong or QC samples don't exist", {
   out <- mv_imputation(df=testData$data, method="knn")
-  expect_error (glog_transformation (df=out, classes=testData$class, 
+  expect_error (glog_transformation (df=out, classes=testData$class,
     qc_label="A"))
 })
 
@@ -16,23 +16,23 @@ test_that("Scale factor is applied if optimisation fails", {
     df <- iris
     classes <- iris$Species
     df <- df[,-5]
-    expect_output(glog_transformation(df,classes,qc_label='versicolor'), 
+    expect_output(glog_transformation(df,classes,qc_label='versicolor'),
       regexp='Error!Lambda')
 })
 
 test_that("If feature variance is 0 replace it with small value", {
     out <- mv_imputation(df=testData$data, method="knn")
     out[3,] <- 3
-    expect_output(out <- glog_transformation (df=out, classes=testData$class, 
+    expect_output(out <- glog_transformation (df=out, classes=testData$class,
       qc_label="QC")[3,], regexp='Error!Lambda')
     testthat::expect_true(all(out[1:3] == out[4:6]))
 })
 
 test_that("glog function returns optimised lambda value if requested", {
   out <- mv_imputation(df=testData$data, method="knn")
-  out <- glog_transformation (df=out, classes=testData$class, 
+  out <- glog_transformation (df=out, classes=testData$class,
     qc_label="QC", store_lambda=TRUE)
   lambda <- as.integer(7865716)
-  testthat::expect_true(length(out) == 2)
+  testthat::expect_true(length(out) == 4)
   testthat::expect_true(lambda == as.integer(out[[2]]))
 })