From 4c522d2c6d248e5ee55d914b3c97f4a7b4f2ca85 Mon Sep 17 00:00:00 2001 From: grlloyd Date: Mon, 8 Jul 2019 13:42:15 +0100 Subject: [PATCH] update glog to output error_flag and optimised value for lambda Also updated tests --- R/glog_transformation.R | 54 +++++++++++------------ tests/testthat/test-glog_transformation.R | 12 ++--- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/R/glog_transformation.R b/R/glog_transformation.R index fd54f07..b6f1815 100644 --- a/R/glog_transformation.R +++ b/R/glog_transformation.R @@ -1,5 +1,5 @@ -#' @importFrom stats optimize -#' +#' @importFrom stats optimize +#' NULL @@ -17,7 +17,7 @@ glog <- function(y, y0=0, lambda){ } #' Internal function for max. likelihood optimisation of glog params -#' Calculates the alternative Jacobian fcn described in +#' Calculates the alternative Jacobian fcn described in #' https://doi.org/10.1186/1471-2105-8-234 #' #' @param y values. @@ -28,7 +28,7 @@ glog <- function(y, y0=0, lambda){ jglog <- function(y, y0=0, lambda){ z <- glog(y, y0, lambda) D <- log(sqrt((y - y0)^2 + lambda)) - + # average over all features (bins) gmn <- exp(apply(D, 2, mean, na.rm=TRUE)) zj <- z * gmn # ML estimate @@ -45,16 +45,16 @@ jglog <- function(y, y0=0, lambda){ SSE <- function(lambda, y0=0, y) { # calculate ML estimate z <- jglog(y, y0, lambda) - + # average over all features mean_spec <- apply(z, 1, mean, na.rm=TRUE) - + # calculate sum of squared difference between true and estimate s <- sum((z - mean_spec)^2, na.rm=TRUE) return(s) } -#' Performs glog transformation on the data set, +#' Performs glog transformation on the data set, #' using QC samples to estimate lambda. #' #' https://doi.org/10.1186/1471-2105-8-234 @@ -64,12 +64,12 @@ SSE <- function(lambda, y0=0, y) { #' @param qc_label Class label for QC sample #' @param store_lambda If value of optimised lambda parameter needs to be #' returned -#' @examples +#' @examples #' attach (testData) #' out <- mv_imputation(df=testData$data, method='knn') #' out <- glog_transformation (df=out, classes=testData$class, #' qc_label='QC') -#' +#' #' @return data frame, peak inntensity matrix after glog transformation #' @export glog_transformation @@ -80,22 +80,22 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) { Please check your data and qc_label parameter.") } df <- check_peak_matrix(peak_data=df, classes=classes) - + # data for the QC samples only df_qc <- df[, classes == qc_label] - + # set offset to the minimum of the QC samples offset <- min(df_qc, na.rm=TRUE) - + # set minimum of qc data to 0 df_qc <- df_qc - offset - + # stop optimisation when improvement less than this value step_threshold <- 1e-16 - + # variance of all features VF <- apply(df_qc, 1, var, na.rm=TRUE) - + # if variance of any feature is 0 if (min(VF) == 0) { # set min value to minimum greater than 0 @@ -103,24 +103,24 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) { } else { newminVar <- min(VF) } - + # set lower and upper limit of optimisation low_lim <- 0 upper_lim <- max(pmax(VF, max(VF) / newminVar)) - - # search for optimal value of lambda. + + # search for optimal value of lambda. # NB y0 set to default of 0 as not being implemented here lambda <- optimize(f=SSE, interval=c(low_lim, upper_lim), y0=0, y=df_qc, tol=step_threshold) - + # make sure value of objective is numeric lambda <- as.numeric(lambda[[1]]) - print(lambda) - + lambda_opt = lambda + # If SSE optimisation fails use fixed lambda value lambda_std <- 5.0278 * 10^(-9) error_flag <- FALSE - + # if optimisation reached upper limit then trigger use of fixed value if (abs(upper_lim - lambda) <= 1e-05) { cat("Error!Lambda tending to infinity!Using standard\n") @@ -130,12 +130,12 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) { cat("Error!Lambda tending to -infinity!Using standard\n") error_flag <- TRUE } - + # if flag triggered then apply scale factor if (error_flag) { # set lambda to default lambda <- lambda_std - + # set scaling factor to 1 / mean (total signal) over all samples scal_fact <- apply(df, 2, sum, na.rm=TRUE) scal_fact <- mean(scal_fact) @@ -143,14 +143,14 @@ glog_transformation <- function(df, classes, qc_label, store_lambda=FALSE) { # apply scaling factor df <- df * scal_fact } - + # set minimum over all values to 0 df <- df - min(df, na.rm=TRUE) # apply glog using optimised values df_glog <- as.data.frame(glog(df, 0, lambda)) - + if (store_lambda){ - return(list(df_glog, lambda)) + return(list(df_glog, lambda, lambda_opt, error_flag)) } else { return(df_glog) } diff --git a/tests/testthat/test-glog_transformation.R b/tests/testthat/test-glog_transformation.R index 9733c44..7b57c9a 100644 --- a/tests/testthat/test-glog_transformation.R +++ b/tests/testthat/test-glog_transformation.R @@ -2,13 +2,13 @@ context("test-glog_transformation") test_that("Glog function returns expected output", { out <- mv_imputation(df=testData$data, method="knn") - expect_equal (glog_transformation (df=out, classes=testData$class, + expect_equal (glog_transformation (df=out, classes=testData$class, qc_label="QC"), testData$glog_transformation) }) test_that("Glog function fails if qc_label is wrong or QC samples don't exist", { out <- mv_imputation(df=testData$data, method="knn") - expect_error (glog_transformation (df=out, classes=testData$class, + expect_error (glog_transformation (df=out, classes=testData$class, qc_label="A")) }) @@ -16,23 +16,23 @@ test_that("Scale factor is applied if optimisation fails", { df <- iris classes <- iris$Species df <- df[,-5] - expect_output(glog_transformation(df,classes,qc_label='versicolor'), + expect_output(glog_transformation(df,classes,qc_label='versicolor'), regexp='Error!Lambda') }) test_that("If feature variance is 0 replace it with small value", { out <- mv_imputation(df=testData$data, method="knn") out[3,] <- 3 - expect_output(out <- glog_transformation (df=out, classes=testData$class, + expect_output(out <- glog_transformation (df=out, classes=testData$class, qc_label="QC")[3,], regexp='Error!Lambda') testthat::expect_true(all(out[1:3] == out[4:6])) }) test_that("glog function returns optimised lambda value if requested", { out <- mv_imputation(df=testData$data, method="knn") - out <- glog_transformation (df=out, classes=testData$class, + out <- glog_transformation (df=out, classes=testData$class, qc_label="QC", store_lambda=TRUE) lambda <- as.integer(7865716) - testthat::expect_true(length(out) == 2) + testthat::expect_true(length(out) == 4) testthat::expect_true(lambda == as.integer(out[[2]])) })