Skip to content

Commit

Permalink
Adds verbose parameter and implements the best layer-specific learner…
Browse files Browse the repository at this point in the history
… als meta learner
  • Loading branch information
fouodo committed Sep 26, 2024
1 parent 0dd3ace commit e090d45
Show file tree
Hide file tree
Showing 12 changed files with 250 additions and 33 deletions.
72 changes: 72 additions & 0 deletions R/BestSpecificLearner.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#' @title The best layer-specific model is used as meta model.
#'
#' @description
#' The meta learner is the best layer-specific laerner.
#'
#' @param x `data.frame(1)` \cr
#' \code{data.frame} of predictors.
#' @param y `vector(1)` \cr
#' Target observations. Either binary or two level factor variable.
#' @param perf `function(1)` \cr
#' Function to compute layer-specific performance of learners. If NULL, the Brier Score is used by default.
#' Otherwise, the performance function must accept two parameters: \code{observed} (observed values) and \code{predicted} (predicted values).
#'
#' @return
#' A model object of class \code{weightedMeanLeaner}.
#'
#' @export
#'
#' @examples
#' set.seed(20240624L)
#' x = data.frame(x1 = runif(n = 50L, min = 0, max = 1))
#' y = sample(x = 0L:1L, size = 50L, replace = TRUE)
#' my_best_model = bestSpecificLearner(x = x, y = y)
#'
bestSpecificLearner = function (x, y, perf = NULL) {
if (is.null(perf)) {
# y must be binomial for Brier Score estimation.
# If dichotomy, first category (case) = 1 and
# second (control) = 0
if ((length(unique(y)) > 2) | is.character(y)) {
stop("y must be either binary or two level factor variable.")
} else {
if (!all(y %in% 0:1)) {
y = 2 - as.integer(y)
} else {
if (is.factor(y)) {
y = as.integer(y) - 1
} else {
y = y
}
}
perf_values = lapply(X = x, FUN = function (predicted) {
mean(x = (predicted - y)^2L, na.rm = TRUE)
})
perf_values = unlist(perf_values)
}
} else {
if (is.function(perf)) {
arg_names <- names(formals(perf))
if (arg_names %in% c("observed", "predicted")) {
# Function has been provided to estimated performance of layer-specific learner
perf_values = lapply(X = x, FUN = function (predicted) {
perf_estimate = do.call(what = perf, args = list(observed = y,
predicted = predicted))
return(perf_estimate)
})
perf_values = unlist(perf_values)
} else {
stop("perf argument must be a function.")
}
} else {
stop("Arguments of the perf function must be 'observed' and 'predicted'.")
}
}
weights_values = (1L / perf_values) / sum((1L / perf_values))
max_index = which.max(weights_values)
weights_values = rep(0L, length(weights_values))
weights_values[max_index] = 1L
names(weights_values) = names(x)
class(weights_values) = "bestSpecificLearner"
return(weights_values)
}
10 changes: 8 additions & 2 deletions R/Lrner.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,16 @@ Lrner <- R6Class("Lrner",
#' Individual ID subset on which the training will be performed.
#' @param use_var_sel `boolean(1)` \cr
#' If TRUE, variable selection is performed before training.
#' @param verbose (`boolean`) \cr
#' Warning messages will be displayed if set to TRUE.
#'
#' @return
#' The resulting model, from class [Model], is returned.
#' @export
#'
train = function (ind_subset = NULL, use_var_sel = FALSE) {
train = function (ind_subset = NULL,
use_var_sel = FALSE,
verbose = TRUE) {
train_data = private$train_layer$getTrainData()
# Train only on complete data
train_data = train_data$clone(deep = FALSE)
Expand Down Expand Up @@ -127,7 +131,9 @@ Lrner <- R6Class("Lrner",
private$var_subset = selected_var
} else {
# nocov start
warning("No selected variable found, so all variables have been used for training.\n")
if (verbose) {
warning("No selected variable found, so all variables have been used for training.\n")
}
private$var_subset = "ALL"
# nocov end
}
Expand Down
34 changes: 29 additions & 5 deletions R/TrainLayer.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,16 @@ TrainLayer <- R6Class("TrainLayer",
#' ID subset of individuals to be used for training.
#' @param use_var_sel `boolean(1)` \cr
#' If TRUE, variable selection is performed before training.
#' @param verbose (`boolean`) \cr
#' Warning messages will be displayed if set to TRUE.
#'
#' @return
#' The current layer is returned with the resulting model.
#' @export
#'
train = function (ind_subset = NULL, use_var_sel = FALSE) {
train = function (ind_subset = NULL,
use_var_sel = FALSE,
verbose = TRUE) {
layer_kc = self$getKeyClass()
# Stop if either learner of data is missing on this layer.
if (!("Lrner" %in% layer_kc[ , "class"])){
Expand All @@ -101,11 +105,20 @@ TrainLayer <- R6Class("TrainLayer",
lrner_key = layer_kc[layer_kc$class == "Lrner" , "key"]
lrner = self$getFromHashTable(key = lrner_key[1L])
if (use_var_sel & (!self$checkVarSelExist())) {
warning(sprintf("No var. sel. on layer %s.", self$getId()))
if (verbose) {
warning(sprintf("No var. sel. on layer %s.", self$getId()))
}
use_var_sel = FALSE
}
if (verbose) {
message(sprintf("Training on layer %s started.\n", self$getId()))
}
model = lrner$train(ind_subset = ind_subset,
use_var_sel = use_var_sel)
use_var_sel = use_var_sel,
verbose = verbose)
if (verbose) {
message(sprintf("Training on layer %s done.\n", self$getId()))
}
# Updating the training status.
if (!private$status) {
# The training layer has not been trained before.
Expand All @@ -122,16 +135,21 @@ TrainLayer <- R6Class("TrainLayer",
#'
#' @param ind_subset `vector(1)` \cr
#' ID subset of individuals to be used for variable selection.
#' @param verbose (`boolean`) \cr
#' Warning messages will be displayed if set to TRUE.
#'
#' @return
#' The current layer is returned with the resulting model.
#' @export
#'
varSelection = function (ind_subset = NULL) {
varSelection = function (ind_subset = NULL,
verbose = TRUE) {
layer_kc = self$getKeyClass()
# Stop if either selector or data is missing on this layer.
if (!("VarSel" %in% layer_kc[ , "class"])) {
warning(sprintf("No var. sel. method on layer %s.", self$getId()))
if (verbose) {
warning(sprintf("No var. sel. method on layer %s.", self$getId()))
}
return(NULL)
} else {
if (!("TrainData" %in% layer_kc[ , "class"])) {
Expand All @@ -141,7 +159,13 @@ TrainLayer <- R6Class("TrainLayer",
# The learner is trained on the current dataset
varsel_key = layer_kc[layer_kc$class == "VarSel" , "key"]
varsel = self$getFromHashTable(key = varsel_key[1L])
if (verbose) {
message(sprintf("Variable selection on layer %s started.\n", self$getId()))
}
selected = varsel$varSelection(ind_subset = ind_subset)
if (verbose) {
message(sprintf("Variable selection on layer %s done.\n", self$getId()))
}
return(selected)
},
#' @description
Expand Down
10 changes: 9 additions & 1 deletion R/TrainMetaLayer.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,15 @@ TrainMetaLayer <- R6Class("TrainMetaLayer",
#'
#' @param ind_subset `vector(1)` \cr
#' ID subset of individuals to be used for training.
#' @param verbose (`boolean`) \cr
#' Warning messages will be displayed if set to TRUE.
#'
#' @return
#' The current layer is returned with the resulting model.
#' @export
#'
train = function (ind_subset = NULL) {
train = function (ind_subset = NULL,
verbose = TRUE) {
layer_kc = self$getKeyClass()
# Stop if either learner of data is missing on this layer.
if (!("Lrner" %in% layer_kc[ , "class"])){
Expand Down Expand Up @@ -239,6 +242,11 @@ TrainMetaLayer <- R6Class("TrainMetaLayer",
ind_col,
data_frame,
target) {
# nocov start
if (sum(!complete.cases(data_frame)) == nrow(data_frame)) {
warning("No individual fully overlaps across all layers.")
}
# nocov end
TrainData$new(id = id,
data_frame = data_frame,
train_layer = self)
Expand Down
Loading

0 comments on commit e090d45

Please sign in to comment.