From b949f141dfaec3c5e77329cb8a67c7cdbce7af4a Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Tue, 16 Jan 2024 15:49:33 -0600 Subject: [PATCH 1/6] check compatibility of `initial` and `param_info` --- R/sim_anneal_helpers.R | 22 +++++++++++++++- tests/testthat/_snaps/sa-overall.md | 15 +++++++++++ tests/testthat/test-sa-overall.R | 41 +++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/R/sim_anneal_helpers.R b/R/sim_anneal_helpers.R index 494804b..e8eed7a 100644 --- a/R/sim_anneal_helpers.R +++ b/R/sim_anneal_helpers.R @@ -144,11 +144,31 @@ random_real_neighbor <- function(current, hist_values, pset, retain = 1, encode_set_backwards <- function(x, pset, ...) { pset <- pset[pset$id %in% names(x), ] - new_vals <- purrr::map2(pset$object, x, dials::encode_unit, direction = "backward") + new_vals <- mapply(encode_unit_backwards, pset$object, x, + SIMPLIFY = FALSE, USE.NAMES = FALSE) names(new_vals) <- names(x) tibble::as_tibble(new_vals) } +encode_unit_backwards <- function(x, value) { + if (!dials::has_unknowns(x)) { + compl <- value[!is.na(value)] + if (any(compl < 0) | any(compl > 1)) { + cli::cli_abort(c( + "!" = "The parameter set used when tuning generating the initial \\ + results isn't compatible with the parameter set supplied \\ + as {.arg param_info}.", + "i" = "Possible values of parameters in {.arg param_info} should \\ + encompass all values evaluated in the initial grid." + ), + call = rlang::call2("tune_sim_anneal()") + ) + } + } + + dials::encode_unit(x, value, direction = "backward") +} + sample_by_distance <- function(candidates, existing, retain, pset) { if (nrow(existing) > 0) { existing <- tune::encode_set(existing, pset, as_matrix = TRUE) diff --git a/tests/testthat/_snaps/sa-overall.md b/tests/testthat/_snaps/sa-overall.md index f2209cd..bb577ed 100644 --- a/tests/testthat/_snaps/sa-overall.md +++ b/tests/testthat/_snaps/sa-overall.md @@ -218,3 +218,18 @@ 9 ( ) accept suboptimal roc_auc=0.8623 (+/-0.007265) 10 <3 new best roc_auc=0.86273 (+/-0.007569) +# incompatible parameter objects + + Code + res <- tune_sim_anneal(car_wflow, param_info = parameter_set_with_smaller_range, + resamples = car_folds, initial = tune_res_with_bigger_range, iter = 2) + Message + Optimizing rmse + Initial best: 2284.10000 + Condition + Error in `tune_sim_anneal()`: + ! The parameter set used when tuning generating the initial results isn't compatible with the parameter set supplied as `param_info`. + i Possible values of parameters in `param_info` should encompass all values evaluated in the initial grid. + Message + x Optimization stopped prematurely; returning current results. + diff --git a/tests/testthat/test-sa-overall.R b/tests/testthat/test-sa-overall.R index d666677..2a4630c 100644 --- a/tests/testthat/test-sa-overall.R +++ b/tests/testthat/test-sa-overall.R @@ -120,6 +120,47 @@ test_that("unfinalized parameters", { }) }) +test_that("incompatible parameter objects", { + skip_on_cran() + + skip_if_not_installed("ranger") + skip_if_not_installed("modeldata") + skip_if_not_installed("rsample") + + + rf_spec <- parsnip::rand_forest(mode = "regression", mtry = tune::tune()) + + grid_with_bigger_range <- + dials::grid_latin_hypercube(dials::mtry(range = c(1, 16))) + + set.seed(1) + car_folds <- rsample::vfold_cv(car_prices, v = 2) + + car_wflow <- workflows::workflow() %>% + workflows::add_formula(Price ~ .) %>% + workflows::add_model(rf_spec) + + tune_res_with_bigger_range <- tune::tune_grid( + car_wflow, + resamples = car_folds, + grid = grid_with_bigger_range + ) + + parameter_set_with_smaller_range <- + dials::parameters(dials::mtry(range = c(1, 5))) + + expect_snapshot(error = TRUE, { + res <- + tune_sim_anneal( + car_wflow, + param_info = parameter_set_with_smaller_range, + resamples = car_folds, + initial = tune_res_with_bigger_range, + iter = 2 + ) + }) +}) + test_that("set event-level", { # See issue 40 skip_if_not_installed("rpart") From 6f3236ddc24ddf49ebe17f72cd39b0639f47d2b6 Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Wed, 17 Jan 2024 09:14:25 -0600 Subject: [PATCH 2/6] set seed elsewhere where needed --- tests/testthat/_snaps/sa-overall.md | 2 +- tests/testthat/test-sa-overall.R | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/sa-overall.md b/tests/testthat/_snaps/sa-overall.md index bb577ed..8f627cb 100644 --- a/tests/testthat/_snaps/sa-overall.md +++ b/tests/testthat/_snaps/sa-overall.md @@ -225,7 +225,7 @@ resamples = car_folds, initial = tune_res_with_bigger_range, iter = 2) Message Optimizing rmse - Initial best: 2284.10000 + Initial best: 2312.70000 Condition Error in `tune_sim_anneal()`: ! The parameter set used when tuning generating the initial results isn't compatible with the parameter set supplied as `param_info`. diff --git a/tests/testthat/test-sa-overall.R b/tests/testthat/test-sa-overall.R index 2a4630c..6c33754 100644 --- a/tests/testthat/test-sa-overall.R +++ b/tests/testthat/test-sa-overall.R @@ -127,9 +127,9 @@ test_that("incompatible parameter objects", { skip_if_not_installed("modeldata") skip_if_not_installed("rsample") - rf_spec <- parsnip::rand_forest(mode = "regression", mtry = tune::tune()) + set.seed(1) grid_with_bigger_range <- dials::grid_latin_hypercube(dials::mtry(range = c(1, 16))) @@ -146,9 +146,11 @@ test_that("incompatible parameter objects", { grid = grid_with_bigger_range ) + set.seed(1) parameter_set_with_smaller_range <- dials::parameters(dials::mtry(range = c(1, 5))) + set.seed(1) expect_snapshot(error = TRUE, { res <- tune_sim_anneal( From aa37d0f83f1ebd337e8e98fdb8d3fc5d7fd1b81e Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 19 Jan 2024 14:39:31 -0600 Subject: [PATCH 3/6] one more RNG change --- tests/testthat/_snaps/sa-overall.md | 2 +- tests/testthat/test-sa-overall.R | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/_snaps/sa-overall.md b/tests/testthat/_snaps/sa-overall.md index 8f627cb..22ca5f6 100644 --- a/tests/testthat/_snaps/sa-overall.md +++ b/tests/testthat/_snaps/sa-overall.md @@ -225,7 +225,7 @@ resamples = car_folds, initial = tune_res_with_bigger_range, iter = 2) Message Optimizing rmse - Initial best: 2312.70000 + Initial best: 2313.60000 Condition Error in `tune_sim_anneal()`: ! The parameter set used when tuning generating the initial results isn't compatible with the parameter set supplied as `param_info`. diff --git a/tests/testthat/test-sa-overall.R b/tests/testthat/test-sa-overall.R index 6c33754..7b1788e 100644 --- a/tests/testthat/test-sa-overall.R +++ b/tests/testthat/test-sa-overall.R @@ -140,6 +140,7 @@ test_that("incompatible parameter objects", { workflows::add_formula(Price ~ .) %>% workflows::add_model(rf_spec) + set.seed(1) tune_res_with_bigger_range <- tune::tune_grid( car_wflow, resamples = car_folds, From 6e30df35e3680ca183beb9686b344b1f0c9c969d Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 19 Jan 2024 14:43:33 -0600 Subject: [PATCH 4/6] add NEWS entry --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 473e173..f953afe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # finetune (development version) +* Improved error message from `tune_sim_anneal()` when values in the supplied `param_info` do not encompass all values evaluated in the `initial` grid. This most often happens when a user mistakenly supplies different parameter sets to the function that generated the initial results and `tune_sim_anneal()`. + * `autoplot()` methods for racing objects will now use integers in x-axis breaks (#75). * Enabling the `verbose_elim` control option for `tune_race_anova()` will now additionally introduce a message confirming that the function is evaluating against the burn-in resamples. From 9d89a9be4ad90e8f389b60a9b0772c2bc81b6d79 Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 19 Jan 2024 15:28:59 -0600 Subject: [PATCH 5/6] scrub Initial best from the snapshot --- tests/testthat/_snaps/sa-overall.md | 2 +- tests/testthat/test-sa-overall.R | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/testthat/_snaps/sa-overall.md b/tests/testthat/_snaps/sa-overall.md index 22ca5f6..c5c4973 100644 --- a/tests/testthat/_snaps/sa-overall.md +++ b/tests/testthat/_snaps/sa-overall.md @@ -225,7 +225,7 @@ resamples = car_folds, initial = tune_res_with_bigger_range, iter = 2) Message Optimizing rmse - Initial best: 2313.60000 + Condition Error in `tune_sim_anneal()`: ! The parameter set used when tuning generating the initial results isn't compatible with the parameter set supplied as `param_info`. diff --git a/tests/testthat/test-sa-overall.R b/tests/testthat/test-sa-overall.R index 7b1788e..4f59452 100644 --- a/tests/testthat/test-sa-overall.R +++ b/tests/testthat/test-sa-overall.R @@ -151,8 +151,14 @@ test_that("incompatible parameter objects", { parameter_set_with_smaller_range <- dials::parameters(dials::mtry(range = c(1, 5))) + scrub_best <- function(lines) { + has_best <- grepl("Initial best", lines) + lines[has_best] <- "" + lines + } + set.seed(1) - expect_snapshot(error = TRUE, { + expect_snapshot(error = TRUE, transform = scrub_best, { res <- tune_sim_anneal( car_wflow, From 245c6fff54a84f9b18d1b973d4eee7e933845b58 Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Wed, 24 Jan 2024 14:22:41 -0600 Subject: [PATCH 6/6] apply suggestions from code review * clarify name of helper * include parameter id in error message --- R/sim_anneal_helpers.R | 15 +++++++-------- tests/testthat/_snaps/sa-overall.md | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/R/sim_anneal_helpers.R b/R/sim_anneal_helpers.R index e8eed7a..fe5b00a 100644 --- a/R/sim_anneal_helpers.R +++ b/R/sim_anneal_helpers.R @@ -144,20 +144,21 @@ random_real_neighbor <- function(current, hist_values, pset, retain = 1, encode_set_backwards <- function(x, pset, ...) { pset <- pset[pset$id %in% names(x), ] - new_vals <- mapply(encode_unit_backwards, pset$object, x, - SIMPLIFY = FALSE, USE.NAMES = FALSE) + mapply(check_backwards_encode, pset$object, x, pset$id, + SIMPLIFY = FALSE, USE.NAMES = FALSE) + new_vals <- purrr::map2(pset$object, x, dials::encode_unit, direction = "backward") names(new_vals) <- names(x) tibble::as_tibble(new_vals) } -encode_unit_backwards <- function(x, value) { +check_backwards_encode <- function(x, value, id) { if (!dials::has_unknowns(x)) { compl <- value[!is.na(value)] if (any(compl < 0) | any(compl > 1)) { cli::cli_abort(c( - "!" = "The parameter set used when tuning generating the initial \\ - results isn't compatible with the parameter set supplied \\ - as {.arg param_info}.", + "!" = "The range for parameter {.val {noquote(id)}} used when \\ + generating initial results isn't compatible with the range \\ + supplied in {.arg param_info}.", "i" = "Possible values of parameters in {.arg param_info} should \\ encompass all values evaluated in the initial grid." ), @@ -165,8 +166,6 @@ encode_unit_backwards <- function(x, value) { ) } } - - dials::encode_unit(x, value, direction = "backward") } sample_by_distance <- function(candidates, existing, retain, pset) { diff --git a/tests/testthat/_snaps/sa-overall.md b/tests/testthat/_snaps/sa-overall.md index c5c4973..c4a0be3 100644 --- a/tests/testthat/_snaps/sa-overall.md +++ b/tests/testthat/_snaps/sa-overall.md @@ -228,7 +228,7 @@ Condition Error in `tune_sim_anneal()`: - ! The parameter set used when tuning generating the initial results isn't compatible with the parameter set supplied as `param_info`. + ! The range for parameter mtry used when generating initial results isn't compatible with the range supplied in `param_info`. i Possible values of parameters in `param_info` should encompass all values evaluated in the initial grid. Message x Optimization stopped prematurely; returning current results.