From d7685361ae1876e0044b882a9b3d6b9603979e3d Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Thu, 14 Mar 2024 13:36:58 -0500 Subject: [PATCH] add support for parallelism with future (#208) * add support for parallelism with future * use future's RNG when using doFuture --- DESCRIPTION | 6 ++++-- NEWS.md | 2 ++ R/fit_members.R | 19 ++++++++++++++----- man/fit_members.Rd | 4 ++-- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5b1804af..4d16ec9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stacks Title: Tidy Model Stacking -Version: 1.0.3.9000 +Version: 1.0.3.9001 Authors@R: c( person("Simon", "Couch", , "simon.couch@posit.co", role = c("aut", "cre")), person("Max", "Kuhn", , "max@posit.co", role = "aut"), @@ -19,8 +19,10 @@ Depends: Imports: butcher (>= 0.1.3), cli, + doFuture, dplyr (>= 1.1.0), foreach, + future, generics, ggplot2, glmnet, @@ -57,4 +59,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3.9000 +RoxygenNote: 7.3.1 diff --git a/NEWS.md b/NEWS.md index aca8d2ac..04960154 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # stacks (development version) +* Introduced support for parallel processing using the [future](https://www.futureverse.org/) framework. The stacks package previously supported parallelism with foreach, and users can use either framework for now. In a future release, stacks will begin the deprecation cycle for parallelism with foreach, so we encourage users to begin migrating their code now. See [the _Parallel Processing_ section in the tune package's "Optimizations" article](https://tune.tidymodels.org/articles/extras/optimizations.html#parallel-processing) to learn more (#866). + * Improved error message for unsupported model modes (#152). # stacks 1.0.3 diff --git a/R/fit_members.R b/R/fit_members.R index b4c3b5b2..99ac3deb 100644 --- a/R/fit_members.R +++ b/R/fit_members.R @@ -15,8 +15,8 @@ #' model contains the necessary components to predict on new data. #' #' @details -#' To fit members in parallel, please register a parallel backend function. -#' See the documentation of [foreach::foreach()] for examples. +#' To fit members in parallel, please create a plan with the future package. +#' See the documentation of [future::plan()] for examples. #' #' @template note_example_data #' @@ -117,15 +117,24 @@ fit_members <- function(model_stack, ...) { dplyr::full_join(metrics_dict, by = c("value" = ".config"), multiple = "all") } - if (foreach::getDoParWorkers() > 1) { - `%do_op%` <- foreach::`%dopar%` + if (foreach::getDoParWorkers() > 1 || future::nbrOfWorkers() > 1) { + `%do_op%` <- switch( + # note some backends can return +Inf + min(future::nbrOfWorkers(), 2), + foreach::`%dopar%`, + doFuture::`%dofuture%` + ) } else { `%do_op%` <- foreach::`%do%` } # fit each of them member_fits <- - foreach::foreach(mem = member_names, .inorder = FALSE) %do_op% { + foreach::foreach( + mem = member_names, + .inorder = FALSE, + .options.future = list(seed = TRUE) + ) %do_op% { asNamespace("stacks")$fit_member( name = mem, wflows = model_stack[["model_defs"]], diff --git a/man/fit_members.Rd b/man/fit_members.Rd index 646b4ee6..f67765f1 100644 --- a/man/fit_members.Rd +++ b/man/fit_members.Rd @@ -24,8 +24,8 @@ stack's predictions, members should be trained on the full training set using \code{fit_members()}. } \details{ -To fit members in parallel, please register a parallel backend function. -See the documentation of \code{\link[foreach:foreach]{foreach::foreach()}} for examples. +To fit members in parallel, please create a plan with the future package. +See the documentation of \code{\link[future:plan]{future::plan()}} for examples. } \section{Example Data}{