From ae3867fc1e3cd5ce90f8f053b4aa82b6105ffdd5 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 12 May 2022 16:37:05 +0100
Subject: [PATCH 01/11] add cache and acceleration to the stack fields

---
 src/composition/learning_networks/machines.jl |  5 ++--
 src/composition/models/stacking.jl            | 30 ++++++++++++-------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/composition/learning_networks/machines.jl b/src/composition/learning_networks/machines.jl
index 26c4a2a1..a71852fc 100644
--- a/src/composition/learning_networks/machines.jl
+++ b/src/composition/learning_networks/machines.jl
@@ -388,11 +388,12 @@ end
 """
 function return!(mach::Machine{<:Surrogate},
                  model::Union{Model,Nothing},
-                 verbosity)
+                 verbosity;
+                 acceleration=CPU1())
 
     network_model_names_ = network_model_names(model, mach)
 
-    verbosity isa Nothing || fit!(mach, verbosity=verbosity)
+    verbosity isa Nothing || fit!(mach, verbosity=verbosity, acceleration=acceleration)
     setfield!(mach.fitresult, :network_model_names, network_model_names_)
 
     # anonymize the data
diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index 6221aabf..90d8a579 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -31,9 +31,11 @@ mutable struct DeterministicStack{modelnames, inp_scitype, tg_scitype} <: Determ
    metalearner::Deterministic
    resampling
    measures::Union{Nothing,AbstractVector}
-   function DeterministicStack(modelnames, models, metalearner, resampling, measures)
+   cache::Bool
+   acceleration::AbstractResource
+   function DeterministicStack(modelnames, models, metalearner, resampling, measures, cache, acceleration)
         inp_scitype, tg_scitype = input_target_scitypes(models, metalearner)
-        return new{modelnames, inp_scitype, tg_scitype}(models, metalearner, resampling, measures)
+        return new{modelnames, inp_scitype, tg_scitype}(models, metalearner, resampling, measures, cache, acceleration)
    end
 end
 
@@ -42,9 +44,11 @@ mutable struct ProbabilisticStack{modelnames, inp_scitype, tg_scitype} <: Probab
     metalearner::Probabilistic
     resampling
     measures::Union{Nothing,AbstractVector}
-    function ProbabilisticStack(modelnames, models, metalearner, resampling, measures)
+    cache::Bool
+    acceleration::AbstractResource
+    function ProbabilisticStack(modelnames, models, metalearner, resampling, measures, cache, acceleration)
         inp_scitype, tg_scitype = input_target_scitypes(models, metalearner)
-        return new{modelnames, inp_scitype, tg_scitype}(models, metalearner, resampling, measures)
+        return new{modelnames, inp_scitype, tg_scitype}(models, metalearner, resampling, measures, cache, acceleration)
     end
  end
 
@@ -147,7 +151,7 @@ report(mach).cv_report
 ```
 
 """
-function Stack(;metalearner=nothing, resampling=CV(), measure=nothing, measures=measure, named_models...)
+function Stack(;metalearner=nothing, resampling=CV(), measure=nothing, measures=measure, cache=true, acceleration=CPU1(), named_models...)
     metalearner === nothing &&
         throw(ArgumentError("No metalearner specified. Use Stack(metalearner=...)"))
 
@@ -159,9 +163,9 @@ function Stack(;metalearner=nothing, resampling=CV(), measure=nothing, measures=
     end
 
     if metalearner isa Deterministic
-        stack =  DeterministicStack(modelnames, models, metalearner, resampling, measures)
+        stack =  DeterministicStack(modelnames, models, metalearner, resampling, measures, cache, acceleration)
     elseif metalearner isa Probabilistic
-        stack = ProbabilisticStack(modelnames, models, metalearner, resampling, measures)
+        stack = ProbabilisticStack(modelnames, models, metalearner, resampling, measures, cache, acceleration)
     else
         throw(ArgumentError("The metalearner should be a subtype
                     of $(Union{Deterministic, Probabilistic})"))
@@ -209,6 +213,8 @@ function Base.getproperty(stack::Stack{modelnames}, name::Symbol) where modelnam
     name === :metalearner && return getfield(stack, :metalearner)
     name === :resampling && return getfield(stack, :resampling)
     name == :measures && return getfield(stack, :measures)
+    name === :cache && return getfield(stack, :cache)
+    name == :acceleration && return getfield(stack, :acceleration)
     models = getfield(stack, :models)
     for j in eachindex(modelnames)
         name === modelnames[j] && return models[j]
@@ -221,6 +227,8 @@ function Base.setproperty!(stack::Stack{modelnames}, _name::Symbol, val) where m
     _name === :metalearner && return setfield!(stack, :metalearner, val)
     _name === :resampling && return setfield!(stack, :resampling, val)
     _name === :measures && return setfield!(stack, :measures, val)
+    _name === :cache && return setfield!(stack, :cache, val)
+    _name === :acceleration && return setfield!(stack, :acceleration, val)
     idx = findfirst(==(_name), modelnames)
     idx isa Nothing || return getfield(stack, :models)[idx] = val
     error("type Stack has no property $name")
@@ -384,7 +392,7 @@ function oos_set(m::Stack, Xs::Source, ys::Source, tt_pairs)
         # predictions are subsequently used as an input to the metalearner
         Zfold = []
         for model in getfield(m, :models)
-            mach = machine(model, Xtrain, ytrain)
+            mach = machine(model, Xtrain, ytrain, cache=m.cache)
             ypred = predict(mach, Xtest)
             # Internal evaluation on the fold if required
             push!(folds_evaluations, store_for_evaluation(mach, Xtest, ytest, m.measures))
@@ -420,12 +428,12 @@ function fit(m::Stack, verbosity::Int, X, y)
         
     Zval, yval, folds_evaluations = oos_set(m, Xs, ys, tt_pairs)
 
-    metamach = machine(m.metalearner, Zval, yval)
+    metamach = machine(m.metalearner, Zval, yval, cache=m.cache)
 
     # Each model is retrained on the original full training set
     Zpred = []
     for model in getfield(m, :models)
-        mach = machine(model, Xs, ys)
+        mach = machine(model, Xs, ys, cache=m.cache)
         ypred = predict(mach, Xs)
         ypred = pre_judge_transform(ypred, typeof(model), target_scitype(model))
         push!(Zpred, ypred)
@@ -439,5 +447,5 @@ function fit(m::Stack, verbosity::Int, X, y)
     # We can infer the Surrogate by two calls to supertype
     mach = machine(supertype(supertype(typeof(m)))(), Xs, ys; predict=ŷ, internal_report...)
     
-    return!(mach, m, verbosity)
+    return!(mach, m, verbosity, acceleration=m.acceleration)
 end

From 014a0138d01bd76215622b4627c28dbcc44349a4 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Thu, 12 May 2022 16:54:53 +0100
Subject: [PATCH 02/11] add test Project.toml and some tests

---
 src/composition/learning_networks/nodes.jl | 18 ++++++++
 test/Project.toml                          | 25 +++++++++++
 test/composition/models/stacking.jl        | 48 +++++++++++++++++++++-
 3 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 test/Project.toml

diff --git a/src/composition/learning_networks/nodes.jl b/src/composition/learning_networks/nodes.jl
index c6690b6a..7faf2295 100644
--- a/src/composition/learning_networks/nodes.jl
+++ b/src/composition/learning_networks/nodes.jl
@@ -230,6 +230,24 @@ function fit!(y::Node, ::CPU1; kwargs...)
 
     return y
 end
+
+function fit!(y::Node, ::CPUThreads; kwargs...)
+    _machines = machines(y)
+
+    # flush the fit_okay channels:
+    for mach in _machines
+        flush!(mach.fit_okay)
+    end
+
+    # fit the machines in Multithreading mode
+    @sync for mach in _machines
+        Threads.@spawn fit_only!(mach, true; kwargs...)
+    end
+
+    return y
+
+end
+
 fit!(S::Source; args...) = S
 
 # allow arguments of `Nodes` and `Machine`s to appear
diff --git a/test/Project.toml b/test/Project.toml
new file mode 100644
index 00000000..b17e547f
--- /dev/null
+++ b/test/Project.toml
@@ -0,0 +1,25 @@
+[deps]
+DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
+NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
\ No newline at end of file
diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl
index 66190ea5..ae2624b2 100644
--- a/test/composition/models/stacking.jl
+++ b/test/composition/models/stacking.jl
@@ -190,8 +190,10 @@ end
     models = [DeterministicConstantRegressor(), FooBarRegressor(;lambda=0)]
     metalearner = DeterministicConstantRegressor()
     resampling = CV()
+    cache = true
+    acceleration = CPU1()
 
-    MLJBase.DeterministicStack(modelnames, models, metalearner, resampling, nothing)
+    MLJBase.DeterministicStack(modelnames, models, metalearner, resampling, nothing, cache, acceleration)
 
     # Test input_target_scitypes with non matching target_scitypes
     models = [KNNRegressor()]
@@ -528,5 +530,49 @@ end
     end
 end
 
+@testset "Test cache is forwarded to submodels" begin
+    X, y = make_regression(100, 3; rng=rng)
+    constant = ConstantRegressor()
+    ridge = FooBarRegressor()
+    mystack = Stack(;metalearner=FooBarRegressor(),
+                    cache=false,
+                    ridge=ridge,
+                    constant=constant)
+    mach = machine(mystack, X, y)
+    fit!(mach, verbosity = 0)
+    # The data and resampled_data have not been populated
+    for mach in fitted_params(mach).machines
+        @test !isdefined(mach, :data)
+        @test !isdefined(mach, :resampled_data)
+    end
+end
+
+@testset "Test multithreaded version" begin
+    X, y = make_regression(100, 5; rng=StableRNG(1234))
+    models = (constant=DeterministicConstantRegressor(),
+                ridge_lambda=FooBarRegressor(;lambda=0.1),
+                ridge=FooBarRegressor(;lambda=0))
+
+    stack = Stack(;metalearner=FooBarRegressor(),
+                    resampling=CV(;nfolds=3),
+                    acceleration=CPU1(),
+                    models...)
+
+    mach = machine(stack, X, y)
+    fit!(mach, verbosity=0)
+    cpu_fp = fitted_params(mach)
+    cpu_ypred = predict(mach)
+
+    stack.acceleration = CPUThreads()
+    mach = machine(stack, X, y)
+    fit!(mach, verbosity=0)
+    thread_fp = fitted_params(mach)
+    thread_ypred = predict(mach)
+
+    @test cpu_ypred ≈ thread_ypred 
+    @test cpu_fp.metalearner ≈ thread_fp.metalearner 
+    @test cpu_fp.ridge ≈ thread_fp.ridge
+end
+
 end
 true

From df274d3e4b7be1e6c636f6b75a33754cccdbf961 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 13 May 2022 12:22:55 +0100
Subject: [PATCH 03/11] update docstrings

---
 src/composition/learning_networks/machines.jl | 4 ++--
 src/composition/models/stacking.jl            | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/composition/learning_networks/machines.jl b/src/composition/learning_networks/machines.jl
index a71852fc..8e15d86d 100644
--- a/src/composition/learning_networks/machines.jl
+++ b/src/composition/learning_networks/machines.jl
@@ -328,7 +328,7 @@ end
 
 """
 
-    return!(mach::Machine{<:Surrogate}, model, verbosity)
+    return!(mach::Machine{<:Surrogate}, model, verbosity; acceleration=CPU1())
 
 The last call in custom code defining the `MLJBase.fit` method for a
 new composite model type. Here `model` is the instance of the new type
@@ -345,7 +345,7 @@ the following:
   handles smart updating (namely, an `MLJBase.update` fallback for
   composite models).
 
-- Calls `fit!(mach, verbosity=verbosity)`.
+- Calls `fit!(mach, verbosity=verbosity, acceleration=acceleration)`.
 
 - Moves any data in source nodes of the learning network into `cache`
   (for data-anonymization purposes).
diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index 90d8a579..e683ece2 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -58,7 +58,7 @@ const Stack{modelnames, inp_scitype, tg_scitype} =
             ProbabilisticStack{modelnames, inp_scitype, tg_scitype}}
 
 """
-    Stack(;metalearner=nothing, resampling=CV(), name1=model1, name2=model2, ...)
+    Stack(;metalearner=nothing, resampling=CV(), name1=model1, cache=true, acceleration=CPU1(), name2=model2, ...)
 
 Implements the two-layer generalized stack algorithm introduced by
 [Wolpert
@@ -99,6 +99,11 @@ When training a machine bound to such an instance:
   evaluation of the learners in the Stack while training. This is not for the
   evaluation of the Stack itself.
 
+- `cache`: Weither machines created in the learning network will cache data or not.
+
+- `acceleration`: A supported `AbstractResource` to define the training parallelization
+  mode of the learning network.
+
 - `name1=model1, name2=model2, ...`: the `Supervised` model instances
   to be used as base learners.  The provided names become properties
   of the instance created to allow hyper-parameter access

From ec41395aca3cdc9d738aec22f34b0a43d5148942 Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Tue, 17 May 2022 17:53:23 -0700
Subject: [PATCH 04/11] remove extras sections from Project and update some
 docs and logs

---
 Project.toml                               | 15 +--------------
 src/composition/learning_networks/nodes.jl |  2 +-
 src/composition/models/stacking.jl         |  4 ++--
 3 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/Project.toml b/Project.toml
index 727b7685..8a852510 100644
--- a/Project.toml
+++ b/Project.toml
@@ -46,17 +46,4 @@ ScientificTypes = "3"
 StatisticalTraits = "3"
 StatsBase = "0.32, 0.33"
 Tables = "0.2, 1.0"
-julia = "1.6"
-
-[extras]
-DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
-Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
-MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
-NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
-
-[targets]
-test = ["DecisionTree", "Distances", "Logging", "MultivariateStats", "NearestNeighbors", "StableRNGs", "Test", "TypedTables"]
+julia = "1.6"
\ No newline at end of file
diff --git a/src/composition/learning_networks/nodes.jl b/src/composition/learning_networks/nodes.jl
index 7faf2295..e358f21c 100644
--- a/src/composition/learning_networks/nodes.jl
+++ b/src/composition/learning_networks/nodes.jl
@@ -208,7 +208,7 @@ fit!(y::Node; acceleration=CPU1(), kwargs...) =
     fit!(y::Node, acceleration; kwargs...)
 
 fit!(y::Node, ::AbstractResource; kwargs...) =
-        error("Only `acceleration=CPU1()` currently supported")
+    error("Only `acceleration=CPU1()` and `acceleration=CPUThreads()` currently supported")
 
 function fit!(y::Node, ::CPU1; kwargs...)
 
diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index e683ece2..67a8966e 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -99,10 +99,10 @@ When training a machine bound to such an instance:
   evaluation of the learners in the Stack while training. This is not for the
   evaluation of the Stack itself.
 
-- `cache`: Weither machines created in the learning network will cache data or not.
+- `cache`: Whether machines created in the learning network will cache data or not.
 
 - `acceleration`: A supported `AbstractResource` to define the training parallelization
-  mode of the learning network.
+  mode of the stack.
 
 - `name1=model1, name2=model2, ...`: the `Supervised` model instances
   to be used as base learners.  The provided names become properties

From 273e7f38928aa0c1e82702ef3581028dbefdba3a Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Wed, 18 May 2022 17:09:08 -0700
Subject: [PATCH 05/11] add some tests for fit!(::Node,
 acceleration=CPUThreads())

---
 test/composition/learning_networks/machines.jl | 8 ++++----
 test/test_utilities.jl                         | 7 +++++++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/test/composition/learning_networks/machines.jl b/test/composition/learning_networks/machines.jl
index 13412d9d..85c962d6 100644
--- a/test/composition/learning_networks/machines.jl
+++ b/test/composition/learning_networks/machines.jl
@@ -155,9 +155,9 @@ zhat = inverse_transform(standM, uhat)
 yhat = exp(zhat)
 enode = @node mae(ys, yhat)
 
-@testset "replace method for learning network machines" begin
+@testset "replace method for learning network machines, acceleration: $(typeof(accel))" for accel in (CPU1(), CPUThreads())
 
-    fit!(yhat, verbosity=0)
+    fit!(yhat, verbosity=0, acceleration=accel)
 
     # test nested reporting:
     r = MLJBase.report(yhat)
@@ -199,7 +199,7 @@ enode = @node mae(ys, yhat)
     knnM2 = machines(yhat2, knn) |> first
     hotM2 = machines(yhat2, hot) |> first
 
-    @test_mach_sequence(fit!(yhat2, force=true),
+    @test_mach_sequence(fit!(yhat2, force=true, acceleration=accel),
                         [(:train, standM2), (:train, hotM2),
                          (:train, knnM2), (:train, oakM2)],
                         [(:train, hotM2), (:train, standM2),
@@ -218,7 +218,7 @@ enode = @node mae(ys, yhat)
     # this change should trigger retraining of all machines except the
     # univariate standardizer:
     hot2.drop_last = true
-    @test_mach_sequence(fit!(yhat2),
+    @test_mach_sequence(fit!(yhat2, acceleration=accel),
                         [(:skip, standM2), (:update, hotM2),
                          (:train, knnM2), (:train, oakM2)],
                         [(:update, hotM2), (:skip, standM2),
diff --git a/test/test_utilities.jl b/test/test_utilities.jl
index eef0f8a8..0846a91d 100644
--- a/test/test_utilities.jl
+++ b/test/test_utilities.jl
@@ -45,6 +45,13 @@ function testset_accelerated(name::String, var, ex; exclude=[])
     return esc(final_ex)
 end
 
+"""
+    sedate!(fit_ex)
+
+The input is a fit expression as `fit!(mach, kws...)`. This function 
+throws an error if the verbosity level is set and sets the verbosity level 
+to -5000 otherwise.
+"""
 function sedate!(fit_ex)
     kwarg_exs = filter(fit_ex.args) do arg
         arg isa Expr && arg.head == :kw

From 28c021228cbc206d8b14f46aaefb8874e7b1f9af Mon Sep 17 00:00:00 2001
From: Olivier Labayle <olabayle@gmail.com>
Date: Fri, 3 Jun 2022 09:02:00 +0100
Subject: [PATCH 06/11] update propertynames

---
 src/composition/models/stacking.jl  | 3 ++-
 test/composition/models/stacking.jl | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index 67a8966e..d945f6af 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -211,7 +211,8 @@ function MMI.clean!(stack::Stack{modelnames, inp_scitype, tg_scitype}) where {mo
 end
 
 
-Base.propertynames(::Stack{modelnames}) where modelnames = tuple(:resampling, :metalearner, modelnames...)
+Base.propertynames(::Stack{modelnames}) where modelnames = 
+    tuple(:metalearner, :resampling, :measures, :cache, :acceleration, modelnames...)
 
 
 function Base.getproperty(stack::Stack{modelnames}, name::Symbol) where modelnames
diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl
index ae2624b2..deba3e93 100644
--- a/test/composition/models/stacking.jl
+++ b/test/composition/models/stacking.jl
@@ -74,8 +74,8 @@ end
                     resampling=CV(;nfolds=3),
                     models...)
     # Testing attribute access of the stack
-    @test propertynames(mystack) == (:resampling, :metalearner, :constant,
-                                    :decisiontree, :ridge_lambda, :ridge)
+    @test propertynames(mystack) == (:metalearner, :resampling, :measures, :cache, :acceleration, 
+        :constant, :decisiontree, :ridge_lambda, :ridge)
 
     @test mystack.decisiontree isa DecisionTreeRegressor
 

From a01964e2ce823d085bb0975022078eb562492e1d Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 9 Jun 2022 16:02:58 +1200
Subject: [PATCH 07/11] fix Stack docstring

---
 src/composition/models/stacking.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index d945f6af..ff740996 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -58,7 +58,7 @@ const Stack{modelnames, inp_scitype, tg_scitype} =
             ProbabilisticStack{modelnames, inp_scitype, tg_scitype}}
 
 """
-    Stack(;metalearner=nothing, resampling=CV(), name1=model1, cache=true, acceleration=CPU1(), name2=model2, ...)
+    Stack(;metalearner=nothing, name1=model1, name2=model2, keyword_options...)
 
 Implements the two-layer generalized stack algorithm introduced by
 [Wolpert

From c112b91a3e12fbb4ae25925301da3111f769aeee Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Fri, 10 Jun 2022 11:05:00 +1200
Subject: [PATCH 08/11] tweak stack docstring

---
 src/composition/models/stacking.jl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/composition/models/stacking.jl b/src/composition/models/stacking.jl
index ff740996..c4d8aac3 100644
--- a/src/composition/models/stacking.jl
+++ b/src/composition/models/stacking.jl
@@ -58,7 +58,7 @@ const Stack{modelnames, inp_scitype, tg_scitype} =
             ProbabilisticStack{modelnames, inp_scitype, tg_scitype}}
 
 """
-    Stack(;metalearner=nothing, name1=model1, name2=model2, keyword_options...)
+    Stack(; metalearner=nothing, name1=model1, name2=model2, ..., keyword_options...)
 
 Implements the two-layer generalized stack algorithm introduced by
 [Wolpert
@@ -93,9 +93,9 @@ When training a machine bound to such an instance:
   model will optimize the squared error.
 
 - `resampling`: The resampling strategy used
-  to prepare out-of-sample predictions of the base learners. 
+  to prepare out-of-sample predictions of the base learners.
 
-- `measures`: A measure or iterable over measures, to perform an internal 
+- `measures`: A measure or iterable over measures, to perform an internal
   evaluation of the learners in the Stack while training. This is not for the
   evaluation of the Stack itself.
 
@@ -148,7 +148,7 @@ evaluate!(mach; resampling=Holdout(), measure=rmse)
 
 ```
 
-The internal evaluation report can be accessed like this 
+The internal evaluation report can be accessed like this
 and provides a PerformanceEvaluation object for each model:
 
 ```julia
@@ -211,7 +211,7 @@ function MMI.clean!(stack::Stack{modelnames, inp_scitype, tg_scitype}) where {mo
 end
 
 
-Base.propertynames(::Stack{modelnames}) where modelnames = 
+Base.propertynames(::Stack{modelnames}) where modelnames =
     tuple(:metalearner, :resampling, :measures, :cache, :acceleration, modelnames...)
 
 
@@ -286,7 +286,7 @@ internal_stack_report(m::Stack, verbosity::Int, tt_pairs, folds_evaluations::Var
 """
     internal_stack_report(m::Stack, verbosity::Int, y::AbstractNode, folds_evaluations::Vararg{AbstractNode})
 
-When measure/measures is provided, the folds_evaluation will have been filled by `store_for_evaluation`. This function is 
+When measure/measures is provided, the folds_evaluation will have been filled by `store_for_evaluation`. This function is
 not doing any heavy work (not constructing nodes corresponding to measures) but just unpacking all the folds_evaluations in a single node that
 can be evaluated later.
 """
@@ -318,10 +318,10 @@ function internal_stack_report(stack::Stack{modelnames,}, verbosity::Int, tt_pai
             fitted_params_per_fold=[],
             report_per_fold=[],
             train_test_pairs=tt_pairs
-            ) 
+            )
         for model in getfield(stack, :models)]
     )
-    
+
     # Update the results
     index = 1
     for foldid in 1:nfolds
@@ -344,7 +344,7 @@ function internal_stack_report(stack::Stack{modelnames,}, verbosity::Int, tt_pai
                 end
 
                 # Update per_fold
-                model_results.per_fold[i][foldid] = 
+                model_results.per_fold[i][foldid] =
                     reports_each_observation(measure) ? MLJBase.aggregate(loss, measure) : loss
             end
             index += 1
@@ -380,7 +380,7 @@ end
     oos_set(m::Stack, folds::AbstractNode, Xs::Source, ys::Source)
 
 This function is building the out-of-sample dataset that is later used by the `judge`
-for its own training. It also returns the folds_evaluations object if internal 
+for its own training. It also returns the folds_evaluations object if internal
 cross-validation results are requested.
 """
 function oos_set(m::Stack, Xs::Source, ys::Source, tt_pairs)
@@ -431,7 +431,7 @@ function fit(m::Stack, verbosity::Int, X, y)
 
     Xs = source(X)
     ys = source(y)
-        
+
     Zval, yval, folds_evaluations = oos_set(m, Xs, ys, tt_pairs)
 
     metamach = machine(m.metalearner, Zval, yval, cache=m.cache)
@@ -452,6 +452,6 @@ function fit(m::Stack, verbosity::Int, X, y)
 
     # We can infer the Surrogate by two calls to supertype
     mach = machine(supertype(supertype(typeof(m)))(), Xs, ys; predict=ŷ, internal_report...)
-    
+
     return!(mach, m, verbosity, acceleration=m.acceleration)
 end

From 9a0cd541f255ed1da589902cc235948148ea9784 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Mon, 13 Jun 2022 16:42:51 +1200
Subject: [PATCH 09/11] add DataFrames to test/Project.toml

---
 test/Project.toml | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index b17e547f..ace6054b 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,25 +1,26 @@
 [deps]
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
+ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
-StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"
-MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
-CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
-Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
-ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
+Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
-StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
\ No newline at end of file
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
+Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"

From d6cf7a16e31980e16bab03eb4135b483f8689034 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Tue, 14 Jun 2022 12:57:08 +1200
Subject: [PATCH 10/11] in multithreading stack test replace stack with double
 stack

---
 test/composition/models/stacking.jl | 35 ++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/test/composition/models/stacking.jl b/test/composition/models/stacking.jl
index deba3e93..3c985ed0 100644
--- a/test/composition/models/stacking.jl
+++ b/test/composition/models/stacking.jl
@@ -547,31 +547,44 @@ end
     end
 end
 
-@testset "Test multithreaded version" begin
-    X, y = make_regression(100, 5; rng=StableRNG(1234))
+# a regression `Stack` which has `model` as one of the base models:
+function _stack(model, resource)
     models = (constant=DeterministicConstantRegressor(),
                 ridge_lambda=FooBarRegressor(;lambda=0.1),
-                ridge=FooBarRegressor(;lambda=0))
+                model=model)
+    Stack(;
+        metalearner=FooBarRegressor(;lambda=0.05),
+        resampling=CV(;nfolds=3),
+        acceleration=resource,
+        models...
+    )
+end
 
-    stack = Stack(;metalearner=FooBarRegressor(),
-                    resampling=CV(;nfolds=3),
-                    acceleration=CPU1(),
-                    models...)
+# return a nested stack in which `model` appears at two levels, with
+# both layers accelerated using `resource`:
+_double_stack(model, resource) =
+    _stack(_stack(model, resource), resource)
+
+@testset "Test multithreaded version" begin
+    X, y = make_regression(100, 5; rng=StableRNG(1234))
+
+    stack = _double_stack(FooBarRegressor(;lambda=0.07), CPU1())
 
     mach = machine(stack, X, y)
     fit!(mach, verbosity=0)
     cpu_fp = fitted_params(mach)
     cpu_ypred = predict(mach)
 
-    stack.acceleration = CPUThreads()
+    stack = _double_stack(FooBarRegressor(;lambda=0.07), CPUThreads())
+
     mach = machine(stack, X, y)
     fit!(mach, verbosity=0)
     thread_fp = fitted_params(mach)
     thread_ypred = predict(mach)
 
-    @test cpu_ypred ≈ thread_ypred 
-    @test cpu_fp.metalearner ≈ thread_fp.metalearner 
-    @test cpu_fp.ridge ≈ thread_fp.ridge
+    @test cpu_ypred ≈ thread_ypred
+    @test cpu_fp.metalearner ≈ thread_fp.metalearner
+    @test cpu_fp.ridge_lambda ≈ thread_fp.ridge_lambda
 end
 
 end

From e02f6afab550f2a648a077c0eb477505b4f7cdf7 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Tue, 14 Jun 2022 13:08:45 +1200
Subject: [PATCH 11/11] update docstring

---
 src/composition/learning_networks/nodes.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/composition/learning_networks/nodes.jl b/src/composition/learning_networks/nodes.jl
index e358f21c..ab4d4601 100644
--- a/src/composition/learning_networks/nodes.jl
+++ b/src/composition/learning_networks/nodes.jl
@@ -201,7 +201,10 @@ end
          acceleration=CPU1())
 
 Train all machines required to call the node `N`, in an appropriate
-order.  These machines are those returned by `machines(N)`.
+order, but parallelizing where possible using specified `acceleration`
+mode.  These machines are those returned by `machines(N)`.
+
+Supported modes of `acceleration`: `CPU1()`, `CPUThreads()`.
 
 """
 fit!(y::Node; acceleration=CPU1(), kwargs...) =