@@ -31,9 +31,11 @@ mutable struct DeterministicStack{modelnames, inp_scitype, tg_scitype} <: Determ
31
31
metalearner:: Deterministic
32
32
resampling
33
33
measures:: Union{Nothing,AbstractVector}
34
- function DeterministicStack (modelnames, models, metalearner, resampling, measures)
34
+ cache:: Bool
35
+ acceleration:: AbstractResource
36
+ function DeterministicStack (modelnames, models, metalearner, resampling, measures, cache, acceleration)
35
37
inp_scitype, tg_scitype = input_target_scitypes (models, metalearner)
36
- return new {modelnames, inp_scitype, tg_scitype} (models, metalearner, resampling, measures)
38
+ return new {modelnames, inp_scitype, tg_scitype} (models, metalearner, resampling, measures, cache, acceleration )
37
39
end
38
40
end
39
41
@@ -42,9 +44,11 @@ mutable struct ProbabilisticStack{modelnames, inp_scitype, tg_scitype} <: Probab
42
44
metalearner:: Probabilistic
43
45
resampling
44
46
measures:: Union{Nothing,AbstractVector}
45
- function ProbabilisticStack (modelnames, models, metalearner, resampling, measures)
47
+ cache:: Bool
48
+ acceleration:: AbstractResource
49
+ function ProbabilisticStack (modelnames, models, metalearner, resampling, measures, cache, acceleration)
46
50
inp_scitype, tg_scitype = input_target_scitypes (models, metalearner)
47
- return new {modelnames, inp_scitype, tg_scitype} (models, metalearner, resampling, measures)
51
+ return new {modelnames, inp_scitype, tg_scitype} (models, metalearner, resampling, measures, cache, acceleration )
48
52
end
49
53
end
50
54
@@ -54,7 +58,7 @@ const Stack{modelnames, inp_scitype, tg_scitype} =
54
58
ProbabilisticStack{modelnames, inp_scitype, tg_scitype}}
55
59
56
60
"""
57
- Stack(;metalearner=nothing, resampling=CV(), name1=model1, name2=model2, ...)
61
+ Stack(; metalearner=nothing, name1=model1, name2=model2, ..., keyword_options ...)
58
62
59
63
Implements the two-layer generalized stack algorithm introduced by
60
64
[Wolpert
@@ -89,12 +93,17 @@ When training a machine bound to such an instance:
89
93
model will optimize the squared error.
90
94
91
95
- `resampling`: The resampling strategy used
92
- to prepare out-of-sample predictions of the base learners.
96
+ to prepare out-of-sample predictions of the base learners.
93
97
94
- - `measures`: A measure or iterable over measures, to perform an internal
98
+ - `measures`: A measure or iterable over measures, to perform an internal
95
99
evaluation of the learners in the Stack while training. This is not for the
96
100
evaluation of the Stack itself.
97
101
102
+ - `cache`: Whether machines created in the learning network will cache data or not.
103
+
104
+ - `acceleration`: A supported `AbstractResource` to define the training parallelization
105
+ mode of the stack.
106
+
98
107
- `name1=model1, name2=model2, ...`: the `Supervised` model instances
99
108
to be used as base learners. The provided names become properties
100
109
of the instance created to allow hyper-parameter access
@@ -139,15 +148,15 @@ evaluate!(mach; resampling=Holdout(), measure=rmse)
139
148
140
149
```
141
150
142
- The internal evaluation report can be accessed like this
151
+ The internal evaluation report can be accessed like this
143
152
and provides a PerformanceEvaluation object for each model:
144
153
145
154
```julia
146
155
report(mach).cv_report
147
156
```
148
157
149
158
"""
150
- function Stack (;metalearner= nothing , resampling= CV (), measure= nothing , measures= measure, named_models... )
159
+ function Stack (;metalearner= nothing , resampling= CV (), measure= nothing , measures= measure, cache = true , acceleration = CPU1 (), named_models... )
151
160
metalearner === nothing &&
152
161
throw (ArgumentError (" No metalearner specified. Use Stack(metalearner=...)" ))
153
162
@@ -159,9 +168,9 @@ function Stack(;metalearner=nothing, resampling=CV(), measure=nothing, measures=
159
168
end
160
169
161
170
if metalearner isa Deterministic
162
- stack = DeterministicStack (modelnames, models, metalearner, resampling, measures)
171
+ stack = DeterministicStack (modelnames, models, metalearner, resampling, measures, cache, acceleration )
163
172
elseif metalearner isa Probabilistic
164
- stack = ProbabilisticStack (modelnames, models, metalearner, resampling, measures)
173
+ stack = ProbabilisticStack (modelnames, models, metalearner, resampling, measures, cache, acceleration )
165
174
else
166
175
throw (ArgumentError (" The metalearner should be a subtype
167
176
of $(Union{Deterministic, Probabilistic}) " ))
@@ -202,13 +211,16 @@ function MMI.clean!(stack::Stack{modelnames, inp_scitype, tg_scitype}) where {mo
202
211
end
203
212
204
213
205
- Base. propertynames (:: Stack{modelnames} ) where modelnames = tuple (:resampling , :metalearner , modelnames... )
214
+ Base. propertynames (:: Stack{modelnames} ) where modelnames =
215
+ tuple (:metalearner , :resampling , :measures , :cache , :acceleration , modelnames... )
206
216
207
217
208
218
function Base. getproperty (stack:: Stack{modelnames} , name:: Symbol ) where modelnames
209
219
name === :metalearner && return getfield (stack, :metalearner )
210
220
name === :resampling && return getfield (stack, :resampling )
211
221
name == :measures && return getfield (stack, :measures )
222
+ name === :cache && return getfield (stack, :cache )
223
+ name == :acceleration && return getfield (stack, :acceleration )
212
224
models = getfield (stack, :models )
213
225
for j in eachindex (modelnames)
214
226
name === modelnames[j] && return models[j]
@@ -221,6 +233,8 @@ function Base.setproperty!(stack::Stack{modelnames}, _name::Symbol, val) where m
221
233
_name === :metalearner && return setfield! (stack, :metalearner , val)
222
234
_name === :resampling && return setfield! (stack, :resampling , val)
223
235
_name === :measures && return setfield! (stack, :measures , val)
236
+ _name === :cache && return setfield! (stack, :cache , val)
237
+ _name === :acceleration && return setfield! (stack, :acceleration , val)
224
238
idx = findfirst (== (_name), modelnames)
225
239
idx isa Nothing || return getfield (stack, :models )[idx] = val
226
240
error (" type Stack has no property $name " )
@@ -272,7 +286,7 @@ internal_stack_report(m::Stack, verbosity::Int, tt_pairs, folds_evaluations::Var
272
286
"""
273
287
internal_stack_report(m::Stack, verbosity::Int, y::AbstractNode, folds_evaluations::Vararg{AbstractNode})
274
288
275
- When measure/measures is provided, the folds_evaluation will have been filled by `store_for_evaluation`. This function is
289
+ When measure/measures is provided, the folds_evaluation will have been filled by `store_for_evaluation`. This function is
276
290
not doing any heavy work (not constructing nodes corresponding to measures) but just unpacking all the folds_evaluations in a single node that
277
291
can be evaluated later.
278
292
"""
@@ -304,10 +318,10 @@ function internal_stack_report(stack::Stack{modelnames,}, verbosity::Int, tt_pai
304
318
fitted_params_per_fold= [],
305
319
report_per_fold= [],
306
320
train_test_pairs= tt_pairs
307
- )
321
+ )
308
322
for model in getfield (stack, :models )]
309
323
)
310
-
324
+
311
325
# Update the results
312
326
index = 1
313
327
for foldid in 1 : nfolds
@@ -330,7 +344,7 @@ function internal_stack_report(stack::Stack{modelnames,}, verbosity::Int, tt_pai
330
344
end
331
345
332
346
# Update per_fold
333
- model_results. per_fold[i][foldid] =
347
+ model_results. per_fold[i][foldid] =
334
348
reports_each_observation (measure) ? MLJBase. aggregate (loss, measure) : loss
335
349
end
336
350
index += 1
366
380
oos_set(m::Stack, folds::AbstractNode, Xs::Source, ys::Source)
367
381
368
382
This function is building the out-of-sample dataset that is later used by the `judge`
369
- for its own training. It also returns the folds_evaluations object if internal
383
+ for its own training. It also returns the folds_evaluations object if internal
370
384
cross-validation results are requested.
371
385
"""
372
386
function oos_set (m:: Stack , Xs:: Source , ys:: Source , tt_pairs)
@@ -384,7 +398,7 @@ function oos_set(m::Stack, Xs::Source, ys::Source, tt_pairs)
384
398
# predictions are subsequently used as an input to the metalearner
385
399
Zfold = []
386
400
for model in getfield (m, :models )
387
- mach = machine (model, Xtrain, ytrain)
401
+ mach = machine (model, Xtrain, ytrain, cache = m . cache )
388
402
ypred = predict (mach, Xtest)
389
403
# Internal evaluation on the fold if required
390
404
push! (folds_evaluations, store_for_evaluation (mach, Xtest, ytest, m. measures))
@@ -417,15 +431,15 @@ function fit(m::Stack, verbosity::Int, X, y)
417
431
418
432
Xs = source (X)
419
433
ys = source (y)
420
-
434
+
421
435
Zval, yval, folds_evaluations = oos_set (m, Xs, ys, tt_pairs)
422
436
423
- metamach = machine (m. metalearner, Zval, yval)
437
+ metamach = machine (m. metalearner, Zval, yval, cache = m . cache )
424
438
425
439
# Each model is retrained on the original full training set
426
440
Zpred = []
427
441
for model in getfield (m, :models )
428
- mach = machine (model, Xs, ys)
442
+ mach = machine (model, Xs, ys, cache = m . cache )
429
443
ypred = predict (mach, Xs)
430
444
ypred = pre_judge_transform (ypred, typeof (model), target_scitype (model))
431
445
push! (Zpred, ypred)
@@ -438,6 +452,6 @@ function fit(m::Stack, verbosity::Int, X, y)
438
452
439
453
# We can infer the Surrogate by two calls to supertype
440
454
mach = machine (supertype (supertype (typeof (m)))(), Xs, ys; predict= ŷ, internal_report... )
441
-
442
- return! (mach, m, verbosity)
455
+
456
+ return! (mach, m, verbosity, acceleration = m . acceleration )
443
457
end
0 commit comments