partial_fitted(::GeneralizedLinearMixedModel) (#28)

* pull out partial_fitted meat into own function * efficiency tweaks * GLMM * deprecate hide_progress in tests * minor version bump * update test project and Aqua
palday · Jan 25, 2024 · 97f7e5c · 97f7e5c · palday · Jan 25, 2024
1 parent 236ce89
commit 97f7e5c
Show file tree

Hide file tree

Showing 7 changed files with 102 additions and 47 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,9 +1,10 @@
 name = "MixedModelsExtras"
 uuid = "781a26e1-49f4-409a-8f4c-c3159d78c17e"
 authors = ["Phillip Alday <[email protected]> and contributors"]
-version = "2.0.0"
+version = "2.1.0"
 
 [deps]
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -12,14 +13,28 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
+Aqua = "0.8"
+GLM = "1"
+DataFrames = "1"
+Distributions = "0.25"
+LinearAlgebra = "1"
 MixedModels = "4"
+RDatasets = "0.7.7"
+StableRNGs = "1"
+Statistics = "1"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.7.3"
 Tables = "1"
+Test = "1"
 julia = "1.8"
 
 [extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test"]
+test = ["Test", "Aqua", "DataFrames", "Distributions", "RDatasets", "StableRNGs"]
diff --git a/src/MixedModelsExtras.jl b/src/MixedModelsExtras.jl
@@ -7,6 +7,7 @@ using StatsBase
 using StatsModels
 using Tables
 
+using GLM: linkinv, Link
 using StatsModels: termnames, vif, gvif
 export termnames, gvif, vif
 

diff --git a/src/remef.jl b/src/remef.jl
@@ -1,10 +1,11 @@
 
 # Intercept always has to be specified
 """
-    partial_fitted(model::LinearMixedModel,
+    partial_fitted(model::MixedModel,
                    fe::AbstractVector{<:AbstractString},
                    re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
-                   mode=:include)
+                   mode=:include,
+                   type=:response)
 
 Compute "partial" fitted values.
 
@@ -26,6 +27,20 @@ with any grouping variable.
 The keyword argument `mode` specifies whether the fixed and random effects
 specifications are treated as coefficients to `:include` or `:exclude`.
 
+For `GeneralizedLinearMixedModel`, the keyword argument `type` specifies whether
+the predictions should be returned on the scale of linear predictor (`:linpred`)
+or on the response scale (`:response`).
+
+!!! warning
+    Partial fitted values can be misleading for generalized linear mixed models
+    on the response scale because of the nonlinear nature of the link function.
+    For example, in logistic regression the partial fitted values are computed
+    on the linear predictor scale, i.e. the log odds scale, and then transformed
+    to the response scale, i.e. the probablitiy scale. However, a simple additive
+    contribution on the log odds scale is not additive on the probability scale.
+    More directly, it is impossible to decompose the effects of individual predictors
+    into simple additive contributions on the original scale.
+
 !!! note
     For both the fixed and the random effects, the relevant entities are the
     coefficient names, not the original term names.
@@ -44,6 +59,21 @@ function partial_fitted(model::LinearMixedModel{T},
                         fe::AbstractVector{<:AbstractString},
                         re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
                         mode=:include) where {T}
+    return _partial_fitted(model, fe, re; mode)
+end
+
+function partial_fitted(model::GeneralizedLinearMixedModel{T},
+                        fe::AbstractVector{<:AbstractString},
+                        re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
+                        mode=:include, type=:linpred) where {T}
+    type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)"))
+    y = _partial_fitted(model, fe, re; mode)
+    return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(model)), y, y)
+end
+
+function _partial_fitted(model::MixedModel{T},
+                         fe::AbstractVector{<:AbstractString},
+                         re::Dict{Symbol}; mode) where {T}
     # @debug fe
     # @debug re
     issubset(fe, coefnames(model)) ||
@@ -60,8 +90,8 @@ function partial_fitted(model::LinearMixedModel{T},
     mode == :exclude && (fe_idx = .!fe_idx)
     # @debug fe_idx
     # XXX does this work properly for rank-deficient models?
-    X = view(model.X, :, fe_idx)
-    vv = mul!(Vector{T}(undef, nobs(model)), X, fixef(model)[fe_idx])
+    X = view(modelmatrix(model), :, fe_idx)
+    vv = mul!(Vector{T}(undef, nobs(model)), X, view(fixef(model), fe_idx))
 
     for (rt, bb) in zip(model.reterms, ranef(model))
         group = Symbol(string(rt.trm))
@@ -85,10 +115,12 @@ function partial_fitted(model::LinearMixedModel{T},
         # @debug re_idx_reps
         # XXX no appropriate mul! method
         # mul!(vv, view(rt, :, re_idx_reps), view(bb, re_idx, :), one(T), one(T))
+        mul!(vv, view(rt, :, re_idx_reps), vec(view(bb, re_idx, :)), one(T), one(T))
+
         # should re-write this as a loop to avoid allocating the intermediate allocation
         # @debug size(view(rt, :, re_idx_reps))
         # @debug size(view(bb, re_idx, :))
-        vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :))
+        # vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :))
     end
 
     return vv

diff --git a/test/Project.toml b/test/Project.toml
diff --git a/test/icc.jl b/test/icc.jl
@@ -22,7 +22,7 @@ progress = false
     @test icc(model, :subj) + icc(model, :item) ≈ icc(model)
 
     @testset "bootstrap" begin
-        boot = parametricbootstrap(StableRNG(42), 100, model; hide_progress=!progress)
+        boot = parametricbootstrap(StableRNG(42), 100, model; progress)
         iccboot_subj = icc(boot, :subj)
         iccboot_item = icc(boot, :item)
         @test iccboot_subj + iccboot_item ≈ icc(boot)
@@ -53,7 +53,7 @@ end
     @test icc(modelbern, Symbol("urban & dist")) ≈ icc(modelbin, Symbol("urban & dist"))
 
     @testset "bootstrap" begin
-        boot = parametricbootstrap(StableRNG(42), 100, modelbern; hide_progress=!progress)
+        boot = parametricbootstrap(StableRNG(42), 100, modelbern; progress)
         @test_throws ArgumentError icc(boot)
         iccboot = icc(boot, Bernoulli())
         ci = shortestcovint(iccboot)

diff --git a/test/remef.jl b/test/remef.jl
@@ -1,27 +1,47 @@
+using GLM: linkinv, Link
 using MixedModels
 using MixedModels: dataset
 using MixedModelsExtras
 using Test
 
 progress = false
 
-fm1 = fit(MixedModel,
-          @formula(reaction ~ 1 + days + (1 + days | subj)),
-          dataset(:sleepstudy);
-          progress)
-
-@test all(==(0), partial_fitted(fm1, String[]; mode=:include))
-@test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1))
-@test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]);
-                         mode=:include) .≈ fm1.X * fm1.β)
-
-@test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""),
-             partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []); mode=:include))
-@test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""),
-             partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]);
-                            mode=:include))
-
-re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]);
-                            mode=:include)
-re_only = fitted(fm1) - fm1.X * fm1.β
-@test all(re_only .≈ re_only_pf)
+@testset "LMM" begin
+    fm1 = fit(MixedModel,
+              @formula(reaction ~ 1 + days + (1 + days | subj)),
+              dataset(:sleepstudy);
+              progress)
+
+    @test all(==(0), partial_fitted(fm1, String[]; mode=:include))
+    @test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1))
+    @test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]);
+                             mode=:include) .≈ fm1.X * fm1.β)
+
+    @test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""),
+                 partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []);
+                                mode=:include))
+    @test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""),
+                 partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]);
+                                mode=:include))
+
+    re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]);
+                                mode=:include)
+    re_only = fitted(fm1) - fm1.X * fm1.β
+    @test all(re_only .≈ re_only_pf)
+end
+
+@testset "GLMM" begin
+    contra = dataset(:contra)
+    gm1 = fit(MixedModel, @formula(use ~ 1 + (1 | urban & dist)),
+              contra, Bernoulli(); fast=true, progress)
+
+    pf_all = partial_fitted(gm1, ["(Intercept)"]; mode=:include, type=:response)
+    fitted_vals = fitted(gm1)
+    @test all(pf_all .≈ fitted_vals)
+
+    re_only_pf = partial_fitted(gm1, String[],
+                                Dict(Symbol("urban & dist") => String["(Intercept)"]);
+                                mode=:include, type=:linpred)
+    full = modelmatrix(gm1) * fixef(gm1) + re_only_pf
+    @test all(linkinv.(Link(gm1), full) .≈ fitted_vals)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,11 +1,15 @@
 using Aqua
 using LinearAlgebra
+using MixedModels
 using MixedModelsExtras
 using Test
 
 @testset "Aqua" begin
     # it's not piracy for StatsAPI.r2(::MixedModel), it's privateering!
-    Aqua.test_all(MixedModelsExtras; ambiguities=false, piracy=false)
+    Aqua.test_all(MixedModelsExtras; ambiguities=false,
+                  piracies=(;
+                            treat_as_own=[LinearMixedModel, MixedModel,
+                                          GeneralizedLinearMixedModel, RandomEffectsTerm]))
 end
 
 @testset "ICC" begin