From 97f7e5c89c7bad99074bcc83217cf8436d7fe580 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Wed, 24 Jan 2024 22:49:00 -0600 Subject: [PATCH] `partial_fitted(::GeneralizedLinearMixedModel)` (#28) * pull out partial_fitted meat into own function * efficiency tweaks * GLMM * deprecate hide_progress in tests * minor version bump * update test project and Aqua --- Project.toml | 19 +++++++++++-- src/MixedModelsExtras.jl | 1 + src/remef.jl | 42 ++++++++++++++++++++++++---- test/Project.toml | 17 ------------ test/icc.jl | 4 +-- test/remef.jl | 60 ++++++++++++++++++++++++++-------------- test/runtests.jl | 6 +++- 7 files changed, 102 insertions(+), 47 deletions(-) delete mode 100644 test/Project.toml diff --git a/Project.toml b/Project.toml index 3db052b..d92c351 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,10 @@ name = "MixedModelsExtras" uuid = "781a26e1-49f4-409a-8f4c-c3159d78c17e" authors = ["Phillip Alday and contributors"] -version = "2.0.0" +version = "2.1.0" [deps] +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -12,14 +13,28 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] +Aqua = "0.8" +GLM = "1" +DataFrames = "1" +Distributions = "0.25" +LinearAlgebra = "1" MixedModels = "4" +RDatasets = "0.7.7" +StableRNGs = "1" +Statistics = "1" StatsBase = "0.33, 0.34" StatsModels = "0.7.3" Tables = "1" +Test = "1" julia = "1.8" [extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test"] +test = ["Test", "Aqua", "DataFrames", "Distributions", "RDatasets", "StableRNGs"] diff --git a/src/MixedModelsExtras.jl b/src/MixedModelsExtras.jl index 476e874..39e322b 100644 --- a/src/MixedModelsExtras.jl +++ b/src/MixedModelsExtras.jl @@ -7,6 +7,7 @@ using StatsBase using StatsModels using Tables +using GLM: linkinv, Link using StatsModels: termnames, vif, gvif export termnames, gvif, vif diff --git a/src/remef.jl b/src/remef.jl index 6716f4d..73256a5 100644 --- a/src/remef.jl +++ b/src/remef.jl @@ -1,10 +1,11 @@ # Intercept always has to be specified """ - partial_fitted(model::LinearMixedModel, + partial_fitted(model::MixedModel, fe::AbstractVector{<:AbstractString}, re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model)); - mode=:include) + mode=:include, + type=:response) Compute "partial" fitted values. @@ -26,6 +27,20 @@ with any grouping variable. The keyword argument `mode` specifies whether the fixed and random effects specifications are treated as coefficients to `:include` or `:exclude`. +For `GeneralizedLinearMixedModel`, the keyword argument `type` specifies whether +the predictions should be returned on the scale of linear predictor (`:linpred`) +or on the response scale (`:response`). + +!!! warning + Partial fitted values can be misleading for generalized linear mixed models + on the response scale because of the nonlinear nature of the link function. + For example, in logistic regression the partial fitted values are computed + on the linear predictor scale, i.e. the log odds scale, and then transformed + to the response scale, i.e. the probablitiy scale. However, a simple additive + contribution on the log odds scale is not additive on the probability scale. + More directly, it is impossible to decompose the effects of individual predictors + into simple additive contributions on the original scale. + !!! note For both the fixed and the random effects, the relevant entities are the coefficient names, not the original term names. @@ -44,6 +59,21 @@ function partial_fitted(model::LinearMixedModel{T}, fe::AbstractVector{<:AbstractString}, re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model)); mode=:include) where {T} + return _partial_fitted(model, fe, re; mode) +end + +function partial_fitted(model::GeneralizedLinearMixedModel{T}, + fe::AbstractVector{<:AbstractString}, + re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model)); + mode=:include, type=:linpred) where {T} + type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)")) + y = _partial_fitted(model, fe, re; mode) + return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(model)), y, y) +end + +function _partial_fitted(model::MixedModel{T}, + fe::AbstractVector{<:AbstractString}, + re::Dict{Symbol}; mode) where {T} # @debug fe # @debug re issubset(fe, coefnames(model)) || @@ -60,8 +90,8 @@ function partial_fitted(model::LinearMixedModel{T}, mode == :exclude && (fe_idx = .!fe_idx) # @debug fe_idx # XXX does this work properly for rank-deficient models? - X = view(model.X, :, fe_idx) - vv = mul!(Vector{T}(undef, nobs(model)), X, fixef(model)[fe_idx]) + X = view(modelmatrix(model), :, fe_idx) + vv = mul!(Vector{T}(undef, nobs(model)), X, view(fixef(model), fe_idx)) for (rt, bb) in zip(model.reterms, ranef(model)) group = Symbol(string(rt.trm)) @@ -85,10 +115,12 @@ function partial_fitted(model::LinearMixedModel{T}, # @debug re_idx_reps # XXX no appropriate mul! method # mul!(vv, view(rt, :, re_idx_reps), view(bb, re_idx, :), one(T), one(T)) + mul!(vv, view(rt, :, re_idx_reps), vec(view(bb, re_idx, :)), one(T), one(T)) + # should re-write this as a loop to avoid allocating the intermediate allocation # @debug size(view(rt, :, re_idx_reps)) # @debug size(view(bb, re_idx, :)) - vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :)) + # vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :)) end return vv diff --git a/test/Project.toml b/test/Project.toml deleted file mode 100644 index 68085b5..0000000 --- a/test/Project.toml +++ /dev/null @@ -1,17 +0,0 @@ -[deps] -Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" -RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" -StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[compat] -Aqua = "0.6" -DataFrames = "1" -StableRNGs = "1" diff --git a/test/icc.jl b/test/icc.jl index 45822f2..522a5d1 100644 --- a/test/icc.jl +++ b/test/icc.jl @@ -22,7 +22,7 @@ progress = false @test icc(model, :subj) + icc(model, :item) ≈ icc(model) @testset "bootstrap" begin - boot = parametricbootstrap(StableRNG(42), 100, model; hide_progress=!progress) + boot = parametricbootstrap(StableRNG(42), 100, model; progress) iccboot_subj = icc(boot, :subj) iccboot_item = icc(boot, :item) @test iccboot_subj + iccboot_item ≈ icc(boot) @@ -53,7 +53,7 @@ end @test icc(modelbern, Symbol("urban & dist")) ≈ icc(modelbin, Symbol("urban & dist")) @testset "bootstrap" begin - boot = parametricbootstrap(StableRNG(42), 100, modelbern; hide_progress=!progress) + boot = parametricbootstrap(StableRNG(42), 100, modelbern; progress) @test_throws ArgumentError icc(boot) iccboot = icc(boot, Bernoulli()) ci = shortestcovint(iccboot) diff --git a/test/remef.jl b/test/remef.jl index c90c88d..3293f4d 100644 --- a/test/remef.jl +++ b/test/remef.jl @@ -1,3 +1,4 @@ +using GLM: linkinv, Link using MixedModels using MixedModels: dataset using MixedModelsExtras @@ -5,23 +6,42 @@ using Test progress = false -fm1 = fit(MixedModel, - @formula(reaction ~ 1 + days + (1 + days | subj)), - dataset(:sleepstudy); - progress) - -@test all(==(0), partial_fitted(fm1, String[]; mode=:include)) -@test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1)) -@test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]); - mode=:include) .≈ fm1.X * fm1.β) - -@test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""), - partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []); mode=:include)) -@test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""), - partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]); - mode=:include)) - -re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]); - mode=:include) -re_only = fitted(fm1) - fm1.X * fm1.β -@test all(re_only .≈ re_only_pf) +@testset "LMM" begin + fm1 = fit(MixedModel, + @formula(reaction ~ 1 + days + (1 + days | subj)), + dataset(:sleepstudy); + progress) + + @test all(==(0), partial_fitted(fm1, String[]; mode=:include)) + @test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1)) + @test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]); + mode=:include) .≈ fm1.X * fm1.β) + + @test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""), + partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []); + mode=:include)) + @test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""), + partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]); + mode=:include)) + + re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]); + mode=:include) + re_only = fitted(fm1) - fm1.X * fm1.β + @test all(re_only .≈ re_only_pf) +end + +@testset "GLMM" begin + contra = dataset(:contra) + gm1 = fit(MixedModel, @formula(use ~ 1 + (1 | urban & dist)), + contra, Bernoulli(); fast=true, progress) + + pf_all = partial_fitted(gm1, ["(Intercept)"]; mode=:include, type=:response) + fitted_vals = fitted(gm1) + @test all(pf_all .≈ fitted_vals) + + re_only_pf = partial_fitted(gm1, String[], + Dict(Symbol("urban & dist") => String["(Intercept)"]); + mode=:include, type=:linpred) + full = modelmatrix(gm1) * fixef(gm1) + re_only_pf + @test all(linkinv.(Link(gm1), full) .≈ fitted_vals) +end diff --git a/test/runtests.jl b/test/runtests.jl index e9776ac..0ca86e7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,11 +1,15 @@ using Aqua using LinearAlgebra +using MixedModels using MixedModelsExtras using Test @testset "Aqua" begin # it's not piracy for StatsAPI.r2(::MixedModel), it's privateering! - Aqua.test_all(MixedModelsExtras; ambiguities=false, piracy=false) + Aqua.test_all(MixedModelsExtras; ambiguities=false, + piracies=(; + treat_as_own=[LinearMixedModel, MixedModel, + GeneralizedLinearMixedModel, RandomEffectsTerm])) end @testset "ICC" begin