Skip to content

Commit

Permalink
partial_fitted(::GeneralizedLinearMixedModel) (#28)
Browse files Browse the repository at this point in the history
* pull out partial_fitted meat into own function

* efficiency tweaks

* GLMM

* deprecate hide_progress in tests

* minor version bump

* update test project and Aqua
  • Loading branch information
palday authored Jan 25, 2024
1 parent 236ce89 commit 97f7e5c
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 47 deletions.
19 changes: 17 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name = "MixedModelsExtras"
uuid = "781a26e1-49f4-409a-8f4c-c3159d78c17e"
authors = ["Phillip Alday <[email protected]> and contributors"]
version = "2.0.0"
version = "2.1.0"

[deps]
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -12,14 +13,28 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
Aqua = "0.8"
GLM = "1"
DataFrames = "1"
Distributions = "0.25"
LinearAlgebra = "1"
MixedModels = "4"
RDatasets = "0.7.7"
StableRNGs = "1"
Statistics = "1"
StatsBase = "0.33, 0.34"
StatsModels = "0.7.3"
Tables = "1"
Test = "1"
julia = "1.8"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["Test", "Aqua", "DataFrames", "Distributions", "RDatasets", "StableRNGs"]
1 change: 1 addition & 0 deletions src/MixedModelsExtras.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ using StatsBase
using StatsModels
using Tables

using GLM: linkinv, Link
using StatsModels: termnames, vif, gvif
export termnames, gvif, vif

Expand Down
42 changes: 37 additions & 5 deletions src/remef.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@

# Intercept always has to be specified
"""
partial_fitted(model::LinearMixedModel,
partial_fitted(model::MixedModel,
fe::AbstractVector{<:AbstractString},
re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
mode=:include)
mode=:include,
type=:response)
Compute "partial" fitted values.
Expand All @@ -26,6 +27,20 @@ with any grouping variable.
The keyword argument `mode` specifies whether the fixed and random effects
specifications are treated as coefficients to `:include` or `:exclude`.
For `GeneralizedLinearMixedModel`, the keyword argument `type` specifies whether
the predictions should be returned on the scale of linear predictor (`:linpred`)
or on the response scale (`:response`).
!!! warning
Partial fitted values can be misleading for generalized linear mixed models
on the response scale because of the nonlinear nature of the link function.
For example, in logistic regression the partial fitted values are computed
on the linear predictor scale, i.e. the log odds scale, and then transformed
to the response scale, i.e. the probablitiy scale. However, a simple additive
contribution on the log odds scale is not additive on the probability scale.
More directly, it is impossible to decompose the effects of individual predictors
into simple additive contributions on the original scale.
!!! note
For both the fixed and the random effects, the relevant entities are the
coefficient names, not the original term names.
Expand All @@ -44,6 +59,21 @@ function partial_fitted(model::LinearMixedModel{T},
fe::AbstractVector{<:AbstractString},
re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
mode=:include) where {T}
return _partial_fitted(model, fe, re; mode)
end

function partial_fitted(model::GeneralizedLinearMixedModel{T},
fe::AbstractVector{<:AbstractString},
re::Dict{Symbol}=Dict(fn => fe for fn in fnames(model));
mode=:include, type=:linpred) where {T}
type in (:linpred, :response) || throw(ArgumentError("Invalid value for type: $(type)"))
y = _partial_fitted(model, fe, re; mode)
return type == :linpred ? y : broadcast!(Base.Fix1(linkinv, Link(model)), y, y)
end

function _partial_fitted(model::MixedModel{T},
fe::AbstractVector{<:AbstractString},
re::Dict{Symbol}; mode) where {T}
# @debug fe
# @debug re
issubset(fe, coefnames(model)) ||
Expand All @@ -60,8 +90,8 @@ function partial_fitted(model::LinearMixedModel{T},
mode == :exclude && (fe_idx = .!fe_idx)
# @debug fe_idx
# XXX does this work properly for rank-deficient models?
X = view(model.X, :, fe_idx)
vv = mul!(Vector{T}(undef, nobs(model)), X, fixef(model)[fe_idx])
X = view(modelmatrix(model), :, fe_idx)
vv = mul!(Vector{T}(undef, nobs(model)), X, view(fixef(model), fe_idx))

for (rt, bb) in zip(model.reterms, ranef(model))
group = Symbol(string(rt.trm))
Expand All @@ -85,10 +115,12 @@ function partial_fitted(model::LinearMixedModel{T},
# @debug re_idx_reps
# XXX no appropriate mul! method
# mul!(vv, view(rt, :, re_idx_reps), view(bb, re_idx, :), one(T), one(T))
mul!(vv, view(rt, :, re_idx_reps), vec(view(bb, re_idx, :)), one(T), one(T))

# should re-write this as a loop to avoid allocating the intermediate allocation
# @debug size(view(rt, :, re_idx_reps))
# @debug size(view(bb, re_idx, :))
vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :))
# vv .+= view(rt, :, re_idx_reps) * vec(view(bb, re_idx, :))
end

return vv
Expand Down
17 changes: 0 additions & 17 deletions test/Project.toml

This file was deleted.

4 changes: 2 additions & 2 deletions test/icc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ progress = false
@test icc(model, :subj) + icc(model, :item) icc(model)

@testset "bootstrap" begin
boot = parametricbootstrap(StableRNG(42), 100, model; hide_progress=!progress)
boot = parametricbootstrap(StableRNG(42), 100, model; progress)
iccboot_subj = icc(boot, :subj)
iccboot_item = icc(boot, :item)
@test iccboot_subj + iccboot_item icc(boot)
Expand Down Expand Up @@ -53,7 +53,7 @@ end
@test icc(modelbern, Symbol("urban & dist")) icc(modelbin, Symbol("urban & dist"))

@testset "bootstrap" begin
boot = parametricbootstrap(StableRNG(42), 100, modelbern; hide_progress=!progress)
boot = parametricbootstrap(StableRNG(42), 100, modelbern; progress)
@test_throws ArgumentError icc(boot)
iccboot = icc(boot, Bernoulli())
ci = shortestcovint(iccboot)
Expand Down
60 changes: 40 additions & 20 deletions test/remef.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,47 @@
using GLM: linkinv, Link
using MixedModels
using MixedModels: dataset
using MixedModelsExtras
using Test

progress = false

fm1 = fit(MixedModel,
@formula(reaction ~ 1 + days + (1 + days | subj)),
dataset(:sleepstudy);
progress)

@test all(==(0), partial_fitted(fm1, String[]; mode=:include))
@test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1))
@test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]);
mode=:include) .≈ fm1.X * fm1.β)

@test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""),
partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []); mode=:include))
@test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""),
partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]);
mode=:include))

re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]);
mode=:include)
re_only = fitted(fm1) - fm1.X * fm1.β
@test all(re_only .≈ re_only_pf)
@testset "LMM" begin
fm1 = fit(MixedModel,
@formula(reaction ~ 1 + days + (1 + days | subj)),
dataset(:sleepstudy);
progress)

@test all(==(0), partial_fitted(fm1, String[]; mode=:include))
@test all(partial_fitted(fm1, String[]; mode=:exclude) .≈ fitted(fm1))
@test all(partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => String[]);
mode=:include) .≈ fm1.X * fm1.β)

@test_throws(ArgumentError("""specified FE names not subset of ["(Intercept)", "days"]"""),
partial_fitted(fm1, ["(Intercept)", "Days"], Dict(:subj => []);
mode=:include))
@test_throws(ArgumentError("""specified RE names for subj not subset of ["(Intercept)", "days"]"""),
partial_fitted(fm1, ["(Intercept)", "days"], Dict(:subj => ["Days"]);
mode=:include))

re_only_pf = partial_fitted(fm1, String[], Dict(:subj => String["(Intercept)", "days"]);
mode=:include)
re_only = fitted(fm1) - fm1.X * fm1.β
@test all(re_only .≈ re_only_pf)
end

@testset "GLMM" begin
contra = dataset(:contra)
gm1 = fit(MixedModel, @formula(use ~ 1 + (1 | urban & dist)),
contra, Bernoulli(); fast=true, progress)

pf_all = partial_fitted(gm1, ["(Intercept)"]; mode=:include, type=:response)
fitted_vals = fitted(gm1)
@test all(pf_all .≈ fitted_vals)

re_only_pf = partial_fitted(gm1, String[],
Dict(Symbol("urban & dist") => String["(Intercept)"]);
mode=:include, type=:linpred)
full = modelmatrix(gm1) * fixef(gm1) + re_only_pf
@test all(linkinv.(Link(gm1), full) .≈ fitted_vals)
end
6 changes: 5 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
using Aqua
using LinearAlgebra
using MixedModels
using MixedModelsExtras
using Test

@testset "Aqua" begin
# it's not piracy for StatsAPI.r2(::MixedModel), it's privateering!
Aqua.test_all(MixedModelsExtras; ambiguities=false, piracy=false)
Aqua.test_all(MixedModelsExtras; ambiguities=false,
piracies=(;
treat_as_own=[LinearMixedModel, MixedModel,
GeneralizedLinearMixedModel, RandomEffectsTerm]))
end

@testset "ICC" begin
Expand Down

2 comments on commit 97f7e5c

@palday
Copy link
Owner Author

@palday palday commented on 97f7e5c Jan 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/99489

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v2.1.0 -m "<description of version>" 97f7e5c89c7bad99074bcc83217cf8436d7fe580
git push origin v2.1.0

Please sign in to comment.