diff --git a/.github/workflows/YASG.yml b/.github/workflows/YASG.yml index 4896c32..5aee7b9 100644 --- a/.github/workflows/YASG.yml +++ b/.github/workflows/YASG.yml @@ -11,6 +11,7 @@ on: - 'src/**' - 'test/**' - 'docs/**' + - 'ext/**' - '.github/workflows/YASG.yml' - 'format/**' jobs: diff --git a/Project.toml b/Project.toml index f31d38b..dda2b95 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Effects" uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2" authors = ["Beacon Biosignals, Inc."] -version = "1.0.2" +version = "1.1.0" [deps] Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" @@ -10,10 +10,19 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +[weakdeps] +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" + +[extensions] +EffectsGLMExt = "GLM" +EffectsMixedModelsExt = ["GLM", "MixedModels"] + [compat] Aqua = "0.5, 0.6" Combinatorics = "1" @@ -27,6 +36,7 @@ MultipleTesting = "0.5, 0.6" RDatasets = "0.7.7" StableRNGs = "1.0" StandardizedPredictors = "0.1, 1" +StatsAPI = "1.6" StatsBase = "0.33, 0.34" StatsModels = "0.6.23, 0.7" Tables = "1" diff --git a/ext/EffectsGLMExt.jl b/ext/EffectsGLMExt.jl new file mode 100644 index 0000000..24e3844 --- /dev/null +++ b/ext/EffectsGLMExt.jl @@ -0,0 +1,14 @@ +module EffectsGLMExt + +using Effects + +using GLM: AbstractGLM, Link, Link01, inverselink +using Effects.StatsModels: TableRegressionModel + +# TODO: upstream a Link(::TableRegressionModel{<:AbstractGLM}) +Effects._model_link(m::TableRegressionModel{<:AbstractGLM}, ::AutoInvLink) = Link(m.model) +Effects._model_link(m::AbstractGLM, ::AutoInvLink) = Link(m) +Effects._invlink_and_deriv(link::Link01, η) = inverselink(link, η)[1:2:3] # (µ, 1 - µ, dμdη) +Effects._invlink_and_deriv(link::Link, η) = inverselink(link, η)[1:2] # (µ, dμdη, NaN) + +end # module diff --git a/ext/EffectsMixedModelsExt.jl b/ext/EffectsMixedModelsExt.jl new file mode 100644 index 0000000..ff83274 --- /dev/null +++ b/ext/EffectsMixedModelsExt.jl @@ -0,0 +1,9 @@ +module EffectsMixedModelsExt + +using Effects +using MixedModels +using GLM: Link + +Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m) + +end # module diff --git a/format/run.jl b/format/run.jl index 5e7150d..87bfc57 100644 --- a/format/run.jl +++ b/format/run.jl @@ -3,7 +3,7 @@ using JuliaFormatter function main() perfect = true # note: keep in sync with `.github/workflows/format-check.yml` - for d in ["src/", "test/", "docs/"] + for d in ["src/", "ext/", "test/", "docs/"] @info "...linting $d ..." dir_perfect = format(d, YASStyle()) perfect = perfect && dir_perfect diff --git a/src/Effects.jl b/src/Effects.jl index 76dafea..b9a1db5 100644 --- a/src/Effects.jl +++ b/src/Effects.jl @@ -15,7 +15,7 @@ using ForwardDiff include("typical.jl") include("regressionmodel.jl") -export effects, effects!, expand_grid +export effects, effects!, expand_grid, AutoInvLink include("emmeans.jl") export emmeans, empairs diff --git a/src/regressionmodel.jl b/src/regressionmodel.jl index 0b4da18..91b91bc 100644 --- a/src/regressionmodel.jl +++ b/src/regressionmodel.jl @@ -1,3 +1,25 @@ +# determine AutoInvLink automatically if a package with an appropriate extension +# is loaded + +""" + AutoInvLink + +Singleton type indicating that the inverse link should be automatically +determined from the model type. + +!!! compat "Julia 1.9" + Automatic inverse link determination is implemented using package + extensions, which are available beginning in Julia 1.9. + +An error is thrown if the inverse link cannot be determined. This will +always occur with Julia versions prior to 1.9, and will otherwise occur +when no extension has been loaded that specifies the link function for +the model type. + +Currently, this is only implemented for GLM.jl and MixedModels.jl +""" +struct AutoInvLink end + """ effects!(reference_grid::DataFrame, model::RegressionModel; eff_col=nothing, err_col=:err, typical=mean, invlink=identity, @@ -54,6 +76,11 @@ Pointwise standard errors are written into the column specified by `err_col`. automatic differentiation. This means that the `invlink` function must be differentiable and should not involve inplace operations. +On Julia versions 1.9 or later, the special singleton value `AutoInvLink()` +can be used to specify that the appropriate inverse link should be determined +automatically. In that case, a direct or analytic computation of the derivative +is used when possible. + Effects are computed using the model's variance-covariance matrix, which is computed by default using `StatsBas.vcov`. Alternative methods such as the sandwich estimator or robust estimators can be used by specifying `vcov`, @@ -67,7 +94,6 @@ using Vcov myvcov(x) = Base.Fix2(vcov, Vcov.robust()) ``` - The reference grid must contain columns for all predictors in the formula. (Interactions are computed automatically.) Contrasts must match the contrasts used to fit the model; using other contrasts will lead to undefined behavior. @@ -87,6 +113,8 @@ The approach for computing effect is based on the effects plots described here: Fox, John (2003). Effect Displays in R for Generalised Linear Models. Journal of Statistical Software. Vol. 8, No. 15 + +See also [`AutoInvLink`](@ref). """ function effects!(reference_grid::DataFrame, model::RegressionModel; eff_col=nothing, err_col=:err, typical=mean, invlink=identity, @@ -98,10 +126,7 @@ function effects!(reference_grid::DataFrame, model::RegressionModel; X = modelcols(form_typical, reference_grid) eff = X * coef(model) err = sqrt.(diag(X * vcov(model) * X')) - if invlink !== identity - err .*= ForwardDiff.derivative.(invlink, eff) - eff .= invlink.(eff) - end + _difference_method!(eff, err, model, invlink) reference_grid[!, something(eff_col, _responsename(model))] = eff reference_grid[!, err_col] = err return reference_grid @@ -110,6 +135,37 @@ function effects!(reference_grid::DataFrame, model::RegressionModel; # return (; reference_grid..., depvar => eff, err_col => err) end +# TODO: support the transformation method +# in addition to the difference method +# xref https://github.com/JuliaStats/GLM.jl/blob/c13577eaf3f418c58020534dd407532ee57f219b/src/glmfit.jl#L773-L783 + +_invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink, η)) +_invlink_and_deriv(::typeof(identity), η) = (η, 1) +# this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link()) +# for now, this is private API, but we should see how this goes and whether we can make it public API +# so local extensions (instead of Package-Extensions) are better supported +_model_link(::RegressionModel, invlink::Function) = invlink +function _model_link(model::RegressionModel, ::AutoInvLink) + msg = string("cannot automatically determine inverse link for models ", + "of type ", typeof(model)) + @static if isdefined(Base, :get_extension) + msg *= "; no appropriate extension has been loaded" + end + throw(ArgumentError(msg)) +end + +function _difference_method!(eff::Vector{T}, err::Vector{T}, + m::RegressionModel, + invlink) where {T<:AbstractFloat} + link = _model_link(m, invlink) + @inbounds for i in eachindex(eff, err) + μ, dμdη = _invlink_and_deriv(link, eff[i]) + err[i] *= dμdη + eff[i] = μ + end + return eff, err +end + """ expand_grid(design) @@ -155,6 +211,8 @@ fully-crossed design. Additionally, two extra columns are created representing the lower and upper edges of the confidence interval. The default `level=nothing` corresponds to [resp-err, resp+err], while `level=0.95` corresponds to the 95% confidence interval. + +See also [`AutoInvLink`](@ref). """ function effects(design::AbstractDict, model::RegressionModel; eff_col=nothing, err_col=:err, typical=mean, diff --git a/test/delta_method.jl b/test/delta_method.jl index b8960d4..41ebef4 100644 --- a/test/delta_method.jl +++ b/test/delta_method.jl @@ -2,15 +2,16 @@ using DataFrames using Effects using GLM using LinearAlgebra +using MixedModels using RDatasets: dataset as rdataset using StableRNGs using Test @testset "transformed response" begin dat = rdataset("car", "Prestige") - model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat) design = Dict(:Income => [1, 2], :Education => [3, 4]) + model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat) eff_original_scale = effects(design, model; invlink=exp) eff_logscale = effects(design, model) @test all(eff_logscale[!, 3] .≈ log.(eff_original_scale[!, 3])) @@ -37,55 +38,99 @@ using Test @test isapprox(only(eff_emm_trans.err), 1.07; atol=0.005) @test isapprox(only(eff_emm_trans.lower), 45.3; atol=0.05) @test isapprox(only(eff_emm_trans.upper), 47.5; atol=0.05) + + @testset "AutoInvLink fails gracefully" begin + # this should work even pre Julia 1.9 because by definition + # no extension is loaded + @test_throws ArgumentError effects(design, model; invlink=AutoInvLink()) + end end @testset "link function" begin dat = rdataset("car", "Cowles") dat[!, :vol] = dat.Volunteer .== "yes" model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Bernoulli()) - invlink = Base.Fix1(GLM.linkinv, Link(model.model)) design = Dict(:Extraversion => [13], :Neuroticism => [16]) X = [1.0 13.0 16.0 13 * 16] + iv = Base.Fix1(GLM.linkinv, Link(model.model)) + @static if VERSION >= v"1.9" + invlinks = [iv, AutoInvLink()] + @test Effects._model_link(model, AutoInvLink()) == + Effects._model_link(model.model, AutoInvLink()) + else + invlinks = [iv] + end + @testset "invlink = $invlink" for invlink in invlinks + for level in [0.68, 0.95] + eff = effects(design, model; invlink, level) + + # compare with results from GLM.predict + pred = DataFrame(predict(model.model, X; + interval=:confidence, + interval_method=:delta, + level)) + @test all(pred.prediction .≈ eff.vol) + @test all(isapprox.(pred.lower, eff.lower; atol=0.001)) + @test all(isapprox.(pred.upper, eff.upper; atol=0.001)) - for level in [0.68, 0.95] - eff = effects(design, model; invlink, level) + eff_trans = effects(design, model; level) + transform!(eff_trans, + :vol => ByRow(iv), + :lower => ByRow(iv), + :upper => ByRow(iv); renamecols=false) + # for this model, things play out nicely + @test all(eff_trans.vol .≈ eff.vol) + @test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001)) + @test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001)) + end - # compare with results from GLM.predict - pred = DataFrame(predict(model.model, X; - interval=:confidence, - interval_method=:delta, - level)) - @test all(pred.prediction .≈ eff.vol) - @test all(isapprox.(pred.lower, eff.lower; atol=0.001)) - @test all(isapprox.(pred.upper, eff.upper; atol=0.001)) + # compare with results from emmeans in R + # emmeans(model, ~ neuroticism * extraversion, level=0.68) + eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model) + @test isapprox(only(eff_emm.vol), -0.347; atol=0.005) + @test isapprox(only(eff_emm.err), 0.0549; atol=0.005) + @test isapprox(only(eff_emm.lower), -0.402; atol=0.005) + @test isapprox(only(eff_emm.upper), -0.292; atol=0.005) - eff_trans = effects(design, model; level) - transform!(eff_trans, - :vol => ByRow(invlink), - :lower => ByRow(invlink), - :upper => ByRow(invlink); renamecols=false) - # for this model, things play out nicely - @test all(eff_trans.vol .≈ eff.vol) - @test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001)) - @test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001)) + # emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response") + eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), + model; + invlink) + @test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005) + @test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005) + @test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005) + @test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005) end +end - # compare with results from emmeans in R - # emmeans(model, ~ neuroticism * extraversion, level=0.68) - eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model) - @test isapprox(only(eff_emm.vol), -0.347; atol=0.005) - @test isapprox(only(eff_emm.err), 0.0549; atol=0.005) - @test isapprox(only(eff_emm.lower), -0.402; atol=0.005) - @test isapprox(only(eff_emm.upper), -0.292; atol=0.005) +@static if VERSION >= v"1.9" + @testset "Non Link01 GLM link" begin + dat = rdataset("car", "Cowles") + dat[!, :vol] = dat.Volunteer .== "yes" + # this isn't a particularly sensible model, but it's fine for testing + model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Poisson()) + design = Dict(:Extraversion => [13], + :Neuroticism => [16]) + X = [1.0 13.0 16.0 13 * 16] + eff_manual = effects(design, model; + invlink=Base.Fix1(GLM.linkinv, Link(model.model))) + eff_auto = effects(design, model; invlink=AutoInvLink()) - # emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response") - eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model; - invlink) - @test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005) - @test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005) - @test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005) - @test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005) + @test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto))) + end + @testset "link function in a MixedModel" begin + model = fit(MixedModel, + @formula(use ~ 1 + age + (1 | urban)), + MixedModels.dataset(:contra), + Bernoulli(); progress=false) + design = Dict(:age => -10:10) + eff_manual = effects(design, model; + invlink=Base.Fix1(GLM.linkinv, Link(model))) + eff_auto = effects(design, model; invlink=AutoInvLink()) + + @test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto))) + end end @testset "identity by another name" begin diff --git a/test/runtests.jl b/test/runtests.jl index 6dbc1de..c40fb4a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,9 @@ using Effects using Test @testset "Aqua" begin - Aqua.test_all(Effects; ambiguities=false) + # layout of weakdeps and extensions etc. differs between pre 1.9 and 1.9+ + project_toml_formatting = VERSION >= v"1.9" + Aqua.test_all(Effects; ambiguities=false, project_toml_formatting) end @testset "TypicalTerm" begin