Automatic inverse link (#56)

* GLM ext * test mixed models extension * better test support for Julia <1.9 * YAS * disable Project.toml formatting check on LTS * reduce duplication Co-authored-by: Alex Arslan <[email protected]> * distributions with support outside [0, 1] * phrasing Co-authored-by: Alex Arslan <[email protected]> * break my tests Co-authored-by: Alex Arslan <[email protected]> --------- Co-authored-by: Alex Arslan <[email protected]>
beacon-biosignals · Aug 21, 2023 · 1ca3e9d · 1ca3e9d · palday · Aug 21, 2023
1 parent 1cd621d
commit 1ca3e9d
Show file tree

Hide file tree

Showing 9 changed files with 183 additions and 44 deletions.
diff --git a/.github/workflows/YASG.yml b/.github/workflows/YASG.yml
@@ -11,6 +11,7 @@ on:
       - 'src/**'
       - 'test/**'
       - 'docs/**'
+      - 'ext/**'
       - '.github/workflows/YASG.yml'
       - 'format/**'
 jobs:

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Effects"
 uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
 authors = ["Beacon Biosignals, Inc."]
-version = "1.0.2"
+version = "1.1.0"
 
 [deps]
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
@@ -10,10 +10,19 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
+[weakdeps]
+GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
+
+[extensions]
+EffectsGLMExt = "GLM"
+EffectsMixedModelsExt = ["GLM", "MixedModels"]
+
 [compat]
 Aqua = "0.5, 0.6"
 Combinatorics = "1"
@@ -27,6 +36,7 @@ MultipleTesting = "0.5, 0.6"
 RDatasets = "0.7.7"
 StableRNGs = "1.0"
 StandardizedPredictors = "0.1, 1"
+StatsAPI = "1.6"
 StatsBase = "0.33, 0.34"
 StatsModels = "0.6.23, 0.7"
 Tables = "1"

diff --git a/ext/EffectsGLMExt.jl b/ext/EffectsGLMExt.jl
@@ -0,0 +1,14 @@
+module EffectsGLMExt
+
+using Effects
+
+using GLM: AbstractGLM, Link, Link01, inverselink
+using Effects.StatsModels: TableRegressionModel
+
+# TODO: upstream a Link(::TableRegressionModel{<:AbstractGLM})
+Effects._model_link(m::TableRegressionModel{<:AbstractGLM}, ::AutoInvLink) = Link(m.model)
+Effects._model_link(m::AbstractGLM, ::AutoInvLink) = Link(m)
+Effects._invlink_and_deriv(link::Link01, η) = inverselink(link, η)[1:2:3]  # (µ, 1 - µ, dμdη)
+Effects._invlink_and_deriv(link::Link, η) = inverselink(link, η)[1:2]  # (µ, dμdη, NaN)
+
+end # module
diff --git a/ext/EffectsMixedModelsExt.jl b/ext/EffectsMixedModelsExt.jl
@@ -0,0 +1,9 @@
+module EffectsMixedModelsExt
+
+using Effects
+using MixedModels
+using GLM: Link
+
+Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m)
+
+end # module
diff --git a/format/run.jl b/format/run.jl
@@ -3,7 +3,7 @@ using JuliaFormatter
 function main()
     perfect = true
     # note: keep in sync with `.github/workflows/format-check.yml`
-    for d in ["src/", "test/", "docs/"]
+    for d in ["src/", "ext/", "test/", "docs/"]
         @info "...linting $d ..."
         dir_perfect = format(d, YASStyle())
         perfect = perfect && dir_perfect

diff --git a/src/Effects.jl b/src/Effects.jl
@@ -15,7 +15,7 @@ using ForwardDiff
 
 include("typical.jl")
 include("regressionmodel.jl")
-export effects, effects!, expand_grid
+export effects, effects!, expand_grid, AutoInvLink
 
 include("emmeans.jl")
 export emmeans, empairs

diff --git a/src/regressionmodel.jl b/src/regressionmodel.jl
@@ -1,3 +1,25 @@
+# determine AutoInvLink automatically if a package with an appropriate extension
+# is loaded
+
+"""
+    AutoInvLink
+
+Singleton type indicating that the inverse link should be automatically
+determined from the model type.
+
+!!! compat "Julia 1.9"
+    Automatic inverse link determination is implemented using package
+    extensions, which are available beginning in Julia 1.9.
+    
+An error is thrown if the inverse link cannot be determined. This will
+always occur with Julia versions prior to 1.9, and will otherwise occur
+when no extension has been loaded that specifies the link function for
+the model type.
+
+Currently, this is only implemented for GLM.jl and MixedModels.jl
+"""
+struct AutoInvLink end
+
 """
     effects!(reference_grid::DataFrame, model::RegressionModel;
              eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
@@ -54,6 +76,11 @@ Pointwise standard errors are written into the column specified by `err_col`.
     automatic differentiation. This means that the `invlink` function must be
     differentiable and should not involve inplace operations.
 
+On Julia versions 1.9 or later, the special singleton value `AutoInvLink()`
+can be used to specify that the appropriate inverse link should be determined
+automatically. In that case, a direct or analytic computation of the derivative
+is used when possible.
+
 Effects are computed using the model's variance-covariance matrix, which is
 computed by default using `StatsBas.vcov`. Alternative methods such as the
 sandwich estimator or robust estimators can be used by specifying `vcov`,
@@ -67,7 +94,6 @@ using Vcov
 myvcov(x) = Base.Fix2(vcov, Vcov.robust())
 ```
 
-
 The reference grid must contain columns for all predictors in the formula.
 (Interactions are computed automatically.) Contrasts must match the contrasts
 used to fit the model; using other contrasts will lead to undefined behavior.
@@ -87,6 +113,8 @@ The approach for computing effect is based on the effects plots described here:
 
 Fox, John (2003). Effect Displays in R for Generalised Linear Models.
 Journal of Statistical Software. Vol. 8, No. 15
+
+See also [`AutoInvLink`](@ref).
 """
 function effects!(reference_grid::DataFrame, model::RegressionModel;
                   eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
@@ -98,10 +126,7 @@ function effects!(reference_grid::DataFrame, model::RegressionModel;
     X = modelcols(form_typical, reference_grid)
     eff = X * coef(model)
     err = sqrt.(diag(X * vcov(model) * X'))
-    if invlink !== identity
-        err .*= ForwardDiff.derivative.(invlink, eff)
-        eff .= invlink.(eff)
-    end
+    _difference_method!(eff, err, model, invlink)
     reference_grid[!, something(eff_col, _responsename(model))] = eff
     reference_grid[!, err_col] = err
     return reference_grid
@@ -110,6 +135,37 @@ function effects!(reference_grid::DataFrame, model::RegressionModel;
     # return (; reference_grid..., depvar => eff, err_col => err)
 end
 
+# TODO: support the transformation method
+# in addition to the difference method
+# xref https://github.com/JuliaStats/GLM.jl/blob/c13577eaf3f418c58020534dd407532ee57f219b/src/glmfit.jl#L773-L783
+
+_invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink, η))
+_invlink_and_deriv(::typeof(identity), η) = (η, 1)
+# this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link())
+# for now, this is private API, but we should see how this goes and whether we can make it public API
+# so local extensions (instead of Package-Extensions) are better supported 
+_model_link(::RegressionModel, invlink::Function) = invlink
+function _model_link(model::RegressionModel, ::AutoInvLink)
+    msg = string("cannot automatically determine inverse link for models ",
+                 "of type ", typeof(model))
+    @static if isdefined(Base, :get_extension)
+        msg *= "; no appropriate extension has been loaded"
+    end
+    throw(ArgumentError(msg))
+end
+
+function _difference_method!(eff::Vector{T}, err::Vector{T},
+                             m::RegressionModel,
+                             invlink) where {T<:AbstractFloat}
+    link = _model_link(m, invlink)
+    @inbounds for i in eachindex(eff, err)
+        μ, dμdη = _invlink_and_deriv(link, eff[i])
+        err[i] *= dμdη
+        eff[i] = μ
+    end
+    return eff, err
+end
+
 """
     expand_grid(design)
 
@@ -155,6 +211,8 @@ fully-crossed design. Additionally, two extra columns are created representing
 the lower and upper edges of the confidence interval.
 The default `level=nothing` corresponds to [resp-err, resp+err], while `level=0.95`
 corresponds to the 95% confidence interval.
+
+See also [`AutoInvLink`](@ref).
 """
 function effects(design::AbstractDict, model::RegressionModel;
                  eff_col=nothing, err_col=:err, typical=mean,

diff --git a/test/delta_method.jl b/test/delta_method.jl
@@ -2,15 +2,16 @@ using DataFrames
 using Effects
 using GLM
 using LinearAlgebra
+using MixedModels
 using RDatasets: dataset as rdataset
 using StableRNGs
 using Test
 
 @testset "transformed response" begin
     dat = rdataset("car", "Prestige")
-    model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
     design = Dict(:Income => [1, 2],
                   :Education => [3, 4])
+    model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
     eff_original_scale = effects(design, model; invlink=exp)
     eff_logscale = effects(design, model)
     @test all(eff_logscale[!, 3] .≈ log.(eff_original_scale[!, 3]))
@@ -37,55 +38,99 @@ using Test
     @test isapprox(only(eff_emm_trans.err), 1.07; atol=0.005)
     @test isapprox(only(eff_emm_trans.lower), 45.3; atol=0.05)
     @test isapprox(only(eff_emm_trans.upper), 47.5; atol=0.05)
+
+    @testset "AutoInvLink fails gracefully" begin
+        # this should work even pre Julia 1.9 because by definition
+        # no extension is loaded
+        @test_throws ArgumentError effects(design, model; invlink=AutoInvLink())
+    end
 end
 
 @testset "link function" begin
     dat = rdataset("car", "Cowles")
     dat[!, :vol] = dat.Volunteer .== "yes"
     model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Bernoulli())
-    invlink = Base.Fix1(GLM.linkinv, Link(model.model))
     design = Dict(:Extraversion => [13],
                   :Neuroticism => [16])
     X = [1.0 13.0 16.0 13 * 16]
+    iv = Base.Fix1(GLM.linkinv, Link(model.model))
+    @static if VERSION >= v"1.9"
+        invlinks = [iv, AutoInvLink()]
+        @test Effects._model_link(model, AutoInvLink()) ==
+              Effects._model_link(model.model, AutoInvLink())
+    else
+        invlinks = [iv]
+    end
+    @testset "invlink = $invlink" for invlink in invlinks
+        for level in [0.68, 0.95]
+            eff = effects(design, model; invlink, level)
+
+            # compare with results from GLM.predict
+            pred = DataFrame(predict(model.model, X;
+                                     interval=:confidence,
+                                     interval_method=:delta,
+                                     level))
+            @test all(pred.prediction .≈ eff.vol)
+            @test all(isapprox.(pred.lower, eff.lower; atol=0.001))
+            @test all(isapprox.(pred.upper, eff.upper; atol=0.001))
 
-    for level in [0.68, 0.95]
-        eff = effects(design, model; invlink, level)
+            eff_trans = effects(design, model; level)
+            transform!(eff_trans,
+                       :vol => ByRow(iv),
+                       :lower => ByRow(iv),
+                       :upper => ByRow(iv); renamecols=false)
+            # for this model, things play out nicely
+            @test all(eff_trans.vol .≈ eff.vol)
+            @test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
+            @test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
+        end
 
-        # compare with results from GLM.predict
-        pred = DataFrame(predict(model.model, X;
-                                 interval=:confidence,
-                                 interval_method=:delta,
-                                 level))
-        @test all(pred.prediction .≈ eff.vol)
-        @test all(isapprox.(pred.lower, eff.lower; atol=0.001))
-        @test all(isapprox.(pred.upper, eff.upper; atol=0.001))
+        # compare with results from emmeans in R
+        # emmeans(model, ~ neuroticism * extraversion, level=0.68)
+        eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
+        @test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
+        @test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
+        @test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
+        @test isapprox(only(eff_emm.upper), -0.292; atol=0.005)
 
-        eff_trans = effects(design, model; level)
-        transform!(eff_trans,
-                   :vol => ByRow(invlink),
-                   :lower => ByRow(invlink),
-                   :upper => ByRow(invlink); renamecols=false)
-        # for this model, things play out nicely
-        @test all(eff_trans.vol .≈ eff.vol)
-        @test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
-        @test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
+        # emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
+        eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]),
+                                model;
+                                invlink)
+        @test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
+        @test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
+        @test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
+        @test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
     end
+end
 
-    # compare with results from emmeans in R
-    # emmeans(model, ~ neuroticism * extraversion, level=0.68)
-    eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
-    @test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
-    @test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
-    @test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
-    @test isapprox(only(eff_emm.upper), -0.292; atol=0.005)
+@static if VERSION >= v"1.9"
+    @testset "Non Link01 GLM link" begin
+        dat = rdataset("car", "Cowles")
+        dat[!, :vol] = dat.Volunteer .== "yes"
+        # this isn't a particularly sensible model, but it's fine for testing
+        model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Poisson())
+        design = Dict(:Extraversion => [13],
+                      :Neuroticism => [16])
+        X = [1.0 13.0 16.0 13 * 16]
+        eff_manual = effects(design, model;
+                             invlink=Base.Fix1(GLM.linkinv, Link(model.model)))
+        eff_auto = effects(design, model; invlink=AutoInvLink())
 
-    # emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
-    eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model;
-                            invlink)
-    @test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
-    @test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
-    @test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
-    @test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
+        @test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
+    end
+    @testset "link function in a MixedModel" begin
+        model = fit(MixedModel,
+                    @formula(use ~ 1 + age + (1 | urban)),
+                    MixedModels.dataset(:contra),
+                    Bernoulli(); progress=false)
+        design = Dict(:age => -10:10)
+        eff_manual = effects(design, model;
+                             invlink=Base.Fix1(GLM.linkinv, Link(model)))
+        eff_auto = effects(design, model; invlink=AutoInvLink())
+
+        @test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
+    end
 end
 
 @testset "identity by another name" begin

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -3,7 +3,9 @@ using Effects
 using Test
 
 @testset "Aqua" begin
-    Aqua.test_all(Effects; ambiguities=false)
+    # layout of weakdeps and extensions etc. differs between pre 1.9 and 1.9+
+    project_toml_formatting = VERSION >= v"1.9"
+    Aqua.test_all(Effects; ambiguities=false, project_toml_formatting)
 end
 
 @testset "TypicalTerm" begin