Skip to content

Commit

Permalink
Automatic inverse link (#56)
Browse files Browse the repository at this point in the history
* GLM ext

* test mixed models extension

* better test support for Julia <1.9

* YAS

* disable Project.toml formatting check on LTS

* reduce duplication

Co-authored-by: Alex Arslan <[email protected]>

* distributions with support outside [0, 1]

* phrasing

Co-authored-by: Alex Arslan <[email protected]>

* break my tests

Co-authored-by: Alex Arslan <[email protected]>

---------

Co-authored-by: Alex Arslan <[email protected]>
  • Loading branch information
palday and ararslan authored Aug 21, 2023
1 parent 1cd621d commit 1ca3e9d
Show file tree
Hide file tree
Showing 9 changed files with 183 additions and 44 deletions.
1 change: 1 addition & 0 deletions .github/workflows/YASG.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
- 'src/**'
- 'test/**'
- 'docs/**'
- 'ext/**'
- '.github/workflows/YASG.yml'
- 'format/**'
jobs:
Expand Down
12 changes: 11 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Effects"
uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
authors = ["Beacon Biosignals, Inc."]
version = "1.0.2"
version = "1.1.0"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Expand All @@ -10,10 +10,19 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[weakdeps]
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"

[extensions]
EffectsGLMExt = "GLM"
EffectsMixedModelsExt = ["GLM", "MixedModels"]

[compat]
Aqua = "0.5, 0.6"
Combinatorics = "1"
Expand All @@ -27,6 +36,7 @@ MultipleTesting = "0.5, 0.6"
RDatasets = "0.7.7"
StableRNGs = "1.0"
StandardizedPredictors = "0.1, 1"
StatsAPI = "1.6"
StatsBase = "0.33, 0.34"
StatsModels = "0.6.23, 0.7"
Tables = "1"
Expand Down
14 changes: 14 additions & 0 deletions ext/EffectsGLMExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module EffectsGLMExt

using Effects

using GLM: AbstractGLM, Link, Link01, inverselink
using Effects.StatsModels: TableRegressionModel

# TODO: upstream a Link(::TableRegressionModel{<:AbstractGLM})
Effects._model_link(m::TableRegressionModel{<:AbstractGLM}, ::AutoInvLink) = Link(m.model)
Effects._model_link(m::AbstractGLM, ::AutoInvLink) = Link(m)
Effects._invlink_and_deriv(link::Link01, η) = inverselink(link, η)[1:2:3] # (µ, 1 - µ, dμdη)
Effects._invlink_and_deriv(link::Link, η) = inverselink(link, η)[1:2] # (µ, dμdη, NaN)

end # module
9 changes: 9 additions & 0 deletions ext/EffectsMixedModelsExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module EffectsMixedModelsExt

using Effects
using MixedModels
using GLM: Link

Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m)

end # module
2 changes: 1 addition & 1 deletion format/run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using JuliaFormatter
function main()
perfect = true
# note: keep in sync with `.github/workflows/format-check.yml`
for d in ["src/", "test/", "docs/"]
for d in ["src/", "ext/", "test/", "docs/"]
@info "...linting $d ..."
dir_perfect = format(d, YASStyle())
perfect = perfect && dir_perfect
Expand Down
2 changes: 1 addition & 1 deletion src/Effects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using ForwardDiff

include("typical.jl")
include("regressionmodel.jl")
export effects, effects!, expand_grid
export effects, effects!, expand_grid, AutoInvLink

include("emmeans.jl")
export emmeans, empairs
Expand Down
68 changes: 63 additions & 5 deletions src/regressionmodel.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# determine AutoInvLink automatically if a package with an appropriate extension
# is loaded

"""
AutoInvLink
Singleton type indicating that the inverse link should be automatically
determined from the model type.
!!! compat "Julia 1.9"
Automatic inverse link determination is implemented using package
extensions, which are available beginning in Julia 1.9.
An error is thrown if the inverse link cannot be determined. This will
always occur with Julia versions prior to 1.9, and will otherwise occur
when no extension has been loaded that specifies the link function for
the model type.
Currently, this is only implemented for GLM.jl and MixedModels.jl
"""
struct AutoInvLink end

"""
effects!(reference_grid::DataFrame, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
Expand Down Expand Up @@ -54,6 +76,11 @@ Pointwise standard errors are written into the column specified by `err_col`.
automatic differentiation. This means that the `invlink` function must be
differentiable and should not involve inplace operations.
On Julia versions 1.9 or later, the special singleton value `AutoInvLink()`
can be used to specify that the appropriate inverse link should be determined
automatically. In that case, a direct or analytic computation of the derivative
is used when possible.
Effects are computed using the model's variance-covariance matrix, which is
computed by default using `StatsBas.vcov`. Alternative methods such as the
sandwich estimator or robust estimators can be used by specifying `vcov`,
Expand All @@ -67,7 +94,6 @@ using Vcov
myvcov(x) = Base.Fix2(vcov, Vcov.robust())
```
The reference grid must contain columns for all predictors in the formula.
(Interactions are computed automatically.) Contrasts must match the contrasts
used to fit the model; using other contrasts will lead to undefined behavior.
Expand All @@ -87,6 +113,8 @@ The approach for computing effect is based on the effects plots described here:
Fox, John (2003). Effect Displays in R for Generalised Linear Models.
Journal of Statistical Software. Vol. 8, No. 15
See also [`AutoInvLink`](@ref).
"""
function effects!(reference_grid::DataFrame, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
Expand All @@ -98,10 +126,7 @@ function effects!(reference_grid::DataFrame, model::RegressionModel;
X = modelcols(form_typical, reference_grid)
eff = X * coef(model)
err = sqrt.(diag(X * vcov(model) * X'))
if invlink !== identity
err .*= ForwardDiff.derivative.(invlink, eff)
eff .= invlink.(eff)
end
_difference_method!(eff, err, model, invlink)
reference_grid[!, something(eff_col, _responsename(model))] = eff
reference_grid[!, err_col] = err
return reference_grid
Expand All @@ -110,6 +135,37 @@ function effects!(reference_grid::DataFrame, model::RegressionModel;
# return (; reference_grid..., depvar => eff, err_col => err)
end

# TODO: support the transformation method
# in addition to the difference method
# xref https://github.com/JuliaStats/GLM.jl/blob/c13577eaf3f418c58020534dd407532ee57f219b/src/glmfit.jl#L773-L783

_invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink, η))
_invlink_and_deriv(::typeof(identity), η) = (η, 1)
# this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link())
# for now, this is private API, but we should see how this goes and whether we can make it public API
# so local extensions (instead of Package-Extensions) are better supported
_model_link(::RegressionModel, invlink::Function) = invlink
function _model_link(model::RegressionModel, ::AutoInvLink)
msg = string("cannot automatically determine inverse link for models ",
"of type ", typeof(model))
@static if isdefined(Base, :get_extension)
msg *= "; no appropriate extension has been loaded"
end
throw(ArgumentError(msg))
end

function _difference_method!(eff::Vector{T}, err::Vector{T},
m::RegressionModel,
invlink) where {T<:AbstractFloat}
link = _model_link(m, invlink)
@inbounds for i in eachindex(eff, err)
μ, dμdη = _invlink_and_deriv(link, eff[i])
err[i] *= dμdη
eff[i] = μ
end
return eff, err
end

"""
expand_grid(design)
Expand Down Expand Up @@ -155,6 +211,8 @@ fully-crossed design. Additionally, two extra columns are created representing
the lower and upper edges of the confidence interval.
The default `level=nothing` corresponds to [resp-err, resp+err], while `level=0.95`
corresponds to the 95% confidence interval.
See also [`AutoInvLink`](@ref).
"""
function effects(design::AbstractDict, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean,
Expand Down
115 changes: 80 additions & 35 deletions test/delta_method.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ using DataFrames
using Effects
using GLM
using LinearAlgebra
using MixedModels
using RDatasets: dataset as rdataset
using StableRNGs
using Test

@testset "transformed response" begin
dat = rdataset("car", "Prestige")
model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
design = Dict(:Income => [1, 2],
:Education => [3, 4])
model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
eff_original_scale = effects(design, model; invlink=exp)
eff_logscale = effects(design, model)
@test all(eff_logscale[!, 3] .≈ log.(eff_original_scale[!, 3]))
Expand All @@ -37,55 +38,99 @@ using Test
@test isapprox(only(eff_emm_trans.err), 1.07; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 45.3; atol=0.05)
@test isapprox(only(eff_emm_trans.upper), 47.5; atol=0.05)

@testset "AutoInvLink fails gracefully" begin
# this should work even pre Julia 1.9 because by definition
# no extension is loaded
@test_throws ArgumentError effects(design, model; invlink=AutoInvLink())
end
end

@testset "link function" begin
dat = rdataset("car", "Cowles")
dat[!, :vol] = dat.Volunteer .== "yes"
model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Bernoulli())
invlink = Base.Fix1(GLM.linkinv, Link(model.model))
design = Dict(:Extraversion => [13],
:Neuroticism => [16])
X = [1.0 13.0 16.0 13 * 16]
iv = Base.Fix1(GLM.linkinv, Link(model.model))
@static if VERSION >= v"1.9"
invlinks = [iv, AutoInvLink()]
@test Effects._model_link(model, AutoInvLink()) ==
Effects._model_link(model.model, AutoInvLink())
else
invlinks = [iv]
end
@testset "invlink = $invlink" for invlink in invlinks
for level in [0.68, 0.95]
eff = effects(design, model; invlink, level)

# compare with results from GLM.predict
pred = DataFrame(predict(model.model, X;
interval=:confidence,
interval_method=:delta,
level))
@test all(pred.prediction .≈ eff.vol)
@test all(isapprox.(pred.lower, eff.lower; atol=0.001))
@test all(isapprox.(pred.upper, eff.upper; atol=0.001))

for level in [0.68, 0.95]
eff = effects(design, model; invlink, level)
eff_trans = effects(design, model; level)
transform!(eff_trans,
:vol => ByRow(iv),
:lower => ByRow(iv),
:upper => ByRow(iv); renamecols=false)
# for this model, things play out nicely
@test all(eff_trans.vol .≈ eff.vol)
@test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
@test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
end

# compare with results from GLM.predict
pred = DataFrame(predict(model.model, X;
interval=:confidence,
interval_method=:delta,
level))
@test all(pred.prediction .≈ eff.vol)
@test all(isapprox.(pred.lower, eff.lower; atol=0.001))
@test all(isapprox.(pred.upper, eff.upper; atol=0.001))
# compare with results from emmeans in R
# emmeans(model, ~ neuroticism * extraversion, level=0.68)
eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
@test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
@test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
@test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
@test isapprox(only(eff_emm.upper), -0.292; atol=0.005)

eff_trans = effects(design, model; level)
transform!(eff_trans,
:vol => ByRow(invlink),
:lower => ByRow(invlink),
:upper => ByRow(invlink); renamecols=false)
# for this model, things play out nicely
@test all(eff_trans.vol .≈ eff.vol)
@test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
@test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
# emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]),
model;
invlink)
@test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
@test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
@test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
end
end

# compare with results from emmeans in R
# emmeans(model, ~ neuroticism * extraversion, level=0.68)
eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
@test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
@test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
@test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
@test isapprox(only(eff_emm.upper), -0.292; atol=0.005)
@static if VERSION >= v"1.9"
@testset "Non Link01 GLM link" begin
dat = rdataset("car", "Cowles")
dat[!, :vol] = dat.Volunteer .== "yes"
# this isn't a particularly sensible model, but it's fine for testing
model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Poisson())
design = Dict(:Extraversion => [13],
:Neuroticism => [16])
X = [1.0 13.0 16.0 13 * 16]
eff_manual = effects(design, model;
invlink=Base.Fix1(GLM.linkinv, Link(model.model)))
eff_auto = effects(design, model; invlink=AutoInvLink())

# emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model;
invlink)
@test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
@test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
@test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
@test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
end
@testset "link function in a MixedModel" begin
model = fit(MixedModel,
@formula(use ~ 1 + age + (1 | urban)),
MixedModels.dataset(:contra),
Bernoulli(); progress=false)
design = Dict(:age => -10:10)
eff_manual = effects(design, model;
invlink=Base.Fix1(GLM.linkinv, Link(model)))
eff_auto = effects(design, model; invlink=AutoInvLink())

@test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
end
end

@testset "identity by another name" begin
Expand Down
4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ using Effects
using Test

@testset "Aqua" begin
Aqua.test_all(Effects; ambiguities=false)
# layout of weakdeps and extensions etc. differs between pre 1.9 and 1.9+
project_toml_formatting = VERSION >= v"1.9"
Aqua.test_all(Effects; ambiguities=false, project_toml_formatting)
end

@testset "TypicalTerm" begin
Expand Down

2 comments on commit 1ca3e9d

@palday
Copy link
Member Author

@palday palday commented on 1ca3e9d Aug 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/90049

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.1.0 -m "<description of version>" 1ca3e9d9235780563a6ae7a213dcc0fdff5b0952
git push origin v1.1.0

Please sign in to comment.