Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatic inverse link #56

Merged
merged 15 commits into from
Aug 21, 2023
1 change: 1 addition & 0 deletions .github/workflows/YASG.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ on:
- 'src/**'
- 'test/**'
- 'docs/**'
- 'ext/**'
- '.github/workflows/YASG.yml'
- 'format/**'
jobs:
Expand Down
12 changes: 11 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Effects"
uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
authors = ["Beacon Biosignals, Inc."]
version = "1.0.2"
version = "1.1.0"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Expand All @@ -10,10 +10,19 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[weakdeps]
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
MixedModels = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"

[extensions]
EffectsGLMExt = "GLM"
EffectsMixedModelsExt = ["GLM", "MixedModels"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC this will only be loaded if both MixedModels and GLM are loaded. I don't think that restriction is necessary since MixedModels depends on GLM.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤷 I wanted to be explicit


[compat]
Aqua = "0.5, 0.6"
Combinatorics = "1"
Expand All @@ -27,6 +36,7 @@ MultipleTesting = "0.5, 0.6"
RDatasets = "0.7.7"
StableRNGs = "1.0"
StandardizedPredictors = "0.1, 1"
StatsAPI = "1.6"
StatsBase = "0.33, 0.34"
StatsModels = "0.6.23, 0.7"
Tables = "1"
Expand Down
14 changes: 14 additions & 0 deletions ext/EffectsGLMExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module EffectsGLMExt

using Effects

using GLM: AbstractGLM, Link, Link01, inverselink
using Effects.StatsModels: TableRegressionModel

# TODO: upstream a Link(::TableRegressionModel{<:AbstractGLM})
Effects._model_link(m::TableRegressionModel{<:AbstractGLM}, ::AutoInvLink) = Link(m.model)
Effects._model_link(m::AbstractGLM, ::AutoInvLink) = Link(m)
Effects._invlink_and_deriv(link::Link01, η) = inverselink(link, η)[1:2:3] # (µ, 1 - µ, dμdη)
Effects._invlink_and_deriv(link::Link, η) = inverselink(link, η)[1:2] # (µ, dμdη, NaN)

end # module
9 changes: 9 additions & 0 deletions ext/EffectsMixedModelsExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module EffectsMixedModelsExt

using Effects
using MixedModels
using GLM: Link

Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m)

end # module
2 changes: 1 addition & 1 deletion format/run.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using JuliaFormatter
function main()
perfect = true
# note: keep in sync with `.github/workflows/format-check.yml`
for d in ["src/", "test/", "docs/"]
for d in ["src/", "ext/", "test/", "docs/"]
@info "...linting $d ..."
dir_perfect = format(d, YASStyle())
perfect = perfect && dir_perfect
Expand Down
2 changes: 1 addition & 1 deletion src/Effects.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using ForwardDiff

include("typical.jl")
include("regressionmodel.jl")
export effects, effects!, expand_grid
export effects, effects!, expand_grid, AutoInvLink

include("emmeans.jl")
export emmeans, empairs
Expand Down
68 changes: 63 additions & 5 deletions src/regressionmodel.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# determine AutoInvLink automatically if a package with an appropriate extension
# is loaded

"""
AutoInvLink

Singleton type indicating that the inverse link should be automatically
determined from the model type.

!!! compat "Julia 1.9"
Automatic inverse link determination is implemented using package
extensions, which are available beginning in Julia 1.9.

An error is thrown if the inverse link cannot be determined. This will
always occur with Julia versions prior to 1.9, and will otherwise occur
when no extension has been loaded that specifies the link function for
the model type.

Currently, this is only implemented for GLM.jl and MixedModels.jl
"""
struct AutoInvLink end

"""
effects!(reference_grid::DataFrame, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
Expand Down Expand Up @@ -54,6 +76,11 @@
automatic differentiation. This means that the `invlink` function must be
differentiable and should not involve inplace operations.

On Julia versions 1.9 or later, the special singleton value `AutoInvLink()`
can be used to specify that the appropriate inverse link should be determined
automatically. In that case, a direct or analytic computation of the derivative
is used when possible.

Effects are computed using the model's variance-covariance matrix, which is
computed by default using `StatsBas.vcov`. Alternative methods such as the
sandwich estimator or robust estimators can be used by specifying `vcov`,
Expand All @@ -67,7 +94,6 @@
myvcov(x) = Base.Fix2(vcov, Vcov.robust())
```


The reference grid must contain columns for all predictors in the formula.
(Interactions are computed automatically.) Contrasts must match the contrasts
used to fit the model; using other contrasts will lead to undefined behavior.
Expand All @@ -87,6 +113,8 @@

Fox, John (2003). Effect Displays in R for Generalised Linear Models.
Journal of Statistical Software. Vol. 8, No. 15

See also [`AutoInvLink`](@ref).
"""
function effects!(reference_grid::DataFrame, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
Expand All @@ -98,10 +126,7 @@
X = modelcols(form_typical, reference_grid)
eff = X * coef(model)
err = sqrt.(diag(X * vcov(model) * X'))
if invlink !== identity
err .*= ForwardDiff.derivative.(invlink, eff)
eff .= invlink.(eff)
end
_difference_method!(eff, err, model, invlink)
reference_grid[!, something(eff_col, _responsename(model))] = eff
reference_grid[!, err_col] = err
return reference_grid
Expand All @@ -110,6 +135,37 @@
# return (; reference_grid..., depvar => eff, err_col => err)
end

# TODO: support the transformation method
# in addition to the difference method
# xref https://github.com/JuliaStats/GLM.jl/blob/c13577eaf3f418c58020534dd407532ee57f219b/src/glmfit.jl#L773-L783

_invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink, η))
_invlink_and_deriv(::typeof(identity), η) = (η, 1)
# this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link())
# for now, this is private API, but we should see how this goes and whether we can make it public API
# so local extensions (instead of Package-Extensions) are better supported
_model_link(::RegressionModel, invlink::Function) = invlink
function _model_link(model::RegressionModel, ::AutoInvLink)
msg = string("cannot automatically determine inverse link for models ",
"of type ", typeof(model))
@static if isdefined(Base, :get_extension)

Check warning on line 151 in src/regressionmodel.jl

View check run for this annotation

Codecov / codecov/patch

src/regressionmodel.jl#L151

Added line #L151 was not covered by tests
msg *= "; no appropriate extension has been loaded"
end
throw(ArgumentError(msg))
end

function _difference_method!(eff::Vector{T}, err::Vector{T},
m::RegressionModel,
invlink) where {T<:AbstractFloat}
link = _model_link(m, invlink)
@inbounds for i in eachindex(eff, err)
μ, dμdη = _invlink_and_deriv(link, eff[i])
err[i] *= dμdη
eff[i] = μ
end
return eff, err
end

"""
expand_grid(design)

Expand Down Expand Up @@ -155,6 +211,8 @@
the lower and upper edges of the confidence interval.
The default `level=nothing` corresponds to [resp-err, resp+err], while `level=0.95`
corresponds to the 95% confidence interval.

See also [`AutoInvLink`](@ref).
"""
function effects(design::AbstractDict, model::RegressionModel;
eff_col=nothing, err_col=:err, typical=mean,
Expand Down
115 changes: 80 additions & 35 deletions test/delta_method.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ using DataFrames
using Effects
using GLM
using LinearAlgebra
using MixedModels
using RDatasets: dataset as rdataset
using StableRNGs
using Test

@testset "transformed response" begin
dat = rdataset("car", "Prestige")
model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
design = Dict(:Income => [1, 2],
:Education => [3, 4])
model = lm(@formula(log(Prestige) ~ 1 + Income * Education), dat)
eff_original_scale = effects(design, model; invlink=exp)
eff_logscale = effects(design, model)
@test all(eff_logscale[!, 3] .≈ log.(eff_original_scale[!, 3]))
Expand All @@ -37,55 +38,99 @@ using Test
@test isapprox(only(eff_emm_trans.err), 1.07; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 45.3; atol=0.05)
@test isapprox(only(eff_emm_trans.upper), 47.5; atol=0.05)

@testset "AutoInvLink fails gracefully" begin
# this should work even pre Julia 1.9 because by definition
# no extension is loaded
@test_throws ArgumentError effects(design, model; invlink=AutoInvLink())
end
end

@testset "link function" begin
dat = rdataset("car", "Cowles")
dat[!, :vol] = dat.Volunteer .== "yes"
model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Bernoulli())
invlink = Base.Fix1(GLM.linkinv, Link(model.model))
design = Dict(:Extraversion => [13],
:Neuroticism => [16])
X = [1.0 13.0 16.0 13 * 16]
iv = Base.Fix1(GLM.linkinv, Link(model.model))
@static if VERSION >= v"1.9"
invlinks = [iv, AutoInvLink()]
@test Effects._model_link(model, AutoInvLink()) ==
Effects._model_link(model.model, AutoInvLink())
else
invlinks = [iv]
end
@testset "invlink = $invlink" for invlink in invlinks
for level in [0.68, 0.95]
eff = effects(design, model; invlink, level)

# compare with results from GLM.predict
pred = DataFrame(predict(model.model, X;
interval=:confidence,
interval_method=:delta,
level))
@test all(pred.prediction .≈ eff.vol)
@test all(isapprox.(pred.lower, eff.lower; atol=0.001))
@test all(isapprox.(pred.upper, eff.upper; atol=0.001))
Comment on lines +73 to +75
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@test all(pred.prediction . eff.vol)
@test all(isapprox.(pred.lower, eff.lower; atol=0.001))
@test all(isapprox.(pred.upper, eff.upper; atol=0.001))
@test pred.prediction eff.vol
@test pred.lower eff.lower atol=0.001
@test pred.upper eff.upper atol=0.001

Any reason not to test the approximate equality of the arrays directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because we want element-wise approximate equality, which is a stronger condition (the approximate equality of vectors is based on the norm of the difference vector)


for level in [0.68, 0.95]
eff = effects(design, model; invlink, level)
eff_trans = effects(design, model; level)
transform!(eff_trans,
:vol => ByRow(iv),
:lower => ByRow(iv),
:upper => ByRow(iv); renamecols=false)
# for this model, things play out nicely
@test all(eff_trans.vol .≈ eff.vol)
@test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
@test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
Comment on lines +83 to +85
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@test all(eff_trans.vol . eff.vol)
@test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
@test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
@test eff_trans.vol eff.vol
@test eff_trans.lower eff.lower atol=0.001
@test eff_trans.upper eff.upper atol=0.001

Same question as above

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same answer as above

end

# compare with results from GLM.predict
pred = DataFrame(predict(model.model, X;
interval=:confidence,
interval_method=:delta,
level))
@test all(pred.prediction .≈ eff.vol)
@test all(isapprox.(pred.lower, eff.lower; atol=0.001))
@test all(isapprox.(pred.upper, eff.upper; atol=0.001))
# compare with results from emmeans in R
# emmeans(model, ~ neuroticism * extraversion, level=0.68)
eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
@test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
@test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
@test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
@test isapprox(only(eff_emm.upper), -0.292; atol=0.005)

eff_trans = effects(design, model; level)
transform!(eff_trans,
:vol => ByRow(invlink),
:lower => ByRow(invlink),
:upper => ByRow(invlink); renamecols=false)
# for this model, things play out nicely
@test all(eff_trans.vol .≈ eff.vol)
@test all(isapprox.(eff_trans.lower, eff.lower; atol=0.001))
@test all(isapprox.(eff_trans.upper, eff.upper; atol=0.001))
# emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]),
model;
invlink)
@test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
@test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
@test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
end
end

# compare with results from emmeans in R
# emmeans(model, ~ neuroticism * extraversion, level=0.68)
eff_emm = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model)
@test isapprox(only(eff_emm.vol), -0.347; atol=0.005)
@test isapprox(only(eff_emm.err), 0.0549; atol=0.005)
@test isapprox(only(eff_emm.lower), -0.402; atol=0.005)
@test isapprox(only(eff_emm.upper), -0.292; atol=0.005)
@static if VERSION >= v"1.9"
@testset "Non Link01 GLM link" begin
dat = rdataset("car", "Cowles")
dat[!, :vol] = dat.Volunteer .== "yes"
# this isn't a particularly sensible model, but it's fine for testing
model = glm(@formula(vol ~ Extraversion * Neuroticism), dat, Poisson())
design = Dict(:Extraversion => [13],
:Neuroticism => [16])
X = [1.0 13.0 16.0 13 * 16]
eff_manual = effects(design, model;
invlink=Base.Fix1(GLM.linkinv, Link(model.model)))
eff_auto = effects(design, model; invlink=AutoInvLink())

# emmeans(model, ~ neuroticism * extraversion, level=0.68, transform="response")
eff_emm_trans = effects(Dict(:Extraversion => [12.4], :Neuroticism => [11.5]), model;
invlink)
@test isapprox(only(eff_emm_trans.vol), 0.414; atol=0.005)
@test isapprox(only(eff_emm_trans.err), 0.0133; atol=0.005)
@test isapprox(only(eff_emm_trans.lower), 0.401; atol=0.005)
@test isapprox(only(eff_emm_trans.upper), 0.427; atol=0.005)
@test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
end
@testset "link function in a MixedModel" begin
model = fit(MixedModel,
@formula(use ~ 1 + age + (1 | urban)),
MixedModels.dataset(:contra),
Bernoulli(); progress=false)
design = Dict(:age => -10:10)
eff_manual = effects(design, model;
invlink=Base.Fix1(GLM.linkinv, Link(model)))
eff_auto = effects(design, model; invlink=AutoInvLink())

@test all(isapprox.(Matrix(eff_manual), Matrix(eff_auto)))
end
end

@testset "identity by another name" begin
Expand Down
4 changes: 3 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ using Effects
using Test

@testset "Aqua" begin
Aqua.test_all(Effects; ambiguities=false)
# layout of weakdeps and extensions etc. differs between pre 1.9 and 1.9+
project_toml_formatting = VERSION >= v"1.9"
Aqua.test_all(Effects; ambiguities=false, project_toml_formatting)
end

@testset "TypicalTerm" begin
Expand Down
Loading