Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add CI computation for emmeans and empairs #72

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Effects"
uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
authors = ["Beacon Biosignals, Inc."]
version = "1.2.0"
version = "1.3.0"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Expand Down
62 changes: 55 additions & 7 deletions src/emmeans.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ pooled_sem(sems...) = sqrt(sum(abs2, sems))
# then choose a less full service function
"""
emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
invlink=identity, levels=Dict(), dof=nothing)
invlink=identity, levels=Dict(), dof=nothing,
ci_level=nothing, lower_col=:lower, upper_col=:upper)

Compute estimated marginal means, a.k.a. least-square (LS) means for a model.

Expand All @@ -39,6 +40,13 @@ a large number of observations, `dof=Inf` may be appropriate.

`invlink`, `eff_col` and `err_col` work exactly as in [`effects!`](@ref).

If `ci_level` is provided, then `ci_level` confidence intervals are computed using
the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
which is equivalent to using the normal distribution.
The corresponding lower and upper edges of the interval are placed in `lower_col`
and `upper_col`, respectively.

Estimated marginal means are closely related to effects and are also known as
least-square means. The functionality here is a convenience
wrapper for [`effects`](@ref) and maps onto the concept of least-square means
Expand All @@ -49,7 +57,8 @@ not currently supported. The documentation for the [R package emmeans](https://c
explains [the background in more depth](https://cran.r-project.org/web/packages/emmeans/vignettes/basics.html).
"""
function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
invlink=identity, levels=Dict(), dof=nothing)
invlink=identity, levels=Dict(), dof=nothing, ci_level=nothing,
lower_col=:lower, upper_col=:upper)
form = formula(model)
typical = mean
defaults = Dict{Symbol,Vector}()
Expand All @@ -73,13 +82,27 @@ function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
if !isnothing(dof)
result[!, :dof] .= _dof(dof, model)
end
if !isnothing(ci_level)
# don't include this in the above if because
# we don't want to potentially add a dof column if there is no CI
if isnothing(dof)
# fall back to z value
result[!, :dof] .= Inf
end
# divide by two for twosided
scale = abs.(quantile.(TDist.(result[!, :dof]), (1 - ci_level) / 2))
result[!, lower_col] .= result[!, eff_col] .- result[!, err_col] .* scale
result[!, upper_col] .= result[!, eff_col] .+ result[!, err_col] .* scale
end
return result
end

"""
empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
ci_level=nothing, lower_col=:lower, upper_col=:upper)
empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
ci_level=nothing, lower_col=:lower, upper_col=:upper)

Compute pairwise differences of estimated marginal means.

Expand All @@ -104,9 +127,19 @@ If `padjust` is provided, then it is used to compute adjust the p-values for
multiple comparisons. [`MultipleTesting.jl`](https://juliangehring.github.io/MultipleTesting.jl/stable/)
provides a number of useful possibilities for this.

If `ci_level` is provided, then `ci_level` confidence intervals are computed using
the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
which is equivalent to using the normal distribution.
The corresponding lower and upper edges of the interval are placed in `lower_col`
and `upper_col`, respectively.

!!! note
`padjust` is silently ignored if `dof` is not provided.

!!! note
Confidence intervals are **not** adjusted for multiple comparisons.

!!! warning
This feature is experimental and the precise column names and presentation of
contrasts/differences may change without being considered breaking.
Expand All @@ -122,13 +155,15 @@ provides a number of useful possibilities for this.
discussed in [the documentation for the R package `emmeans`](https://cran.r-project.org/web/packages/emmeans/vignettes/transformations.html).
"""
function empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
ci_level=nothing, lower_col=:lower, upper_col=:upper)
eff_col = something(eff_col, _responsename(model))
em = emmeans(model; eff_col, err_col, invlink, levels, dof)
return empairs(em; eff_col, err_col, padjust)
return empairs(em; eff_col, err_col, padjust, ci_level, lower_col, upper_col)
end

function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
ci_level=nothing, lower_col=:lower, upper_col=:upper)
# need to enforce that we're all the same type
# (mixing string and symbol is an issue with Not
# and a few other things below)
Expand Down Expand Up @@ -170,5 +205,18 @@ function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
end => "Pr(>|t|)")
transform!(result_df, "Pr(>|t|)" => padjust => "Pr(>|t|)")
end
if !isnothing(ci_level)
# don't include this in the above if because
# we don't want to potentially add a dof column if there is no CI
if "dof" ∉ stats_cols
# fall back to z value
result_df[!, :dof] .= Inf
end
# divide by two for twosided
# 1+level to pull from upper tail
scale = quantile.(TDist.(result_df[!, :dof]), (1 + ci_level) / 2)
result_df[!, lower_col] .= result_df[!, eff_col] .- result_df[!, err_col] .* scale
result_df[!, upper_col] .= result_df[!, eff_col] .+ result_df[!, err_col] .* scale
end
return result_df
end
28 changes: 28 additions & 0 deletions test/emmeans.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ model_scaled = lm(@formula(weight ~ 1 + sex * age), growthdata;
end
em = emmeans(m; levels=Dict(:age => 23))
@test all(em.age .== 23)

@testset "confint" begin
em = emmeans(m; ci_level=0.68)
@test all(==(Inf), em[!, :dof])
# 68% CI is approximately one standard error
@test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
@test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3

em = emmeans(m; ci_level=0.68, dof=dof_residual)
@test all(==(10), em[!, :dof])
# 68% CI is approximately one standard error
@test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
@test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
end
end

# R> pairs(em)
Expand Down Expand Up @@ -101,6 +115,20 @@ bonferroni(pvals) = adjust(PValues(pvals), Bonferroni())
rtol=0.001))
end

@testset "confint" begin
em = empairs(m; ci_level=0.68)
@test all(==(Inf), em[!, :dof])
# 68% CI is approximately one standard error
@test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
@test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3

em = emmeans(m; ci_level=0.68, dof=dof_residual)
@test all(==(10), em[!, :dof])
# 68% CI is approximately one standard error
@test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
@test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
end

@testset "AbstractString crossing" begin
# this model is utter nonsense, but it creates a particular pattern
# with InlineStrings that fails if our type restriction is too tight
Expand Down
Loading