beacon-biosignals · palday · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Effects"
 uuid = "8f03c58b-bd97-4933-a826-f71b64d2cca2"
 authors = ["Beacon Biosignals, Inc."]
-version = "1.2.0"
+version = "1.3.0"
 
 [deps]
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"

diff --git a/src/emmeans.jl b/src/emmeans.jl
@@ -20,7 +20,8 @@ pooled_sem(sems...) = sqrt(sum(abs2, sems))
 # then choose a less full service function
 """
     emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
-            invlink=identity, levels=Dict(), dof=nothing)
+            invlink=identity, levels=Dict(), dof=nothing,
+            ci_level=nothing, lower_col=:lower, upper_col=:upper)
 
 Compute estimated marginal means, a.k.a. least-square (LS) means for a model.
 
@@ -39,6 +40,13 @@ a large number of observations, `dof=Inf` may be appropriate.
 
 `invlink`, `eff_col` and `err_col` work exactly as in [`effects!`](@ref).
 
+If `ci_level` is provided, then `ci_level` confidence intervals are computed using
+the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
+If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
+which is equivalent to using the normal distribution.
+The corresponding lower and upper edges of the interval are placed in `lower_col`
+and `upper_col`, respectively.
+
 Estimated marginal means are closely related to effects and are also known as
 least-square means. The functionality here is a convenience
 wrapper for [`effects`](@ref) and maps onto the concept of least-square means
@@ -49,7 +57,8 @@ not currently supported. The documentation for the [R package emmeans](https://c
 explains [the background in more depth](https://cran.r-project.org/web/packages/emmeans/vignettes/basics.html).
 """
 function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
-                 invlink=identity, levels=Dict(), dof=nothing)
+                 invlink=identity, levels=Dict(), dof=nothing, ci_level=nothing,
+                 lower_col=:lower, upper_col=:upper)
     form = formula(model)
     typical = mean
     defaults = Dict{Symbol,Vector}()
@@ -73,13 +82,27 @@ function emmeans(model::RegressionModel; eff_col=nothing, err_col=:err,
     if !isnothing(dof)
         result[!, :dof] .= _dof(dof, model)
     end
+    if !isnothing(ci_level)
+        # don't include this in the above if because
+        # we don't want to potentially add a dof column if there is no CI
+        if isnothing(dof)
+            # fall back to z value
+            result[!, :dof] .= Inf
+        end
+        # divide by two for twosided
+        scale = abs.(quantile.(TDist.(result[!, :dof]), (1 - ci_level) / 2))
+        result[!, lower_col] .= result[!, eff_col] .- result[!, err_col] .* scale
+        result[!, upper_col] .= result[!, eff_col] .+ result[!, err_col] .* scale
+    end
     return result
 end
 
 """
     empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
-            invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
-    empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
+            invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
+            ci_level=nothing, lower_col=:lower, upper_col=:upper)
+    empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
+            ci_level=nothing, lower_col=:lower, upper_col=:upper)
 
 Compute pairwise differences of estimated marginal means.
 
@@ -104,9 +127,19 @@ If `padjust` is provided, then it is used to compute adjust the p-values for
 multiple comparisons. [`MultipleTesting.jl`](https://juliangehring.github.io/MultipleTesting.jl/stable/)
 provides a number of useful possibilities for this.
 
+If `ci_level` is provided, then `ci_level` confidence intervals are computed using
+the Wald approximation based on the standard errors and quantiles of the ``t``-distribution.
+If `dof` is not provided, then the degrees of freedom are assumed to be infinite,
+which is equivalent to using the normal distribution.
+The corresponding lower and upper edges of the interval are placed in `lower_col`
+and `upper_col`, respectively.
+
 !!! note
     `padjust` is silently ignored if `dof` is not provided.
 
+!!! note
+    Confidence intervals are **not** adjusted for multiple comparisons.
+
 !!! warning
     This feature is experimental and the precise column names and presentation of
     contrasts/differences may change without being considered breaking.
@@ -122,13 +155,15 @@ provides a number of useful possibilities for this.
     discussed in [the documentation for the R package `emmeans`](https://cran.r-project.org/web/packages/emmeans/vignettes/transformations.html).
 """
 function empairs(model::RegressionModel; eff_col=nothing, err_col=:err,
-                 invlink=identity, levels=Dict(), dof=nothing, padjust=identity)
+                 invlink=identity, levels=Dict(), dof=nothing, padjust=identity,
+                 ci_level=nothing, lower_col=:lower, upper_col=:upper)
     eff_col = something(eff_col, _responsename(model))
     em = emmeans(model; eff_col, err_col, invlink, levels, dof)
-    return empairs(em; eff_col, err_col, padjust)
+    return empairs(em; eff_col, err_col, padjust, ci_level, lower_col, upper_col)
 end
 
-function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
+function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity,
+                 ci_level=nothing, lower_col=:lower, upper_col=:upper)
     # need to enforce that we're all the same type
     # (mixing string and symbol is an issue with Not
     #  and a few other things below)
@@ -170,5 +205,18 @@ function empairs(df::AbstractDataFrame; eff_col, err_col=:err, padjust=identity)
                    end => "Pr(>|t|)")
         transform!(result_df, "Pr(>|t|)" => padjust => "Pr(>|t|)")
     end
+    if !isnothing(ci_level)
+        # don't include this in the above if because
+        # we don't want to potentially add a dof column if there is no CI
+        if "dof" ∉ stats_cols
+            # fall back to z value
+            result_df[!, :dof] .= Inf
+        end
+        # divide by two for twosided
+        # 1+level to pull from upper tail
+        scale = quantile.(TDist.(result_df[!, :dof]), (1 + ci_level) / 2)
+        result_df[!, lower_col] .= result_df[!, eff_col] .- result_df[!, err_col] .* scale
+        result_df[!, upper_col] .= result_df[!, eff_col] .+ result_df[!, err_col] .* scale
+    end
     return result_df
 end
diff --git a/test/emmeans.jl b/test/emmeans.jl
@@ -53,6 +53,20 @@ model_scaled = lm(@formula(weight ~ 1 + sex * age), growthdata;
     end
     em = emmeans(m; levels=Dict(:age => 23))
     @test all(em.age .== 23)
+
+    @testset "confint" begin
+        em = emmeans(m; ci_level=0.68)
+        @test all(==(Inf), em[!, :dof])
+        # 68% CI is approximately one standard error
+        @test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
+        @test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
+
+        em = emmeans(m; ci_level=0.68, dof=dof_residual)
+        @test all(==(10), em[!, :dof])
+        # 68% CI is approximately one standard error
+        @test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
+        @test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
+    end
 end
 
 # R> pairs(em)
@@ -101,6 +115,20 @@ bonferroni(pvals) = adjust(PValues(pvals), Bonferroni())
                             rtol=0.001))
     end
 
+    @testset "confint" begin
+        em = empairs(m; ci_level=0.68)
+        @test all(==(Inf), em[!, :dof])
+        # 68% CI is approximately one standard error
+        @test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
+        @test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
+
+        em = emmeans(m; ci_level=0.68, dof=dof_residual)
+        @test all(==(10), em[!, :dof])
+        # 68% CI is approximately one standard error
+        @test em[!, :weight] + em[!, :err] ≈ em[!, :upper] rtol = 1e-3
+        @test em[!, :weight] - em[!, :err] ≈ em[!, :lower] rtol = 1e-3
+    end
+
     @testset "AbstractString crossing" begin
         # this model is utter nonsense, but it creates a particular pattern
         # with InlineStrings that fails if our type restriction is too tight