Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a909cea
add diff diagnostics
avehtari Aug 31, 2025
b940192
add comments and option whether the probabilities are printed
avehtari Oct 16, 2025
6fe41bb
Apply Jonah's suggestions from code review
avehtari Oct 17, 2025
fa0d79d
fix argument name
avehtari Oct 17, 2025
65ef286
add more documentation (+ khat threshold 0.5 as we don't yet smooth)
avehtari Oct 17, 2025
b0ebac7
oops, fix khat comparison
avehtari Oct 17, 2025
7410b81
skip pareto-k check for low number of unique values
avehtari Oct 17, 2025
b993b44
remove `simplify` argument (need to check reverse dependencies)
jgabry Oct 17, 2025
b118da2
remove simplify argument from print.compare.loo_ss
jgabry Oct 17, 2025
25f93fd
start fixing tests
jgabry Oct 17, 2025
62027d1
fix issues in preliminary reverse dependency checks
jgabry Oct 17, 2025
cc3edfe
Update loo_compare.R
jgabry Oct 17, 2025
22f442f
make sure p_worse is available
jgabry Oct 17, 2025
ca843f4
use x instead of xcopy
jgabry Oct 17, 2025
d579c06
Update loo_compare.R
jgabry Oct 17, 2025
cf20250
Merge branch 'master' into diff-diagnostics
jgabry Oct 18, 2025
846a891
unify diagnostic messages
avehtari Oct 18, 2025
64b365c
improved loo_compare documentation
avehtari Oct 18, 2025
185e570
add subsections to loo_compare doc and put diagnostic messages in bul…
jgabry Oct 18, 2025
8625e10
minor cleanup
jgabry Oct 18, 2025
64a19c3
Add `model` column to `loo_compare()` output
jgabry Oct 18, 2025
a84154e
remove old loo::compare()
jgabry Oct 18, 2025
16f67d4
improve backwards compatibility
jgabry Oct 18, 2025
9b43e06
Merge branch 'diff-diagnostics' into model-names-as-column
jgabry Oct 18, 2025
4e16d2f
Update loo_compare.R
jgabry Oct 18, 2025
23a79c0
fix failing test
jgabry Oct 18, 2025
cff3c2c
Revert "remove old loo::compare()"
jgabry Oct 18, 2025
8f521a5
update tests
jgabry Oct 18, 2025
dc9db69
cleanup print method
jgabry Oct 19, 2025
3ed1c0c
improve backwards compatibility
jgabry Oct 19, 2025
89d39f5
change diag_pnorm to diag_diff
avehtari Oct 21, 2025
6d73537
change diag_pnorm to diag_diff in tests
avehtari Oct 21, 2025
abcf209
update test snapshots
jgabry Oct 21, 2025
794ffb0
add `model` column instead of row names
jgabry Oct 21, 2025
a375c93
remove row numbers when printing
jgabry Oct 21, 2025
f3fcb28
add diag_elpd
avehtari Oct 22, 2025
359853a
improve loo_compare doc
avehtari Oct 22, 2025
b7db7dd
clarifiy loo_compare diag_diff khat
avehtari Oct 22, 2025
77e753f
yet another small doc improvement
avehtari Oct 22, 2025
c59414e
Use function()
avehtari Oct 22, 2025
93fdbff
another loo_compare doc edit
avehtari Oct 22, 2025
ae48d69
adjust some diagnostic messages and documentation
avehtari Oct 22, 2025
4f5872b
edit doc, fix tests, move diagnostics to internal functions
jgabry Oct 22, 2025
574af4f
Merge branch 'master' into diff-diagnostics
jgabry Oct 22, 2025
3d008e9
Merge branch 'master' into diff-diagnostics
jgabry Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 53 additions & 18 deletions R/loo_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,14 @@
#' standard approach of comparing differences of deviances to a Chi-squared
#' distribution, a practice derived for Gaussian linear models or
#' asymptotically, and which only applies to nested models in any case.
#' Sivula et al. (2022) discuss the conditions when the normal
#' approximation used for SE and `se_diff` is good.
#'
#' The values in the `p_worse` column are computed using the normal
#' approximation and values from the columns `elpd_diff` and
#' `se_diff`. Sivula et al. (2025) discuss the conditions when the
#' normal approximation used for SE and `se_diff` is good, and the
#' column `diag_pnorm` contains possible diagnostic messages: 1)
#' small data (N < 100), 2) similar predictions (|elpd_diff| < 4),
#' or 3) possible outliers (khat > 0.5).
#'
#' If more than \eqn{11} models are compared, we internally recompute the model
#' differences using the median model by ELPD as the baseline model. We then
Expand All @@ -52,7 +58,7 @@
#' selection process. In that case users are recommended to avoid model
#' selection based on LOO-CV, and instead to favor model averaging/stacking or
#' projection predictive inference.
#'
#'
#' @seealso
#' * The [FAQ page](https://mc-stan.org/loo/articles/online-only/faq.html) on
#' the __loo__ website for answers to frequently asked questions.
Expand Down Expand Up @@ -116,7 +122,34 @@ loo_compare.default <- function(x, ...) {
diffs <- mapply(FUN = elpd_diffs, loos[ord[1]], loos[ord])
elpd_diff <- apply(diffs, 2, sum)
se_diff <- apply(diffs, 2, se_elpd_diff)
comp <- cbind(elpd_diff = elpd_diff, se_diff = se_diff, comp)

# compute probabilities that a model has worse elpd than the best model
# using a normal approximation (Sivula et al., 2025)
p_worse <- stats::pnorm(0, elpd_diff, se_diff)
p_worse[elpd_diff == 0] <- NA

# diagnostics to assess whether the normal approximation can be trusted
N <- nrow(diffs)
if (N < 100) {
# small N (Sivula et al., 2025)
diag_pnorm <- rep("N < 100", length(elpd_diff))
diag_pnorm[elpd_diff == 0] <- ""
} else {
diag_pnorm <- rep("", length(elpd_diff))
# similar predictions (Sivula et al., 2025)
diag_pnorm[elpd_diff > -4 & elpd_diff != 0] <- "similar predictions"
# possible outliers in differences (Sivula et al., 2025; Vehtari et al., 2024)
khat_diff <- rep(NA, length(elpd_diff))
khat_diff[elpd_diff != 0] <- apply(
diffs[, elpd_diff != 0, drop = FALSE], 2,
\(x) ifelse(length(unique(x)) <= 20, NA, posterior::pareto_khat(x, tail = "both")
))
diag_pnorm[khat_diff > 0.5] <- paste0("khat_diff > 0.5")
}
rownames(comp) <- rnms
comp <- cbind(data.frame(elpd_diff = elpd_diff, se_diff = se_diff,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also changes the returned object to a data frame when it used to be a matrix. That could potentially cause reverse dependency issues, but we'll see when I run the checks. This is a necessary change, though, since we're mixing numeric columns with text columns for the diagnostic.

p_worse = p_worse, diag_pnorm = diag_pnorm),
as.data.frame(comp))
rownames(comp) <- rnms

# run order statistics-based checks on models
Expand All @@ -130,25 +163,28 @@ loo_compare.default <- function(x, ...) {
#' @export
#' @param digits For the print method only, the number of digits to use when
#' printing.
#' @param simplify For the print method only, should only the essential columns
#' of the summary matrix be printed? The entire matrix is always returned, but
#' by default only the most important columns are printed.
print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE) {
#' @param p_worse For the print method only, should we include the normal
#' approximation based probability of each model having worse performance than
#' the best model? The default is `TRUE`.
print.compare.loo <- function(x, ..., digits = 1, p_worse = TRUE) {
xcopy <- x
if (inherits(xcopy, "old_compare.loo")) {
if (NCOL(xcopy) >= 2 && simplify) {
patts <- "^elpd_|^se_diff|^p_|^waic$|^looic$"
xcopy <- xcopy[, grepl(patts, colnames(xcopy))]
}
} else if (NCOL(xcopy) >= 2 && simplify) {
xcopy <- xcopy[, c("elpd_diff", "se_diff")]
if (NCOL(xcopy) >= 2) {
xcopy <- xcopy[, c("elpd_diff", "se_diff")]
}
if (p_worse) {
print(
cbind(.fr(xcopy, digits),
p_worse = .fr(x[, "p_worse"], 2),
diag_pnorm = x[, "diag_pnorm"]),
quote = FALSE
)
} else {
print(cbind(.fr(xcopy, digits)), quote = FALSE)
}
print(.fr(xcopy, digits), quote = FALSE)
invisible(x)
}



# internal ----------------------------------------------------------------

#' Compute pointwise elpd differences
Expand All @@ -172,7 +208,6 @@ se_elpd_diff <- function(diffs) {
sqrt(N) * sd(diffs)
}


#' Perform checks on `"loo"` objects before comparison
#' @noRd
#' @param loos List of `"loo"` objects.
Expand Down
18 changes: 12 additions & 6 deletions man/loo_compare.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading