Skip to content
Open
Changes from 2 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a909cea
add diff diagnostics
avehtari Aug 31, 2025
b940192
add comments and option whether the probabilities are printed
avehtari Oct 16, 2025
6fe41bb
Apply Jonah's suggestions from code review
avehtari Oct 17, 2025
fa0d79d
fix argument name
avehtari Oct 17, 2025
65ef286
add more documentation (+ khat threshold 0.5 as we don't yet smooth)
avehtari Oct 17, 2025
b0ebac7
oops, fix khat comparison
avehtari Oct 17, 2025
7410b81
skip pareto-k check for low number of unique values
avehtari Oct 17, 2025
b993b44
remove `simplify` argument (need to check reverse dependencies)
jgabry Oct 17, 2025
b118da2
remove simplify argument from print.compare.loo_ss
jgabry Oct 17, 2025
25f93fd
start fixing tests
jgabry Oct 17, 2025
62027d1
fix issues in preliminary reverse dependency checks
jgabry Oct 17, 2025
cc3edfe
Update loo_compare.R
jgabry Oct 17, 2025
22f442f
make sure p_worse is available
jgabry Oct 17, 2025
ca843f4
use x instead of xcopy
jgabry Oct 17, 2025
d579c06
Update loo_compare.R
jgabry Oct 17, 2025
cf20250
Merge branch 'master' into diff-diagnostics
jgabry Oct 18, 2025
846a891
unify diagnostic messages
avehtari Oct 18, 2025
64b365c
improved loo_compare documentation
avehtari Oct 18, 2025
185e570
add subsections to loo_compare doc and put diagnostic messages in bul…
jgabry Oct 18, 2025
8625e10
minor cleanup
jgabry Oct 18, 2025
64a19c3
Add `model` column to `loo_compare()` output
jgabry Oct 18, 2025
a84154e
remove old loo::compare()
jgabry Oct 18, 2025
16f67d4
improve backwards compatibility
jgabry Oct 18, 2025
9b43e06
Merge branch 'diff-diagnostics' into model-names-as-column
jgabry Oct 18, 2025
4e16d2f
Update loo_compare.R
jgabry Oct 18, 2025
23a79c0
fix failing test
jgabry Oct 18, 2025
cff3c2c
Revert "remove old loo::compare()"
jgabry Oct 18, 2025
8f521a5
update tests
jgabry Oct 18, 2025
dc9db69
cleanup print method
jgabry Oct 19, 2025
3ed1c0c
improve backwards compatibility
jgabry Oct 19, 2025
89d39f5
change diag_pnorm to diag_diff
avehtari Oct 21, 2025
6d73537
change diag_pnorm to diag_diff in tests
avehtari Oct 21, 2025
abcf209
update test snapshots
jgabry Oct 21, 2025
794ffb0
add `model` column instead of row names
jgabry Oct 21, 2025
a375c93
remove row numbers when printing
jgabry Oct 21, 2025
f3fcb28
add diag_elpd
avehtari Oct 22, 2025
359853a
improve loo_compare doc
avehtari Oct 22, 2025
b7db7dd
clarifiy loo_compare diag_diff khat
avehtari Oct 22, 2025
77e753f
yet another small doc improvement
avehtari Oct 22, 2025
c59414e
Use function()
avehtari Oct 22, 2025
93fdbff
another loo_compare doc edit
avehtari Oct 22, 2025
ae48d69
adjust some diagnostic messages and documentation
avehtari Oct 22, 2025
4f5872b
edit doc, fix tests, move diagnostics to internal functions
jgabry Oct 22, 2025
574af4f
Merge branch 'master' into diff-diagnostics
jgabry Oct 22, 2025
3d008e9
Merge branch 'master' into diff-diagnostics
jgabry Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 35 additions & 8 deletions R/loo_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,30 @@ loo_compare.default <- function(x, ...) {
diffs <- mapply(FUN = elpd_diffs, loos[ord[1]], loos[ord])
elpd_diff <- apply(diffs, 2, sum)
se_diff <- apply(diffs, 2, se_elpd_diff)
comp <- cbind(elpd_diff = elpd_diff, se_diff = se_diff, comp)
# compute probabilities that a model has worse elpd than the best model
# using a normal approximation (Sivula et al., 2025)
p_worse <- pnorm(0, elpd_diff, se_diff)
p_worse[elpd_diff==0] <- NA
N <- nrow(diffs)
# diagnostics to assess whether the normal approximation can be trusted
if (N<100) {
# small N (Sivula et al., 2025)
diag_pnorm <- rep("N < 100", length(elpd_diff))
diag_pnorm[elpd_diff==0] = ""
} else {
diag_pnorm <- rep("", length(elpd_diff))
# similar predictions (Sivula et al., 2025)
diag_pnorm[elpd_diff>-4 & elpd_diff!=0] <- "similar predictions"
# possible outliers in differences (Sivula et al., 2025;
# Vehtari et al., 2024)
khat_diff <- rep(NA, length(elpd_diff))
khat_diff[elpd_diff!=0] <- apply(diffs[,elpd_diff!=0, drop = FALSE], 2, \(x) posterior::pareto_khat(x, tail="both"))
diag_pnorm[khat_diff > ps_khat_threshold(N)] <- paste0("khat_diff > ", .fr(ps_khat_threshold(N), 2))
}
rownames(comp) <- rnms
comp <- cbind(data.frame(elpd_diff = elpd_diff, se_diff = se_diff,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also changes the returned object to a data frame when it used to be a matrix. That could potentially cause reverse dependency issues, but we'll see when I run the checks. This is a necessary change, though, since we're mixing numeric columns with text columns for the diagnostic.

p_worse = p_worse, diag_pnorm = diag_pnorm),
as.data.frame(comp))
rownames(comp) <- rnms

# run order statistics-based checks on models
Expand All @@ -131,9 +154,10 @@ loo_compare.default <- function(x, ...) {
#' @param digits For the print method only, the number of digits to use when
#' printing.
#' @param simplify For the print method only, should only the essential columns
#' of the summary matrix be printed? The entire matrix is always returned, but
#' by default only the most important columns are printed.
print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE) {
#' of the summary matrix be printed? The entire matrix is always returned, bu#' @param pnorm For the print method only, should we include the normal
#' approximation based probability of model having worse performance than
#' the best model
print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE, pnorm = FALSE) {
xcopy <- x
if (inherits(xcopy, "old_compare.loo")) {
if (NCOL(xcopy) >= 2 && simplify) {
Expand All @@ -143,12 +167,16 @@ print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE) {
} else if (NCOL(xcopy) >= 2 && simplify) {
xcopy <- xcopy[, c("elpd_diff", "se_diff")]
}
print(.fr(xcopy, digits), quote = FALSE)
invisible(x)
if (p_worse) {
print(cbind(.fr(xcopy, digits), p_worse=.fr(x[,"p_worse"],2), diag_pnorm=x[, "diag_pnorm"]), quote = FALSE)
invisible(x)
} else {
print(cbind(.fr(xcopy, digits), quote = FALSE))
invisible(x)
}
}



# internal ----------------------------------------------------------------

#' Compute pointwise elpd differences
Expand All @@ -172,7 +200,6 @@ se_elpd_diff <- function(diffs) {
sqrt(N) * sd(diffs)
}


#' Perform checks on `"loo"` objects before comparison
#' @noRd
#' @param loos List of `"loo"` objects.
Expand Down
Loading