stan-dev · avehtari · Aug 31, 2025 · Oct 16, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/R/loo_compare.R b/R/loo_compare.R
@@ -116,7 +116,30 @@ loo_compare.default <- function(x, ...) {
   diffs <- mapply(FUN = elpd_diffs, loos[ord[1]], loos[ord])
   elpd_diff <- apply(diffs, 2, sum)
   se_diff <- apply(diffs, 2, se_elpd_diff)
-  comp <- cbind(elpd_diff = elpd_diff, se_diff = se_diff, comp)
+  # compute probabilities that a model has worse elpd than the best model
+  # using a normal approximation (Sivula et al., 2025)
+  p_worse <- pnorm(0, elpd_diff, se_diff)
+  p_worse[elpd_diff==0] <- NA
+  N <- nrow(diffs)
+  # diagnostics to assess whether the normal approximation can be trusted
+  if (N<100) {
+    # small N (Sivula et al., 2025)
+    diag_pnorm <- rep("N < 100", length(elpd_diff))
+    diag_pnorm[elpd_diff==0] = ""
+  } else {
+    diag_pnorm <- rep("", length(elpd_diff))
+    # similar predictions (Sivula et al., 2025)
+    diag_pnorm[elpd_diff>-4 & elpd_diff!=0] <- "similar predictions"
+    # possible outliers in differences (Sivula et al., 2025;
+    # Vehtari et al., 2024)
+    khat_diff <- rep(NA, length(elpd_diff))
+    khat_diff[elpd_diff!=0] <- apply(diffs[,elpd_diff!=0, drop = FALSE], 2, \(x) posterior::pareto_khat(x, tail="both"))
+    diag_pnorm[khat_diff > ps_khat_threshold(N)] <- paste0("khat_diff > ", .fr(ps_khat_threshold(N), 2))
+  }
+  rownames(comp) <- rnms
+  comp <- cbind(data.frame(elpd_diff = elpd_diff, se_diff = se_diff,
+                p_worse = p_worse, diag_pnorm = diag_pnorm),
+                as.data.frame(comp))
   rownames(comp) <- rnms
 
   # run order statistics-based checks on models
@@ -131,9 +154,10 @@ loo_compare.default <- function(x, ...) {
 #' @param digits For the print method only, the number of digits to use when
 #'   printing.
 #' @param simplify For the print method only, should only the essential columns
-#'   of the summary matrix be printed? The entire matrix is always returned, but
-#'   by default only the most important columns are printed.
-print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE) {
+#'   of the summary matrix be printed? The entire matrix is always returned, bu#' @param pnorm For the print method only, should we include the normal
+#'   approximation based probability of model having worse performance than
+#'   the best model
+print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE, pnorm = FALSE) {
   xcopy <- x
   if (inherits(xcopy, "old_compare.loo")) {
     if (NCOL(xcopy) >= 2 && simplify) {
@@ -143,12 +167,16 @@ print.compare.loo <- function(x, ..., digits = 1, simplify = TRUE) {
   } else if (NCOL(xcopy) >= 2 && simplify) {
      xcopy <- xcopy[, c("elpd_diff", "se_diff")]
   }
-  print(.fr(xcopy, digits), quote = FALSE)
-  invisible(x)
+  if (p_worse) {
+    print(cbind(.fr(xcopy, digits), p_worse=.fr(x[,"p_worse"],2), diag_pnorm=x[, "diag_pnorm"]), quote = FALSE)
+    invisible(x)
+  } else {
+    print(cbind(.fr(xcopy, digits), quote = FALSE))
+    invisible(x)
+  }
 }
 
 
-
 # internal ----------------------------------------------------------------
 
 #' Compute pointwise elpd differences
@@ -172,7 +200,6 @@ se_elpd_diff <- function(diffs) {
   sqrt(N) * sd(diffs)
 }
 
-
 #' Perform checks on `"loo"` objects before comparison
 #' @noRd
 #' @param loos List of `"loo"` objects.