Skip to content

Commit d0bf67c

Browse files
committed
Cap inline diff payload at 200 lines / 20000 chars
Codex flagged that compute_unified_diff() was unbounded: a 1000-line new file dumped 1003 lines into chat and CLI scrollback (and shipped the same volume across the callr worker boundary). Cap the lines vector with a "[diff truncated: N more lines]" marker so the user can see the size of the change without drowning in it. Summary counts still reflect the full diff so "Added 1000 lines" stays accurate. The new max_lines / max_chars params default to 200 / 20000 — tighter than the 300 / 60000 used by /diff because inline tool diffs live in chat scrollback rather than a one-shot command output. Both budgets are tunable; pass Inf to disable.
1 parent ca497be commit d0bf67c

2 files changed

Lines changed: 94 additions & 6 deletions

File tree

R/diff-render.R

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,25 +65,38 @@
6565
#' Returns NULL when the two inputs are byte-identical (signal to the
6666
#' caller that no diff display is warranted). When `diff` isn't on PATH,
6767
#' returns a fallback payload describing the size of the change without
68-
#' the per-line content.
68+
#' the per-line content. Large diffs are truncated to keep the payload
69+
#' bounded — both for serialization across the callr worker boundary
70+
#' and for chat scrollback hygiene — but the `summary` counts always
71+
#' reflect the full diff.
6972
#'
7073
#' @param old_text Character scalar, prior file contents. Empty string
7174
#' means "new file".
7275
#' @param new_text Character scalar, new file contents.
7376
#' @param path Character scalar, the file path the diff describes; used
7477
#' for the `+++` / `---` header labels.
78+
#' @param max_lines Cap on the number of diff lines retained for
79+
#' display. Beyond this, a `[diff truncated: N more lines]` marker is
80+
#' appended in place of the rest. Set to `Inf` to disable.
81+
#' @param max_chars Cap on total characters across retained lines.
82+
#' Tripped if a small number of long lines blow past the budget even
83+
#' though `max_lines` hasn't.
7584
#' @return NULL if identical, else a list with:
7685
#' \itemize{
7786
#' \item \code{path}: input path
78-
#' \item \code{summary}: one-line summary string
87+
#' \item \code{summary}: one-line summary string (always reflects
88+
#' the full diff, not the truncated lines)
7989
#' \item \code{lines}: character vector of uncolored diff lines
8090
#' (header + hunks). May be empty when only the fallback
81-
#' summary is available.
91+
#' summary is available, or truncated for large diffs.
8292
#' \item \code{fallback}: logical TRUE when `diff` was unavailable
8393
#' and the payload is summary-only.
94+
#' \item \code{truncated}: logical TRUE when `lines` was clipped.
8495
#' }
8596
#' @noRd
86-
compute_unified_diff <- function(old_text, new_text, path) {
97+
compute_unified_diff <- function(old_text, new_text, path,
98+
max_lines = 200L,
99+
max_chars = 20000L) {
87100
old_text <- old_text %||% ""
88101
new_text <- new_text %||% ""
89102
path <- path %||% "(unnamed)"
@@ -148,10 +161,52 @@ compute_unified_diff <- function(old_text, new_text, path) {
148161
}
149162

150163
counts <- .diff_summary_counts(res)
164+
full_lines <- as.character(res)
165+
clipped <- .clip_diff_lines(full_lines, max_lines, max_chars)
151166
list(path = path,
152167
summary = .diff_summary_line(counts$added, counts$removed),
153-
lines = as.character(res),
154-
fallback = FALSE)
168+
lines = clipped$lines,
169+
fallback = FALSE,
170+
truncated = clipped$truncated)
171+
}
172+
173+
#' Clip a diff-line vector to the configured budgets.
174+
#'
175+
#' Returns a list with the (possibly clipped) `lines` and a `truncated`
176+
#' flag. When the budget is busted we keep the first N lines and append
177+
#' a `[diff truncated: N more lines]` marker so the reader knows there
178+
#' was more.
179+
#' @noRd
180+
.clip_diff_lines <- function(lines, max_lines, max_chars) {
181+
total <- length(lines)
182+
if (total == 0L) {
183+
return(list(lines = lines, truncated = FALSE))
184+
}
185+
186+
keep <- min(total, as.integer(max_lines))
187+
head <- lines[seq_len(keep)]
188+
189+
# Character budget: walk the kept lines until we'd exceed max_chars,
190+
# then drop the rest. Counts newlines so the budget matches what
191+
# the user actually sees.
192+
if (is.finite(max_chars)) {
193+
widths <- nchar(head, type = "bytes") + 1L
194+
running <- cumsum(widths)
195+
within <- which(running <= as.integer(max_chars))
196+
keep_chars <- if (length(within) == 0L) 0L else max(within)
197+
if (keep_chars < length(head)) {
198+
head <- head[seq_len(keep_chars)]
199+
}
200+
}
201+
202+
truncated <- length(head) < total
203+
if (truncated) {
204+
dropped <- total - length(head)
205+
head <- c(head,
206+
sprintf("[diff truncated: %d more line%s]",
207+
dropped, if (dropped == 1L) "" else "s"))
208+
}
209+
list(lines = head, truncated = truncated)
155210
}
156211

157212
#' Render a diff payload to the terminal.

inst/tinytest/test_diff_render.R

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,36 @@ expect_false(is.null(fb))
6868
expect_true(isTRUE(fb$fallback))
6969
expect_identical(fb$lines, character(0L))
7070
expect_true(nzchar(fb$summary))
71+
72+
# Restore the diff-binary cache so the truncation tests below run
73+
# against the real `diff` again.
74+
if (is.null(saved)) {
75+
suppressWarnings(rm(list = "value", envir = cache))
76+
} else {
77+
assign("value", saved, envir = cache)
78+
}
79+
80+
# Truncation: a big new file gets capped so chat scrollback / callr
81+
# serialization don't blow up. Counts in summary still reflect the
82+
# full diff.
83+
big_new <- paste(sprintf("line %04d", seq_len(1000)), collapse = "\n")
84+
big <- corteza:::compute_unified_diff("", big_new, "big.R",
85+
max_lines = 50L)
86+
expect_false(is.null(big))
87+
expect_true(isTRUE(big$truncated))
88+
# 50 retained lines plus the truncation marker.
89+
expect_equal(length(big$lines), 51L)
90+
expect_true(any(grepl("^\\[diff truncated:", big$lines)))
91+
expect_true(grepl("Added 1000 lines", big$summary, fixed = TRUE))
92+
93+
# Char-budget trip: lots of small lines but tight max_chars.
94+
tight <- corteza:::compute_unified_diff("", big_new, "big.R",
95+
max_lines = 1000L,
96+
max_chars = 200L)
97+
expect_false(is.null(tight))
98+
expect_true(isTRUE(tight$truncated))
99+
expect_true(length(tight$lines) < 50L)
100+
101+
# Default budgets leave a small diff untouched.
102+
small <- corteza:::compute_unified_diff("a\nb\nc\n", "a\nB\nc\n", "x.R")
103+
expect_false(isTRUE(small$truncated))

0 commit comments

Comments
 (0)