Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ export(cli_worker_spawn)
export(context_limit_for_model)
export(context_usage_pct)
export(default_local_model)
export(default_provider_model)
export(ensure_skills)
export(estimate_history_tokens)
export(estimate_live_context_tokens)
export(estimate_text_tokens)
export(estimate_tool_tokens)
export(format_age)
export(format_live_ctx)
export(format_tokens)
export(install_cli)
export(matrix_archive_all)
Expand Down
13 changes: 5 additions & 8 deletions R/compact-turn.R
Original file line number Diff line number Diff line change
Expand Up @@ -266,14 +266,11 @@ maybe_compact_turn_session <- function(session, config, kind = NULL) {
if (length(history) < min_messages) {
return(invisible(FALSE))
}
model <- session$model_map$cloud %||% NULL
if (is.null(model)) {
model <- switch(session$provider %||% "anthropic",
anthropic = "claude-sonnet-4-20250514",
openai = "gpt-4o",
moonshot = "moonshot-v1-8k",
NULL)
}
# Resolve the same model turn() will run with; mirrors
# subagent_live_token_count() so /agents, compaction, and the
# next API call all reason about the same model identity.
model <- session$model_map$cloud %||%
default_provider_model(session$provider)
# Estimate against the same tools turn() will send. turn()
# resolves tools from session$tools_filter when tools is NULL,
# so passing NULL here would undercount the live context for any
Expand Down
50 changes: 50 additions & 0 deletions R/context-budget.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ MODEL_CONTEXT_LIMITS <- list(
"qwen2.5" = 32000L
)

#' Provider-specific default model name.
#'
#' Resolves the actual model a subagent (or chat session) will run
#' with when no explicit \code{model} is set. Mirrors the defaults
#' the CLI script picks at startup so /agents, compaction, and the
#' CLI all show the same model identity. Returns NULL for unknown
#' providers (lets llm.api fall back to its own default).
#' @param provider Provider name.
#' @return Model name (character) or NULL.
#' @keywords internal
#' @export
default_provider_model <- function(provider) {
switch(provider %||% "",
anthropic = "claude-sonnet-4-20250514",
openai = "gpt-4o",
moonshot = "kimi-k2.6",
ollama = "llama3.2",
NULL)
}

#' Look up the context window for a given model.
#'
#' Tries exact match, then prefix match either direction (so
Expand Down Expand Up @@ -76,6 +96,36 @@ format_tokens <- function(n) {
}
}

#' Format an age in seconds as a compact string (e.g. "12s", "3m", "2h").
#' @keywords internal
#' @export
format_age <- function(seconds) {
s <- as.numeric(seconds)
if (is.na(s) || s < 0) {
return("?")
}
if (s < 60) {
sprintf("%ds", as.integer(round(s)))
} else if (s < 3600) {
sprintf("%dm", as.integer(round(s / 60)))
} else {
sprintf("%.1fh", s / 3600)
}
}

#' Format a live-context display like "4.2K/200K" or "?".
#'
#' Used by /agents to summarize live tokens versus model limit.
#' Returns "?" when either value is NA.
#' @keywords internal
#' @export
format_live_ctx <- function(tokens, limit) {
if (is.na(tokens) || is.na(limit) || is.null(tokens) || is.null(limit)) {
return("ctx ?")
}
sprintf("ctx %s/%s", format_tokens(tokens), format_tokens(limit))
}

#' Rough token estimate from raw text.
#'
#' Returns `ceil(nchar(text) / 4)`. Good enough for budget decisions
Expand Down
195 changes: 163 additions & 32 deletions R/subagent.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,10 @@ subagent_seed_history <- function(history) {
#' turn into a sub-subagent (capped by depth_cap).
#'
#' @param prompt User prompt (character).
#' @return Reply text (character).
#' @return A list with `$reply` (character, the LLM reply text) and
#' `$usage` (list with `input_tokens`, `output_tokens`, `total_tokens`,
#' and optionally `cost` — provider-dependent). Callers extract the
#' reply and accumulate usage into the parent-side registry.
#' @keywords internal
#' @export
subagent_turn_prompt <- function(prompt) {
Expand Down Expand Up @@ -275,7 +278,8 @@ subagent_turn_prompt <- function(prompt) {
error = conditionMessage(e), level = "warn")
})

as.character(result$reply %||% "")
list(reply = as.character(result$reply %||% ""),
usage = result$usage %||% list())
}

SUBAGENT_DEFAULTS <- list(
Expand Down Expand Up @@ -496,16 +500,24 @@ subagent_spawn <- function(task, model = NULL, tools = NULL, preset = NULL,
store_update(session_key, list(status = "running"))
seq <- next_subagent_seq()
.subagent_registry[[id]] <- list(
id = id,
seq = seq,
session_key = session_key,
session = session,
task = task,
tools = tools,
model = model,
started_at = Sys.time(),
timeout = Sys.time() + subcfg$timeout_minutes * 60,
depth = child_depth
id = id,
seq = seq,
session_key = session_key,
session = session,
task = task,
tools = tools,
model = spawn_model,
provider = spawn_provider,
started_at = Sys.time(),
timeout = Sys.time() + subcfg$timeout_minutes * 60,
depth = child_depth,
# Usage counters (accumulated across queries; cost is NA when
# the provider doesn't surface it).
cumulative_input_tokens = 0L,
cumulative_output_tokens = 0L,
cumulative_total_tokens = 0L,
cumulative_cost = NA_real_,
query_count = 0L
)
# Initialize the durable transcript file. Disk space is cheap;
# context is expensive — the in-memory child history may later be
Expand Down Expand Up @@ -591,18 +603,20 @@ subagent_query <- function(id, prompt, wait = TRUE, timeout = 60L) {
return(invisible(canonical))
}

reply <- tryCatch(
info$session$run(
function(p) corteza::subagent_turn_prompt(p),
list(p = prompt)
turn_result <- tryCatch(
info$session$run(
function(p) corteza::subagent_turn_prompt(p),
list(p = prompt)
),
error = function(e) {
stop("Subagent query failed: ", conditionMessage(e), call. = FALSE)
}
)
error = function(e) {
stop("Subagent query failed: ", conditionMessage(e),
call. = FALSE)
})
info <- subagent_accumulate_usage(info, turn_result$usage)
.subagent_registry[[canonical]] <- info
log_event("subagent_query", subagent_id = canonical,
prompt_length = nchar(prompt))
as.character(reply)
as.character(turn_result$reply %||% "")
}

#' Collect the result of a previously-fired async subagent query.
Expand Down Expand Up @@ -646,13 +660,16 @@ subagent_collect <- function(id, wait = TRUE, timeout = 60L) {
msg <- info$session$read()
info$pending <- NULL
info$pending_started_at <- NULL
if (is.null(msg$error)) {
info <- subagent_accumulate_usage(info, msg$result$usage)
}
.subagent_registry[[canonical]] <- info
if (!is.null(msg$error)) {
stop("Subagent query failed: ", conditionMessage(msg$error),
call. = FALSE)
}
log_event("subagent_collect", subagent_id = canonical)
as.character(msg$result)
as.character(msg$result$reply %||% "")
}

#' Kill a subagent.
Expand All @@ -678,7 +695,83 @@ subagent_kill <- function(id) {
invisible(TRUE)
}

#' Accumulate per-turn usage into a registry entry.
#'
#' `usage` is the `$usage` field returned by `subagent_turn_prompt()`
#' (originally from `llm.api::agent`). Missing fields are treated as
#' zero — for providers that don't return cost (moonshot, ollama),
#' `cumulative_cost` stays NA.
#' @noRd
subagent_accumulate_usage <- function(info, usage) {
if (is.null(usage)) {
return(info)
}
add_int <- function(prev, new) {
if (is.null(new) || is.na(new)) prev else prev + as.integer(new)
}
info$cumulative_input_tokens <- add_int(info$cumulative_input_tokens %||% 0L,
usage$input_tokens)
info$cumulative_output_tokens <- add_int(info$cumulative_output_tokens %||% 0L,
usage$output_tokens)
info$cumulative_total_tokens <- add_int(info$cumulative_total_tokens %||% 0L,
usage$total_tokens)
if (!is.null(usage$cost) && !is.na(usage$cost)) {
prev <- info$cumulative_cost
info$cumulative_cost <- if (is.na(prev)) {
as.numeric(usage$cost)
} else {
prev + as.numeric(usage$cost)
}
}
info$query_count <- (info$query_count %||% 0L) + 1L
info
}

#' Best-effort live context-token count for an idle subagent.
#'
#' Calls into the child via `r_session$run()` to compute the same
#' `context_usage_pct()` math the compaction policy uses. Returns NA
#' on any failure (busy child, callr error, etc.) so the caller can
#' display `?` instead of crashing `/agents`.
#' @noRd
subagent_live_token_count <- function(info) {
if (!is.null(info[["pending"]])) {
return(list(tokens = NA_integer_, limit = NA_integer_))
}
result <- tryCatch(
info$session$run(function() {
sess <- corteza:::.subagent_state$session
if (is.null(sess)) {
return(list(tokens = NA_integer_, limit = NA_integer_,
model = NULL))
}
# Match the model the child actually runs with: explicit
# model_map$cloud first, otherwise the provider default.
# Without this fallback, child sessions spawned with the
# default model report `ctx ?` because there's no explicit
# model name to look up a limit for.
model <- sess$model_map$cloud %||%
corteza::default_provider_model(sess$provider)
tools <- tryCatch(corteza:::skills_as_api_tools(sess$tools_filter),
error = function(e) NULL)
list(
tokens = corteza::estimate_live_context_tokens(
list(history = sess$history %||% list()),
system_prompt = sess$system, tools = tools),
limit = if (is.null(model)) NA_integer_ else
corteza::context_limit_for_model(model),
model = model)
}),
error = function(e) list(tokens = NA_integer_, limit = NA_integer_,
model = NULL))
result
}

#' List active subagents.
#'
#' Returns a list of info objects per agent: id/seq/task/started_at/
#' time_remaining/pending plus model/age/cumulative usage and a
#' best-effort live token count for idle agents (`NA` for busy).
#' @return List of subagent info objects.
#' @export
subagent_list <- function() {
Expand All @@ -688,18 +781,39 @@ subagent_list <- function() {
}
out <- lapply(ids, function(id) {
info <- .subagent_registry[[id]]
live <- subagent_live_token_count(info)
age_seconds <- as.numeric(difftime(Sys.time(),
info$started_at,
units = "secs"))
# Display the actual model the child runs with — explicit
# info$model first, otherwise the resolved default for the
# provider (live$model, which subagent_live_token_count
# already computed inside the child). Falls back to provider
# then "?" only if neither is known.
resolved_model <- info$model %||% live$model %||%
default_provider_model(info$provider) %||%
info$provider %||% "?"
# `[[ ]]` for pending fields: list `$` prefix-matches, so
# info$pending would silently return info$pending_started_at
# whenever pending itself has been NULL-stripped.
list(
id = info$id,
seq = info$seq,
task = info$task,
started_at = info$started_at,
time_remaining = as.numeric(difftime(info$timeout, Sys.time(),
units = "mins")),
pending = info[["pending"]],
pending_started_at = info[["pending_started_at"]]
id = info$id,
seq = info$seq,
task = info$task,
model = resolved_model,
started_at = info$started_at,
age_seconds = age_seconds,
time_remaining = as.numeric(difftime(info$timeout, Sys.time(),
units = "mins")),
live_tokens = live$tokens,
context_limit = live$limit,
cumulative_input_tokens = info$cumulative_input_tokens %||% 0L,
cumulative_output_tokens = info$cumulative_output_tokens %||% 0L,
cumulative_total_tokens = info$cumulative_total_tokens %||% 0L,
cumulative_cost = info$cumulative_cost %||% NA_real_,
query_count = info$query_count %||% 0L,
pending = info[["pending"]],
pending_started_at = info[["pending_started_at"]]
)
})
# Sort by seq ascending so the user-visible numbering is stable.
Expand Down Expand Up @@ -761,8 +875,25 @@ format_subagent_list <- function(agents) {
} else {
" idle"
}
lines <- c(lines, sprintf(" [%s] %s (%s)%s %s", seq_str, a$task,
time_str, state_str, id_short))
# Model / age / live ctx / cumulative tokens / cost. Live ctx
# is "?" when the child is busy (callr can't ask it
# mid-turn). Cost is "?" when the provider doesn't surface it.
model_str <- as.character(a$model %||% "?")
age_str <- format_age(a$age_seconds %||% 0)
ctx_str <- format_live_ctx(a$live_tokens, a$context_limit)
tok_str <- sprintf("%s in / %s out",
format_tokens(a$cumulative_input_tokens %||% 0L),
format_tokens(a$cumulative_output_tokens %||% 0L))
cost_str <- if (is.na(a$cumulative_cost)) {
"?"
} else {
sprintf("$%.4f", a$cumulative_cost)
}
meta <- sprintf("(%s · %s · %s · %s · %s)",
model_str, age_str, ctx_str, tok_str, cost_str)
lines <- c(lines, sprintf(" [%s] %s %s (%s)%s %s",
seq_str, a$task, meta, time_str,
state_str, id_short))
}
paste(c(lines, "",
"Use the sequence number, the 8-char prefix, or the full id with /ask, /collect, and /kill."),
Expand Down
Loading