diff --git a/NAMESPACE b/NAMESPACE index 5195bcc..6b346aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,11 +7,14 @@ export(cli_worker_spawn) export(context_limit_for_model) export(context_usage_pct) export(default_local_model) +export(default_provider_model) export(ensure_skills) export(estimate_history_tokens) export(estimate_live_context_tokens) export(estimate_text_tokens) export(estimate_tool_tokens) +export(format_age) +export(format_live_ctx) export(format_tokens) export(install_cli) export(matrix_archive_all) diff --git a/R/compact-turn.R b/R/compact-turn.R index f979b2e..6e18d68 100644 --- a/R/compact-turn.R +++ b/R/compact-turn.R @@ -266,14 +266,11 @@ maybe_compact_turn_session <- function(session, config, kind = NULL) { if (length(history) < min_messages) { return(invisible(FALSE)) } - model <- session$model_map$cloud %||% NULL - if (is.null(model)) { - model <- switch(session$provider %||% "anthropic", - anthropic = "claude-sonnet-4-20250514", - openai = "gpt-4o", - moonshot = "moonshot-v1-8k", - NULL) - } + # Resolve the same model turn() will run with; mirrors + # subagent_live_token_count() so /agents, compaction, and the + # next API call all reason about the same model identity. + model <- session$model_map$cloud %||% + default_provider_model(session$provider) # Estimate against the same tools turn() will send. turn() # resolves tools from session$tools_filter when tools is NULL, # so passing NULL here would undercount the live context for any diff --git a/R/context-budget.R b/R/context-budget.R index 87778aa..d0a6a88 100644 --- a/R/context-budget.R +++ b/R/context-budget.R @@ -39,6 +39,26 @@ MODEL_CONTEXT_LIMITS <- list( "qwen2.5" = 32000L ) +#' Provider-specific default model name. +#' +#' Resolves the actual model a subagent (or chat session) will run +#' with when no explicit \code{model} is set. Mirrors the defaults +#' the CLI script picks at startup so /agents, compaction, and the +#' CLI all show the same model identity. Returns NULL for unknown +#' providers (lets llm.api fall back to its own default). +#' @param provider Provider name. +#' @return Model name (character) or NULL. +#' @keywords internal +#' @export +default_provider_model <- function(provider) { + switch(provider %||% "", + anthropic = "claude-sonnet-4-20250514", + openai = "gpt-4o", + moonshot = "kimi-k2.6", + ollama = "llama3.2", + NULL) +} + #' Look up the context window for a given model. #' #' Tries exact match, then prefix match either direction (so @@ -76,6 +96,36 @@ format_tokens <- function(n) { } } +#' Format an age in seconds as a compact string (e.g. "12s", "3m", "2h"). +#' @keywords internal +#' @export +format_age <- function(seconds) { + s <- as.numeric(seconds) + if (is.na(s) || s < 0) { + return("?") + } + if (s < 60) { + sprintf("%ds", as.integer(round(s))) + } else if (s < 3600) { + sprintf("%dm", as.integer(round(s / 60))) + } else { + sprintf("%.1fh", s / 3600) + } +} + +#' Format a live-context display like "4.2K/200K" or "?". +#' +#' Used by /agents to summarize live tokens versus model limit. +#' Returns "?" when either value is NA. +#' @keywords internal +#' @export +format_live_ctx <- function(tokens, limit) { + if (is.na(tokens) || is.na(limit) || is.null(tokens) || is.null(limit)) { + return("ctx ?") + } + sprintf("ctx %s/%s", format_tokens(tokens), format_tokens(limit)) +} + #' Rough token estimate from raw text. #' #' Returns `ceil(nchar(text) / 4)`. Good enough for budget decisions diff --git a/R/subagent.R b/R/subagent.R index be38ab9..0aa8b2a 100644 --- a/R/subagent.R +++ b/R/subagent.R @@ -177,7 +177,10 @@ subagent_seed_history <- function(history) { #' turn into a sub-subagent (capped by depth_cap). #' #' @param prompt User prompt (character). -#' @return Reply text (character). +#' @return A list with `$reply` (character, the LLM reply text) and +#' `$usage` (list with `input_tokens`, `output_tokens`, `total_tokens`, +#' and optionally `cost` — provider-dependent). Callers extract the +#' reply and accumulate usage into the parent-side registry. #' @keywords internal #' @export subagent_turn_prompt <- function(prompt) { @@ -275,7 +278,8 @@ subagent_turn_prompt <- function(prompt) { error = conditionMessage(e), level = "warn") }) - as.character(result$reply %||% "") + list(reply = as.character(result$reply %||% ""), + usage = result$usage %||% list()) } SUBAGENT_DEFAULTS <- list( @@ -496,16 +500,24 @@ subagent_spawn <- function(task, model = NULL, tools = NULL, preset = NULL, store_update(session_key, list(status = "running")) seq <- next_subagent_seq() .subagent_registry[[id]] <- list( - id = id, - seq = seq, - session_key = session_key, - session = session, - task = task, - tools = tools, - model = model, - started_at = Sys.time(), - timeout = Sys.time() + subcfg$timeout_minutes * 60, - depth = child_depth + id = id, + seq = seq, + session_key = session_key, + session = session, + task = task, + tools = tools, + model = spawn_model, + provider = spawn_provider, + started_at = Sys.time(), + timeout = Sys.time() + subcfg$timeout_minutes * 60, + depth = child_depth, + # Usage counters (accumulated across queries; cost is NA when + # the provider doesn't surface it). + cumulative_input_tokens = 0L, + cumulative_output_tokens = 0L, + cumulative_total_tokens = 0L, + cumulative_cost = NA_real_, + query_count = 0L ) # Initialize the durable transcript file. Disk space is cheap; # context is expensive — the in-memory child history may later be @@ -591,18 +603,20 @@ subagent_query <- function(id, prompt, wait = TRUE, timeout = 60L) { return(invisible(canonical)) } - reply <- tryCatch( - info$session$run( - function(p) corteza::subagent_turn_prompt(p), - list(p = prompt) + turn_result <- tryCatch( + info$session$run( + function(p) corteza::subagent_turn_prompt(p), + list(p = prompt) ), - error = function(e) { - stop("Subagent query failed: ", conditionMessage(e), call. = FALSE) - } - ) + error = function(e) { + stop("Subagent query failed: ", conditionMessage(e), + call. = FALSE) + }) + info <- subagent_accumulate_usage(info, turn_result$usage) + .subagent_registry[[canonical]] <- info log_event("subagent_query", subagent_id = canonical, prompt_length = nchar(prompt)) - as.character(reply) + as.character(turn_result$reply %||% "") } #' Collect the result of a previously-fired async subagent query. @@ -646,13 +660,16 @@ subagent_collect <- function(id, wait = TRUE, timeout = 60L) { msg <- info$session$read() info$pending <- NULL info$pending_started_at <- NULL + if (is.null(msg$error)) { + info <- subagent_accumulate_usage(info, msg$result$usage) + } .subagent_registry[[canonical]] <- info if (!is.null(msg$error)) { stop("Subagent query failed: ", conditionMessage(msg$error), call. = FALSE) } log_event("subagent_collect", subagent_id = canonical) - as.character(msg$result) + as.character(msg$result$reply %||% "") } #' Kill a subagent. @@ -678,7 +695,83 @@ subagent_kill <- function(id) { invisible(TRUE) } +#' Accumulate per-turn usage into a registry entry. +#' +#' `usage` is the `$usage` field returned by `subagent_turn_prompt()` +#' (originally from `llm.api::agent`). Missing fields are treated as +#' zero — for providers that don't return cost (moonshot, ollama), +#' `cumulative_cost` stays NA. +#' @noRd +subagent_accumulate_usage <- function(info, usage) { + if (is.null(usage)) { + return(info) + } + add_int <- function(prev, new) { + if (is.null(new) || is.na(new)) prev else prev + as.integer(new) + } + info$cumulative_input_tokens <- add_int(info$cumulative_input_tokens %||% 0L, + usage$input_tokens) + info$cumulative_output_tokens <- add_int(info$cumulative_output_tokens %||% 0L, + usage$output_tokens) + info$cumulative_total_tokens <- add_int(info$cumulative_total_tokens %||% 0L, + usage$total_tokens) + if (!is.null(usage$cost) && !is.na(usage$cost)) { + prev <- info$cumulative_cost + info$cumulative_cost <- if (is.na(prev)) { + as.numeric(usage$cost) + } else { + prev + as.numeric(usage$cost) + } + } + info$query_count <- (info$query_count %||% 0L) + 1L + info +} + +#' Best-effort live context-token count for an idle subagent. +#' +#' Calls into the child via `r_session$run()` to compute the same +#' `context_usage_pct()` math the compaction policy uses. Returns NA +#' on any failure (busy child, callr error, etc.) so the caller can +#' display `?` instead of crashing `/agents`. +#' @noRd +subagent_live_token_count <- function(info) { + if (!is.null(info[["pending"]])) { + return(list(tokens = NA_integer_, limit = NA_integer_)) + } + result <- tryCatch( + info$session$run(function() { + sess <- corteza:::.subagent_state$session + if (is.null(sess)) { + return(list(tokens = NA_integer_, limit = NA_integer_, + model = NULL)) + } + # Match the model the child actually runs with: explicit + # model_map$cloud first, otherwise the provider default. + # Without this fallback, child sessions spawned with the + # default model report `ctx ?` because there's no explicit + # model name to look up a limit for. + model <- sess$model_map$cloud %||% + corteza::default_provider_model(sess$provider) + tools <- tryCatch(corteza:::skills_as_api_tools(sess$tools_filter), + error = function(e) NULL) + list( + tokens = corteza::estimate_live_context_tokens( + list(history = sess$history %||% list()), + system_prompt = sess$system, tools = tools), + limit = if (is.null(model)) NA_integer_ else + corteza::context_limit_for_model(model), + model = model) + }), + error = function(e) list(tokens = NA_integer_, limit = NA_integer_, + model = NULL)) + result +} + #' List active subagents. +#' +#' Returns a list of info objects per agent: id/seq/task/started_at/ +#' time_remaining/pending plus model/age/cumulative usage and a +#' best-effort live token count for idle agents (`NA` for busy). #' @return List of subagent info objects. #' @export subagent_list <- function() { @@ -688,18 +781,39 @@ subagent_list <- function() { } out <- lapply(ids, function(id) { info <- .subagent_registry[[id]] + live <- subagent_live_token_count(info) + age_seconds <- as.numeric(difftime(Sys.time(), + info$started_at, + units = "secs")) + # Display the actual model the child runs with — explicit + # info$model first, otherwise the resolved default for the + # provider (live$model, which subagent_live_token_count + # already computed inside the child). Falls back to provider + # then "?" only if neither is known. + resolved_model <- info$model %||% live$model %||% + default_provider_model(info$provider) %||% + info$provider %||% "?" # `[[ ]]` for pending fields: list `$` prefix-matches, so # info$pending would silently return info$pending_started_at # whenever pending itself has been NULL-stripped. list( - id = info$id, - seq = info$seq, - task = info$task, - started_at = info$started_at, - time_remaining = as.numeric(difftime(info$timeout, Sys.time(), - units = "mins")), - pending = info[["pending"]], - pending_started_at = info[["pending_started_at"]] + id = info$id, + seq = info$seq, + task = info$task, + model = resolved_model, + started_at = info$started_at, + age_seconds = age_seconds, + time_remaining = as.numeric(difftime(info$timeout, Sys.time(), + units = "mins")), + live_tokens = live$tokens, + context_limit = live$limit, + cumulative_input_tokens = info$cumulative_input_tokens %||% 0L, + cumulative_output_tokens = info$cumulative_output_tokens %||% 0L, + cumulative_total_tokens = info$cumulative_total_tokens %||% 0L, + cumulative_cost = info$cumulative_cost %||% NA_real_, + query_count = info$query_count %||% 0L, + pending = info[["pending"]], + pending_started_at = info[["pending_started_at"]] ) }) # Sort by seq ascending so the user-visible numbering is stable. @@ -761,8 +875,25 @@ format_subagent_list <- function(agents) { } else { " idle" } - lines <- c(lines, sprintf(" [%s] %s (%s)%s %s", seq_str, a$task, - time_str, state_str, id_short)) + # Model / age / live ctx / cumulative tokens / cost. Live ctx + # is "?" when the child is busy (callr can't ask it + # mid-turn). Cost is "?" when the provider doesn't surface it. + model_str <- as.character(a$model %||% "?") + age_str <- format_age(a$age_seconds %||% 0) + ctx_str <- format_live_ctx(a$live_tokens, a$context_limit) + tok_str <- sprintf("%s in / %s out", + format_tokens(a$cumulative_input_tokens %||% 0L), + format_tokens(a$cumulative_output_tokens %||% 0L)) + cost_str <- if (is.na(a$cumulative_cost)) { + "?" + } else { + sprintf("$%.4f", a$cumulative_cost) + } + meta <- sprintf("(%s · %s · %s · %s · %s)", + model_str, age_str, ctx_str, tok_str, cost_str) + lines <- c(lines, sprintf(" [%s] %s %s (%s)%s %s", + seq_str, a$task, meta, time_str, + state_str, id_short)) } paste(c(lines, "", "Use the sequence number, the 8-char prefix, or the full id with /ask, /collect, and /kill."), diff --git a/inst/tinytest/test_agents_visibility.R b/inst/tinytest/test_agents_visibility.R new file mode 100644 index 0000000..43f6ffe --- /dev/null +++ b/inst/tinytest/test_agents_visibility.R @@ -0,0 +1,137 @@ +# /agents visibility — pure-function tests for the new fields. +# The live-tokens callr round-trip is exercised in +# test_subagent_callr.R (at_home). + +# format_age ---- + +expect_equal(corteza::format_age(0), "0s") +expect_equal(corteza::format_age(5), "5s") +expect_equal(corteza::format_age(59), "59s") +expect_equal(corteza::format_age(60), "1m") +expect_equal(corteza::format_age(90), "2m") +expect_equal(corteza::format_age(600), "10m") +expect_equal(corteza::format_age(3600), "1.0h") +expect_equal(corteza::format_age(7200), "2.0h") +expect_equal(corteza::format_age(NA), "?") +expect_equal(corteza::format_age(-1), "?") + +# format_live_ctx ---- + +expect_equal(corteza::format_live_ctx(4200, 200000), "ctx 4.2K/200.0K") +expect_equal(corteza::format_live_ctx(0, 128000), "ctx 0/128.0K") +expect_equal(corteza::format_live_ctx(NA, 128000), "ctx ?") +expect_equal(corteza::format_live_ctx(4200, NA), "ctx ?") +expect_equal(corteza::format_live_ctx(NULL, 128000), "ctx ?") + +# subagent_accumulate_usage ---- + +base <- list(cumulative_input_tokens = 0L, + cumulative_output_tokens = 0L, + cumulative_total_tokens = 0L, + cumulative_cost = NA_real_, + query_count = 0L) + +# NULL usage is a no-op. +expect_identical(corteza:::subagent_accumulate_usage(base, NULL), base) + +# First call accumulates non-NULL fields. +after1 <- corteza:::subagent_accumulate_usage( + base, + list(input_tokens = 100L, output_tokens = 20L, total_tokens = 120L)) +expect_equal(after1$cumulative_input_tokens, 100L) +expect_equal(after1$cumulative_output_tokens, 20L) +expect_equal(after1$cumulative_total_tokens, 120L) +expect_true(is.na(after1$cumulative_cost), + info = "no cost in usage -> cumulative stays NA") +expect_equal(after1$query_count, 1L) + +# Second call adds, query_count increments. +after2 <- corteza:::subagent_accumulate_usage( + after1, + list(input_tokens = 50L, output_tokens = 5L, total_tokens = 55L, + cost = 0.001)) +expect_equal(after2$cumulative_input_tokens, 150L) +expect_equal(after2$cumulative_output_tokens, 25L) +expect_equal(after2$cumulative_total_tokens, 175L) +expect_equal(after2$cumulative_cost, 0.001) +expect_equal(after2$query_count, 2L) + +# Third call with cost adds to running cost. +after3 <- corteza:::subagent_accumulate_usage( + after2, + list(input_tokens = 10L, output_tokens = 1L, total_tokens = 11L, + cost = 0.0001)) +expect_equal(after3$cumulative_cost, 0.0011) + +# Missing fields don't crash and don't change those counters. +partial <- corteza:::subagent_accumulate_usage( + base, list(input_tokens = 7L)) +expect_equal(partial$cumulative_input_tokens, 7L) +expect_equal(partial$cumulative_output_tokens, 0L) + +# format_subagent_list with full fields ---- +# Stub a registry-style entry and confirm the formatter renders all +# the new fields without crashing on missing pieces. + +mock_agent <- list( + id = "stub-12345678", seq = 1L, + task = "demo task", + model = "moonshot-v1-8k", + started_at = Sys.time() - 30, + age_seconds = 30, + time_remaining = 29.5, + live_tokens = 1500L, + context_limit = 128000L, + cumulative_input_tokens = 300L, + cumulative_output_tokens = 50L, + cumulative_total_tokens = 350L, + cumulative_cost = NA_real_, + query_count = 2L, + pending = NULL, + pending_started_at = NULL) +out <- corteza:::format_subagent_list(list(mock_agent)) +expect_true(grepl("moonshot-v1-8k", out, fixed = TRUE)) +expect_true(grepl("30s", out, fixed = TRUE)) +expect_true(grepl("ctx 1.5K/128.0K", out, fixed = TRUE)) +expect_true(grepl("300 in / 50 out", out, fixed = TRUE)) +# Cost is "?" when NA. +expect_true(grepl("· ?)", out, fixed = TRUE)) +expect_true(grepl("idle", out, fixed = TRUE)) + +# Busy agent: live tokens NA, state shows the pending prompt. +busy_agent <- mock_agent +busy_agent$pending <- "investigating the deploy log" +busy_agent$live_tokens <- NA_integer_ +busy_agent$context_limit <- NA_integer_ +out_busy <- corteza:::format_subagent_list(list(busy_agent)) +expect_true(grepl("ctx ?", out_busy, fixed = TRUE), + info = "busy agent shows ctx ? (callr can't ask a busy child)") +expect_true(grepl("busy:", out_busy, fixed = TRUE)) +expect_true(grepl("investigating the deploy log", out_busy, fixed = TRUE)) + +# Cost rendering when provider does supply it. +costing_agent <- mock_agent +costing_agent$cumulative_cost <- 0.0153 +out_cost <- corteza:::format_subagent_list(list(costing_agent)) +expect_true(grepl("$0.0153", out_cost, fixed = TRUE)) + +# default_provider_model ---- +# Regression for the case where a subagent spawned with the provider +# default model (no explicit model_map$cloud) used to display the +# provider name as the model and "ctx ?" because the limit lookup +# had no key. The helper now resolves the same default the CLI and +# turn() will use, so /agents shows a real model name and a real +# context window. +expect_equal(corteza::default_provider_model("anthropic"), + "claude-sonnet-4-20250514") +expect_equal(corteza::default_provider_model("openai"), "gpt-4o") +expect_equal(corteza::default_provider_model("moonshot"), "kimi-k2.6") +expect_equal(corteza::default_provider_model("ollama"), "llama3.2") +expect_null(corteza::default_provider_model("unknown-provider")) +expect_null(corteza::default_provider_model(NULL)) + +# Resolved default models all have a context-limit entry. +expect_true(corteza::context_limit_for_model( + corteza::default_provider_model("anthropic")) > 0L) +expect_true(corteza::context_limit_for_model( + corteza::default_provider_model("moonshot")) > 0L) diff --git a/man/default_provider_model.Rd b/man/default_provider_model.Rd new file mode 100644 index 0000000..24684b2 --- /dev/null +++ b/man/default_provider_model.Rd @@ -0,0 +1,21 @@ +% tinyrox says don't edit this manually, but it can't stop you! +\name{default_provider_model} +\alias{default_provider_model} +\title{Provider-specific default model name.} +\usage{ +default_provider_model(provider) +} +\arguments{ +\item{provider}{Provider name.} +} +\value{ +Model name (character) or NULL. +} +\description{ +Resolves the actual model a subagent (or chat session) will run +with when no explicit \code{model} is set. Mirrors the defaults +the CLI script picks at startup so /agents, compaction, and the +CLI all show the same model identity. Returns NULL for unknown +providers (lets llm.api fall back to its own default). +} +\keyword{internal} diff --git a/man/format_age.Rd b/man/format_age.Rd new file mode 100644 index 0000000..7b73eec --- /dev/null +++ b/man/format_age.Rd @@ -0,0 +1,11 @@ +% tinyrox says don't edit this manually, but it can't stop you! +\name{format_age} +\alias{format_age} +\title{Format an age in seconds as a compact string (e.g. "12s", "3m", "2h").} +\usage{ +format_age(seconds) +} +\description{ +Format an age in seconds as a compact string (e.g. "12s", "3m", "2h"). +} +\keyword{internal} diff --git a/man/format_live_ctx.Rd b/man/format_live_ctx.Rd new file mode 100644 index 0000000..0d4b73b --- /dev/null +++ b/man/format_live_ctx.Rd @@ -0,0 +1,12 @@ +% tinyrox says don't edit this manually, but it can't stop you! +\name{format_live_ctx} +\alias{format_live_ctx} +\title{Format a live-context display like "4.2K/200K" or "?".} +\usage{ +format_live_ctx(tokens, limit) +} +\description{ +Used by /agents to summarize live tokens versus model limit. +Returns "?" when either value is NA. +} +\keyword{internal} diff --git a/man/subagent_list.Rd b/man/subagent_list.Rd index b276688..745a80b 100644 --- a/man/subagent_list.Rd +++ b/man/subagent_list.Rd @@ -9,5 +9,7 @@ subagent_list() List of subagent info objects. } \description{ -List active subagents. +Returns a list of info objects per agent: id/seq/task/started_at/ +time_remaining/pending plus model/age/cumulative usage and a +best-effort live token count for idle agents (`NA` for busy). } diff --git a/man/subagent_turn_prompt.Rd b/man/subagent_turn_prompt.Rd index 4524800..86ea349 100644 --- a/man/subagent_turn_prompt.Rd +++ b/man/subagent_turn_prompt.Rd @@ -9,7 +9,10 @@ subagent_turn_prompt(prompt) \item{prompt}{User prompt (character).} } \value{ -Reply text (character). +A list with `$reply` (character, the LLM reply text) and + `$usage` (list with `input_tokens`, `output_tokens`, `total_tokens`, + and optionally `cost` — provider-dependent). Callers extract the + reply and accumulate usage into the parent-side registry. } \description{ Captures the pre-turn history length so that, if archival is enabled