Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: llm.api
Title: Minimal LLM Chat Interface
Version: 0.1.1
Version: 0.1.2
Authors@R: c(
person("Troy", "Hernandez", role = c("aut", "cre"),
email = "troy@cornball.ai",
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# llm.api 0.1.2

* `chat()` now returns `$thinking` and `$finish_reason`. Reasoning models
(DeepSeek-R1, Moonshot Kimi, Anthropic extended thinking, OpenRouter)
put their chain-of-thought in a separate field and previously had it
silently dropped. `$thinking` is normalized across providers
(`reasoning_content`, `reasoning`, Anthropic `thinking` blocks).
`$finish_reason` is normalized to OpenAI vocabulary; Anthropic's
`max_tokens` becomes `"length"` and `end_turn` becomes `"stop"`.
* `chat()` now warns when a reasoning model truncates mid-thought
(`finish_reason == "length"` with empty content but populated
thinking). Previously this returned `content == ""` with no
indication; the actionable signal is "raise max_tokens".

# llm.api 0.1.1

* Initial CRAN submission.
Expand Down
104 changes: 95 additions & 9 deletions R/chat.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
#'
#' @return A list with:
#' \item{content}{The assistant's response text}
#' \item{thinking}{Chain-of-thought from reasoning models, or NULL.
#' Populated from \code{reasoning_content} (DeepSeek, Moonshot Kimi,
#' vLLM, SGLang), \code{reasoning} (OpenRouter), or Anthropic
#' \code{thinking} blocks. Normalized across providers.}
#' \item{finish_reason}{Why generation stopped. \code{"stop"} on a
#' normal completion, \code{"length"} when truncated by max_tokens.
#' A reasoning model that returns empty \code{content} with
#' \code{finish_reason == "length"} ran out of budget mid-thought;
#' raise \code{max_tokens}.}
#' \item{model}{Model used}
#' \item{usage}{Token usage (if available)}
#' \item{history}{Updated conversation history}
Expand Down Expand Up @@ -108,6 +117,8 @@ chat <- function(

list(
content = result$content,
thinking = result$thinking,
finish_reason = result$finish_reason,
model = model,
usage = result$usage,
history = new_history
Expand Down Expand Up @@ -155,14 +166,24 @@ chat <- function(
data <- jsonlite::fromJSON(rawToChar(resp$content))

# Handle both list and data.frame formats from jsonlite
content <- if (is.data.frame(data$choices)) {
data$choices$message$content[1]
if (is.data.frame(data$choices)) {
msg <- data$choices$message
content <- msg$content[1]
thinking <- msg$reasoning_content[1] %||% msg$reasoning[1]
finish_reason <- data$choices$finish_reason[1]
} else {
data$choices[[1]]$message$content
msg <- data$choices[[1]]$message
content <- msg$content
thinking <- msg$reasoning_content %||% msg$reasoning
finish_reason <- data$choices[[1]]$finish_reason
}

.warn_if_truncated(content, thinking, finish_reason)

list(
content = content,
thinking = thinking,
finish_reason = finish_reason,
usage = data$usage
)
}
Expand Down Expand Up @@ -229,26 +250,74 @@ chat <- function(

data <- jsonlite::fromJSON(rawToChar(resp$content))

# Handle both data.frame and list formats from jsonlite
content <- if (is.data.frame(data$content)) {
data$content$text[1]
# Handle both data.frame and list formats from jsonlite. content is an
# ordered list of blocks; pull text out of "text" blocks and thinking
# out of "thinking" blocks.
if (is.data.frame(data$content)) {
types <- data$content$type
text_blocks <- data$content$text[types == "text"]
thinking_blocks <- data$content$thinking[types == "thinking"]
} else {
types <- vapply(data$content, function(b) b$type %||% "", character(1))
text_blocks <- vapply(data$content[types == "text"],
function(b) b$text %||% "", character(1))
thinking_blocks <- vapply(data$content[types == "thinking"],
function(b) b$thinking %||% "", character(1))
}

content <- if (length(text_blocks)) paste(text_blocks, collapse = "\n") else ""
thinking <- if (length(thinking_blocks)) {
paste(thinking_blocks, collapse = "\n")
} else {
data$content[[1]]$text
NULL
}
finish_reason <- .normalize_anthropic_stop_reason(data$stop_reason)

.warn_if_truncated(content, thinking, finish_reason)

list(
content = content,
thinking = thinking,
finish_reason = finish_reason,
usage = data$usage
)
}

# Map Anthropic's stop_reason to OpenAI-style finish_reason so callers see
# one vocabulary across providers. "max_tokens" is Anthropic's name for
# what OpenAI calls "length"; "end_turn" maps to "stop". Other values
# ("stop_sequence", "tool_use", "pause_turn", "refusal") pass through.
.normalize_anthropic_stop_reason <- function(stop_reason) {
if (is.null(stop_reason) || !nzchar(stop_reason)) return(NULL)
switch(stop_reason,
"end_turn" = "stop",
"max_tokens" = "length",
stop_reason)
}

# Surface the silent-empty-content failure mode of reasoning models. When
# the model burns its budget on chain-of-thought without ever emitting a
# user-facing answer, callers otherwise see content="" and assume the
# model decided to say nothing.
.warn_if_truncated <- function(content, thinking, finish_reason) {
if (identical(finish_reason, "length") &&
!nzchar(content %||% "") &&
nzchar(thinking %||% "")) {
warning("Model truncated mid-reasoning; partial chain-of-thought ",
"available in $thinking. Increase max_tokens.",
call. = FALSE)
}
}

#' Stream response with live output
#' @noRd
.stream_response <- function(
url,
handle
) {
full_content <- ""
full_thinking <- ""
finish_reason <- NULL

callback <- function(data) {
lines <- strsplit(rawToChar(data), "\n") [[1]]
Expand All @@ -257,11 +326,20 @@ chat <- function(
json_str <- substring(line, 7)
tryCatch({
chunk <- jsonlite::fromJSON(json_str)
delta <- chunk$choices[[1]]$delta$content
choice <- chunk$choices[[1]]
delta <- choice$delta$content
if (!is.null(delta)) {
cat(delta)
full_content <<- paste0(full_content, delta)
}
think_delta <- choice$delta$reasoning_content %||%
choice$delta$reasoning
if (!is.null(think_delta)) {
full_thinking <<- paste0(full_thinking, think_delta)
}
if (!is.null(choice$finish_reason)) {
finish_reason <<- choice$finish_reason
}
}, error = function(e) NULL)
}
}
Expand All @@ -272,7 +350,15 @@ chat <- function(
curl::curl_fetch_memory(url, handle = handle)
cat("\n")

list(content = full_content, usage = NULL)
thinking <- if (nzchar(full_thinking)) full_thinking else NULL
.warn_if_truncated(full_content, thinking, finish_reason)

list(
content = full_content,
thinking = thinking,
finish_reason = finish_reason,
usage = NULL
)
}

#' Null coalescing operator
Expand Down
2 changes: 1 addition & 1 deletion inst/tinytest/test_providers.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ expect_equal(cfg$default_model, "gpt-4o-mini")
cfg <- llm.api:::.get_provider_config("anthropic")
expect_equal(cfg$base_url, "https://api.anthropic.com")
expect_equal(cfg$chat_path, "/v1/messages")
expect_equal(cfg$default_model, "claude-3-5-sonnet-latest")
expect_equal(cfg$default_model, "claude-sonnet-4-6")

# Moonshot config
cfg <- llm.api:::.get_provider_config("moonshot")
Expand Down
40 changes: 40 additions & 0 deletions inst/tinytest/test_reasoning.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Tests for reasoning_content / thinking handling.

# Anthropic stop_reason → finish_reason normalization. Anthropic uses
# different vocabulary than OpenAI; chat() exposes the OpenAI vocabulary
# uniformly, so callers don't have to branch on provider.
expect_null(llm.api:::.normalize_anthropic_stop_reason(NULL))
expect_null(llm.api:::.normalize_anthropic_stop_reason(""))
expect_equal(llm.api:::.normalize_anthropic_stop_reason("end_turn"), "stop")
expect_equal(llm.api:::.normalize_anthropic_stop_reason("max_tokens"), "length")
# Pass-through for values that don't have an OpenAI equivalent.
expect_equal(llm.api:::.normalize_anthropic_stop_reason("tool_use"), "tool_use")
expect_equal(llm.api:::.normalize_anthropic_stop_reason("stop_sequence"),
"stop_sequence")

# Truncation warning fires only when content is empty AND thinking is
# populated AND finish_reason is "length". This is the silent-failure
# mode of reasoning models that callers need to know about.
expect_warning(
llm.api:::.warn_if_truncated("", "some reasoning", "length"),
pattern = "truncated mid-reasoning"
)
# Normal completion with both content and thinking: no warning.
expect_silent(
llm.api:::.warn_if_truncated("answer", "thinking", "stop")
)
# Length-truncated but content is non-empty: model hit the cap mid-answer
# but at least said something. Caller can decide; we don't warn here.
expect_silent(
llm.api:::.warn_if_truncated("partial answer", "thinking", "length")
)
# Length-truncated with no thinking and no content: not a reasoning model
# truncation pattern, no warning.
expect_silent(
llm.api:::.warn_if_truncated("", NULL, "length")
)
# Length-truncated with thinking but content is NULL (some providers).
expect_warning(
llm.api:::.warn_if_truncated(NULL, "some reasoning", "length"),
pattern = "truncated mid-reasoning"
)
9 changes: 9 additions & 0 deletions man/chat.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ chat(prompt, model = NULL, system = NULL, history = NULL, temperature = NULL,
\value{
A list with:
\item{content}{The assistant's response text}
\item{thinking}{Chain-of-thought from reasoning models, or NULL.
Populated from \code{reasoning_content} (DeepSeek, Moonshot Kimi,
vLLM, SGLang), \code{reasoning} (OpenRouter), or Anthropic
\code{thinking} blocks. Normalized across providers.}
\item{finish_reason}{Why generation stopped. \code{"stop"} on a
normal completion, \code{"length"} when truncated by max_tokens.
A reasoning model that returns empty \code{content} with
\code{finish_reason == "length"} ran out of budget mid-thought;
raise \code{max_tokens}.}
\item{model}{Model used}
\item{usage}{Token usage (if available)}
\item{history}{Updated conversation history}
Expand Down