cornball-ai · TroyHernandez · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: llm.api
 Title: Minimal LLM Chat Interface
-Version: 0.1.1
+Version: 0.1.2
 Authors@R: c(
     person("Troy", "Hernandez", role = c("aut", "cre"),
            email = "troy@cornball.ai",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,17 @@
+# llm.api 0.1.2
+
+* `chat()` now returns `$thinking` and `$finish_reason`. Reasoning models
+  (DeepSeek-R1, Moonshot Kimi, Anthropic extended thinking, OpenRouter)
+  put their chain-of-thought in a separate field and previously had it
+  silently dropped. `$thinking` is normalized across providers
+  (`reasoning_content`, `reasoning`, Anthropic `thinking` blocks).
+  `$finish_reason` is normalized to OpenAI vocabulary; Anthropic's
+  `max_tokens` becomes `"length"` and `end_turn` becomes `"stop"`.
+* `chat()` now warns when a reasoning model truncates mid-thought
+  (`finish_reason == "length"` with empty content but populated
+  thinking). Previously this returned `content == ""` with no
+  indication; the actionable signal is "raise max_tokens".
+
 # llm.api 0.1.1
 
 * Initial CRAN submission.

diff --git a/R/chat.R b/R/chat.R
@@ -17,6 +17,15 @@
 #'
 #' @return A list with:
 #'   \item{content}{The assistant's response text}
+#'   \item{thinking}{Chain-of-thought from reasoning models, or NULL.
+#'     Populated from \code{reasoning_content} (DeepSeek, Moonshot Kimi,
+#'     vLLM, SGLang), \code{reasoning} (OpenRouter), or Anthropic
+#'     \code{thinking} blocks. Normalized across providers.}
+#'   \item{finish_reason}{Why generation stopped. \code{"stop"} on a
+#'     normal completion, \code{"length"} when truncated by max_tokens.
+#'     A reasoning model that returns empty \code{content} with
+#'     \code{finish_reason == "length"} ran out of budget mid-thought;
+#'     raise \code{max_tokens}.}
 #'   \item{model}{Model used}
 #'   \item{usage}{Token usage (if available)}
 #'   \item{history}{Updated conversation history}
@@ -108,6 +117,8 @@ chat <- function(
 
   list(
     content = result$content,
+    thinking = result$thinking,
+    finish_reason = result$finish_reason,
     model = model,
     usage = result$usage,
     history = new_history
@@ -155,14 +166,24 @@ chat <- function(
     data <- jsonlite::fromJSON(rawToChar(resp$content))
 
     # Handle both list and data.frame formats from jsonlite
-    content <- if (is.data.frame(data$choices)) {
-      data$choices$message$content[1]
+    if (is.data.frame(data$choices)) {
+      msg <- data$choices$message
+      content <- msg$content[1]
+      thinking <- msg$reasoning_content[1] %||% msg$reasoning[1]
+      finish_reason <- data$choices$finish_reason[1]
     } else {
-      data$choices[[1]]$message$content
+      msg <- data$choices[[1]]$message
+      content <- msg$content
+      thinking <- msg$reasoning_content %||% msg$reasoning
+      finish_reason <- data$choices[[1]]$finish_reason
     }
 
+    .warn_if_truncated(content, thinking, finish_reason)
+
     list(
       content = content,
+      thinking = thinking,
+      finish_reason = finish_reason,
       usage = data$usage
     )
   }
@@ -229,26 +250,74 @@ chat <- function(
 
   data <- jsonlite::fromJSON(rawToChar(resp$content))
 
-  # Handle both data.frame and list formats from jsonlite
-  content <- if (is.data.frame(data$content)) {
-    data$content$text[1]
+  # Handle both data.frame and list formats from jsonlite. content is an
+  # ordered list of blocks; pull text out of "text" blocks and thinking
+  # out of "thinking" blocks.
+  if (is.data.frame(data$content)) {
+    types <- data$content$type
+    text_blocks <- data$content$text[types == "text"]
+    thinking_blocks <- data$content$thinking[types == "thinking"]
+  } else {
+    types <- vapply(data$content, function(b) b$type %||% "", character(1))
+    text_blocks <- vapply(data$content[types == "text"],
+                          function(b) b$text %||% "", character(1))
+    thinking_blocks <- vapply(data$content[types == "thinking"],
+                              function(b) b$thinking %||% "", character(1))
+  }
+
+  content <- if (length(text_blocks)) paste(text_blocks, collapse = "\n") else ""
+  thinking <- if (length(thinking_blocks)) {
+    paste(thinking_blocks, collapse = "\n")
   } else {
-    data$content[[1]]$text
+    NULL
   }
+  finish_reason <- .normalize_anthropic_stop_reason(data$stop_reason)
+
+  .warn_if_truncated(content, thinking, finish_reason)
 
   list(
     content = content,
+    thinking = thinking,
+    finish_reason = finish_reason,
     usage = data$usage
   )
 }
 
+# Map Anthropic's stop_reason to OpenAI-style finish_reason so callers see
+# one vocabulary across providers. "max_tokens" is Anthropic's name for
+# what OpenAI calls "length"; "end_turn" maps to "stop". Other values
+# ("stop_sequence", "tool_use", "pause_turn", "refusal") pass through.
+.normalize_anthropic_stop_reason <- function(stop_reason) {
+  if (is.null(stop_reason) || !nzchar(stop_reason)) return(NULL)
+  switch(stop_reason,
+         "end_turn" = "stop",
+         "max_tokens" = "length",
+         stop_reason)
+}
+
+# Surface the silent-empty-content failure mode of reasoning models. When
+# the model burns its budget on chain-of-thought without ever emitting a
+# user-facing answer, callers otherwise see content="" and assume the
+# model decided to say nothing.
+.warn_if_truncated <- function(content, thinking, finish_reason) {
+  if (identical(finish_reason, "length") &&
+      !nzchar(content %||% "") &&
+      nzchar(thinking %||% "")) {
+    warning("Model truncated mid-reasoning; partial chain-of-thought ",
+            "available in $thinking. Increase max_tokens.",
+            call. = FALSE)
+  }
+}
+
 #' Stream response with live output
 #' @noRd
 .stream_response <- function(
   url,
   handle
 ) {
   full_content <- ""
+  full_thinking <- ""
+  finish_reason <- NULL
 
   callback <- function(data) {
     lines <- strsplit(rawToChar(data), "\n") [[1]]
@@ -257,11 +326,20 @@ chat <- function(
         json_str <- substring(line, 7)
         tryCatch({
             chunk <- jsonlite::fromJSON(json_str)
-            delta <- chunk$choices[[1]]$delta$content
+            choice <- chunk$choices[[1]]
+            delta <- choice$delta$content
             if (!is.null(delta)) {
               cat(delta)
               full_content <<- paste0(full_content, delta)
             }
+            think_delta <- choice$delta$reasoning_content %||%
+                           choice$delta$reasoning
+            if (!is.null(think_delta)) {
+              full_thinking <<- paste0(full_thinking, think_delta)
+            }
+            if (!is.null(choice$finish_reason)) {
+              finish_reason <<- choice$finish_reason
+            }
           }, error = function(e) NULL)
       }
     }
@@ -272,7 +350,15 @@ chat <- function(
   curl::curl_fetch_memory(url, handle = handle)
   cat("\n")
 
-  list(content = full_content, usage = NULL)
+  thinking <- if (nzchar(full_thinking)) full_thinking else NULL
+  .warn_if_truncated(full_content, thinking, finish_reason)
+
+  list(
+    content = full_content,
+    thinking = thinking,
+    finish_reason = finish_reason,
+    usage = NULL
+  )
 }
 
 #' Null coalescing operator

diff --git a/inst/tinytest/test_providers.R b/inst/tinytest/test_providers.R
@@ -59,7 +59,7 @@ expect_equal(cfg$default_model, "gpt-4o-mini")
 cfg <- llm.api:::.get_provider_config("anthropic")
 expect_equal(cfg$base_url, "https://api.anthropic.com")
 expect_equal(cfg$chat_path, "/v1/messages")
-expect_equal(cfg$default_model, "claude-3-5-sonnet-latest")
+expect_equal(cfg$default_model, "claude-sonnet-4-6")
 
 # Moonshot config
 cfg <- llm.api:::.get_provider_config("moonshot")

diff --git a/inst/tinytest/test_reasoning.R b/inst/tinytest/test_reasoning.R
@@ -0,0 +1,40 @@
+# Tests for reasoning_content / thinking handling.
+
+# Anthropic stop_reason → finish_reason normalization. Anthropic uses
+# different vocabulary than OpenAI; chat() exposes the OpenAI vocabulary
+# uniformly, so callers don't have to branch on provider.
+expect_null(llm.api:::.normalize_anthropic_stop_reason(NULL))
+expect_null(llm.api:::.normalize_anthropic_stop_reason(""))
+expect_equal(llm.api:::.normalize_anthropic_stop_reason("end_turn"), "stop")
+expect_equal(llm.api:::.normalize_anthropic_stop_reason("max_tokens"), "length")
+# Pass-through for values that don't have an OpenAI equivalent.
+expect_equal(llm.api:::.normalize_anthropic_stop_reason("tool_use"), "tool_use")
+expect_equal(llm.api:::.normalize_anthropic_stop_reason("stop_sequence"),
+             "stop_sequence")
+
+# Truncation warning fires only when content is empty AND thinking is
+# populated AND finish_reason is "length". This is the silent-failure
+# mode of reasoning models that callers need to know about.
+expect_warning(
+    llm.api:::.warn_if_truncated("", "some reasoning", "length"),
+    pattern = "truncated mid-reasoning"
+)
+# Normal completion with both content and thinking: no warning.
+expect_silent(
+    llm.api:::.warn_if_truncated("answer", "thinking", "stop")
+)
+# Length-truncated but content is non-empty: model hit the cap mid-answer
+# but at least said something. Caller can decide; we don't warn here.
+expect_silent(
+    llm.api:::.warn_if_truncated("partial answer", "thinking", "length")
+)
+# Length-truncated with no thinking and no content: not a reasoning model
+# truncation pattern, no warning.
+expect_silent(
+    llm.api:::.warn_if_truncated("", NULL, "length")
+)
+# Length-truncated with thinking but content is NULL (some providers).
+expect_warning(
+    llm.api:::.warn_if_truncated(NULL, "some reasoning", "length"),
+    pattern = "truncated mid-reasoning"
+)
diff --git a/man/chat.Rd b/man/chat.Rd
@@ -31,6 +31,15 @@ chat(prompt, model = NULL, system = NULL, history = NULL, temperature = NULL,
 \value{
 A list with:
   \item{content}{The assistant's response text}
+  \item{thinking}{Chain-of-thought from reasoning models, or NULL.
+    Populated from \code{reasoning_content} (DeepSeek, Moonshot Kimi,
+    vLLM, SGLang), \code{reasoning} (OpenRouter), or Anthropic
+    \code{thinking} blocks. Normalized across providers.}
+  \item{finish_reason}{Why generation stopped. \code{"stop"} on a
+    normal completion, \code{"length"} when truncated by max_tokens.
+    A reasoning model that returns empty \code{content} with
+    \code{finish_reason == "length"} ran out of budget mid-thought;
+    raise \code{max_tokens}.}
   \item{model}{Model used}
   \item{usage}{Token usage (if available)}
   \item{history}{Updated conversation history}