redpanda-data · birdayz · Apr 28, 2026 · Apr 30, 2026
diff --git a/agent/llmagent/empty_response_test.go b/agent/llmagent/empty_response_test.go
@@ -0,0 +1,133 @@
+// Copyright 2026 Redpanda Data, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package llmagent_test
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/redpanda-data/ai-sdk-go/agent"
+	"github.com/redpanda-data/ai-sdk-go/agent/llmagent"
+	"github.com/redpanda-data/ai-sdk-go/llm"
+	"github.com/redpanda-data/ai-sdk-go/llm/fakellm"
+	"github.com/redpanda-data/ai-sdk-go/store/session"
+)
+
+// TestRun_EmptyContentNotPersisted locks in the agent-loop guard against
+// session poisoning by empty assistant messages.
+//
+// Sibling case to PR #116. Where #116 surfaced *truncated* tool args
+// instead of corrupting state, this guards against *empty* content.
+// When a provider returns a Response whose Message has zero content
+// blocks (max_tokens before any block was emitted, the only block was
+// a partial tool_use that stream finalisation correctly dropped, or
+// refusal with no text emitted), Anthropic and most other providers
+// reject any subsequent replay with `messages.N.content: Field
+// required` — once the empty turn is in session state, every
+// following call 400s and the conversation is wedged forever.
+//
+// The guard sits at the same place adk-go's AppendEvent honours its
+// `Event.Partial` flag: the session-store boundary, the single
+// chokepoint for "what gets persisted." The MessageEvent still fires
+// (observers see what happened) and the FinishReason still propagates
+// (terminal-reason handling fires below); only the persistence step
+// is skipped.
+func TestRun_EmptyContentNotPersisted(t *testing.T) {
+	t.Parallel()
+
+	model := fakellm.NewFakeModel()
+	model.When(fakellm.Any()).
+		ThenRespondWith(func(_ *llm.Request, _ *fakellm.CallContext) (*llm.Response, error) {
+			// max_tokens hit before any content block emitted: provider
+			// returns a Response with empty content + FinishReasonLength.
+			return &llm.Response{
+				Message: llm.Message{
+					Role:    llm.RoleAssistant,
+					Content: nil,
+				},
+				FinishReason: llm.FinishReasonLength,
+			}, nil
+		})
+
+	ag, err := llmagent.New("test-agent", "You are a helpful assistant", model)
+	require.NoError(t, err)
+
+	sess := &session.State{
+		ID:       "test-session",
+		Messages: []llm.Message{llm.NewMessage(llm.RoleUser, llm.NewTextPart("hello"))},
+	}
+	inv := agent.NewInvocationMetadata(sess, agent.Info{})
+
+	events := collectEvents(t, ag.Run(t.Context(), inv))
+
+	// FinishReason still propagates so callers can react.
+	endEvent := findInvocationEndEvent(events)
+	require.NotNil(t, endEvent)
+	assert.Equal(t, agent.FinishReasonLength, endEvent.FinishReason,
+		"finish reason must propagate even when content is empty")
+
+	// MessageEvent still fires so observers can see what happened.
+	messageEvents := filterEvents[agent.MessageEvent](events)
+	assert.NotEmpty(t, messageEvents, "MessageEvent must still fire so observers see the empty response")
+
+	// Session must NOT contain the empty assistant message — that's the
+	// whole point. Only the original user message stays.
+	require.Len(t, sess.Messages, 1, "empty assistant message must not be persisted to session")
+	assert.Equal(t, llm.RoleUser, sess.Messages[0].Role)
+
+	// Sanity: every persisted message has non-empty content. This is
+	// the invariant Anthropic et al. require — every message in a
+	// replayed array must have at least one content block.
+	for i, m := range sess.Messages {
+		assert.NotEmpty(t, m.Content, "session.Messages[%d] must have non-empty content", i)
+	}
+}
+
+// TestRun_NonEmptyContentStillPersisted is the negative control:
+// the guard must not over-fire and skip messages that DO have content.
+// If this regresses, normal turns stop being persisted and the agent
+// loses all state.
+func TestRun_NonEmptyContentStillPersisted(t *testing.T) {
+	t.Parallel()
+
+	model := fakellm.NewFakeModel()
+	model.When(fakellm.Any()).
+		ThenRespondWith(func(_ *llm.Request, _ *fakellm.CallContext) (*llm.Response, error) {
+			return &llm.Response{
+				Message: llm.Message{
+					Role:    llm.RoleAssistant,
+					Content: []*llm.Part{llm.NewTextPart("hi back")},
+				},
+				FinishReason: llm.FinishReasonStop,
+			}, nil
+		})
+
+	ag, err := llmagent.New("test-agent", "You are a helpful assistant", model)
+	require.NoError(t, err)
+
+	sess := &session.State{
+		ID:       "test-session",
+		Messages: []llm.Message{llm.NewMessage(llm.RoleUser, llm.NewTextPart("hello"))},
+	}
+	inv := agent.NewInvocationMetadata(sess, agent.Info{})
+
+	collectEvents(t, ag.Run(t.Context(), inv))
+
+	require.Len(t, sess.Messages, 2, "non-empty assistant message must be persisted")
+	assert.Equal(t, llm.RoleAssistant, sess.Messages[1].Role)
+	assert.NotEmpty(t, sess.Messages[1].Content)
+}
diff --git a/agent/llmagent/llmagent.go b/agent/llmagent/llmagent.go
@@ -269,8 +269,28 @@ func (a *LLMAgent) executeSingleTurn(
 	// Update usage tracking
 	agent.AddUsage(inv, resp.Usage)
 
-	// Add assistant message to session (single source of truth)
-	sess.Messages = append(sess.Messages, resp.Message)
+	// Add assistant message to session (single source of truth).
+	//
+	// Skip persistence when Content is empty. Sources we have observed:
+	//   - max_tokens hit before any content block was emitted
+	//   - the only content block was a partial tool_use that stream
+	//     finalisation correctly dropped (PR #116)
+	//   - refusal / safety filter with no text emitted
+	//
+	// In all of these the FinishReason carries the truth (Length,
+	// ContentFilter, etc.) and the terminal-reason handling below
+	// surfaces it. The MessageEvent below still fires so observers
+	// see what happened. Persisting the empty Message would poison
+	// every subsequent replay — Anthropic and most other providers
+	// reject `messages.N.content` arrays with `Field required` and
+	// the conversation is permanently wedged.
+	//
+	// Same shape as adk-go's `AppendEvent` check on `Event.Partial`:
+	// the session-store boundary is the right place to drop responses
+	// that carry no usable content.
+	if len(resp.Message.Content) > 0 {
+		sess.Messages = append(sess.Messages, resp.Message)
+	}
 
 	// Emit message event
 	if !yield(agent.MessageEvent{

diff --git a/docs/empty-content-persistence.md b/docs/empty-content-persistence.md
@@ -0,0 +1,139 @@
+# Empty-content responses: open design question
+
+Status: parked. PR #123 ships the minimal fix — agent-loop guard at the
+session-store boundary. The deeper question of where the invariant
+should live is not resolved.
+
+## The bug
+
+After PR #116 landed (drop partial `tool_use` blocks at stream
+finalisation), an unrelated wedge surfaced in production: agent
+sessions started failing every call with
+
+```
+messages.15.content: Field required
+```
+
+from Anthropic. The cause: a previous turn finalised with `Content =
+[]` (max_tokens hit before any non-partial block was emitted, or the
+only block was a partial tool_use that #116 correctly dropped, or a
+refusal with no text). The agent loop's `sess.Messages = append(...)`
+ran *before* the FinishReason terminal-handling check, so the empty
+`Message` landed in session state. Every subsequent replay then sent
+that empty array to Anthropic, which 400'd. Pod restarts didn't help —
+session was the persistent thing.
+
+## What we shipped (PR #123)
+
+One-line guard at `agent/llmagent/llmagent.go:273`:
+
+```go
+if len(resp.Message.Content) > 0 {
+    sess.Messages = append(sess.Messages, resp.Message)
+}
+```
+
+The MessageEvent still fires; the FinishReason still propagates; the
+terminal-reason handling still terminates the loop cleanly. Only
+persistence is skipped.
+
+## What we considered and rejected
+
+### A. Provider returns error on empty content
+
+Move the check into each `response_mapper`: if `len(content) == 0`,
+return `ErrResponseMapping`. The agent loop's existing terminal-error
+path then fires (`generate()` returns err, loop exits cleanly).
+
+Rejected because it swallows the FinishReason signal. `Length + empty`
+is a real, valid state (the model ran out of budget before producing
+anything) — surfacing it as a generic mapping error loses the
+information the caller needs to decide whether to retry with higher
+`max_tokens`. Same for `ContentFilter + empty` (refusal). The agent
+loop's existing handling for these reasons is correct and should keep
+firing.
+
+### B. Provider error only on `Stop + empty`
+
+Conditional version of (A): only error when the stop reason is `Stop`
+(end_turn / clean stop) with empty content, since *that* is the
+genuinely anomalous case. Length and ContentFilter with empty content
+flow through normally.
+
+Rejected as speculative paranoia: `Stop + empty` is theoretical and
+hasn't been observed in the wild. Adding code for a phantom case adds
+complexity without value. If it ever happens we can revisit.
+
+### C. Add a `Partial bool` field to `llm.Response`
+
+Mirror adk-go's `Event.Partial` flag. Providers set it when the
+response is a non-result; the agent loop respects the flag at the
+persistence boundary.
+
+Rejected as redundant. adk-go needs Partial because they expose
+streaming chunks at the API surface — they have to distinguish
+"chunk-with-content" from "complete." This SDK's agent loop sees only
+the final aggregated `Response`, so `Partial == (len(Content) == 0)`
+in our architecture. The flag adds ceremony without distinguishing
+power.
+
+### D. Key persistence on FinishReason instead of content length
+
+"Skip persistence when FinishReason == Length."
+
+Rejected because Length with non-empty content is valid and worth
+persisting — e.g., the model completed 3 of 4 parallel tool calls
+before hitting `max_tokens` (PR #116's central scenario). The caller
+wants those 3 completed tool_use blocks to inspect. Length alone
+isn't synonymous with "unusable result"; `len(Content) == 0` is.
+
+## The open question
+
+The session-store boundary is the *right place* to honour "non-result
+responses don't get persisted." That part is settled. What's not
+settled:
+
+1. **Should providers also be involved?** Right now, the invariant is
+   enforced only in `llmagent.go`. A non-llmagent caller that drives
+   `model.Generate()` directly and persists the result has to know to
+   apply the same guard. That's not great encapsulation. Options:
+
+   - Document the invariant: "callers must skip persistence when
+     `len(resp.Message.Content) == 0`." Cheapest. Relies on docs.
+   - Add a helper: `resp.IsPersistable() bool` returning
+     `len(Content) > 0`. Self-documenting at call sites.
+   - Move the guard into the session store itself (any
+     `sess.Append(msg)` helper rejects empty Content). Requires
+     introducing a session abstraction the SDK doesn't currently
+     own at this layer.
+
+2. **Should we have telemetry?** Right now empty-content responses
+   silently disappear into the void. A counter/log would tell us how
+   often this fires and whether some provider/scenario is leaking
+   them at an unexpected rate. Cheap to add, useful for spotting
+   regressions.
+
+3. **Is there a class of "incomplete" responses that don't have
+   `len(Content) == 0` but still shouldn't be persisted?** PR #116's
+   reproducer (truncated tool args yielding `unexpected end of JSON
+   input`) was such a case before the fix. Are there other shapes we
+   haven't seen yet? Hard to know without more production exposure.
+
+## Move on
+
+For now: PR #123 fixes the production bug. The architectural
+improvements above are not blocking. Revisit when:
+
+- Telemetry shows empty-content responses are happening at a
+  non-trivial rate, or
+- A non-llmagent caller hits the same wedge, or
+- A new "incomplete" response shape surfaces that the simple
+  `len(Content) == 0` check doesn't catch.
+
+## References
+
+- PR #116 — surface truncation instead of corrupting state when
+  tool_use is cut off (the sibling fix)
+- PR #123 — the agent-loop guard shipped here
+- adk-go `session/inmemory.go::AppendEvent` — prior art for
+  guarding the persistence boundary on a "not-final" signal
diff --git a/llm/part.go b/llm/part.go
@@ -167,6 +167,29 @@ type ToolRequest struct {
 	Arguments json.RawMessage `json:"arguments"`
 }
 
+// ArgumentsAsObject decodes Arguments into a JSON object for provider APIs
+// that require structured input (Anthropic tool_use.input, Bedrock tool_use
+// input, Gemini function_call.args). Empty bytes decode to an empty object,
+// which matches what providers actually send for no-arg tools. Invalid JSON
+// also decodes to an empty object rather than erroring: corrupt arguments
+// can end up in session state when a streaming turn is cut short
+// mid-accumulation (e.g. stop_reason=max_tokens during input_json_delta),
+// and without this fallback every replay of that session fails message
+// mapping with "unexpected end of JSON input". The paired tool_result
+// already carries the original parse error so the model has context.
+func (t *ToolRequest) ArgumentsAsObject() map[string]any {
+	if len(t.Arguments) == 0 {
+		return map[string]any{}
+	}
+
+	var input map[string]any
+	if err := json.Unmarshal(t.Arguments, &input); err != nil {
+		return map[string]any{}
+	}
+
+	return input
+}
+
 // ToolResponse represents the result of executing a tool.
 // This is sent back to the model to continue the conversation.
 type ToolResponse struct {

diff --git a/llm/part_test.go b/llm/part_test.go
@@ -0,0 +1,50 @@
+// Copyright 2026 Redpanda Data, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package llm_test
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/redpanda-data/ai-sdk-go/llm"
+)
+
+func TestToolRequest_ArgumentsAsObject(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		in   json.RawMessage
+		want map[string]any
+	}{
+		{"nil", nil, map[string]any{}},
+		{"empty", json.RawMessage(""), map[string]any{}},
+		{"truncated", json.RawMessage(`{"query":`), map[string]any{}},
+		{"not an object", json.RawMessage(`"oops"`), map[string]any{}},
+		{"valid empty", json.RawMessage(`{}`), map[string]any{}},
+		{"valid", json.RawMessage(`{"query":"SELECT 1"}`), map[string]any{"query": "SELECT 1"}},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			tr := &llm.ToolRequest{Arguments: tc.in}
+			assert.Equal(t, tc.want, tr.ArgumentsAsObject())
+		})
+	}
+}