Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
5d67e8f
feat: enhance session status indicators and AskUserQuestion handling
Gkrumbach07 Feb 25, 2026
615dd0a
refactor: update session handling and agent status management
Gkrumbach07 Feb 26, 2026
4d033de
refactor: update session name generation to use millisecond timestamp
Gkrumbach07 Feb 26, 2026
a1e4ef8
fix: resolve frontend lint errors and remove worktree artifacts
Gkrumbach07 Feb 27, 2026
fb3a8d8
fix: address review findings - lint, race condition, stale agentStatus
Gkrumbach07 Feb 27, 2026
4585a54
fix: resolve frontend build errors - missing import and wrong functio…
Gkrumbach07 Mar 9, 2026
5ed8e63
fix: derive agentStatus from event log and fix AskUserQuestion UI
Gkrumbach07 Mar 10, 2026
014bd0f
fix: add StoredAgentStatus type for consistent agent status typing
Mar 10, 2026
5cd1cec
fix: optimize DeriveAgentStatus to read only last 64KB of event log
Mar 10, 2026
d97c923
test: add comprehensive unit tests for DeriveAgentStatus
Mar 10, 2026
07d469b
fix: address minor code quality issues
Mar 10, 2026
bbe7175
chore: remove old ask-user-question.tsx from components/ui/
Mar 10, 2026
c248f63
fix: address review feedback β€” remove operator agentStatus writes and…
Mar 11, 2026
d30db53
fix: address review feedback
ambient-code[bot] Mar 11, 2026
9f29f5d
fix: address CodeRabbit critical & accessibility feedback
Mar 11, 2026
7965b66
fix: optimize enrichAgentStatus for paginated sessions and add exhaus…
Gkrumbach07 Mar 11, 2026
082343f
fix: remove unused exhaustive check variable to fix lint error
Gkrumbach07 Mar 11, 2026
044420c
fix: address review feedback - remove unused import and fix namespace
ambient-code[bot] Mar 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ dmypy.json

# Claude Code
.claude/settings.local.json
.claude/worktrees/

# mkdocs
/site
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -734,9 +734,9 @@ kind-port-forward: check-kubectl check-local-context ## Port-forward kind servic
@echo ""
@echo "$(COLOR_YELLOW)Press Ctrl+C to stop$(COLOR_RESET)"
@echo ""
@trap 'echo ""; echo "$(COLOR_GREEN)βœ“$(COLOR_RESET) Port forwarding stopped"; exit 0' INT; \
(kubectl port-forward -n ambient-code svc/frontend-service $(KIND_FWD_FRONTEND_PORT):3000 >/dev/null 2>&1 &); \
(kubectl port-forward -n ambient-code svc/backend-service $(KIND_FWD_BACKEND_PORT):8080 >/dev/null 2>&1 &); \
@trap 'kill 0; echo ""; echo "$(COLOR_GREEN)βœ“$(COLOR_RESET) Port forwarding stopped"; exit 0' INT; \
kubectl port-forward -n $(NAMESPACE) svc/frontend-service $(KIND_FWD_FRONTEND_PORT):3000 >/dev/null 2>&1 & \
kubectl port-forward -n $(NAMESPACE) svc/backend-service $(KIND_FWD_BACKEND_PORT):8080 >/dev/null 2>&1 & \
wait

dev-bootstrap: check-kubectl check-local-context ## Bootstrap developer workspace with API key and integrations
Expand Down
38 changes: 36 additions & 2 deletions components/backend/handlers/sessions.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ var (
GetGitHubToken func(context.Context, kubernetes.Interface, dynamic.Interface, string, string) (string, error)
GetGitLabToken func(context.Context, kubernetes.Interface, string, string) (string, error)
DeriveRepoFolderFromURL func(string) string
// DeriveAgentStatusFromEvents derives agentStatus from the persisted event log.
// Set by the websocket package at init to avoid circular imports.
// sessionID should be namespace-qualified (e.g., "namespace/sessionName") to avoid cross-project collisions.
DeriveAgentStatusFromEvents func(sessionID string) string
Comment on lines +44 to +47
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | πŸ”΄ Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify how agent-status derivation is wired and what key the persisted event store uses.
# Expected: the implementation should use a namespace-qualified or otherwise globally unique key.

rg -n -C3 '\bDeriveAgentStatusFromEvents\b|\bDeriveAgentStatus\b'
rg -n -C3 'sdkSession(Id|ID)|sessionName|namespace|project'

Repository: ambient-code/platform

Length of output: 50381


🏁 Script executed:

# Find where sessions are created and how sessionID/name are set
rg -n 'session.*ID|sessionID|metadata\["name"\]|Metadata.*name' components/backend/handlers/sessions.go -A 2 -B 2

# Check what StateBaseDir contains and how session directories are structured  
rg -n 'StateBaseDir|sessions/' components/backend/websocket/ -A 2 -B 2

# Look for session creation to understand ID generation
rg -n 'CreateSession|GenerateName|UnixMilli' components/backend/handlers/sessions.go -A 5 -B 2

Repository: ambient-code/platform

Length of output: 5144


🏁 Script executed:

# Find all calls to persistEvent to see what sessionID value is passed
rg -n 'persistEvent' components/backend/websocket/ -B 3 -A 1

# Search for where events are written to understand the sessionID context
rg -n 'HandleMessage|broadcastEvent|persistEvent' components/backend/websocket/agui_proxy.go -B 5 -A 5

Repository: ambient-code/platform

Length of output: 4352


🏁 Script executed:

# Find function definitions and signatures in agui_proxy.go to understand sessionName and sessionID
rg -n 'func.*sessionName|func.*sessionID|persistStreamedEvent|HandleMessage|HandleAGUI' components/backend/websocket/agui_proxy.go -A 15 | head -100

# Look for where sessionName is set/extracted
rg -n 'sessionName\s*:=|sessionName\s*=' components/backend/websocket/agui_proxy.go -B 3 -A 2

Repository: ambient-code/platform

Length of output: 6105


🏁 Script executed:

# Verify that sessionID in persistStreamedEvent is actually the bare sessionName
rg -n 'persistStreamedEvent' components/backend/websocket/agui_proxy.go -B 10 -A 5

# Check StateBaseDir initialization and scope
rg -n 'StateBaseDir\s*=' components/backend/ -B 2 -A 2

# Understand if StateBaseDir is per-session or shared
rg -n 'STATE_BASE_DIR|StateBaseDir' components/backend/main.go -B 3 -A 3

Repository: ambient-code/platform

Length of output: 3936


🏁 Script executed:

# Check if backend is deployed per-namespace or per-cluster
fd -t f 'backend.*deployment\|backend.*yaml' components/manifests/ --type f | head -20

# Look at deployment manifests to understand scope
rg -n 'namespace|spec:' components/manifests/ -l | grep -i backend | head -5

Repository: ambient-code/platform

Length of output: 371


🏁 Script executed:

# Find and read backend deployment files
find components/manifests -name "*backend*" -type f | grep -E 'deployment|kustomization' | head -10

# Check for kustomization files that define the backend
fd 'kustomization' components/manifests/base -o components/manifests/overlays

# Look for backend pod template specifications
rg -n 'kind: Deployment|kind: Pod|spec:' components/manifests/base/ -l | grep -i backend

Repository: ambient-code/platform

Length of output: 483


🏁 Script executed:

# Read backend deployment to check namespace and StateBaseDir setup
cat -n components/manifests/base/backend-deployment.yaml | head -80

# Check how StateBaseDir is used and if there's any namespace qualification
rg -n 'STATE_BASE_DIR|StateBaseDir|env:' components/manifests/base/backend-deployment.yaml -B 2 -A 2

Repository: ambient-code/platform

Length of output: 3269


🏁 Script executed:

# Check if backend is deployed per-namespace (in kustomization)
rg -n 'backend-deployment' components/manifests/base/kustomization.yaml -B 2 -A 2

# Check namespace in kustomization
rg -n 'namespace:' components/manifests/base/kustomization.yaml

# Verify if sessions can be created with same name in different namespaces
rg -n 'namespace|project' components/backend/handlers/sessions.go | grep -i create -A 10 -B 10 | head -50

Repository: ambient-code/platform

Length of output: 4377


🏁 Script executed:

# Check if backend serves multiple projects/namespaces or is single-tenant
rg -n 'project.*Param|projectName' components/backend/handlers/sessions.go | head -20

# Verify StateBaseDir is shared/mounted and if it's truly unqualified
rg -n 'StateBaseDir|/workspace' components/manifests/ -r

# Check if PVC is shared across namespaces or single-namespace
rg -n 'volumeMounts|persistentVolume' components/manifests/base/backend-deployment.yaml -B 2 -A 5

Repository: ambient-code/platform

Length of output: 2506


Cross-project session collision in agent status derivation β€” use namespace-qualified keys in event store.

Sessions can be created with identical names in different projects if created within the same millisecond (e.g., session-1709251234567). The event store path uses only the bare session name β€” {StateBaseDir}/sessions/{sessionName}/agui-events.jsonl β€” without namespace qualification. This allows two projects' sessions with the same name to collide and read/write the same event file, causing enrichAgentStatus() to derive incorrect agentStatus for affected sessions.

To fix:

  • Qualify the event store key with namespace: e.g., {StateBaseDir}/sessions/{namespace}/{sessionName}/agui-events.jsonl
  • Update persistEvent(), loadEvents(), DeriveAgentStatus(), and all callers to use the namespace-qualified path
  • Ensure enrichAgentStatus() passes both namespace and session name to DeriveAgentStatusFromEvents()

Alternatively, use the session's UID instead of the name as the store key, since UIDs are globally unique within Kubernetes.

πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/handlers/sessions.go` around lines 44 - 46, The event
store currently uses only sessionName which allows cross-project collisions;
update the storage key to include namespace (e.g.,
{StateBaseDir}/sessions/{namespace}/{sessionName}/agui-events.jsonl) and change
all related functions to accept and propagate namespace: modify
persistEvent(...) and loadEvents(...) to build and use the namespace-qualified
path, update the signature and callers of DeriveAgentStatusFromEvents to accept
(namespace, sessionName) and adjust
DeriveAgentStatus/DeriveAgentStatusFromEvents implementation accordingly, and
ensure enrichAgentStatus() calls DeriveAgentStatusFromEvents with both namespace
and sessionName; alternatively you may switch to using the session UID as the
unique store key if preferred, but be consistent across persist/load/derive
callers.

// LEGACY: SendMessageToSession removed - AG-UI server uses HTTP/SSE instead of WebSocket
)

Expand Down Expand Up @@ -361,6 +365,28 @@ func parseStatus(status map[string]interface{}) *types.AgenticSessionStatus {

// V2 API Handlers - Multi-tenant session management

// enrichAgentStatus derives agentStatus from the persisted event log for
// Running sessions. This is the source of truth β€” it replaces the stale
// CR-cached value which was subject to goroutine race conditions.
func enrichAgentStatus(session *types.AgenticSession) {
if session.Status == nil || session.Status.Phase != "Running" {
return
}
if DeriveAgentStatusFromEvents == nil {
return
}
name, _ := session.Metadata["name"].(string)
namespace, _ := session.Metadata["namespace"].(string)
if name == "" || namespace == "" {
return
}
// Use namespace-qualified key to avoid cross-project collisions in the event store
sessionID := namespace + "/" + name
if derived := DeriveAgentStatusFromEvents(sessionID); derived != "" {
session.Status.AgentStatus = types.StringPtr(derived)
}
}

func ListSessions(c *gin.Context) {
project := c.GetString("project")

Expand Down Expand Up @@ -431,6 +457,11 @@ func ListSessions(c *gin.Context) {
totalCount := len(sessions)
paginatedSessions, hasMore, nextOffset := paginateSessions(sessions, params.Offset, params.Limit)

// Derive agentStatus from event log only for paginated sessions (performance optimization)
for i := range paginatedSessions {
enrichAgentStatus(&paginatedSessions[i])
}

response := types.PaginatedResponse{
Items: paginatedSessions,
TotalCount: totalCount,
Expand Down Expand Up @@ -645,9 +676,9 @@ func CreateSession(c *gin.Context) {
timeout = *req.Timeout
}

// Generate unique name (timestamp-based)
// Generate unique name (millisecond timestamp for burst-creation safety)
// Note: Runner will create branch as "ambient/{session-name}"
timestamp := time.Now().Unix()
timestamp := time.Now().UnixMilli()
name := fmt.Sprintf("session-%d", timestamp)
Comment on lines +679 to 682
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Millisecond timestamps still allow name collisions.

Two CreateSession requests in the same millisecond for the same namespace will produce the same CR name and one of them will fail with AlreadyExists, which this path currently turns into a 500. Please switch to Kubernetes generateName or retry with a random suffix on collision.

As per coding guidelines, "Handle errors and edge cases explicitly rather than ignoring them."

πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/handlers/sessions.go` around lines 673 - 676, The current
CreateSession code builds a deterministic CR name using the millisecond
timestamp (variables timestamp and name), which can collide and causes an
AlreadyExists error that surfaces as a 500; change the session CR creation to
use Kubernetes server-side name generation (set ObjectMeta.GenerateName like
"session-<ns>-") or implement a small retry loop that appends a random suffix to
name on apierrors.IsAlreadyAlreadyExists collisions (and retry a few times)
instead of failing; ensure the handler (CreateSession) explicitly detects
apierrors.IsAlreadyExists and either retries or returns a 409/meaningful client
error rather than an internal 500.


// Create the custom resource
Expand Down Expand Up @@ -903,6 +934,9 @@ func GetSession(c *gin.Context) {
session.Status = parseStatus(status)
}

// Derive agentStatus from event log (source of truth) for running sessions
enrichAgentStatus(&session)

session.AutoBranch = ComputeAutoBranch(sessionName)

c.JSON(http.StatusOK, session)
Expand Down
1 change: 1 addition & 0 deletions components/backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ func main() {

// Initialize websocket package
websocket.StateBaseDir = server.StateBaseDir
handlers.DeriveAgentStatusFromEvents = websocket.DeriveAgentStatus

// Normal server mode
if err := server.Run(registerRoutes); err != nil {
Expand Down
7 changes: 7 additions & 0 deletions components/backend/types/agui.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ const (
EventTypeMeta = "META"
)

// Agent status values derived from the AG-UI event stream.
const (
AgentStatusWorking = "working"
AgentStatusIdle = "idle"
AgentStatusWaitingInput = "waiting_input"
)

// AG-UI Message Roles
// See: https://docs.ag-ui.com/concepts/messages
const (
Expand Down
1 change: 1 addition & 0 deletions components/backend/types/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type AgenticSessionStatus struct {
StartTime *string `json:"startTime,omitempty"`
CompletionTime *string `json:"completionTime,omitempty"`
LastActivityTime *string `json:"lastActivityTime,omitempty"`
AgentStatus *string `json:"agentStatus,omitempty"`
StoppedReason *string `json:"stoppedReason,omitempty"`
ReconciledRepos []ReconciledRepo `json:"reconciledRepos,omitempty"`
ReconciledWorkflow *ReconciledWorkflow `json:"reconciledWorkflow,omitempty"`
Expand Down
44 changes: 33 additions & 11 deletions components/backend/websocket/agui_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,13 @@ func HandleAGUIRunProxy(c *gin.Context) {

log.Printf("AGUI Proxy: run=%s session=%s/%s msgs=%d", truncID(runID), projectName, sessionName, len(rawMessages))

// Use namespace-qualified session ID to avoid cross-project collisions
namespacedSessionID := projectName + "/" + sessionName

sessionLastSeen.Store(sessionName, time.Now())

// Store project→session mapping for activity tracking in persistStreamedEvent
sessionProjectMap.Store(sessionName, projectName)
sessionProjectMap.Store(namespacedSessionID, projectName)

// Resolve and cache the runner port for this session from the registry.
cacheSessionPort(projectName, sessionName)
Expand Down Expand Up @@ -297,7 +300,7 @@ func HandleAGUIRunProxy(c *gin.Context) {
runnerURL := getRunnerEndpoint(projectName, sessionName)

// Start background goroutine to proxy runner SSE β†’ persist + broadcast
go proxyRunnerStream(runnerURL, bodyBytes, sessionName, runID, threadID)
go proxyRunnerStream(runnerURL, bodyBytes, sessionName, namespacedSessionID, runID, threadID)

// Return metadata immediately β€” events arrive via GET /agui/events
c.JSON(http.StatusOK, gin.H{
Expand All @@ -309,21 +312,22 @@ func HandleAGUIRunProxy(c *gin.Context) {
// proxyRunnerStream connects to the runner's SSE endpoint, reads events,
// persists them, and publishes them to the live broadcast pipe. Runs in
// a background goroutine so the POST /agui/run handler can return immediately.
func proxyRunnerStream(runnerURL string, bodyBytes []byte, sessionName, runID, threadID string) {
// namespacedSessionID is the namespace-qualified session ID (e.g., "namespace/sessionName") for event persistence.
func proxyRunnerStream(runnerURL string, bodyBytes []byte, sessionName, namespacedSessionID, runID, threadID string) {
log.Printf("AGUI Proxy: connecting to runner at %s", runnerURL)
resp, err := connectToRunner(runnerURL, bodyBytes)
if err != nil {
log.Printf("AGUI Proxy: runner unavailable for %s: %v", sessionName, err)
// Publish error events so GET /agui/events subscribers see the failure
publishAndPersistErrorEvents(sessionName, runID, threadID, "Runner is not available")
publishAndPersistErrorEvents(sessionName, namespacedSessionID, runID, threadID, "Runner is not available")
return
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
log.Printf("AGUI Proxy: runner returned %d: %s", resp.StatusCode, string(body))
publishAndPersistErrorEvents(sessionName, runID, threadID, fmt.Sprintf("Runner error: HTTP %d", resp.StatusCode))
publishAndPersistErrorEvents(sessionName, namespacedSessionID, runID, threadID, fmt.Sprintf("Runner error: HTTP %d", resp.StatusCode))
return
}

Expand All @@ -343,7 +347,7 @@ func proxyRunnerStream(runnerURL string, bodyBytes []byte, sessionName, runID, t
// Persist every data event to JSONL
if strings.HasPrefix(trimmed, "data: ") {
jsonData := strings.TrimPrefix(trimmed, "data: ")
persistStreamedEvent(sessionName, runID, threadID, jsonData)
persistStreamedEvent(namespacedSessionID, runID, threadID, jsonData)
}

// Publish raw SSE line to all GET /agui/events subscribers
Expand All @@ -356,14 +360,15 @@ func proxyRunnerStream(runnerURL string, bodyBytes []byte, sessionName, runID, t
// publishAndPersistErrorEvents generates RUN_STARTED + RUN_ERROR events,
// persists them, and publishes to the live broadcast so subscribers get
// notified of runner failures.
func publishAndPersistErrorEvents(sessionName, runID, threadID, message string) {
// sessionName is used for broadcasting; namespacedSessionID is used for persistence.
func publishAndPersistErrorEvents(sessionName, namespacedSessionID, runID, threadID, message string) {
// RUN_STARTED
startEvt := map[string]interface{}{
"type": "RUN_STARTED",
"threadId": threadID,
"runId": runID,
}
persistEvent(sessionName, startEvt)
persistEvent(namespacedSessionID, startEvt)
startData, _ := json.Marshal(startEvt)
publishLine(sessionName, fmt.Sprintf("data: %s\n\n", startData))

Expand All @@ -374,7 +379,7 @@ func publishAndPersistErrorEvents(sessionName, runID, threadID, message string)
"threadId": threadID,
"runId": runID,
}
persistEvent(sessionName, errEvt)
persistEvent(namespacedSessionID, errEvt)
errData, _ := json.Marshal(errEvt)
publishLine(sessionName, fmt.Sprintf("data: %s\n\n", errData))
}
Expand Down Expand Up @@ -436,15 +441,19 @@ func persistStreamedEvent(sessionID, runID, threadID, jsonData string) {

persistEvent(sessionID, event)

// Update lastActivityTime on CR for activity events (debounced).
// Extract event type to check; projectName is derived from the
// Extract event type; projectName is derived from the
// sessionID-to-project mapping populated by HandleAGUIRunProxy.
eventType, _ := event["type"].(string)

// Update lastActivityTime on CR for activity events (debounced).
if isActivityEvent(eventType) {
if projectName, ok := sessionProjectMap.Load(sessionID); ok {
updateLastActivityTime(projectName.(string), sessionID, eventType == types.EventTypeRunStarted)
}
}

// agentStatus is derived at query time from the event log (DeriveAgentStatus).
// No CR updates needed here β€” the persisted events ARE the source of truth.
}

// ─── POST /agui/interrupt ────────────────────────────────────────────
Expand Down Expand Up @@ -945,3 +954,16 @@ func updateLastActivityTime(projectName, sessionName string, immediate bool) {
}
}()
}

// isAskUserQuestionToolCall checks if a tool call name is the AskUserQuestion HITL tool.
// Uses case-insensitive comparison after stripping non-alpha characters,
// matching the frontend pattern in use-agent-status.ts.
func isAskUserQuestionToolCall(name string) bool {
var clean strings.Builder
for _, r := range strings.ToLower(name) {
if r >= 'a' && r <= 'z' {
clean.WriteRune(r)
}
}
return clean.String() == "askuserquestion"
}
105 changes: 105 additions & 0 deletions components/backend/websocket/agui_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package websocket

import (
"ambient-code-backend/types"
"bytes"
"encoding/json"
"fmt"
"log"
Expand Down Expand Up @@ -194,6 +195,110 @@ func loadEvents(sessionID string) []map[string]interface{} {
return events
}

// DeriveAgentStatus reads a session's event log and returns the agent
// status derived from the last significant events.
//
// sessionID should be namespace-qualified (e.g., "namespace/sessionName") to avoid cross-project collisions.
// Returns "" if the status cannot be determined (no events, file missing, etc.).
func DeriveAgentStatus(sessionID string) string {
// sessionID is now namespace-qualified, e.g., "default/session-123"
path := fmt.Sprintf("%s/sessions/%s/agui-events.jsonl", StateBaseDir, sessionID)

// Read only the tail of the file to avoid loading entire event log into memory.
// 64KB is sufficient for recent lifecycle events (scanning backwards).
const maxTailBytes = 64 * 1024

file, err := os.Open(path)
if err != nil {
return ""
}
defer file.Close()

stat, err := file.Stat()
if err != nil {
return ""
}

fileSize := stat.Size()
var data []byte

if fileSize <= maxTailBytes {
// File is small, read it all
data, err = os.ReadFile(path)
if err != nil {
return ""
}
} else {
// File is large, seek to tail and read last N bytes
offset := fileSize - maxTailBytes
_, err = file.Seek(offset, 0)
if err != nil {
return ""
}

data = make([]byte, maxTailBytes)
n, err := file.Read(data)
if err != nil {
return ""
}
data = data[:n]

// Skip partial first line (we seeked into the middle of a line)
if idx := bytes.IndexByte(data, '\n'); idx >= 0 {
data = data[idx+1:]
}
}
Comment on lines +203 to +250
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | πŸ”΅ Trivial

Consider caching derived status to reduce I/O on list endpoints.

The 64KB tail read optimization is good, but DeriveAgentStatus is called for every "Running" session in ListSessions (see enrichAgentStatus in sessions.go). Each call performs:

  • os.Open + file.Stat + file.Seek + file.Read
  • JSON parsing of each line in the tail

For endpoints listing many sessions, this creates per-session I/O overhead. Consider caching the derived status (with TTL or invalidation on new events) to avoid repeated file reads for the same session within a short window.


lines := splitLines(data)
Comment on lines +203 to +252
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Keep status reads on the same migration path as event replay.

This function bypasses the loader that handles messages.jsonl β†’ agui-events.jsonl migration, so legacy sessions will resolve to "" here until someone opens /agui/events. Status derivation should share the same loading/migration path as replay.

πŸ€– Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@components/backend/websocket/agui_store.go` around lines 201 - 209,
DeriveAgentStatus currently reads agui-events.jsonl directly (using StateBaseDir
and os.ReadFile) which bypasses the migration/loader for messages.jsonl β†’
agui-events.jsonl and causes legacy sessions to return "". Replace the direct
file read with the canonical session events loader/migration path used elsewhere
(the function that guarantees messages.jsonl is migrated to
agui-events.jsonlβ€”the same loader used by the /agui/events replay code), then
derive status from the loader's returned events (instead of splitLines on raw
file bytes); keep references to DeriveAgentStatus, StateBaseDir and splitLines
in your changes so the logic stays clear.


// Scan backwards. We only care about lifecycle and AskUserQuestion events.
// RUN_STARTED β†’ "working"
// RUN_FINISHED / RUN_ERROR β†’ "idle", unless same run had AskUserQuestion
// TOOL_CALL_START (AskUserQuestion) β†’ "waiting_input"
var runEndRunID string // set when we hit RUN_FINISHED/RUN_ERROR and need to look deeper
for i := len(lines) - 1; i >= 0; i-- {
if len(lines[i]) == 0 {
continue
}
var evt map[string]interface{}
if err := json.Unmarshal(lines[i], &evt); err != nil {
continue
}
evtType, _ := evt["type"].(string)

switch evtType {
case types.EventTypeRunStarted:
if runEndRunID != "" {
// We were scanning for an AskUserQuestion but hit RUN_STARTED first β†’ idle
return types.AgentStatusIdle
}
return types.AgentStatusWorking

case types.EventTypeRunFinished, types.EventTypeRunError:
if runEndRunID == "" {
// First run-end seen; scan deeper within this run for AskUserQuestion
runEndRunID, _ = evt["runId"].(string)
}

case types.EventTypeToolCallStart:
if runEndRunID != "" {
// Only relevant if we're scanning within the ended run
if evtRunID, _ := evt["runId"].(string); evtRunID != "" && evtRunID != runEndRunID {
return types.AgentStatusIdle
}
}
if toolName, _ := evt["toolCallName"].(string); isAskUserQuestionToolCall(toolName) {
return types.AgentStatusWaitingInput
}
}
}

if runEndRunID != "" {
return types.AgentStatusIdle
}
return ""
}

// ─── Compaction ──────────────────────────────────────────────────────
//
// Go port of @ag-ui/client compactEvents. Concatenates streaming deltas
Expand Down
Loading
Loading