fix: thinkingText reset, /compact force mode, token map cleanup (v1.3.1)

1bcMax · 1bcMax · commit 3bf99504028a · 2026-04-04T02:07:27.000-04:00
diff --git a/dist/agent/compact.d.ts b/dist/agent/compact.d.ts
@@ -13,6 +13,13 @@ export declare function autoCompactIfNeeded(history: Dialogue[], model: string,
     history: Dialogue[];
     compacted: boolean;
 }>;
+/**
+ * Force compaction regardless of threshold (for /compact command).
+ */
+export declare function forceCompact(history: Dialogue[], model: string, client: ModelClient, debug?: boolean): Promise<{
+    history: Dialogue[];
+    compacted: boolean;
+}>;
 /**
  * Clear old tool results in-place to save tokens (microcompaction).
  * Replaces tool result content with a short summary for all but the last N results.
diff --git a/dist/agent/compact.js b/dist/agent/compact.js
@@ -51,6 +51,26 @@ export async function autoCompactIfNeeded(history, model, client, debug) {
         return { history: truncated, compacted: true };
     }
 }
+/**
+ * Force compaction regardless of threshold (for /compact command).
+ */
+export async function forceCompact(history, model, client, debug) {
+    if (history.length <= 4) {
+        return { history, compacted: false };
+    }
+    try {
+        const compacted = await compactHistory(history, model, client, debug);
+        return { history: compacted, compacted: true };
+    }
+    catch (err) {
+        if (debug) {
+            console.error(`[runcode] Force compaction failed: ${err.message}`);
+        }
+        const threshold = getCompactionThreshold(model);
+        const truncated = emergencyTruncate(history, threshold);
+        return { history: truncated, compacted: true };
+    }
+}
 /**
  * Compact conversation history by summarizing older messages.
  */
diff --git a/dist/agent/loop.js b/dist/agent/loop.js
@@ -4,7 +4,7 @@
  * Original implementation with different architecture from any reference codebase.
  */
 import { ModelClient } from './llm.js';
-import { autoCompactIfNeeded, microCompact } from './compact.js';
+import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
 import { estimateHistoryTokens } from './tokens.js';
 import { PermissionManager } from './permissions.js';
 import { StreamingExecutor } from './streaming-executor.js';
@@ -217,15 +217,15 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         // Handle /compact command — force compaction without sending to model
         if (input === '/compact') {
             const beforeTokens = estimateHistoryTokens(history);
-            const { history: compacted, compacted: didCompact } = await autoCompactIfNeeded(history, config.model, client, config.debug);
+            const { history: compacted, compacted: didCompact } = await forceCompact(history, config.model, client, config.debug);
             if (didCompact) {
                 history.length = 0;
                 history.push(...compacted);
             }
             const afterTokens = estimateHistoryTokens(history);
             onEvent({ kind: 'text_delta', text: didCompact
                     ? `Compacted: ~${beforeTokens.toLocaleString()} → ~${afterTokens.toLocaleString()} tokens\n`
-                    : `History is ${beforeTokens.toLocaleString()} tokens — no compaction needed.\n`
+                    : `History too short to compact (${beforeTokens.toLocaleString()} tokens, ${history.length} messages).\n`
             });
             onEvent({ kind: 'turn_done', reason: 'completed' });
             continue;
diff --git a/dist/proxy/server.js b/dist/proxy/server.js
@@ -41,7 +41,16 @@ function log(...args) {
 }
 const DEFAULT_MAX_TOKENS = 4096;
 // Per-model last output tokens for adaptive max_tokens (avoids cross-request pollution)
+const MAX_TRACKED_MODELS = 50;
 const lastOutputByModel = new Map();
+function trackOutputTokens(model, tokens) {
+    if (lastOutputByModel.size >= MAX_TRACKED_MODELS) {
+        const firstKey = lastOutputByModel.keys().next().value;
+        if (firstKey)
+            lastOutputByModel.delete(firstKey);
+    }
+    lastOutputByModel.set(model, tokens);
+}
 // Model shortcuts for quick switching
 const MODEL_SHORTCUTS = {
     // Routing profiles
@@ -388,7 +397,7 @@ export function createProxy(options) {
                                     const inputMatch = fullResponse.match(/"input_tokens"\s*:\s*(\d+)/);
                                     if (lastOutputMatch) {
                                         const outputTokens = parseInt(lastOutputMatch[1], 10);
-                                        lastOutputByModel.set(finalModel, outputTokens);
+                                        trackOutputTokens(finalModel, outputTokens);
                                         const inputTokens = inputMatch
                                             ? parseInt(inputMatch[1], 10)
                                             : 0;
@@ -419,7 +428,7 @@ export function createProxy(options) {
                         const parsed = JSON.parse(text);
                         if (parsed.usage?.output_tokens) {
                             const outputTokens = parsed.usage.output_tokens;
-                            lastOutputByModel.set(finalModel, outputTokens);
+                            trackOutputTokens(finalModel, outputTokens);
                             const inputTokens = parsed.usage?.input_tokens || 0;
                             const latencyMs = Date.now() - requestStartTime;
                             const cost = estimateCost(finalModel, inputTokens, outputTokens);
diff --git a/dist/ui/app.js b/dist/ui/app.js
@@ -153,6 +153,12 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                     setTimeout(() => setStatusMsg(''), 3000);
                     return;
                 case '/compact':
+                    setStreamText('');
+                    setThinking(false);
+                    setThinkingText('');
+                    setTools(new Map());
+                    setWaiting(true);
+                    setReady(false);
                     onSubmit('/compact');
                     return;
                 default:
@@ -233,6 +239,7 @@ function RunCodeApp({ initialModel, workDir, walletAddress, walletBalance, chain
                         setReady(true);
                         setWaiting(false);
                         setThinking(false);
+                        setThinkingText('');
                         break;
                 }
             },
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/runcode",
-  "version": "1.3.0",
+  "version": "1.3.1",
   "description": "RunCode — AI coding agent powered by 41+ models. Pay per use with USDC.",
   "type": "module",
   "bin": {
diff --git a/src/agent/compact.ts b/src/agent/compact.ts
@@ -71,6 +71,31 @@ export async function autoCompactIfNeeded(
   }
 }
 
+/**
+ * Force compaction regardless of threshold (for /compact command).
+ */
+export async function forceCompact(
+  history: Dialogue[],
+  model: string,
+  client: ModelClient,
+  debug?: boolean
+): Promise<{ history: Dialogue[]; compacted: boolean }> {
+  if (history.length <= 4) {
+    return { history, compacted: false };
+  }
+  try {
+    const compacted = await compactHistory(history, model, client, debug);
+    return { history: compacted, compacted: true };
+  } catch (err) {
+    if (debug) {
+      console.error(`[runcode] Force compaction failed: ${(err as Error).message}`);
+    }
+    const threshold = getCompactionThreshold(model);
+    const truncated = emergencyTruncate(history, threshold);
+    return { history: truncated, compacted: true };
+  }
+}
+
 /**
  * Compact conversation history by summarizing older messages.
  */
diff --git a/src/agent/loop.ts b/src/agent/loop.ts
@@ -5,7 +5,7 @@
  */
 
 import { ModelClient } from './llm.js';
-import { autoCompactIfNeeded, microCompact } from './compact.js';
+import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
 import { estimateHistoryTokens } from './tokens.js';
 import { PermissionManager } from './permissions.js';
 import { StreamingExecutor } from './streaming-executor.js';
@@ -317,15 +317,15 @@ export async function interactiveSession(
     if (input === '/compact') {
       const beforeTokens = estimateHistoryTokens(history);
       const { history: compacted, compacted: didCompact } =
-        await autoCompactIfNeeded(history, config.model, client, config.debug);
+        await forceCompact(history, config.model, client, config.debug);
       if (didCompact) {
         history.length = 0;
         history.push(...compacted);
       }
       const afterTokens = estimateHistoryTokens(history);
       onEvent({ kind: 'text_delta', text: didCompact
         ? `Compacted: ~${beforeTokens.toLocaleString()} → ~${afterTokens.toLocaleString()} tokens\n`
-        : `History is ${beforeTokens.toLocaleString()} tokens — no compaction needed.\n`
+        : `History too short to compact (${beforeTokens.toLocaleString()} tokens, ${history.length} messages).\n`
       });
       onEvent({ kind: 'turn_done', reason: 'completed' });
       continue;
diff --git a/src/proxy/server.ts b/src/proxy/server.ts
@@ -73,7 +73,15 @@ function log(...args: unknown[]) {
 
 const DEFAULT_MAX_TOKENS = 4096;
 // Per-model last output tokens for adaptive max_tokens (avoids cross-request pollution)
+const MAX_TRACKED_MODELS = 50;
 const lastOutputByModel = new Map<string, number>();
+function trackOutputTokens(model: string, tokens: number) {
+  if (lastOutputByModel.size >= MAX_TRACKED_MODELS) {
+    const firstKey = lastOutputByModel.keys().next().value;
+    if (firstKey) lastOutputByModel.delete(firstKey);
+  }
+  lastOutputByModel.set(model, tokens);
+}
 
 // Model shortcuts for quick switching
 const MODEL_SHORTCUTS: Record<string, string> = {
@@ -491,7 +499,7 @@ export function createProxy(options: ProxyOptions): http.Server {
                   );
                   if (lastOutputMatch) {
                     const outputTokens = parseInt(lastOutputMatch[1], 10);
-                    lastOutputByModel.set(finalModel, outputTokens);
+                    trackOutputTokens(finalModel, outputTokens);
                     const inputTokens = inputMatch
                       ? parseInt(inputMatch[1], 10)
                       : 0;
@@ -536,7 +544,7 @@ export function createProxy(options: ProxyOptions): http.Server {
             const parsed = JSON.parse(text);
             if (parsed.usage?.output_tokens) {
               const outputTokens = parsed.usage.output_tokens;
-              lastOutputByModel.set(finalModel, outputTokens);
+              trackOutputTokens(finalModel, outputTokens);
               const inputTokens = parsed.usage?.input_tokens || 0;
               const latencyMs = Date.now() - requestStartTime;
               const cost = estimateCost(
diff --git a/src/ui/app.tsx b/src/ui/app.tsx
@@ -225,6 +225,12 @@ function RunCodeApp({
           return;
 
         case '/compact':
+          setStreamText('');
+          setThinking(false);
+          setThinkingText('');
+          setTools(new Map());
+          setWaiting(true);
+          setReady(false);
           onSubmit('/compact');
           return;
 
@@ -308,6 +314,7 @@ function RunCodeApp({
             setReady(true);
             setWaiting(false);
             setThinking(false);
+            setThinkingText('');
             break;
         }
       },

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@blockrun/runcode",`
`3`		`- "version": "1.3.0",`
	`3`	`+ "version": "1.3.1",`
`4`	`4`	`"description": "RunCode — AI coding agent powered by 41+ models. Pay per use with USDC.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"bin": {`