steipete · adam91holt · Dec 8, 2025 · Dec 9, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## 0.5.4 — 2025-12-08
 
+### Added
+- Browser mode: `--agent` flag enables ChatGPT Agent mode for agentic tasks (web browsing, code execution). Response capture waits for the send button to reappear, ensuring full agent output is captured after all actions complete.
+
 ### Changed
 - Docs: README now explicitly warns against `pnpx @steipete/oracle` (pnpx cache breaks sqlite bindings); use `npx -y @steipete/oracle` instead. Thanks Xuanwo for flagging this.
 - Browser uploads: stick to the single reliable file-input path (no drag/drop fallbacks), wait for the composer to render the new “N files” pill/remove-card UI before sending, and prefer non-image inputs. Thanks Peter for the repros and screenshots that caught the regressions.

diff --git a/bin/oracle-cli.ts b/bin/oracle-cli.ts
@@ -121,6 +121,7 @@ interface CliOptions extends OptionValues {
   remoteChrome?: string;
   browserPort?: number;
   browserDebugPort?: number;
+  agent?: boolean;
   remoteHost?: string;
   remoteToken?: string;
   copyMarkdown?: boolean;
@@ -380,6 +381,7 @@ program
     new Option('--browser-inline-files', 'Paste files directly into the ChatGPT composer instead of uploading attachments.').default(false),
   )
   .addOption(new Option('--browser-bundle-files', 'Bundle all attachments into a single archive before uploading.').default(false))
+  .option('--agent', 'Enable ChatGPT Agent mode for agentic tasks (web browsing, code execution). Browser engine only.', false)
   .option(
     '--retain-hours <hours>',
     'Prune stored sessions older than this many hours before running (set 0 to disable).',

diff --git a/docs/browser-mode.md b/docs/browser-mode.md
@@ -52,6 +52,7 @@ You can pass the same payload inline (`--browser-inline-cookies '<json or base64
 - `--browser-url`: override ChatGPT base URL if needed.
 - `--browser-inline-files`: paste resolved files directly into the composer instead of uploading them (debug fallback; useful when the attachment button is broken).
 - `--browser-bundle-files`: bundle all resolved attachments into a single temp file before uploading (useful when you want one upload even with many files).
+- `--agent`: enable ChatGPT Agent mode for agentic tasks (web browsing, code execution). When enabled, Oracle clicks the "+" button and selects "Agent" before submitting the prompt. Response capture waits for the send button to reappear, ensuring the full agent output is captured after all actions complete.
 - sqlite bindings: automatic rebuilds now require `ORACLE_ALLOW_SQLITE_REBUILD=1`. Without it, the CLI logs instructions instead of running `pnpm rebuild` on your behalf.
 - `--model`: the same flag used for API runs controls the ChatGPT picker. Pass descriptive labels such as `--model "ChatGPT 5.1 Instant"` when you want a specific browser variant; canonical API names (`gpt-5.1-pro`, `gpt-5.1`) still work and map to their default picker labels.
 - Cookie sync is mandatory—if we can’t copy cookies from Chrome, the run exits early. Use the hidden `--browser-allow-cookie-errors` flag only when you’re intentionally running logged out (it skips the early exit but still warns).

diff --git a/src/browser/actions/agentMode.ts b/src/browser/actions/agentMode.ts
@@ -0,0 +1,116 @@
+import type { ChromeClient, BrowserLogger } from '../types.js';
+import { buildClickDispatcher } from './domEvents.js';
+
+/**
+ * Enables ChatGPT Agent mode by clicking the "+" button and selecting "Agent" from the menu.
+ */
+export async function enableAgentMode(
+  Runtime: ChromeClient['Runtime'],
+  logger: BrowserLogger,
+): Promise<{ status: 'enabled' | 'already-enabled' | 'not-found' | 'button-missing' }> {
+  const { result } = await Runtime.evaluate({
+    expression: buildAgentModeExpression(),
+    awaitPromise: true,
+    returnByValue: true,
+  });
+
+  const value = result?.value as { status: string; debug?: string } | undefined;
+
+  switch (value?.status) {
+    case 'enabled':
+      logger('Agent mode: enabled');
+      return { status: 'enabled' };
+    case 'already-enabled':
+      logger('Agent mode: already active');
+      return { status: 'already-enabled' };
+    case 'not-found':
+      logger(`Agent mode: option not found in menu${value.debug ? ` - ${value.debug}` : ''}`);
+      return { status: 'not-found' };
+    case 'button-missing':
+      logger('Agent mode: plus button not found');
+      return { status: 'button-missing' };
+    default:
+      logger('Agent mode: unexpected result');
+      return { status: 'button-missing' };
+  }
+}
+
+function buildAgentModeExpression(): string {
+  return `(async () => {
+    ${buildClickDispatcher()}
+
+    const PLUS_BUTTON_SELECTOR = 'button[data-testid="composer-plus-btn"]';
+    const MENU_WAIT_MS = 500;
+    const MAX_ATTEMPTS = 10;
+    const ATTEMPT_INTERVAL_MS = 300;
+
+    // Find the plus button
+    const plusButton = document.querySelector(PLUS_BUTTON_SELECTOR);
+    if (!plusButton) {
+      return { status: 'button-missing' };
+    }
+
+    // Click to open menu
+    dispatchClickSequence(plusButton);
+    await new Promise(r => setTimeout(r, MENU_WAIT_MS));
+
+    // Search for Agent option in opened menus/popups
+    const findAgentOption = () => {
+      // Look in any menu or popup that appeared
+      const menuContainers = document.querySelectorAll(
+        '[role="menu"], [role="listbox"], [data-radix-menu-content], [data-radix-popper-content-wrapper], [data-state="open"]'
+      );
+
+      for (const container of menuContainers) {
+        // Look for items containing "agent"
+        const items = container.querySelectorAll('button, [role="menuitem"], [role="option"], div[tabindex]');
+        for (const item of items) {
+          const text = (item.textContent || '').toLowerCase().trim();
+          const testId = (item.getAttribute('data-testid') || '').toLowerCase();
+          if (text.includes('agent') || testId.includes('agent')) {
+            return item;
+          }
+        }
+      }
+
+      // Also check body-level floating elements
+      const floatingDivs = document.querySelectorAll('body > div[data-radix-popper-content-wrapper]');
+      for (const div of floatingDivs) {
+        const items = div.querySelectorAll('button, [role="menuitem"], div[tabindex]');
+        for (const item of items) {
+          const text = (item.textContent || '').toLowerCase().trim();
+          if (text.includes('agent')) {
+            return item;
+          }
+        }
+      }
+
+      return null;
+    };
+
+    // Try to find and click the agent option
+    for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
+      const agentOption = findAgentOption();
+      if (agentOption) {
+        dispatchClickSequence(agentOption);
+        await new Promise(r => setTimeout(r, 200));
+        return { status: 'enabled' };
+      }
+
+      // Menu might not be open yet, try clicking again
+      if (attempt > 0 && attempt % 3 === 0) {
+        dispatchClickSequence(plusButton);
+      }
+      await new Promise(r => setTimeout(r, ATTEMPT_INTERVAL_MS));
+    }
+
+    // Collect debug info about what we found
+    const menuContainers = document.querySelectorAll('[role="menu"], [role="listbox"], [data-radix-menu-content]');
+    const menuTexts = Array.from(menuContainers).map(m => (m.textContent || '').slice(0, 100)).join(' | ');
+
+    return {
+      status: 'not-found',
+      debug: menuTexts ? \`Found menus: \${menuTexts}\` : 'No menus found'
+    };
+  })()`;
+}
diff --git a/src/browser/actions/assistantResponse.ts b/src/browser/actions/assistantResponse.ts
@@ -6,6 +6,7 @@ import {
   COPY_BUTTON_SELECTOR,
   FINISHED_ACTIONS_SELECTOR,
   STOP_BUTTON_SELECTOR,
+  SEND_BUTTON_SELECTOR,
 } from '../constants.js';
 import { delay } from '../utils.js';
 import { logDomFailure, logConversationSnapshot, buildConversationDebugExpression } from '../domDebug.js';
@@ -17,17 +18,18 @@ export async function waitForAssistantResponse(
   Runtime: ChromeClient['Runtime'],
   timeoutMs: number,
   logger: BrowserLogger,
+  options?: { agentMode?: boolean },
 ): Promise<{ text: string; html?: string; meta: { turnId?: string | null; messageId?: string | null } }> {
   logger('Waiting for ChatGPT response');
-  const expression = buildResponseObserverExpression(timeoutMs);
+  const expression = buildResponseObserverExpression(timeoutMs, options?.agentMode);
   const evaluationPromise = Runtime.evaluate({ expression, awaitPromise: true, returnByValue: true });
   const raceReadyEvaluation = evaluationPromise.then(
     (value) => ({ kind: 'evaluation' as const, value }),
     (error) => {
       throw { source: 'evaluation' as const, error };
     },
   );
-  const pollerPromise = pollAssistantCompletion(Runtime, timeoutMs).then(
+  const pollerPromise = pollAssistantCompletion(Runtime, timeoutMs, options?.agentMode).then(
     (value) => {
       if (!value) {
         throw { source: 'poll' as const, error: new Error(ASSISTANT_POLL_TIMEOUT_ERROR) };
@@ -213,11 +215,13 @@ async function terminateRuntimeExecution(Runtime: ChromeClient['Runtime']): Prom
 async function pollAssistantCompletion(
   Runtime: ChromeClient['Runtime'],
   timeoutMs: number,
+  agentMode?: boolean,
 ): Promise<{ text: string; html?: string; meta: { turnId?: string | null; messageId?: string | null } } | null> {
   const watchdogDeadline = Date.now() + timeoutMs;
   let previousLength = 0;
   let stableCycles = 0;
-  const requiredStableCycles = 6;
+  // Agent mode needs more stable cycles since agents pause between actions
+  const requiredStableCycles = agentMode ? 15 : 6;
   while (Date.now() < watchdogDeadline) {
     const snapshot = await readAssistantSnapshot(Runtime);
     const normalized = normalizeAssistantSnapshot(snapshot);
@@ -229,12 +233,27 @@ async function pollAssistantCompletion(
       } else {
         stableCycles += 1;
       }
-      const [stopVisible, completionVisible] = await Promise.all([
+      const [stopVisible, sendVisible, completionVisible] = await Promise.all([
         isStopButtonVisible(Runtime),
+        isSendButtonVisible(Runtime),
         isCompletionVisible(Runtime),
       ]);
-      if (completionVisible || (!stopVisible && stableCycles >= requiredStableCycles)) {
-        return normalized;
+
+      if (agentMode) {
+        // In agent mode: require send button visible (meaning agent is done)
+        // and stop button gone, with stable content
+        if (sendVisible && !stopVisible && stableCycles >= requiredStableCycles) {
+          return normalized;
+        }
+        // Also accept if completion actions are visible
+        if (completionVisible && !stopVisible && sendVisible) {
+          return normalized;
+        }
+      } else {
+        // Standard mode: return when stop button is gone and response is stable
+        if (completionVisible || (!stopVisible && stableCycles >= requiredStableCycles)) {
+          return normalized;
+        }
       }
     } else {
       previousLength = 0;
@@ -257,6 +276,18 @@ async function isStopButtonVisible(Runtime: ChromeClient['Runtime']): Promise<bo
   }
 }
 
+async function isSendButtonVisible(Runtime: ChromeClient['Runtime']): Promise<boolean> {
+  try {
+    const { result } = await Runtime.evaluate({
+      expression: `Boolean(document.querySelector('${SEND_BUTTON_SELECTOR}'))`,
+      returnByValue: true,
+    });
+    return Boolean(result?.value);
+  } catch {
+    return false;
+  }
+}
+
 async function isCompletionVisible(Runtime: ChromeClient['Runtime']): Promise<boolean> {
   try {
     const { result } = await Runtime.evaluate({
@@ -311,13 +342,15 @@ function buildAssistantSnapshotExpression(): string {
   })()`;
 }
 
-function buildResponseObserverExpression(timeoutMs: number): string {
+function buildResponseObserverExpression(timeoutMs: number, agentMode?: boolean): string {
   const selectorsLiteral = JSON.stringify(ANSWER_SELECTORS);
   return `(() => {
     ${buildClickDispatcher()}
     const SELECTORS = ${selectorsLiteral};
     const STOP_SELECTOR = '${STOP_BUTTON_SELECTOR}';
+    const SEND_SELECTOR = '${SEND_BUTTON_SELECTOR}';
     const FINISHED_SELECTOR = '${FINISHED_ACTIONS_SELECTOR}';
+    const AGENT_MODE = ${agentMode ? 'true' : 'false'};
     const settleDelayMs = 800;
     ${buildAssistantExtractor('extractFromTurns')}
 
@@ -364,25 +397,39 @@ function buildResponseObserverExpression(timeoutMs: number): string {
       });
 
     const waitForSettle = async (snapshot) => {
-      const settleWindowMs = 5000;
+      const settleWindowMs = AGENT_MODE ? 30000 : 5000;
       const settleIntervalMs = 400;
       const deadline = Date.now() + settleWindowMs;
       let latest = snapshot;
       let lastLength = snapshot?.text?.length ?? 0;
+      let stableCycles = 0;
+      const requiredStableCycles = AGENT_MODE ? 15 : 3;
       while (Date.now() < deadline) {
         await new Promise((resolve) => setTimeout(resolve, settleIntervalMs));
         const refreshed = extractFromTurns();
-        if (refreshed && (refreshed.text?.length ?? 0) >= lastLength) {
+        if (refreshed && (refreshed.text?.length ?? 0) > lastLength) {
           latest = refreshed;
           lastLength = refreshed.text?.length ?? lastLength;
+          stableCycles = 0;
+        } else {
+          stableCycles++;
         }
         const stopVisible = Boolean(document.querySelector(STOP_SELECTOR));
+        const sendVisible = Boolean(document.querySelector(SEND_SELECTOR));
         const finishedVisible =
           Boolean(document.querySelector(FINISHED_SELECTOR)) ||
           Array.from(document.querySelectorAll('.markdown')).some((n) => (n.textContent || '').trim() === 'Done');
 
-        if (!stopVisible || finishedVisible) {
-          break;
+        if (AGENT_MODE) {
+          // In agent mode: wait for send button to appear (agent is done)
+          if (sendVisible && !stopVisible && stableCycles >= requiredStableCycles) {
+            break;
+          }
+        } else {
+          // Standard mode
+          if (!stopVisible || finishedVisible) {
+            break;
+          }
         }
       }
       return latest ?? snapshot;

diff --git a/src/browser/config.ts b/src/browser/config.ts
@@ -21,6 +21,7 @@ export const DEFAULT_BROWSER_CONFIG: ResolvedBrowserConfig = {
   keepBrowser: false,
   hideWindow: false,
   desiredModel: DEFAULT_MODEL_TARGET,
+  agentMode: false,
   debug: false,
   allowCookieErrors: false,
   remoteChrome: null,
@@ -63,6 +64,7 @@ export function resolveBrowserConfig(config: BrowserAutomationConfig | undefined
     chromeProfile: config?.chromeProfile ?? DEFAULT_BROWSER_CONFIG.chromeProfile,
     chromePath: config?.chromePath ?? DEFAULT_BROWSER_CONFIG.chromePath,
     chromeCookiePath: config?.chromeCookiePath ?? DEFAULT_BROWSER_CONFIG.chromeCookiePath,
+    agentMode: config?.agentMode ?? DEFAULT_BROWSER_CONFIG.agentMode,
     debug: config?.debug ?? DEFAULT_BROWSER_CONFIG.debug,
     allowCookieErrors: config?.allowCookieErrors ?? envAllowCookieErrors ?? DEFAULT_BROWSER_CONFIG.allowCookieErrors,
     manualLogin,

diff --git a/src/browser/index.ts b/src/browser/index.ts
@@ -25,6 +25,7 @@ import {
   uploadAttachmentFile,
   waitForAttachmentCompletion,
   readAssistantSnapshot,
+  enableAgentMode,
 } from './pageActions.js';
 import { uploadAttachmentViaDataTransfer } from './actions/remoteFileTransfer.js';
 import { estimateTokenCount, withRetries, delay } from './utils.js';
@@ -313,6 +314,13 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
       await raceWithDisconnect(ensurePromptReady(Runtime, config.inputTimeoutMs, logger));
       logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`);
     }
+    // Enable agent mode if requested
+    if (config.agentMode) {
+      const agentResult = await raceWithDisconnect(enableAgentMode(Runtime, logger));
+      if (agentResult.status !== 'enabled' && agentResult.status !== 'already-enabled') {
+        throw new Error(`Failed to enable agent mode: ${agentResult.status}`);
+      }
+    }
     const attachmentNames = attachments.map((a) => path.basename(a.path));
     if (attachments.length > 0) {
       if (!DOM) {
@@ -328,7 +336,7 @@ export async function runBrowserMode(options: BrowserRunOptions): Promise<Browse
     }
     await raceWithDisconnect(submitPrompt({ runtime: Runtime, input: Input, attachmentNames }, promptText, logger));
     stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
-    const answer = await raceWithDisconnect(waitForAssistantResponse(Runtime, config.timeoutMs, logger));
+    const answer = await raceWithDisconnect(waitForAssistantResponse(Runtime, config.timeoutMs, logger, { agentMode: config.agentMode }));
     answerText = answer.text;
     answerHtml = answer.html ?? '';
     const copiedMarkdown = await raceWithDisconnect(
@@ -733,6 +741,13 @@ async function runRemoteBrowserMode(
       await ensurePromptReady(Runtime, config.inputTimeoutMs, logger);
       logger(`Prompt textarea ready (after model switch, ${promptText.length.toLocaleString()} chars queued)`);
     }
+    // Enable agent mode if requested
+    if (config.agentMode) {
+      const agentResult = await enableAgentMode(Runtime, logger);
+      if (agentResult.status !== 'enabled' && agentResult.status !== 'already-enabled') {
+        throw new Error(`Failed to enable agent mode: ${agentResult.status}`);
+      }
+    }
 
     const attachmentNames = attachments.map((a) => path.basename(a.path));
     if (attachments.length > 0) {
@@ -750,7 +765,7 @@ async function runRemoteBrowserMode(
     }
     await submitPrompt({ runtime: Runtime, input: Input, attachmentNames }, promptText, logger);
     stopThinkingMonitor = startThinkingStatusMonitor(Runtime, logger, options.verbose ?? false);
-    const answer = await waitForAssistantResponse(Runtime, config.timeoutMs, logger);
+    const answer = await waitForAssistantResponse(Runtime, config.timeoutMs, logger, { agentMode: config.agentMode });
     answerText = answer.text;
     answerHtml = answer.html ?? '';
 

diff --git a/src/browser/pageActions.ts b/src/browser/pageActions.ts
@@ -9,3 +9,4 @@ export {
   buildAssistantExtractorForTest,
   buildConversationDebugExpressionForTest,
 } from './actions/assistantResponse.js';
+export { enableAgentMode } from './actions/agentMode.js';