diff --git a/Cargo.lock b/Cargo.lock index 7b828f96a..6ba10110d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5890,6 +5890,7 @@ dependencies = [ "moltis-config", "moltis-metrics", "rand 0.9.2", + "reqwest 0.12.28", "serde", "serde_json", "sysinfo", diff --git a/crates/browser/Cargo.toml b/crates/browser/Cargo.toml index 49e68b075..1a319495c 100644 --- a/crates/browser/Cargo.toml +++ b/crates/browser/Cargo.toml @@ -12,6 +12,7 @@ moltis-common = { workspace = true } moltis-config = { workspace = true } moltis-metrics = { optional = true, workspace = true } rand = { workspace = true } +reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } sysinfo = { workspace = true } diff --git a/crates/browser/src/manager.rs b/crates/browser/src/manager.rs index 34716847d..8e932afe4 100644 --- a/crates/browser/src/manager.rs +++ b/crates/browser/src/manager.rs @@ -223,6 +223,10 @@ impl BrowserManager { BrowserAction::Back => self.go_back(session_id, sandbox).await, BrowserAction::Forward => self.go_forward(session_id, sandbox).await, BrowserAction::Refresh => self.refresh(session_id, sandbox).await, + BrowserAction::LiveUrl { interactive } => { + self.live_url(session_id, sandbox, browser, interactive) + .await + }, BrowserAction::Close => self.close(session_id, sandbox).await, }; @@ -315,6 +319,36 @@ impl BrowserManager { )) } + /// Return a human-usable live URL for this browser session. + async fn live_url( + &self, + session_id: Option<&str>, + sandbox: bool, + browser: Option, + _interactive: bool, + ) -> Result<(String, BrowserResponse), Error> { + if !sandbox { + return Err(Error::InvalidAction( + "live_url currently requires sandboxed browser sessions".to_string(), + )); + } + + let sid = self + .pool + .get_or_create(session_id, sandbox, browser) + .await?; + // Ensure a page target exists before asking browserless for /json/list + let _ = self.pool.get_page(&sid).await?; + + let http_url = self.pool.sandbox_http_url(&sid).await.ok_or_else(|| { + Error::LaunchFailed("sandbox browser HTTP endpoint not available".to_string()) + })?; + + let live_url = fetch_devtools_live_url(&http_url).await?; + let response = BrowserResponse::success(sid.clone(), 0, true).with_live_url(live_url); + Ok((sid, response)) + } + /// Take a screenshot of the page. async fn screenshot( &self, @@ -875,6 +909,40 @@ fn truncate_url(url: &str) -> String { } } +async fn fetch_devtools_live_url(http_base: &str) -> Result { + let endpoint = format!("{}/json/list", http_base.trim_end_matches('/')); + let response = reqwest::get(&endpoint) + .await + .map_err(|e| Error::Cdp(format!("failed to query browser targets: {e}")))?; + + if !response.status().is_success() { + return Err(Error::Cdp(format!( + "browser target list returned HTTP {}", + response.status() + ))); + } + + let targets = response + .json::>() + .await + .map_err(|e| Error::Cdp(format!("failed to parse browser target list: {e}")))?; + + let target = targets.first().ok_or_else(|| { + Error::Cdp("browser target list is empty; navigate first, then retry live_url".to_string()) + })?; + let frontend = target + .get("devtoolsFrontendUrl") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.is_empty()) + .ok_or_else(|| Error::Cdp("browser target is missing devtoolsFrontendUrl".to_string()))?; + + if frontend.starts_with("http://") || frontend.starts_with("https://") { + Ok(frontend.to_string()) + } else { + Ok(format!("{}{}", http_base.trim_end_matches('/'), frontend)) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/browser/src/pool.rs b/crates/browser/src/pool.rs index 9c0407d1a..bcefdeb01 100644 --- a/crates/browser/src/pool.rs +++ b/crates/browser/src/pool.rs @@ -323,6 +323,19 @@ impl BrowserPool { self.instances.read().await.len() } + /// Return the sandbox browser HTTP base URL for a session, if available. + pub async fn sandbox_http_url(&self, session_id: &str) -> Option { + let instance = { + let instances = self.instances.read().await; + instances.get(session_id).cloned() + }?; + + let inst = instance.lock().await; + inst.container + .as_ref() + .map(|container| container.http_url()) + } + /// Launch a new browser instance. async fn launch_browser( &self, diff --git a/crates/browser/src/types.rs b/crates/browser/src/types.rs index de3f5a281..12788f78e 100644 --- a/crates/browser/src/types.rs +++ b/crates/browser/src/types.rs @@ -70,6 +70,15 @@ pub enum BrowserAction { /// Refresh the page. Refresh, + /// Return a user-facing URL for live manual interaction with this browser + /// session (for login/takeover workflows). + LiveUrl { + /// Optional hint for future transport backends. + /// `true` = full interactive session, `false` = read-only view preferred. + #[serde(default = "default_live_interactive")] + interactive: bool, + }, + /// Close the browser session. Close, } @@ -78,6 +87,10 @@ fn default_wait_timeout_ms() -> u64 { 30000 } +fn default_live_interactive() -> bool { + true +} + /// Known Chromium-family browser engines we can launch. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] @@ -181,6 +194,13 @@ impl fmt::Display for BrowserAction { Self::Back => write!(f, "back"), Self::Forward => write!(f, "forward"), Self::Refresh => write!(f, "refresh"), + Self::LiveUrl { interactive } => { + if *interactive { + write!(f, "live_url(interactive)") + } else { + write!(f, "live_url(read_only)") + } + }, Self::Close => write!(f, "close"), } } @@ -334,6 +354,10 @@ pub struct BrowserResponse { #[serde(skip_serializing_if = "Option::is_none")] pub title: Option, + /// User-facing URL for manual browser viewing/control. + #[serde(skip_serializing_if = "Option::is_none")] + pub live_url: Option, + /// Duration of the action in milliseconds. pub duration_ms: u64, } @@ -351,6 +375,7 @@ impl BrowserResponse { result: None, url: None, title: None, + live_url: None, duration_ms, } } @@ -367,6 +392,7 @@ impl BrowserResponse { result: None, url: None, title: None, + live_url: None, duration_ms, } } @@ -396,6 +422,11 @@ impl BrowserResponse { self.title = Some(title); self } + + pub fn with_live_url(mut self, live_url: String) -> Self { + self.live_url = Some(live_url); + self + } } /// Browser configuration. @@ -616,6 +647,16 @@ mod tests { assert_eq!(value, BrowserPreference::Brave); } + #[test] + fn test_browser_action_live_url_deserialize_defaults_interactive() { + let req: BrowserRequest = serde_json::from_str(r#"{"action":"live_url"}"#) + .unwrap_or_else(|e| panic!("failed to deserialize live_url action: {e}")); + match req.action { + BrowserAction::LiveUrl { interactive } => assert!(interactive), + _ => panic!("expected BrowserAction::LiveUrl"), + } + } + #[test] fn resolved_profile_dir_returns_path_by_default() { // Default config has persist_profile = true diff --git a/crates/tools/src/browser.rs b/crates/tools/src/browser.rs index c465d5f77..3f37c3e41 100644 --- a/crates/tools/src/browser.rs +++ b/crates/tools/src/browser.rs @@ -12,7 +12,12 @@ use { async_trait::async_trait, moltis_agents::tool_registry::AgentTool, moltis_browser::{BrowserManager, BrowserRequest}, - std::{borrow::Cow, collections::HashMap, sync::Arc}, + std::{ + borrow::Cow, + collections::HashMap, + sync::{Arc, LazyLock, RwLock as StdRwLock}, + }, + time::OffsetDateTime, tokio::sync::{OnceCell, RwLock}, tracing::debug, }; @@ -42,6 +47,26 @@ pub struct BrowserTool { session_ids: RwLock>, } +#[derive(Debug, Clone, serde::Serialize)] +pub struct BrowserSessionOverview { + pub chat_session_key: String, + pub browser_session_id: String, + pub sandboxed: bool, + pub last_seen_unix_ms: i128, +} + +static TRACKED_BROWSER_SESSIONS: LazyLock>> = + LazyLock::new(|| StdRwLock::new(HashMap::new())); + +pub fn list_tracked_browser_sessions() -> Vec { + let guard = TRACKED_BROWSER_SESSIONS + .read() + .unwrap_or_else(|e| e.into_inner()); + let mut items: Vec = guard.values().cloned().collect(); + items.sort_by(|a, b| b.last_seen_unix_ms.cmp(&a.last_seen_unix_ms)); + items +} + impl BrowserTool { const DEFAULT_SESSION_KEY: &'static str = "main"; /// Maximum number of tracked browser sessions. When exceeded the oldest @@ -86,11 +111,15 @@ impl BrowserTool { async fn clear_session(&self, session_key: &str) { let mut guard = self.session_ids.write().await; guard.remove(session_key); + let mut tracked = TRACKED_BROWSER_SESSIONS + .write() + .unwrap_or_else(|e| e.into_inner()); + tracked.remove(session_key); } /// Save the browser session ID for future reuse in the same chat/session /// context. - async fn save_session(&self, session_key: &str, session_id: &str) { + async fn save_session(&self, session_key: &str, session_id: &str, sandboxed: bool) { if !session_id.is_empty() { let mut guard = self.session_ids.write().await; // Evict an arbitrary entry when at capacity to bound memory. @@ -100,8 +129,22 @@ impl BrowserTool { { debug!(evicted = %evict_key, "browser session cache full, evicting entry"); guard.remove(&evict_key); + let mut tracked = TRACKED_BROWSER_SESSIONS + .write() + .unwrap_or_else(|e| e.into_inner()); + tracked.remove(&evict_key); } guard.insert(session_key.to_string(), session_id.to_string()); + + let mut tracked = TRACKED_BROWSER_SESSIONS + .write() + .unwrap_or_else(|e| e.into_inner()); + tracked.insert(session_key.to_string(), BrowserSessionOverview { + chat_session_key: session_key.to_string(), + browser_session_id: session_id.to_string(), + sandboxed, + last_seen_unix_ms: OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000_000, + }); } } @@ -153,7 +196,8 @@ impl AgentTool for BrowserTool { or needs interaction (clicking, forms, screenshots, JavaScript-heavy pages).\n\n\ REQUIRED: You MUST specify an 'action' parameter. Example:\n\ {\"action\": \"navigate\", \"url\": \"https://example.com\"}\n\n\ - Actions: navigate, screenshot, snapshot, click, type, scroll, evaluate, wait, close\n\n\ + Actions: navigate, screenshot, snapshot, click, type, scroll, evaluate, wait, \ + get_url, get_title, back, forward, refresh, live_url, close\n\n\ BROWSER CHOICE: optionally set \"browser\" to choose one (auto, chrome, chromium, \ edge, brave, opera, vivaldi, arc). If no browser is installed, Moltis will try \ to auto-install one.\n\n\ @@ -165,7 +209,8 @@ impl AgentTool for BrowserTool { 2. {\"action\": \"snapshot\"} - get interactive elements with ref numbers\n\ 3. {\"action\": \"click\", \"ref_\": N} - click element by ref number\n\ 4. {\"action\": \"screenshot\"} - capture the current view\n\ - 5. {\"action\": \"close\"} - close the browser when done" + 5. {\"action\": \"live_url\"} - get a manual browser view URL for human login/takeover\n\ + 6. {\"action\": \"close\"} - close the browser when done" } fn parameters_schema(&self) -> serde_json::Value { @@ -175,7 +220,7 @@ impl AgentTool for BrowserTool { "properties": { "action": { "type": "string", - "enum": ["navigate", "screenshot", "snapshot", "click", "type", "scroll", "evaluate", "wait", "get_url", "get_title", "back", "forward", "refresh", "close"], + "enum": ["navigate", "screenshot", "snapshot", "click", "type", "scroll", "evaluate", "wait", "get_url", "get_title", "back", "forward", "refresh", "live_url", "close"], "description": "REQUIRED. The browser action to perform. Use 'navigate' with 'url' to open a page, 'snapshot' to see elements, 'screenshot' to capture." }, "session_id": { @@ -222,6 +267,10 @@ impl AgentTool for BrowserTool { "timeout_ms": { "type": "integer", "description": "Timeout in milliseconds (default: 60000)" + }, + "interactive": { + "type": "boolean", + "description": "For 'live_url': true for interactive takeover, false to prefer read-only view." } } }) @@ -302,7 +351,8 @@ impl AgentTool for BrowserTool { if is_close { self.clear_session(&session_key).await; } else { - self.save_session(&session_key, &response.session_id).await; + self.save_session(&session_key, &response.session_id, response.sandboxed) + .await; } } @@ -367,9 +417,9 @@ mod tests { }; let tool = BrowserTool::from_config(&config).unwrap(); - tool.save_session("web:session:one", "browser-session-one") + tool.save_session("web:session:one", "browser-session-one", true) .await; - tool.save_session("web:session:two", "browser-session-two") + tool.save_session("web:session:two", "browser-session-two", false) .await; assert_eq!( @@ -390,7 +440,7 @@ mod tests { ..Default::default() }; let tool = BrowserTool::from_config(&config).unwrap(); - tool.save_session("web:session:one", "").await; + tool.save_session("web:session:one", "", true).await; assert_eq!(tool.get_saved_session("web:session:one").await, None); } @@ -404,7 +454,7 @@ mod tests { // Fill the cache to capacity for i in 0..BrowserTool::MAX_TRACKED_SESSIONS { - tool.save_session(&format!("session-{i}"), &format!("sid-{i}")) + tool.save_session(&format!("session-{i}"), &format!("sid-{i}"), true) .await; } assert_eq!( @@ -413,7 +463,7 @@ mod tests { ); // Adding one more should evict an entry and stay at capacity - tool.save_session("session-new", "sid-new").await; + tool.save_session("session-new", "sid-new", true).await; let guard = tool.session_ids.read().await; assert_eq!(guard.len(), BrowserTool::MAX_TRACKED_SESSIONS); assert_eq!(guard.get("session-new"), Some(&"sid-new".to_string())); @@ -427,9 +477,9 @@ mod tests { }; let tool = BrowserTool::from_config(&config).unwrap(); - tool.save_session("web:session:one", "browser-session-one") + tool.save_session("web:session:one", "browser-session-one", true) .await; - tool.save_session("web:session:two", "browser-session-two") + tool.save_session("web:session:two", "browser-session-two", false) .await; tool.clear_session("web:session:one").await; @@ -440,4 +490,23 @@ mod tests { Some("browser-session-two".to_string()) ); } + + #[tokio::test] + async fn tracked_browser_sessions_are_listed() { + let config = moltis_config::schema::BrowserConfig { + enabled: true, + ..Default::default() + }; + let tool = BrowserTool::from_config(&config).unwrap(); + + tool.save_session("web:session:list", "browser-session-list", true) + .await; + + let list = list_tracked_browser_sessions(); + assert!(list.iter().any(|entry| { + entry.chat_session_key == "web:session:list" + && entry.browser_session_id == "browser-session-list" + && entry.sandboxed + })); + } } diff --git a/crates/web/src/api.rs b/crates/web/src/api.rs index 3250465ee..f7d1bd4bc 100644 --- a/crates/web/src/api.rs +++ b/crates/web/src/api.rs @@ -41,6 +41,7 @@ const SESSION_LIST_DEFAULT_LIMIT: usize = 40; const SESSION_LIST_MAX_LIMIT: usize = 200; const SESSION_HISTORY_DEFAULT_LIMIT: usize = 120; const SESSION_HISTORY_MAX_LIMIT: usize = 500; +const BROWSER_SESSIONS_LIST_FAILED: &str = "BROWSER_SESSIONS_LIST_FAILED"; fn api_error(code: &str, error: impl Into) -> serde_json::Value { serde_json::json!({ @@ -165,6 +166,22 @@ pub async fn api_sessions_handler( } } +pub async fn api_browser_sessions_handler() -> impl IntoResponse { + let sessions = moltis_tools::browser::list_tracked_browser_sessions(); + if sessions.is_empty() { + return Json(serde_json::json!({ "sessions": [] })).into_response(); + } + + match serde_json::to_value(&sessions) { + Ok(serialized) => Json(serde_json::json!({ "sessions": serialized })).into_response(), + Err(error) => api_error_response( + StatusCode::INTERNAL_SERVER_ERROR, + BROWSER_SESSIONS_LIST_FAILED, + error.to_string(), + ), + } +} + #[derive(serde::Deserialize)] pub struct SessionHistoryQuery { #[serde(default)] diff --git a/crates/web/src/assets/js/page-settings.js b/crates/web/src/assets/js/page-settings.js index 2bd8a11d2..cf28e0cfb 100644 --- a/crates/web/src/assets/js/page-settings.js +++ b/crates/web/src/assets/js/page-settings.js @@ -1731,12 +1731,13 @@ function ToolsSection() { var [loadingTools, setLoadingTools] = useState(true); var [toolData, setToolData] = useState(null); var [nodeInventory, setNodeInventory] = useState([]); + var [browserSessions, setBrowserSessions] = useState([]); var [toolsErr, setToolsErr] = useState(null); function loadToolsOverview() { setLoadingTools(true); setToolsErr(null); - Promise.allSettled([sendRpc("chat.context", {}), sendRpc("node.list", {})]) + Promise.allSettled([sendRpc("chat.context", {}), sendRpc("node.list", {}), fetch("/api/browser/sessions")]) .then((results) => { var contextResult = results[0]; if (contextResult.status !== "fulfilled" || !contextResult.value?.ok) { @@ -1748,8 +1749,20 @@ function ToolsSection() { nodesResult.status === "fulfilled" && nodesResult.value?.ok && Array.isArray(nodesResult.value.payload) ? nodesResult.value.payload : []; + var browserSessionsResult = results[2]; + var nextBrowserSessions = []; + if (browserSessionsResult.status === "fulfilled") { + nextBrowserSessions = browserSessionsResult.value + .json() + .then((payload) => (Array.isArray(payload?.sessions) ? payload.sessions : [])) + .catch(() => []); + } setToolData(nextToolData); setNodeInventory(nextNodeInventory); + return Promise.resolve(nextBrowserSessions); + }) + .then((nextBrowserSessions) => { + setBrowserSessions(nextBrowserSessions); setLoadingTools(false); }) .catch((error) => { @@ -2025,6 +2038,49 @@ function ToolsSection() { + +
+
+

Browser Sessions

+ ${browserSessions.length} +
+
+ Tracked browser sessions mapped to chat sessions. Use this to find active browser IDs before requesting + live_url for manual login/takeover. +
+ ${ + browserSessions.length > 0 + ? html`
+ + + + + + + + + + + ${browserSessions.map( + (entry) => html` + + + + + `, + )} + +
Chat SessionBrowser SessionModeLast Seen
${entry.chat_session_key || "main"}${entry.browser_session_id || "—"} + + ${entry.sandboxed ? "Sandbox" : "Host"} + + + ${entry.last_seen_unix_ms ? new Date(Number(entry.last_seen_unix_ms)).toLocaleString() : "—"} +
+
` + : html`
No tracked browser sessions yet.
` + } +
`; } diff --git a/crates/web/src/lib.rs b/crates/web/src/lib.rs index ee7a6808e..b70cf85da 100644 --- a/crates/web/src/lib.rs +++ b/crates/web/src/lib.rs @@ -189,6 +189,10 @@ fn build_api_routes() -> Router { axum::routing::post(moltis_httpd::tools_routes::restart), ) .route("/api/sessions", get(api::api_sessions_handler)) + .route( + "/api/browser/sessions", + get(api::api_browser_sessions_handler), + ) .route( "/api/sessions/{session_key}/history", get(api::api_session_history_handler), diff --git a/docs/src/browser-automation.md b/docs/src/browser-automation.md index 8c605a222..f5b5b08b6 100644 --- a/docs/src/browser-automation.md +++ b/docs/src/browser-automation.md @@ -119,6 +119,7 @@ also the base domain itself. | `back` | Go back in history | - | | `forward` | Go forward in history | - | | `refresh` | Reload the page | - | +| `live_url` | Get a human-viewable DevTools URL for manual login/takeover (sandbox mode) | `interactive` (optional) | | `close` | Close browser session | - | ### Automatic Session Tracking @@ -146,6 +147,10 @@ pass `session_id` explicitly: This prevents pool exhaustion from LLMs that forget to pass the session_id. +The web UI now exposes tracked browser sessions in **Settings → Tools** so you +can see active chat-to-browser mappings and copy a session ID before requesting +`live_url`. + ### Browser selection You can ask for a specific browser at runtime (host mode):