From b6ba8e29efea69a255a6f8be3e19da22aa5b733b Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 04:37:17 -0700 Subject: [PATCH 01/51] feat: [US-001] - Add browser_install.rs CLI command Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sandbox-agent/src/browser_install.rs | 153 ++++++++++++++++++ .../sandbox-agent/src/desktop_install.rs | 12 +- server/packages/sandbox-agent/src/lib.rs | 1 + 3 files changed, 160 insertions(+), 6 deletions(-) create mode 100644 server/packages/sandbox-agent/src/browser_install.rs diff --git a/server/packages/sandbox-agent/src/browser_install.rs b/server/packages/sandbox-agent/src/browser_install.rs new file mode 100644 index 00000000..a67902cb --- /dev/null +++ b/server/packages/sandbox-agent/src/browser_install.rs @@ -0,0 +1,153 @@ +use crate::desktop_install::{ + detect_package_manager, find_binary, prompt_yes_no, render_install_command, + run_install_commands, running_as_root, DesktopPackageManager, +}; + +const AUTOMATIC_INSTALL_SUPPORTED_DISTROS: &str = + "Automatic browser dependency installation is supported on Debian/Ubuntu (apt), Fedora/RHEL (dnf), and Alpine (apk)."; +const AUTOMATIC_INSTALL_UNSUPPORTED_ENVS: &str = + "Automatic installation is not supported on macOS, Windows, or Linux distributions without apt, dnf, or apk."; + +#[derive(Debug, Clone)] +pub struct BrowserInstallRequest { + pub yes: bool, + pub print_only: bool, + pub package_manager: Option, +} + +pub(crate) fn browser_platform_support_message() -> String { + format!("Browser APIs are only supported on Linux. {AUTOMATIC_INSTALL_SUPPORTED_DISTROS}") +} + +fn linux_install_support_message() -> String { + format!("{AUTOMATIC_INSTALL_SUPPORTED_DISTROS} {AUTOMATIC_INSTALL_UNSUPPORTED_ENVS}") +} + +pub fn install_browser(request: BrowserInstallRequest) -> Result<(), String> { + if std::env::consts::OS != "linux" { + return Err(format!( + "browser installation is only supported on Linux. {}", + linux_install_support_message() + )); + } + + let package_manager = match request.package_manager { + Some(value) => value, + None => detect_package_manager().ok_or_else(|| { + format!( + "could not detect a supported package manager. {} Install the browser dependencies manually on this distribution.", + linux_install_support_message() + ) + })?, + }; + + let packages = browser_packages(package_manager); + let used_sudo = !running_as_root() && find_binary("sudo").is_some(); + if !running_as_root() && !used_sudo { + return Err( + "browser installation requires root or sudo access; rerun as root or install dependencies manually" + .to_string(), + ); + } + + println!("Browser package manager: {}", package_manager); + println!("Browser packages:"); + for package in &packages { + println!(" - {package}"); + } + println!("Install command:"); + println!( + " {}", + render_install_command(package_manager, used_sudo, &packages) + ); + + if request.print_only { + return Ok(()); + } + + if !request.yes && !prompt_yes_no("Proceed with browser dependency installation? [y/N] ")? { + return Err("installation cancelled".to_string()); + } + + run_install_commands(package_manager, used_sudo, &packages)?; + + println!("Browser dependencies installed."); + Ok(()) +} + +fn browser_packages(package_manager: DesktopPackageManager) -> Vec { + match package_manager { + DesktopPackageManager::Apt => vec![ + "chromium", + "chromium-sandbox", + "libnss3", + "libatk-bridge2.0-0", + "libdrm2", + "libxcomposite1", + "libxdamage1", + "libxrandr2", + "libgbm1", + "libasound2", + "libpangocairo-1.0-0", + "libgtk-3-0", + ], + DesktopPackageManager::Dnf => vec!["chromium"], + DesktopPackageManager::Apk => vec!["chromium", "nss"], + } + .into_iter() + .map(str::to_string) + .collect() +} + +/// Checks for missing browser dependencies (Chromium binary and desktop libs). +pub(crate) fn detect_missing_browser_dependencies() -> Vec { + let mut missing = Vec::new(); + + // Check for chromium binary (may be named chromium or chromium-browser) + if find_binary("chromium").is_none() && find_binary("chromium-browser").is_none() { + missing.push("chromium".to_string()); + } + + // Check for key desktop dependency libraries + for (name, binary) in [("Xvfb", "Xvfb"), ("xrandr", "xrandr")] { + if find_binary(binary).is_none() { + missing.push(name.to_string()); + } + } + + missing +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn browser_platform_support_message_mentions_linux_and_supported_distros() { + let message = browser_platform_support_message(); + assert!(message.contains("only supported on Linux")); + assert!(message.contains("Debian/Ubuntu (apt)")); + assert!(message.contains("Fedora/RHEL (dnf)")); + assert!(message.contains("Alpine (apk)")); + } + + #[test] + fn browser_packages_apt_includes_chromium_and_libs() { + let packages = browser_packages(DesktopPackageManager::Apt); + assert!(packages.iter().any(|p| p == "chromium")); + assert!(packages.iter().any(|p| p == "libnss3")); + assert!(packages.iter().any(|p| p == "libgbm1")); + } + + #[test] + fn browser_packages_dnf_includes_chromium() { + let packages = browser_packages(DesktopPackageManager::Dnf); + assert_eq!(packages, vec!["chromium"]); + } + + #[test] + fn browser_packages_apk_includes_chromium_and_nss() { + let packages = browser_packages(DesktopPackageManager::Apk); + assert_eq!(packages, vec!["chromium", "nss"]); + } +} diff --git a/server/packages/sandbox-agent/src/desktop_install.rs b/server/packages/sandbox-agent/src/desktop_install.rs index 480da7d2..d4745add 100644 --- a/server/packages/sandbox-agent/src/desktop_install.rs +++ b/server/packages/sandbox-agent/src/desktop_install.rs @@ -85,7 +85,7 @@ pub fn install_desktop(request: DesktopInstallRequest) -> Result<(), String> { Ok(()) } -fn detect_package_manager() -> Option { +pub(crate) fn detect_package_manager() -> Option { if find_binary("apt-get").is_some() { return Some(DesktopPackageManager::Apt); } @@ -149,7 +149,7 @@ fn desktop_packages(package_manager: DesktopPackageManager, no_fonts: bool) -> V packages } -fn render_install_command( +pub(crate) fn render_install_command( package_manager: DesktopPackageManager, used_sudo: bool, packages: &[String], @@ -169,7 +169,7 @@ fn render_install_command( } } -fn run_install_commands( +pub(crate) fn run_install_commands( package_manager: DesktopPackageManager, used_sudo: bool, packages: &[String], @@ -233,7 +233,7 @@ fn run_command((program, args): (String, Vec)) -> Result<(), String> { Ok(()) } -fn prompt_yes_no(prompt: &str) -> Result { +pub(crate) fn prompt_yes_no(prompt: &str) -> Result { print!("{prompt}"); io::stdout() .flush() @@ -246,7 +246,7 @@ fn prompt_yes_no(prompt: &str) -> Result { Ok(matches!(normalized.as_str(), "y" | "yes")) } -fn running_as_root() -> bool { +pub(crate) fn running_as_root() -> bool { #[cfg(unix)] unsafe { return libc::geteuid() == 0; @@ -257,7 +257,7 @@ fn running_as_root() -> bool { } } -fn find_binary(name: &str) -> Option { +pub(crate) fn find_binary(name: &str) -> Option { let path_env = std::env::var_os("PATH")?; for path in std::env::split_paths(&path_env) { let candidate = path.join(name); diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index d7b92d60..faa1d52d 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -1,6 +1,7 @@ //! Sandbox agent core utilities. mod acp_proxy_runtime; +mod browser_install; pub mod cli; pub mod daemon; mod desktop_errors; From f8a220c53bef584bee8fc4ba3f5a24bd496127f0 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 04:40:23 -0700 Subject: [PATCH 02/51] feat: [US-002] - Register install browser subcommand in CLI Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/cli.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/server/packages/sandbox-agent/src/cli.rs b/server/packages/sandbox-agent/src/cli.rs index 000ea41e..5537599c 100644 --- a/server/packages/sandbox-agent/src/cli.rs +++ b/server/packages/sandbox-agent/src/cli.rs @@ -11,6 +11,7 @@ mod build_version { include!(concat!(env!("OUT_DIR"), "/version.rs")); } +use crate::browser_install::{install_browser, BrowserInstallRequest}; use crate::desktop_install::{install_desktop, DesktopInstallRequest, DesktopPackageManager}; use crate::router::{ build_router_with_state, shutdown_servers, AppState, AuthConfig, BrandingMode, @@ -169,6 +170,8 @@ pub struct DaemonArgs { pub enum InstallCommand { /// Install desktop runtime dependencies. Desktop(InstallDesktopArgs), + /// Install browser (Chromium) dependencies. + Browser(InstallBrowserArgs), } #[derive(Subcommand, Debug)] @@ -337,6 +340,16 @@ pub struct InstallDesktopArgs { no_fonts: bool, } +#[derive(Args, Debug)] +pub struct InstallBrowserArgs { + #[arg(long, default_value_t = false)] + yes: bool, + #[arg(long, default_value_t = false)] + print_only: bool, + #[arg(long, value_enum)] + package_manager: Option, +} + #[derive(Args, Debug)] pub struct CredentialsExtractArgs { #[arg(long, short = 'a', value_enum)] @@ -444,6 +457,7 @@ pub fn run_command(command: &Command, cli: &CliConfig) -> Result<(), CliError> { fn run_install(command: &InstallCommand) -> Result<(), CliError> { match command { InstallCommand::Desktop(args) => install_desktop_local(args), + InstallCommand::Browser(args) => install_browser_local(args), } } @@ -522,6 +536,15 @@ fn install_desktop_local(args: &InstallDesktopArgs) -> Result<(), CliError> { .map_err(CliError::Server) } +fn install_browser_local(args: &InstallBrowserArgs) -> Result<(), CliError> { + install_browser(BrowserInstallRequest { + yes: args.yes, + print_only: args.print_only, + package_manager: args.package_manager, + }) + .map_err(CliError::Server) +} + fn run_agents(command: &AgentsCommand, cli: &CliConfig) -> Result<(), CliError> { match command { AgentsCommand::List(args) => { From 7d294a7cab1ad4a79dbef5991116cc97bdbd0a25 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 04:46:47 -0700 Subject: [PATCH 03/51] feat: [US-003] - Add browser type definitions (DTOs and errors) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sandbox-agent/src/browser_errors.rs | 167 +++++ .../sandbox-agent/src/browser_types.rs | 578 ++++++++++++++++++ server/packages/sandbox-agent/src/lib.rs | 2 + 3 files changed, 747 insertions(+) create mode 100644 server/packages/sandbox-agent/src/browser_errors.rs create mode 100644 server/packages/sandbox-agent/src/browser_types.rs diff --git a/server/packages/sandbox-agent/src/browser_errors.rs b/server/packages/sandbox-agent/src/browser_errors.rs new file mode 100644 index 00000000..27c3b640 --- /dev/null +++ b/server/packages/sandbox-agent/src/browser_errors.rs @@ -0,0 +1,167 @@ +use sandbox_agent_error::ProblemDetails; +use serde_json::{Map, Value}; + +use crate::desktop_types::DesktopErrorInfo; + +#[derive(Debug, Clone)] +pub struct BrowserProblem { + status: u16, + title: &'static str, + code: &'static str, + message: String, +} + +impl BrowserProblem { + // 409 - browser is not running + pub fn not_active() -> Self { + Self::new( + 409, + "Browser Not Active", + "browser/not-active", + "The browser is not running. Call POST /v1/browser/start first.", + ) + } + + // 409 - browser is already running + pub fn already_active() -> Self { + Self::new( + 409, + "Browser Already Active", + "browser/already-active", + "The browser is already running. Stop it first with POST /v1/browser/stop.", + ) + } + + // 409 - desktop mode is active, cannot start browser + pub fn desktop_conflict() -> Self { + Self::new( + 409, + "Desktop Conflict", + "browser/desktop-conflict", + "The desktop runtime is currently active. Browser and desktop modes are mutually exclusive.", + ) + } + + // 424 - missing dependencies + pub fn install_required(message: impl Into) -> Self { + Self::new( + 424, + "Browser Install Required", + "browser/install-required", + message, + ) + } + + // 500 - startup sequence failed + pub fn start_failed(message: impl Into) -> Self { + Self::new(500, "Browser Start Failed", "browser/start-failed", message) + } + + // 502 - CDP communication error + pub fn cdp_error(message: impl Into) -> Self { + Self::new(502, "CDP Error", "browser/cdp-error", message) + } + + // 504 - operation timed out + pub fn timeout(message: impl Into) -> Self { + Self::new(504, "Browser Timeout", "browser/timeout", message) + } + + // 404 - tab/context/element not found + pub fn not_found(message: impl Into) -> Self { + Self::new(404, "Not Found", "browser/not-found", message) + } + + // 400 - bad CSS selector + pub fn invalid_selector(message: impl Into) -> Self { + Self::new(400, "Invalid Selector", "browser/invalid-selector", message) + } + + pub fn to_problem_details(&self) -> ProblemDetails { + let mut extensions = Map::new(); + extensions.insert("code".to_string(), Value::String(self.code.to_string())); + + ProblemDetails { + type_: format!("tag:sandboxagent.dev,2025:{}", self.code), + title: self.title.to_string(), + status: self.status, + detail: Some(self.message.clone()), + instance: None, + extensions, + } + } + + pub fn to_error_info(&self) -> DesktopErrorInfo { + DesktopErrorInfo { + code: self.code.to_string(), + message: self.message.clone(), + } + } + + pub fn code(&self) -> &'static str { + self.code + } + + fn new( + status: u16, + title: &'static str, + code: &'static str, + message: impl Into, + ) -> Self { + Self { + status, + title, + code, + message: message.into(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn not_active_produces_correct_problem_details() { + let problem = BrowserProblem::not_active(); + let details = problem.to_problem_details(); + assert_eq!(details.status, 409); + assert_eq!( + details.type_, + "tag:sandboxagent.dev,2025:browser/not-active" + ); + assert_eq!(details.title, "Browser Not Active"); + assert!(details.detail.unwrap().contains("not running")); + } + + #[test] + fn cdp_error_includes_custom_message() { + let problem = BrowserProblem::cdp_error("connection refused"); + let details = problem.to_problem_details(); + assert_eq!(details.status, 502); + assert_eq!(details.detail.unwrap(), "connection refused"); + assert_eq!( + details.extensions.get("code"), + Some(&Value::String("browser/cdp-error".to_string())) + ); + } + + #[test] + fn install_required_uses_424_status() { + let problem = BrowserProblem::install_required("chromium not found"); + let details = problem.to_problem_details(); + assert_eq!(details.status, 424); + assert_eq!( + details.type_, + "tag:sandboxagent.dev,2025:browser/install-required" + ); + } + + #[test] + fn to_error_info_returns_code_and_message() { + let problem = BrowserProblem::timeout("CDP poll timed out after 15s"); + let info = problem.to_error_info(); + assert_eq!(info.code, "browser/timeout"); + assert_eq!(info.message, "CDP poll timed out after 15s"); + } +} diff --git a/server/packages/sandbox-agent/src/browser_types.rs b/server/packages/sandbox-agent/src/browser_types.rs new file mode 100644 index 00000000..29a65880 --- /dev/null +++ b/server/packages/sandbox-agent/src/browser_types.rs @@ -0,0 +1,578 @@ +use std::collections::HashMap; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use utoipa::{IntoParams, ToSchema}; + +use crate::desktop_types::{DesktopErrorInfo, DesktopProcessInfo, DesktopResolution}; + +// --------------------------------------------------------------------------- +// State +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum BrowserState { + Inactive, + InstallRequired, + Starting, + Active, + Stopping, + Failed, +} + +// --------------------------------------------------------------------------- +// Lifecycle +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserStartRequest { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub width: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub height: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dpi: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub headless: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub context_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub stream_video_codec: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub stream_audio_codec: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub stream_frame_rate: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub webrtc_port_range: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub recording_fps: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserStatusResponse { + pub state: BrowserState, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub display: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub resolution: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub started_at: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cdp_url: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(default)] + pub missing_dependencies: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub install_command: Option, + #[serde(default)] + pub processes: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub last_error: Option, +} + +// --------------------------------------------------------------------------- +// Navigation +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserNavigateRequest { + pub url: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub wait_until: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserNavigateWaitUntil { + Load, + Domcontentloaded, + Networkidle, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserPageInfo { + pub url: String, + pub title: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub status: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserReloadRequest { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ignore_cache: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserWaitRequest { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub selector: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub timeout: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub state: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserWaitState { + Visible, + Hidden, + Attached, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserWaitResponse { + pub found: bool, +} + +// --------------------------------------------------------------------------- +// Tabs +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserTabInfo { + pub id: String, + pub url: String, + pub title: String, + pub active: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserTabListResponse { + pub tabs: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCreateTabRequest { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, +} + +// --------------------------------------------------------------------------- +// Screenshots & PDF +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserScreenshotFormat { + Png, + Jpeg, + Webp, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserScreenshotQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub format: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub quality: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub full_page: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub selector: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserPdfQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub format: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub landscape: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub print_background: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub scale: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserPdfFormat { + A4, + Letter, + Legal, +} + +// --------------------------------------------------------------------------- +// Content extraction +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserContentQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub selector: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserContentResponse { + pub html: String, + pub url: String, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserMarkdownResponse { + pub markdown: String, + pub url: String, + pub title: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserLinkInfo { + pub href: String, + pub text: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserLinksResponse { + pub links: Vec, + pub url: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserSnapshotResponse { + pub snapshot: String, + pub url: String, + pub title: String, +} + +// --------------------------------------------------------------------------- +// Scrape +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserScrapeRequest { + pub selectors: HashMap, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserScrapeResponse { + pub data: HashMap>, + pub url: String, + pub title: String, +} + +// --------------------------------------------------------------------------- +// Execute +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserExecuteRequest { + pub expression: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub await_promise: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserExecuteResponse { + pub result: Value, + #[serde(rename = "type")] + pub type_: String, +} + +// --------------------------------------------------------------------------- +// Interaction +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserClickRequest { + pub selector: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub button: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub click_count: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub timeout: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserMouseButton { + Left, + Right, + Middle, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserTypeRequest { + pub selector: String, + pub text: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub delay: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub clear: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserSelectRequest { + pub selector: String, + pub value: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserHoverRequest { + pub selector: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserScrollRequest { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub selector: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub x: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub y: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserUploadRequest { + pub selector: String, + pub path: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserDialogRequest { + pub accept: bool, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub text: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserActionResponse { + pub ok: bool, +} + +// --------------------------------------------------------------------------- +// Console monitoring +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserConsoleQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub limit: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserConsoleMessage { + pub level: String, + pub text: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub line: Option, + pub timestamp: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserConsoleResponse { + pub messages: Vec, +} + +// --------------------------------------------------------------------------- +// Network monitoring +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserNetworkQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub limit: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url_pattern: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserNetworkRequest { + pub url: String, + pub method: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub status: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub mime_type: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub response_size: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub duration: Option, + pub timestamp: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserNetworkResponse { + pub requests: Vec, +} + +// --------------------------------------------------------------------------- +// Crawling +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCrawlRequest { + pub url: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_pages: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_depth: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub allowed_domains: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub extract: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum BrowserCrawlExtract { + Markdown, + Html, + Text, + Links, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCrawlPage { + pub url: String, + pub title: String, + pub content: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub links: Vec, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub status: Option, + pub depth: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCrawlResponse { + pub pages: Vec, + pub total_pages: u32, + pub truncated: bool, +} + +// --------------------------------------------------------------------------- +// Contexts (persistent profiles) +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserContextInfo { + pub id: String, + pub name: String, + pub created_at: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub size_bytes: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +pub struct BrowserContextListResponse { + pub contexts: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserContextCreateRequest { + pub name: String, +} + +// --------------------------------------------------------------------------- +// Cookies +// --------------------------------------------------------------------------- + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCookiesQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub url: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCookie { + pub name: String, + pub value: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub domain: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub path: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub expires: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub http_only: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub secure: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub same_site: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)] +pub enum BrowserCookieSameSite { + Strict, + Lax, + None, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserCookiesResponse { + pub cookies: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct BrowserSetCookiesRequest { + pub cookies: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)] +#[serde(rename_all = "camelCase")] +pub struct BrowserDeleteCookiesQuery { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub name: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub domain: Option, +} diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index faa1d52d..32b4427a 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -1,7 +1,9 @@ //! Sandbox agent core utilities. mod acp_proxy_runtime; +mod browser_errors; mod browser_install; +pub mod browser_types; pub mod cli; pub mod daemon; mod desktop_errors; From 0bd34f6a8daf93214c3a3fec9000c91824a79feb Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 04:53:40 -0700 Subject: [PATCH 04/51] feat: [US-004] - Add CdpClient for communicating with Chromium Co-Authored-By: Claude Opus 4.6 (1M context) --- .../packages/sandbox-agent/src/browser_cdp.rs | 225 ++++++++++++++++++ server/packages/sandbox-agent/src/lib.rs | 1 + 2 files changed, 226 insertions(+) create mode 100644 server/packages/sandbox-agent/src/browser_cdp.rs diff --git a/server/packages/sandbox-agent/src/browser_cdp.rs b/server/packages/sandbox-agent/src/browser_cdp.rs new file mode 100644 index 00000000..cfea2ab7 --- /dev/null +++ b/server/packages/sandbox-agent/src/browser_cdp.rs @@ -0,0 +1,225 @@ +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + +use futures::{SinkExt, StreamExt}; +use serde_json::{json, Value}; +use tokio::sync::{mpsc, oneshot, Mutex}; +use tokio::task::JoinHandle; +use tokio_tungstenite::tungstenite::Message; +use tracing::{debug, warn}; + +use crate::browser_errors::BrowserProblem; + +/// WebSocket stream type returned by `tokio_tungstenite::connect_async`. +type CdpWsStream = + tokio_tungstenite::WebSocketStream>; + +/// Chrome DevTools Protocol client. +/// +/// Maintains a persistent WebSocket connection to Chromium's debugging port +/// for sending commands and receiving events. +pub struct CdpClient { + ws_sender: Arc>>, + next_id: AtomicU64, + pending: Arc>>>>, + subscribers: Arc>>>>, + reader_task: JoinHandle<()>, +} + +impl CdpClient { + /// CDP debugging port on localhost. + const CDP_PORT: u16 = 9222; + + /// Default timeout for CDP commands. + const COMMAND_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); + + /// Connect to Chromium's CDP endpoint. + /// + /// Discovers the WebSocket debugger URL via `http://127.0.0.1:9222/json/version`, + /// then establishes a persistent WebSocket connection to + /// `ws://127.0.0.1:9222/devtools/browser/{id}`. + pub async fn connect() -> Result { + let version_url = format!("http://127.0.0.1:{}/json/version", Self::CDP_PORT); + + let resp = reqwest::get(&version_url).await.map_err(|e| { + BrowserProblem::cdp_error(format!( + "failed to reach CDP endpoint at {version_url}: {e}" + )) + })?; + + let version_info: Value = resp.json().await.map_err(|e| { + BrowserProblem::cdp_error(format!("invalid JSON from {version_url}: {e}")) + })?; + + let ws_url = version_info["webSocketDebuggerUrl"] + .as_str() + .ok_or_else(|| { + BrowserProblem::cdp_error( + "webSocketDebuggerUrl not found in /json/version response", + ) + })? + .to_string(); + + debug!(ws_url = %ws_url, "connecting to CDP"); + + let (ws_stream, _) = tokio_tungstenite::connect_async(&ws_url) + .await + .map_err(|e| { + BrowserProblem::cdp_error(format!("WebSocket connection to {ws_url} failed: {e}")) + })?; + + let (ws_sink, ws_read) = ws_stream.split(); + + let pending: Arc>>>> = + Arc::new(Mutex::new(HashMap::new())); + let subscribers: Arc>>>> = + Arc::new(Mutex::new(HashMap::new())); + + let reader_pending = pending.clone(); + let reader_subscribers = subscribers.clone(); + let reader_task = tokio::spawn(Self::reader_loop( + ws_read, + reader_pending, + reader_subscribers, + )); + + Ok(Self { + ws_sender: Arc::new(Mutex::new(ws_sink)), + next_id: AtomicU64::new(1), + pending, + subscribers, + reader_task, + }) + } + + /// Send a CDP command and wait for the matching response. + /// + /// Returns the `result` field from the CDP response, or a `BrowserProblem` + /// if the command fails or times out. + pub async fn send(&self, method: &str, params: Option) -> Result { + let id = self.next_id.fetch_add(1, Ordering::Relaxed); + let msg = json!({ + "id": id, + "method": method, + "params": params.unwrap_or_else(|| Value::Object(Default::default())), + }); + + let (tx, rx) = oneshot::channel(); + self.pending.lock().await.insert(id, tx); + + let text = serde_json::to_string(&msg).map_err(|e| { + BrowserProblem::cdp_error(format!("failed to serialize CDP command: {e}")) + })?; + + if let Err(e) = self + .ws_sender + .lock() + .await + .send(Message::Text(text.into())) + .await + { + self.pending.lock().await.remove(&id); + return Err(BrowserProblem::cdp_error(format!( + "failed to send CDP command '{method}': {e}" + ))); + } + + let result = tokio::time::timeout(Self::COMMAND_TIMEOUT, rx) + .await + .map_err(|_| { + BrowserProblem::timeout(format!( + "CDP command '{method}' timed out after {}s", + Self::COMMAND_TIMEOUT.as_secs() + )) + })? + .map_err(|_| BrowserProblem::cdp_error("CDP response channel closed unexpectedly"))?; + + result.map_err(BrowserProblem::cdp_error) + } + + /// Subscribe to a CDP event by method name. + /// + /// Returns a receiver that delivers event params each time the specified + /// event fires. The subscription remains active until the receiver is dropped. + pub async fn subscribe(&self, event: &str) -> mpsc::UnboundedReceiver { + let (tx, rx) = mpsc::unbounded_channel(); + self.subscribers + .lock() + .await + .entry(event.to_string()) + .or_default() + .push(tx); + rx + } + + /// Close the CDP connection and stop the reader task. + pub async fn close(self) { + self.reader_task.abort(); + let _ = self.ws_sender.lock().await.close().await; + } + + /// Background loop that reads WebSocket messages and dispatches them. + /// + /// Messages with an `id` field are routed to the matching pending request. + /// Messages with a `method` field (no `id`) are broadcast to event subscribers. + async fn reader_loop( + mut ws_stream: futures::stream::SplitStream, + pending: Arc>>>>, + subscribers: Arc>>>>, + ) { + while let Some(msg) = ws_stream.next().await { + let text = match msg { + Ok(Message::Text(t)) => t, + Ok(Message::Close(_)) => break, + Ok(_) => continue, + Err(e) => { + warn!(error = %e, "CDP WebSocket read error"); + break; + } + }; + + let parsed: Value = match serde_json::from_str(&text.to_string()) { + Ok(v) => v, + Err(e) => { + warn!(error = %e, "CDP received invalid JSON"); + continue; + } + }; + + if let Some(id) = parsed.get("id").and_then(|v| v.as_u64()) { + // Response to a pending command + if let Some(tx) = pending.lock().await.remove(&id) { + let result = if let Some(error) = parsed.get("error") { + let msg = error + .get("message") + .and_then(|m| m.as_str()) + .unwrap_or("unknown CDP error"); + Err(msg.to_string()) + } else { + Ok(parsed.get("result").cloned().unwrap_or(Value::Null)) + }; + let _ = tx.send(result); + } + } else if let Some(method) = parsed.get("method").and_then(|v| v.as_str()) { + // Event notification + let params = parsed.get("params").cloned().unwrap_or(Value::Null); + let mut subs = subscribers.lock().await; + if let Some(listeners) = subs.get_mut(method) { + listeners.retain(|tx| tx.send(params.clone()).is_ok()); + } + } + } + + // Connection closed: fail all pending requests + for (_, tx) in pending.lock().await.drain() { + let _ = tx.send(Err("CDP WebSocket connection closed".to_string())); + } + } +} + +impl Drop for CdpClient { + fn drop(&mut self) { + self.reader_task.abort(); + } +} diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index 32b4427a..f7d4da9c 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -1,6 +1,7 @@ //! Sandbox agent core utilities. mod acp_proxy_runtime; +mod browser_cdp; mod browser_errors; mod browser_install; pub mod browser_types; From 1d2c43ae3603676767cbfdaa9113cc30943883b1 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:01:44 -0700 Subject: [PATCH 05/51] feat: [US-005] - Add BrowserRuntime state machine Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sandbox-agent/src/browser_runtime.rs | 982 ++++++++++++++++++ server/packages/sandbox-agent/src/lib.rs | 1 + server/packages/sandbox-agent/src/router.rs | 11 + 3 files changed, 994 insertions(+) create mode 100644 server/packages/sandbox-agent/src/browser_runtime.rs diff --git a/server/packages/sandbox-agent/src/browser_runtime.rs b/server/packages/sandbox-agent/src/browser_runtime.rs new file mode 100644 index 00000000..cec6a88e --- /dev/null +++ b/server/packages/sandbox-agent/src/browser_runtime.rs @@ -0,0 +1,982 @@ +use std::collections::HashMap; +use std::collections::VecDeque; +use std::fs::{self, OpenOptions}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; + +use tokio::sync::Mutex; + +use crate::browser_cdp::CdpClient; +use crate::browser_errors::BrowserProblem; +use crate::browser_install::{ + browser_platform_support_message, detect_missing_browser_dependencies, +}; +use crate::browser_types::{ + BrowserConsoleMessage, BrowserNetworkRequest, BrowserStartRequest, BrowserState, + BrowserStatusResponse, +}; +use crate::desktop_install::find_binary; +use crate::desktop_runtime::DesktopRuntime; +use crate::desktop_streaming::DesktopStreamingManager; +use crate::desktop_types::{DesktopErrorInfo, DesktopProcessInfo, DesktopResolution}; +use crate::process_runtime::{ + ProcessOwner, ProcessRuntime, ProcessStartSpec, ProcessStatus, RestartPolicy, +}; + +const DEFAULT_WIDTH: u32 = 1440; +const DEFAULT_HEIGHT: u32 = 900; +const DEFAULT_DPI: u32 = 96; +const DEFAULT_DISPLAY_NUM: i32 = 98; +const MAX_DISPLAY_PROBE: i32 = 10; +const STARTUP_TIMEOUT: Duration = Duration::from_secs(15); +const CDP_POLL_TIMEOUT: Duration = Duration::from_secs(15); +const CDP_PORT: u16 = 9222; +const MAX_CONSOLE_MESSAGES: usize = 1000; +const MAX_NETWORK_REQUESTS: usize = 1000; + +#[derive(Debug, Clone)] +pub struct BrowserRuntime { + config: BrowserRuntimeConfig, + process_runtime: Arc, + desktop_runtime: Arc, + streaming_manager: DesktopStreamingManager, + inner: Arc>, +} + +#[derive(Debug, Clone)] +pub struct BrowserRuntimeConfig { + state_dir: PathBuf, + display_num: i32, + assume_linux_for_tests: bool, +} + +impl Default for BrowserRuntimeConfig { + fn default() -> Self { + Self { + state_dir: default_state_dir(), + display_num: DEFAULT_DISPLAY_NUM, + assume_linux_for_tests: false, + } + } +} + +struct BrowserRuntimeStateData { + state: BrowserState, + display_num: i32, + display: Option, + resolution: Option, + started_at: Option, + last_error: Option, + missing_dependencies: Vec, + install_command: Option, + runtime_log_path: PathBuf, + environment: HashMap, + xvfb: Option, + chromium: Option, + cdp_client: Option, + context_id: Option, + streaming_config: Option, + recording_fps: Option, + console_messages: VecDeque, + network_requests: VecDeque, +} + +impl std::fmt::Debug for BrowserRuntimeStateData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BrowserRuntimeStateData") + .field("state", &self.state) + .field("display", &self.display) + .field("resolution", &self.resolution) + .field("started_at", &self.started_at) + .finish_non_exhaustive() + } +} + +#[derive(Debug)] +struct ManagedBrowserProcess { + name: &'static str, + process_id: String, + pid: Option, + running: bool, +} + +impl BrowserRuntime { + pub fn new(process_runtime: Arc, desktop_runtime: Arc) -> Self { + Self::with_config( + process_runtime, + desktop_runtime, + BrowserRuntimeConfig::default(), + ) + } + + pub fn with_config( + process_runtime: Arc, + desktop_runtime: Arc, + config: BrowserRuntimeConfig, + ) -> Self { + let runtime_log_path = config.state_dir.join("browser-runtime.log"); + Self { + streaming_manager: DesktopStreamingManager::new(process_runtime.clone()), + process_runtime, + desktop_runtime, + inner: Arc::new(Mutex::new(BrowserRuntimeStateData { + state: BrowserState::Inactive, + display_num: config.display_num, + display: None, + resolution: None, + started_at: None, + last_error: None, + missing_dependencies: Vec::new(), + install_command: None, + runtime_log_path, + environment: HashMap::new(), + xvfb: None, + chromium: None, + cdp_client: None, + context_id: None, + streaming_config: None, + recording_fps: None, + console_messages: VecDeque::new(), + network_requests: VecDeque::new(), + })), + config, + } + } + + // ----------------------------------------------------------------------- + // Public API + // ----------------------------------------------------------------------- + + pub async fn status(&self) -> BrowserStatusResponse { + let mut state = self.inner.lock().await; + self.refresh_status_locked(&mut state).await; + let mut response = self.snapshot_locked(&state); + drop(state); + self.append_neko_process(&mut response).await; + response + } + + pub async fn start( + &self, + request: BrowserStartRequest, + ) -> Result { + // Check mutual exclusivity with desktop runtime + let desktop_status = self.desktop_runtime.status().await; + if desktop_status.state == crate::desktop_types::DesktopState::Active { + return Err(BrowserProblem::desktop_conflict()); + } + + let mut state = self.inner.lock().await; + + if !self.platform_supported() { + let problem = BrowserProblem::start_failed(browser_platform_support_message()); + self.record_problem_locked(&mut state, &problem); + state.state = BrowserState::Failed; + return Err(problem); + } + + if matches!(state.state, BrowserState::Starting | BrowserState::Stopping) { + return Err(BrowserProblem::start_failed( + "Browser runtime is busy transitioning state", + )); + } + + self.refresh_status_locked(&mut state).await; + if state.state == BrowserState::Active { + let mut response = self.snapshot_locked(&state); + drop(state); + self.append_neko_process(&mut response).await; + return Ok(response); + } + + if !state.missing_dependencies.is_empty() { + return Err(BrowserProblem::install_required(format!( + "Missing browser dependencies: {}. Run: sandbox-agent install browser --yes", + state.missing_dependencies.join(", ") + ))); + } + + self.ensure_state_dir() + .map_err(|err| BrowserProblem::start_failed(err))?; + self.write_runtime_log_locked(&state, "starting browser runtime"); + + let width = request.width.unwrap_or(DEFAULT_WIDTH); + let height = request.height.unwrap_or(DEFAULT_HEIGHT); + let dpi = request.dpi.unwrap_or(DEFAULT_DPI); + if width == 0 || height == 0 { + return Err(BrowserProblem::start_failed( + "Browser width and height must be greater than 0", + )); + } + + let headless = request.headless.unwrap_or(false); + + // Store streaming/recording config + state.streaming_config = if request.stream_video_codec.is_some() + || request.stream_audio_codec.is_some() + || request.stream_frame_rate.is_some() + || request.webrtc_port_range.is_some() + { + Some(crate::desktop_streaming::StreamingConfig { + video_codec: request + .stream_video_codec + .unwrap_or_else(|| "vp8".to_string()), + audio_codec: request + .stream_audio_codec + .unwrap_or_else(|| "opus".to_string()), + frame_rate: request.stream_frame_rate.unwrap_or(30).clamp(1, 60), + webrtc_port_range: request + .webrtc_port_range + .unwrap_or_else(|| "59050-59070".to_string()), + }) + } else { + None + }; + state.recording_fps = request.recording_fps.map(|fps| fps.clamp(1, 60)); + state.context_id = request.context_id.clone(); + + // Choose display and set up environment + let display_num = if headless { + // Headless doesn't need Xvfb but we still pick a display_num for consistency + self.config.display_num + } else { + self.choose_display_num()? + }; + let display = format!(":{display_num}"); + let resolution = DesktopResolution { + width, + height, + dpi: Some(dpi), + }; + let environment = self.base_environment(&display)?; + + state.state = BrowserState::Starting; + state.display_num = display_num; + state.display = Some(display.clone()); + state.resolution = Some(resolution.clone()); + state.started_at = None; + state.last_error = None; + state.environment = environment; + state.install_command = None; + state.console_messages.clear(); + state.network_requests.clear(); + + // Start Xvfb (unless headless) + if !headless { + if let Err(problem) = self.start_xvfb_locked(&mut state, &resolution).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + if let Err(problem) = self.wait_for_socket(display_num).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + } + + // Start Chromium + if let Err(problem) = self + .start_chromium_locked(&mut state, &resolution, headless, request.url.as_deref()) + .await + { + return Err(self.fail_start_locked(&mut state, problem).await); + } + + // Wait for CDP to become ready + if let Err(problem) = self.wait_for_cdp().await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + + // Connect CDP client + match CdpClient::connect().await { + Ok(client) => { + state.cdp_client = Some(client); + } + Err(problem) => { + return Err(self.fail_start_locked(&mut state, problem).await); + } + } + + // Optionally start Neko for streaming + if !headless { + if let Some(streaming_config) = state.streaming_config.clone() { + let display_ref = state.display.clone().unwrap_or_default(); + let resolution_ref = state.resolution.clone().unwrap_or(DesktopResolution { + width, + height, + dpi: Some(dpi), + }); + let env_ref = state.environment.clone(); + drop(state); + let _ = self + .streaming_manager + .start( + &display_ref, + resolution_ref, + &env_ref, + Some(streaming_config), + None, + ) + .await; + state = self.inner.lock().await; + } + } + + state.state = BrowserState::Active; + state.started_at = Some(chrono::Utc::now().to_rfc3339()); + state.last_error = None; + self.write_runtime_log_locked( + &state, + &format!( + "browser runtime active on {} ({}x{}, dpi {})", + display, width, height, dpi + ), + ); + + let mut response = self.snapshot_locked(&state); + drop(state); + self.append_neko_process(&mut response).await; + Ok(response) + } + + pub async fn stop(&self) -> Result { + let mut state = self.inner.lock().await; + if matches!(state.state, BrowserState::Starting | BrowserState::Stopping) { + return Err(BrowserProblem::start_failed( + "Browser runtime is busy transitioning state", + )); + } + + state.state = BrowserState::Stopping; + self.write_runtime_log_locked(&state, "stopping browser runtime"); + + // Close CDP client + if let Some(cdp_client) = state.cdp_client.take() { + cdp_client.close().await; + } + + // Stop streaming + let _ = self.streaming_manager.stop().await; + + // Stop Chromium + self.stop_chromium_locked(&mut state).await; + + // Stop Xvfb + self.stop_xvfb_locked(&mut state).await; + + state.state = BrowserState::Inactive; + state.display = None; + state.resolution = None; + state.started_at = None; + state.last_error = None; + state.context_id = None; + state.missing_dependencies = self.detect_missing_dependencies(); + state.install_command = self.install_command_for(&state.missing_dependencies); + state.environment.clear(); + state.streaming_config = None; + state.recording_fps = None; + state.console_messages.clear(); + state.network_requests.clear(); + + let mut response = self.snapshot_locked(&state); + drop(state); + self.append_neko_process(&mut response).await; + Ok(response) + } + + pub async fn shutdown(&self) { + let _ = self.stop().await; + } + + /// Get a reference to the CDP client, if connected. + pub async fn cdp_client(&self) -> Result { + let state = self.inner.lock().await; + if state.state != BrowserState::Active { + return Err(BrowserProblem::not_active()); + } + // We cannot return a reference out of the Mutex, so we need to use + // the send method directly. For now, return an error if not connected. + // Callers should use `with_cdp` instead. + Err(BrowserProblem::cdp_error( + "Use with_cdp() to execute CDP commands", + )) + } + + /// Execute a closure with the CDP client while holding the state lock. + pub async fn with_cdp(&self, f: F) -> Result + where + F: FnOnce(&CdpClient) -> Fut, + Fut: std::future::Future>, + { + let state = self.inner.lock().await; + if state.state != BrowserState::Active { + return Err(BrowserProblem::not_active()); + } + let cdp = state + .cdp_client + .as_ref() + .ok_or_else(|| BrowserProblem::cdp_error("CDP client is not connected"))?; + f(cdp).await + } + + /// Get the streaming manager for WebRTC signaling. + pub fn streaming_manager(&self) -> &DesktopStreamingManager { + &self.streaming_manager + } + + /// Push a console message into the ring buffer. + pub async fn push_console_message(&self, message: BrowserConsoleMessage) { + let mut state = self.inner.lock().await; + if state.console_messages.len() >= MAX_CONSOLE_MESSAGES { + state.console_messages.pop_front(); + } + state.console_messages.push_back(message); + } + + /// Push a network request into the ring buffer. + pub async fn push_network_request(&self, request: BrowserNetworkRequest) { + let mut state = self.inner.lock().await; + if state.network_requests.len() >= MAX_NETWORK_REQUESTS { + state.network_requests.pop_front(); + } + state.network_requests.push_back(request); + } + + /// Get console messages, optionally filtered by level. + pub async fn console_messages( + &self, + level: Option<&str>, + limit: Option, + ) -> Vec { + let state = self.inner.lock().await; + let limit = limit.unwrap_or(100) as usize; + state + .console_messages + .iter() + .filter(|msg| level.map_or(true, |l| msg.level == l)) + .rev() + .take(limit) + .cloned() + .collect::>() + .into_iter() + .rev() + .collect() + } + + /// Get network requests, optionally filtered by URL pattern. + pub async fn network_requests( + &self, + url_pattern: Option<&str>, + limit: Option, + ) -> Vec { + let state = self.inner.lock().await; + let limit = limit.unwrap_or(100) as usize; + state + .network_requests + .iter() + .filter(|req| url_pattern.map_or(true, |pattern| req.url.contains(pattern))) + .rev() + .take(limit) + .cloned() + .collect::>() + .into_iter() + .rev() + .collect() + } + + // ----------------------------------------------------------------------- + // Internal: state management + // ----------------------------------------------------------------------- + + async fn refresh_status_locked(&self, state: &mut BrowserRuntimeStateData) { + let missing_dependencies = if self.platform_supported() { + self.detect_missing_dependencies() + } else { + Vec::new() + }; + state.missing_dependencies = missing_dependencies.clone(); + state.install_command = self.install_command_for(&missing_dependencies); + + if !self.platform_supported() { + state.state = BrowserState::Failed; + state.last_error = Some( + BrowserProblem::start_failed(browser_platform_support_message()).to_error_info(), + ); + return; + } + + if !missing_dependencies.is_empty() { + state.state = BrowserState::InstallRequired; + state.last_error = Some( + BrowserProblem::install_required(format!( + "Missing: {}", + missing_dependencies.join(", ") + )) + .to_error_info(), + ); + return; + } + + if matches!( + state.state, + BrowserState::Inactive | BrowserState::Starting | BrowserState::Stopping + ) { + if state.state == BrowserState::Inactive { + state.last_error = None; + } + return; + } + + if state.state == BrowserState::Failed + && state.display.is_none() + && state.xvfb.is_none() + && state.chromium.is_none() + { + return; + } + + // Check Xvfb is running (if we started one) + if let Some(ref xvfb) = state.xvfb { + if let Ok(snapshot) = self.process_runtime.snapshot(&xvfb.process_id).await { + if snapshot.status != ProcessStatus::Running { + let problem = BrowserProblem::start_failed("Xvfb process exited unexpectedly"); + self.record_problem_locked(state, &problem); + state.state = BrowserState::Failed; + return; + } + } + } + + // Check Chromium is running + if let Some(ref chromium) = state.chromium { + if let Ok(snapshot) = self.process_runtime.snapshot(&chromium.process_id).await { + if snapshot.status != ProcessStatus::Running { + let problem = + BrowserProblem::start_failed("Chromium process exited unexpectedly"); + self.record_problem_locked(state, &problem); + state.state = BrowserState::Failed; + return; + } + } + } + } + + fn snapshot_locked(&self, state: &BrowserRuntimeStateData) -> BrowserStatusResponse { + BrowserStatusResponse { + state: state.state, + display: state.display.clone(), + resolution: state.resolution.clone(), + started_at: state.started_at.clone(), + cdp_url: if state.state == BrowserState::Active { + Some(format!("ws://127.0.0.1:{CDP_PORT}/devtools/browser")) + } else { + None + }, + url: None, + missing_dependencies: state.missing_dependencies.clone(), + install_command: state.install_command.clone(), + processes: self.processes_locked(state), + last_error: state.last_error.clone(), + } + } + + fn processes_locked(&self, state: &BrowserRuntimeStateData) -> Vec { + let mut processes = Vec::new(); + if let Some(ref process) = state.xvfb { + processes.push(DesktopProcessInfo { + name: process.name.to_string(), + pid: process.pid, + running: process.running, + log_path: None, + }); + } + if let Some(ref process) = state.chromium { + processes.push(DesktopProcessInfo { + name: process.name.to_string(), + pid: process.pid, + running: process.running, + log_path: None, + }); + } + processes + } + + async fn append_neko_process(&self, response: &mut BrowserStatusResponse) { + if let Some(neko_info) = self.streaming_manager.process_info().await { + response.processes.push(neko_info); + } + } + + fn record_problem_locked(&self, state: &mut BrowserRuntimeStateData, problem: &BrowserProblem) { + state.last_error = Some(problem.to_error_info()); + self.write_runtime_log_locked( + state, + &format!("{}: {}", problem.code(), problem.to_error_info().message), + ); + } + + // ----------------------------------------------------------------------- + // Internal: subprocess management + // ----------------------------------------------------------------------- + + async fn start_xvfb_locked( + &self, + state: &mut BrowserRuntimeStateData, + resolution: &DesktopResolution, + ) -> Result<(), BrowserProblem> { + let Some(display) = state.display.clone() else { + return Err(BrowserProblem::start_failed( + "Display was not configured before starting Xvfb", + )); + }; + let args = vec![ + display, + "-screen".to_string(), + "0".to_string(), + format!("{}x{}x24", resolution.width, resolution.height), + "-dpi".to_string(), + resolution.dpi.unwrap_or(DEFAULT_DPI).to_string(), + "-nolisten".to_string(), + "tcp".to_string(), + ]; + let snapshot = self + .process_runtime + .start_process(ProcessStartSpec { + command: "Xvfb".to_string(), + args, + cwd: None, + env: state.environment.clone(), + tty: false, + interactive: false, + owner: ProcessOwner::Desktop, + restart_policy: Some(RestartPolicy::Always), + }) + .await + .map_err(|err| BrowserProblem::start_failed(format!("failed to start Xvfb: {err}")))?; + state.xvfb = Some(ManagedBrowserProcess { + name: "Xvfb", + process_id: snapshot.id, + pid: snapshot.pid, + running: snapshot.status == ProcessStatus::Running, + }); + Ok(()) + } + + async fn start_chromium_locked( + &self, + state: &mut BrowserRuntimeStateData, + resolution: &DesktopResolution, + headless: bool, + initial_url: Option<&str>, + ) -> Result<(), BrowserProblem> { + let chromium_binary = find_chromium_binary().ok_or_else(|| { + BrowserProblem::install_required( + "Chromium binary not found. Run: sandbox-agent install browser --yes", + ) + })?; + + let mut args = vec![ + "--no-sandbox".to_string(), + "--disable-gpu".to_string(), + "--disable-dev-shm-usage".to_string(), + "--disable-software-rasterizer".to_string(), + format!("--remote-debugging-port={CDP_PORT}"), + "--remote-debugging-address=127.0.0.1".to_string(), + format!("--window-size={},{}", resolution.width, resolution.height), + "--no-first-run".to_string(), + "--no-default-browser-check".to_string(), + ]; + + if headless { + args.push("--headless=new".to_string()); + } + + // Set user-data-dir for persistent contexts + if let Some(ref context_id) = state.context_id { + let context_dir = self + .config + .state_dir + .join("browser-contexts") + .join(context_id); + args.push(format!("--user-data-dir={}", context_dir.display())); + } + + // Initial URL + let url = initial_url.unwrap_or("about:blank"); + args.push(url.to_string()); + + let snapshot = self + .process_runtime + .start_process(ProcessStartSpec { + command: chromium_binary.to_string_lossy().to_string(), + args, + cwd: None, + env: state.environment.clone(), + tty: false, + interactive: false, + owner: ProcessOwner::Desktop, + restart_policy: Some(RestartPolicy::Always), + }) + .await + .map_err(|err| { + BrowserProblem::start_failed(format!("failed to start Chromium: {err}")) + })?; + state.chromium = Some(ManagedBrowserProcess { + name: "chromium", + process_id: snapshot.id, + pid: snapshot.pid, + running: snapshot.status == ProcessStatus::Running, + }); + Ok(()) + } + + async fn stop_xvfb_locked(&self, state: &mut BrowserRuntimeStateData) { + if let Some(process) = state.xvfb.take() { + self.write_runtime_log_locked(state, "stopping Xvfb"); + let _ = self + .process_runtime + .stop_process(&process.process_id, Some(2_000)) + .await; + if self + .process_runtime + .snapshot(&process.process_id) + .await + .ok() + .is_some_and(|snapshot| snapshot.status == ProcessStatus::Running) + { + let _ = self + .process_runtime + .kill_process(&process.process_id, Some(1_000)) + .await; + } + } + } + + async fn stop_chromium_locked(&self, state: &mut BrowserRuntimeStateData) { + if let Some(process) = state.chromium.take() { + self.write_runtime_log_locked(state, "stopping Chromium"); + let _ = self + .process_runtime + .stop_process(&process.process_id, Some(2_000)) + .await; + if self + .process_runtime + .snapshot(&process.process_id) + .await + .ok() + .is_some_and(|snapshot| snapshot.status == ProcessStatus::Running) + { + let _ = self + .process_runtime + .kill_process(&process.process_id, Some(1_000)) + .await; + } + } + } + + async fn fail_start_locked( + &self, + state: &mut BrowserRuntimeStateData, + problem: BrowserProblem, + ) -> BrowserProblem { + self.record_problem_locked(state, &problem); + self.write_runtime_log_locked(state, "browser runtime startup failed; cleaning up"); + + // Close CDP client if any + if let Some(cdp) = state.cdp_client.take() { + cdp.close().await; + } + + self.stop_chromium_locked(state).await; + self.stop_xvfb_locked(state).await; + + state.state = BrowserState::Failed; + state.display = None; + state.resolution = None; + state.started_at = None; + state.environment.clear(); + problem + } + + // ----------------------------------------------------------------------- + // Internal: helpers + // ----------------------------------------------------------------------- + + async fn wait_for_socket(&self, display_num: i32) -> Result<(), BrowserProblem> { + let socket = socket_path(display_num); + let parent = socket + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| PathBuf::from("/tmp/.X11-unix")); + let _ = fs::create_dir_all(parent); + + let start = tokio::time::Instant::now(); + while start.elapsed() < STARTUP_TIMEOUT { + if socket.exists() { + return Ok(()); + } + tokio::time::sleep(Duration::from_millis(100)).await; + } + + Err(BrowserProblem::timeout(format!( + "timed out waiting for X socket {}", + socket.display() + ))) + } + + async fn wait_for_cdp(&self) -> Result<(), BrowserProblem> { + let url = format!("http://127.0.0.1:{CDP_PORT}/json/version"); + let client = reqwest::Client::new(); + let start = tokio::time::Instant::now(); + + while start.elapsed() < CDP_POLL_TIMEOUT { + match client.get(&url).send().await { + Ok(resp) if resp.status().is_success() => return Ok(()), + _ => {} + } + tokio::time::sleep(Duration::from_millis(200)).await; + } + + Err(BrowserProblem::timeout(format!( + "CDP endpoint at {url} did not become ready within {}s", + CDP_POLL_TIMEOUT.as_secs() + ))) + } + + fn choose_display_num(&self) -> Result { + let start = self.config.display_num; + if start <= 0 { + return Err(BrowserProblem::start_failed("displayNum must be > 0")); + } + for offset in 0..MAX_DISPLAY_PROBE { + let candidate = start + offset; + if !socket_path(candidate).exists() { + return Ok(candidate); + } + } + Err(BrowserProblem::start_failed(format!( + "unable to find an available X display starting at :{start}" + ))) + } + + fn base_environment(&self, display: &str) -> Result, BrowserProblem> { + let mut environment = HashMap::new(); + environment.insert("DISPLAY".to_string(), display.to_string()); + environment.insert( + "HOME".to_string(), + self.config + .state_dir + .join("home") + .to_string_lossy() + .to_string(), + ); + environment.insert( + "USER".to_string(), + std::env::var("USER").unwrap_or_else(|_| "sandbox-agent".to_string()), + ); + environment.insert( + "PATH".to_string(), + std::env::var("PATH").unwrap_or_default(), + ); + fs::create_dir_all(self.config.state_dir.join("home")).map_err(|err| { + BrowserProblem::start_failed(format!("failed to create browser home: {err}")) + })?; + Ok(environment) + } + + fn detect_missing_dependencies(&self) -> Vec { + detect_missing_browser_dependencies() + } + + fn install_command_for(&self, missing_dependencies: &[String]) -> Option { + if !self.platform_supported() || missing_dependencies.is_empty() { + None + } else { + Some("sandbox-agent install browser --yes".to_string()) + } + } + + fn platform_supported(&self) -> bool { + cfg!(target_os = "linux") || self.config.assume_linux_for_tests + } + + fn ensure_state_dir(&self) -> Result<(), String> { + fs::create_dir_all(&self.config.state_dir).map_err(|err| { + format!( + "failed to create browser state dir {}: {err}", + self.config.state_dir.display() + ) + }) + } + + fn write_runtime_log_locked(&self, state: &BrowserRuntimeStateData, message: &str) { + if let Some(parent) = state.runtime_log_path.parent() { + let _ = fs::create_dir_all(parent); + } + let line = format!("{} {}\n", chrono::Utc::now().to_rfc3339(), message); + let _ = OpenOptions::new() + .create(true) + .append(true) + .open(&state.runtime_log_path) + .and_then(|mut file| std::io::Write::write_all(&mut file, line.as_bytes())); + } +} + +// --------------------------------------------------------------------------- +// Free functions +// --------------------------------------------------------------------------- + +fn default_state_dir() -> PathBuf { + if let Ok(value) = std::env::var("XDG_STATE_HOME") { + return PathBuf::from(value).join("sandbox-agent").join("browser"); + } + if let Some(home) = dirs::home_dir() { + return home + .join(".local") + .join("state") + .join("sandbox-agent") + .join("browser"); + } + PathBuf::from("/tmp/sandbox-agent/browser") +} + +fn socket_path(display_num: i32) -> PathBuf { + PathBuf::from(format!("/tmp/.X11-unix/X{display_num}")) +} + +fn find_chromium_binary() -> Option { + find_binary("chromium") + .or_else(|| find_binary("chromium-browser")) + .or_else(|| find_binary("google-chrome")) + .or_else(|| find_binary("google-chrome-stable")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_config_uses_display_98() { + let config = BrowserRuntimeConfig::default(); + assert_eq!(config.display_num, DEFAULT_DISPLAY_NUM); + } + + #[test] + fn find_chromium_binary_returns_some_on_path() { + // This test is environment-dependent; just ensure no panic + let _ = find_chromium_binary(); + } + + #[test] + fn socket_path_matches_expected_format() { + let path = socket_path(98); + assert_eq!(path, PathBuf::from("/tmp/.X11-unix/X98")); + } + + #[test] + fn install_command_for_empty_deps_is_none() { + let rt = BrowserRuntime::new( + Arc::new(ProcessRuntime::new()), + Arc::new(DesktopRuntime::new(Arc::new(ProcessRuntime::new()))), + ); + assert_eq!(rt.install_command_for(&[]), None); + } +} diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index f7d4da9c..d095c5ca 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -4,6 +4,7 @@ mod acp_proxy_runtime; mod browser_cdp; mod browser_errors; mod browser_install; +mod browser_runtime; pub mod browser_types; pub mod cli; pub mod daemon; diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 195a5cdd..7b197720 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -37,6 +37,7 @@ use tracing::Span; use utoipa::{IntoParams, Modify, OpenApi, ToSchema}; use crate::acp_proxy_runtime::{AcpProxyRuntime, ProxyPostOutcome}; +use crate::browser_runtime::BrowserRuntime; use crate::desktop_errors::DesktopProblem; use crate::desktop_runtime::DesktopRuntime; use crate::desktop_types::*; @@ -92,6 +93,7 @@ pub struct AppState { opencode_server_manager: Arc, process_runtime: Arc, desktop_runtime: Arc, + browser_runtime: Arc, pub(crate) branding: BrandingMode, version_cache: Mutex>, } @@ -117,6 +119,10 @@ impl AppState { )); let process_runtime = Arc::new(ProcessRuntime::new()); let desktop_runtime = Arc::new(DesktopRuntime::new(process_runtime.clone())); + let browser_runtime = Arc::new(BrowserRuntime::new( + process_runtime.clone(), + desktop_runtime.clone(), + )); Self { auth, agent_manager, @@ -124,6 +130,7 @@ impl AppState { opencode_server_manager, process_runtime, desktop_runtime, + browser_runtime, branding, version_cache: Mutex::new(HashMap::new()), } @@ -149,6 +156,10 @@ impl AppState { self.desktop_runtime.clone() } + pub(crate) fn browser_runtime(&self) -> Arc { + self.browser_runtime.clone() + } + pub(crate) fn purge_version_cache(&self, agent: AgentId) { self.version_cache.lock().unwrap().remove(&agent); } From f8b4df9acb16ab4f7d1b053474e0eca24caf6690 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:07:35 -0700 Subject: [PATCH 06/51] feat: [US-006] - Add browser lifecycle HTTP endpoints (start/stop/status) Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 81 +++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 7b197720..2f5de21c 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -37,7 +37,9 @@ use tracing::Span; use utoipa::{IntoParams, Modify, OpenApi, ToSchema}; use crate::acp_proxy_runtime::{AcpProxyRuntime, ProxyPostOutcome}; +use crate::browser_errors::BrowserProblem; use crate::browser_runtime::BrowserRuntime; +use crate::browser_types::*; use crate::desktop_errors::DesktopProblem; use crate::desktop_runtime::DesktopRuntime; use crate::desktop_types::*; @@ -270,6 +272,9 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/desktop/stream/stop", post(post_v1_desktop_stream_stop)) .route("/desktop/stream/status", get(get_v1_desktop_stream_status)) .route("/desktop/stream/signaling", get(get_v1_desktop_stream_ws)) + .route("/browser/status", get(get_v1_browser_status)) + .route("/browser/start", post(post_v1_browser_start)) + .route("/browser/stop", post(post_v1_browser_stop)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -457,6 +462,9 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_desktop_stream_start, post_v1_desktop_stream_stop, get_v1_desktop_stream_ws, + get_v1_browser_status, + post_v1_browser_start, + post_v1_browser_stop, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -526,6 +534,9 @@ pub async fn shutdown_servers(state: &Arc) { DesktopRecordingInfo, DesktopRecordingListResponse, DesktopStreamStatusResponse, + BrowserState, + BrowserStartRequest, + BrowserStatusResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -621,6 +632,12 @@ impl From for ApiError { } } +impl From for ApiError { + fn from(value: BrowserProblem) -> Self { + Self::Problem(value.to_problem_details()) + } +} + impl IntoResponse for ApiError { fn into_response(self) -> Response { let problem = match &self { @@ -723,6 +740,70 @@ async fn post_v1_desktop_stop( Ok(Json(status)) } +/// Get browser runtime status. +/// +/// Returns the current browser state, display information, CDP URL, +/// and managed process details. +#[utoipa::path( + get, + path = "/v1/browser/status", + tag = "v1", + responses( + (status = 200, description = "Browser runtime status", body = BrowserStatusResponse), + (status = 401, description = "Authentication required", body = ProblemDetails) + ) +)] +async fn get_v1_browser_status( + State(state): State>, +) -> Result, ApiError> { + Ok(Json(state.browser_runtime().status().await)) +} + +/// Start the browser runtime. +/// +/// Launches Chromium with remote debugging, optionally starts Xvfb for +/// non-headless mode, and returns the resulting browser status snapshot. +#[utoipa::path( + post, + path = "/v1/browser/start", + tag = "v1", + request_body = BrowserStartRequest, + responses( + (status = 200, description = "Browser runtime status after start", body = BrowserStatusResponse), + (status = 400, description = "Invalid browser start request", body = ProblemDetails), + (status = 409, description = "Browser or desktop runtime conflict", body = ProblemDetails), + (status = 424, description = "Browser dependencies not installed", body = ProblemDetails), + (status = 500, description = "Browser runtime could not be started", body = ProblemDetails) + ) +)] +async fn post_v1_browser_start( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let status = state.browser_runtime().start(body).await?; + Ok(Json(status)) +} + +/// Stop the browser runtime. +/// +/// Terminates Chromium, the CDP client, and any associated Xvfb/Neko +/// processes, then returns the resulting status snapshot. +#[utoipa::path( + post, + path = "/v1/browser/stop", + tag = "v1", + responses( + (status = 200, description = "Browser runtime status after stop", body = BrowserStatusResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails) + ) +)] +async fn post_v1_browser_stop( + State(state): State>, +) -> Result, ApiError> { + let status = state.browser_runtime().stop().await?; + Ok(Json(status)) +} + /// Capture a full desktop screenshot. /// /// Performs a health-gated full-frame screenshot of the managed desktop and From b328d6b214ddcee0c66948e217964ed0719f6da5 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:11:57 -0700 Subject: [PATCH 07/51] feat: [US-007] - Add CDP WebSocket proxy endpoint Co-Authored-By: Claude Opus 4.6 (1M context) --- .../sandbox-agent/src/browser_runtime.rs | 37 ++++++ server/packages/sandbox-agent/src/router.rs | 117 ++++++++++++++++++ 2 files changed, 154 insertions(+) diff --git a/server/packages/sandbox-agent/src/browser_runtime.rs b/server/packages/sandbox-agent/src/browser_runtime.rs index cec6a88e..ccb29d7d 100644 --- a/server/packages/sandbox-agent/src/browser_runtime.rs +++ b/server/packages/sandbox-agent/src/browser_runtime.rs @@ -417,6 +417,43 @@ impl BrowserRuntime { f(cdp).await } + /// Ensure the browser runtime is active. + /// + /// Returns `BrowserProblem::NotActive` if the browser is not running. + pub async fn ensure_active(&self) -> Result<(), BrowserProblem> { + let state = self.inner.lock().await; + if state.state != BrowserState::Active { + return Err(BrowserProblem::not_active()); + } + Ok(()) + } + + /// Discover the CDP WebSocket debugger URL from Chromium. + /// + /// Queries `http://127.0.0.1:9222/json/version` and extracts the + /// `webSocketDebuggerUrl` field. + pub async fn cdp_ws_url(&self) -> Result { + self.ensure_active().await?; + + let version_url = format!("http://127.0.0.1:{CDP_PORT}/json/version"); + let resp = reqwest::get(&version_url).await.map_err(|e| { + BrowserProblem::cdp_error(format!( + "failed to reach CDP endpoint at {version_url}: {e}" + )) + })?; + let version_info: serde_json::Value = resp.json().await.map_err(|e| { + BrowserProblem::cdp_error(format!("invalid JSON from {version_url}: {e}")) + })?; + version_info["webSocketDebuggerUrl"] + .as_str() + .map(|s| s.to_string()) + .ok_or_else(|| { + BrowserProblem::cdp_error( + "webSocketDebuggerUrl not found in /json/version response", + ) + }) + } + /// Get the streaming manager for WebRTC signaling. pub fn streaming_manager(&self) -> &DesktopStreamingManager { &self.streaming_manager diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 2f5de21c..04986e48 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -275,6 +275,7 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/status", get(get_v1_browser_status)) .route("/browser/start", post(post_v1_browser_start)) .route("/browser/stop", post(post_v1_browser_stop)) + .route("/browser/cdp", get(get_v1_browser_cdp_ws)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -465,6 +466,7 @@ pub async fn shutdown_servers(state: &Arc) { get_v1_browser_status, post_v1_browser_start, post_v1_browser_stop, + get_v1_browser_cdp_ws, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -804,6 +806,121 @@ async fn post_v1_browser_stop( Ok(Json(status)) } +/// Open a CDP WebSocket proxy session. +/// +/// Upgrades the connection to a WebSocket that relays bidirectionally to +/// Chromium's internal CDP WebSocket endpoint. External tools like Playwright +/// or Puppeteer can connect via `ws://sandbox-host:2468/v1/browser/cdp`. +#[utoipa::path( + get, + path = "/v1/browser/cdp", + tag = "v1", + responses( + (status = 101, description = "WebSocket upgraded"), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP connection failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_cdp_ws( + State(state): State>, + ws: WebSocketUpgrade, +) -> Result { + state.browser_runtime().ensure_active().await?; + Ok(ws + .on_upgrade(move |socket| browser_cdp_ws_session(socket, state.browser_runtime())) + .into_response()) +} + +/// CDP WebSocket proxy session. +/// +/// Proxies the WebSocket bidirectionally between the external client and +/// Chromium's internal CDP WebSocket endpoint. All CDP commands and events +/// are relayed transparently. +async fn browser_cdp_ws_session(mut client_ws: WebSocket, browser_runtime: Arc) { + use futures::SinkExt; + use tokio_tungstenite::tungstenite::Message as TungsteniteMessage; + + // Discover the actual CDP WebSocket URL from Chromium. + let cdp_ws_url = match browser_runtime.cdp_ws_url().await { + Ok(url) => url, + Err(_) => { + let _ = send_ws_error(&mut client_ws, "browser CDP endpoint is not available").await; + let _ = client_ws.close().await; + return; + } + }; + + // Connect to Chromium's internal CDP WebSocket. + let (cdp_ws, _) = match tokio_tungstenite::connect_async(&cdp_ws_url).await { + Ok(conn) => conn, + Err(err) => { + let _ = send_ws_error( + &mut client_ws, + &format!("failed to connect to CDP endpoint: {err}"), + ) + .await; + let _ = client_ws.close().await; + return; + } + }; + + let (mut cdp_sink, mut cdp_stream) = cdp_ws.split(); + + // Relay messages bidirectionally between client and CDP. + loop { + tokio::select! { + // Client → CDP + client_msg = client_ws.recv() => { + match client_msg { + Some(Ok(Message::Text(text))) => { + if cdp_sink.send(TungsteniteMessage::Text(text.into())).await.is_err() { + break; + } + } + Some(Ok(Message::Binary(data))) => { + if cdp_sink.send(TungsteniteMessage::Binary(data.into())).await.is_err() { + break; + } + } + Some(Ok(Message::Ping(payload))) => { + let _ = client_ws.send(Message::Pong(payload)).await; + } + Some(Ok(Message::Close(_))) | None => break, + Some(Ok(Message::Pong(_))) => {} + Some(Err(_)) => break, + } + } + // CDP → Client + cdp_msg = cdp_stream.next() => { + match cdp_msg { + Some(Ok(TungsteniteMessage::Text(text))) => { + if client_ws.send(Message::Text(text.into())).await.is_err() { + break; + } + } + Some(Ok(TungsteniteMessage::Binary(data))) => { + if client_ws.send(Message::Binary(data.into())).await.is_err() { + break; + } + } + Some(Ok(TungsteniteMessage::Ping(payload))) => { + if cdp_sink.send(TungsteniteMessage::Pong(payload.clone())).await.is_err() { + break; + } + } + Some(Ok(TungsteniteMessage::Close(_))) | None => break, + Some(Ok(TungsteniteMessage::Pong(_))) => {} + Some(Ok(TungsteniteMessage::Frame(_))) => {} + Some(Err(_)) => break, + } + } + } + } + + let _ = cdp_sink.close().await; + let _ = client_ws.close().await; +} + /// Capture a full desktop screenshot. /// /// Performs a health-gated full-frame screenshot of the managed desktop and From 604239421d1f2aed37258aa16394c014abb1d727 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:18:36 -0700 Subject: [PATCH 08/51] feat: [US-008] - Add browser navigation endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- .../packages/sandbox-agent/src/browser_cdp.rs | 2 +- .../sandbox-agent/src/browser_runtime.rs | 23 +- server/packages/sandbox-agent/src/router.rs | 344 ++++++++++++++++++ 3 files changed, 364 insertions(+), 5 deletions(-) diff --git a/server/packages/sandbox-agent/src/browser_cdp.rs b/server/packages/sandbox-agent/src/browser_cdp.rs index cfea2ab7..35b77ed6 100644 --- a/server/packages/sandbox-agent/src/browser_cdp.rs +++ b/server/packages/sandbox-agent/src/browser_cdp.rs @@ -154,7 +154,7 @@ impl CdpClient { } /// Close the CDP connection and stop the reader task. - pub async fn close(self) { + pub async fn close(&self) { self.reader_task.abort(); let _ = self.ws_sender.lock().await.close().await; } diff --git a/server/packages/sandbox-agent/src/browser_runtime.rs b/server/packages/sandbox-agent/src/browser_runtime.rs index ccb29d7d..250613d2 100644 --- a/server/packages/sandbox-agent/src/browser_runtime.rs +++ b/server/packages/sandbox-agent/src/browser_runtime.rs @@ -74,7 +74,7 @@ struct BrowserRuntimeStateData { environment: HashMap, xvfb: Option, chromium: Option, - cdp_client: Option, + cdp_client: Option>, context_id: Option, streaming_config: Option, recording_fps: Option, @@ -288,7 +288,7 @@ impl BrowserRuntime { // Connect CDP client match CdpClient::connect().await { Ok(client) => { - state.cdp_client = Some(client); + state.cdp_client = Some(Arc::new(client)); } Err(problem) => { return Err(self.fail_start_locked(&mut state, problem).await); @@ -349,7 +349,7 @@ impl BrowserRuntime { self.write_runtime_log_locked(&state, "stopping browser runtime"); // Close CDP client - if let Some(cdp_client) = state.cdp_client.take() { + if let Some(ref cdp_client) = state.cdp_client.take() { cdp_client.close().await; } @@ -417,6 +417,21 @@ impl BrowserRuntime { f(cdp).await } + /// Get an Arc-wrapped CDP client handle. + /// + /// Returns a cloned `Arc` after verifying the browser is active. + /// The caller can use the returned handle without holding the state lock. + pub async fn get_cdp(&self) -> Result, BrowserProblem> { + let state = self.inner.lock().await; + if state.state != BrowserState::Active { + return Err(BrowserProblem::not_active()); + } + state + .cdp_client + .clone() + .ok_or_else(|| BrowserProblem::cdp_error("CDP client is not connected")) + } + /// Ensure the browser runtime is active. /// /// Returns `BrowserProblem::NotActive` if the browser is not running. @@ -818,7 +833,7 @@ impl BrowserRuntime { self.write_runtime_log_locked(state, "browser runtime startup failed; cleaning up"); // Close CDP client if any - if let Some(cdp) = state.cdp_client.take() { + if let Some(ref cdp) = state.cdp_client.take() { cdp.close().await; } diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 04986e48..f8d55db5 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -276,6 +276,11 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/start", post(post_v1_browser_start)) .route("/browser/stop", post(post_v1_browser_stop)) .route("/browser/cdp", get(get_v1_browser_cdp_ws)) + .route("/browser/navigate", post(post_v1_browser_navigate)) + .route("/browser/back", post(post_v1_browser_back)) + .route("/browser/forward", post(post_v1_browser_forward)) + .route("/browser/reload", post(post_v1_browser_reload)) + .route("/browser/wait", post(post_v1_browser_wait)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -467,6 +472,11 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_browser_start, post_v1_browser_stop, get_v1_browser_cdp_ws, + post_v1_browser_navigate, + post_v1_browser_back, + post_v1_browser_forward, + post_v1_browser_reload, + post_v1_browser_wait, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -539,6 +549,13 @@ pub async fn shutdown_servers(state: &Arc) { BrowserState, BrowserStartRequest, BrowserStatusResponse, + BrowserNavigateRequest, + BrowserNavigateWaitUntil, + BrowserPageInfo, + BrowserReloadRequest, + BrowserWaitRequest, + BrowserWaitState, + BrowserWaitResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -921,6 +938,333 @@ async fn browser_cdp_ws_session(mut client_ws: WebSocket, browser_runtime: Arc>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + // Enable Page domain for lifecycle events + cdp.send("Page.enable", None).await?; + + let nav_result = cdp + .send( + "Page.navigate", + Some(serde_json::json!({ "url": body.url })), + ) + .await?; + + // Extract HTTP status from the navigation result if available + let status = nav_result + .get("errorText") + .and_then(|_| None::) + .or_else(|| { + // Page.navigate doesn't directly return HTTP status; + // we rely on frameId being present as a success signal + nav_result.get("frameId").map(|_| 200u16) + }); + + // Wait for the requested lifecycle event + match body.wait_until { + Some(BrowserNavigateWaitUntil::Load) | None => { + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } + Some(BrowserNavigateWaitUntil::Domcontentloaded) => { + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + Some(BrowserNavigateWaitUntil::Networkidle) => { + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + } + + // Get current page URL and title + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { url, title, status })) +} + +/// Navigate the browser back in history. +/// +/// Sends a CDP `Page.navigateToHistoryEntry` command with the previous +/// history entry and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/back", + tag = "v1", + responses( + (status = 200, description = "Page info after navigating back", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_back( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let history = cdp.send("Page.getNavigationHistory", None).await?; + let current_index = history + .get("currentIndex") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + let entries = history + .get("entries") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if current_index > 0 { + if let Some(entry) = entries.get((current_index - 1) as usize) { + if let Some(entry_id) = entry.get("id").and_then(|v| v.as_i64()) { + cdp.send( + "Page.navigateToHistoryEntry", + Some(serde_json::json!({ "entryId": entry_id })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + } + } + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Navigate the browser forward in history. +/// +/// Sends a CDP `Page.navigateToHistoryEntry` command with the next +/// history entry and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/forward", + tag = "v1", + responses( + (status = 200, description = "Page info after navigating forward", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_forward( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let history = cdp.send("Page.getNavigationHistory", None).await?; + let current_index = history + .get("currentIndex") + .and_then(|v| v.as_i64()) + .unwrap_or(0); + let entries = history + .get("entries") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if (current_index + 1) < entries.len() as i64 { + if let Some(entry) = entries.get((current_index + 1) as usize) { + if let Some(entry_id) = entry.get("id").and_then(|v| v.as_i64()) { + cdp.send( + "Page.navigateToHistoryEntry", + Some(serde_json::json!({ "entryId": entry_id })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + } + } + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Reload the current browser page. +/// +/// Sends a CDP `Page.reload` command with an optional cache bypass flag +/// and returns the resulting page URL and title. +#[utoipa::path( + post, + path = "/v1/browser/reload", + tag = "v1", + request_body = BrowserReloadRequest, + responses( + (status = 200, description = "Page info after reload", body = BrowserPageInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_reload( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let ignore_cache = body.ignore_cache.unwrap_or(false); + cdp.send( + "Page.reload", + Some(serde_json::json!({ "ignoreCache": ignore_cache })), + ) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + Ok(Json(BrowserPageInfo { + url, + title, + status: None, + })) +} + +/// Wait for a selector or condition in the browser. +/// +/// Polls the page DOM using `Runtime.evaluate` with a `querySelector` check +/// until the element is found or the timeout expires. +#[utoipa::path( + post, + path = "/v1/browser/wait", + tag = "v1", + request_body = BrowserWaitRequest, + responses( + (status = 200, description = "Wait result", body = BrowserWaitResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails), + (status = 504, description = "Timeout waiting for condition", body = ProblemDetails) + ) +)] +async fn post_v1_browser_wait( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let timeout_ms = body.timeout.unwrap_or(5000); + let selector = body.selector.clone().unwrap_or_else(|| "body".to_string()); + let wait_state = body.state.unwrap_or(BrowserWaitState::Attached); + + let js_expression = match wait_state { + BrowserWaitState::Visible => { + format!( + r#"(() => {{ + const el = document.querySelector({sel}); + if (!el) return false; + const style = window.getComputedStyle(el); + return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'; + }})()"#, + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + BrowserWaitState::Hidden => { + format!( + r#"(() => {{ + const el = document.querySelector({sel}); + if (!el) return true; + const style = window.getComputedStyle(el); + return style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0'; + }})()"#, + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + BrowserWaitState::Attached => { + format!( + "document.querySelector({sel}) !== null", + sel = serde_json::to_string(&selector).unwrap_or_default() + ) + } + }; + + let start = tokio::time::Instant::now(); + let timeout_dur = std::time::Duration::from_millis(timeout_ms); + let poll_interval = std::time::Duration::from_millis(100); + + loop { + let eval_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": js_expression, + "returnByValue": true + })), + ) + .await?; + + let found = eval_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if found { + return Ok(Json(BrowserWaitResponse { found: true })); + } + + if start.elapsed() >= timeout_dur { + return Ok(Json(BrowserWaitResponse { found: false })); + } + + tokio::time::sleep(poll_interval).await; + } +} + +/// Helper: get the current page URL and title via CDP Runtime.evaluate. +async fn get_page_info_via_cdp( + cdp: &crate::browser_cdp::CdpClient, +) -> Result<(String, String), BrowserProblem> { + let url_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": "document.location.href", + "returnByValue": true + })), + ) + .await?; + let url = url_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let title_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": "document.title", + "returnByValue": true + })), + ) + .await?; + let title = title_result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + Ok((url, title)) +} + /// Capture a full desktop screenshot. /// /// Performs a health-gated full-frame screenshot of the managed desktop and From 58d7acaabeaf0da1648059d9bdd103a43d6ac80d Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:24:13 -0700 Subject: [PATCH 09/51] feat: [US-009] - Add browser tab management endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 277 ++++++++++++++++++++ 1 file changed, 277 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index f8d55db5..f5b16de2 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -281,6 +281,15 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/forward", post(post_v1_browser_forward)) .route("/browser/reload", post(post_v1_browser_reload)) .route("/browser/wait", post(post_v1_browser_wait)) + .route( + "/browser/tabs", + get(get_v1_browser_tabs).post(post_v1_browser_tabs), + ) + .route( + "/browser/tabs/:tab_id/activate", + post(post_v1_browser_tab_activate), + ) + .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -477,6 +486,10 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_browser_forward, post_v1_browser_reload, post_v1_browser_wait, + get_v1_browser_tabs, + post_v1_browser_tabs, + post_v1_browser_tab_activate, + delete_v1_browser_tab, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -556,6 +569,10 @@ pub async fn shutdown_servers(state: &Arc) { BrowserWaitRequest, BrowserWaitState, BrowserWaitResponse, + BrowserTabInfo, + BrowserTabListResponse, + BrowserCreateTabRequest, + BrowserActionResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1226,6 +1243,266 @@ async fn post_v1_browser_wait( } } +/// List open browser tabs. +/// +/// Returns all open browser tabs (pages) via CDP `Target.getTargets`, +/// filtered to type "page". +#[utoipa::path( + get, + path = "/v1/browser/tabs", + tag = "v1", + responses( + (status = 200, description = "List of open browser tabs", body = BrowserTabListResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_tabs( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let result = cdp.send("Target.getTargets", None).await?; + let targets = result + .get("targetInfos") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + // Get the currently focused target to determine active tab + let active_target_id = { + let history = cdp.send("Page.getNavigationHistory", None).await.ok(); + // The page-level commands operate on the currently attached target, + // so we use Target.getTargets and check which target is the one + // with the current page's URL to determine the active tab. + history.and_then(|h| { + let idx = h.get("currentIndex").and_then(|v| v.as_i64())? as usize; + let entries = h.get("entries").and_then(|v| v.as_array())?; + entries + .get(idx) + .and_then(|e| e.get("url").and_then(|v| v.as_str())) + .map(|s| s.to_string()) + }) + }; + + let tabs: Vec = targets + .iter() + .filter(|t| t.get("type").and_then(|v| v.as_str()) == Some("page")) + .map(|t| { + let id = t + .get("targetId") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let url = t + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let title = t + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let active = active_target_id + .as_deref() + .map(|active_url| active_url == url) + .unwrap_or(false); + BrowserTabInfo { + id, + url, + title, + active, + } + }) + .collect(); + + Ok(Json(BrowserTabListResponse { tabs })) +} + +/// Create a new browser tab. +/// +/// Opens a new tab via CDP `Target.createTarget` and returns the tab info. +#[utoipa::path( + post, + path = "/v1/browser/tabs", + tag = "v1", + request_body = BrowserCreateTabRequest, + responses( + (status = 201, description = "New tab created", body = BrowserTabInfo), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_tabs( + State(state): State>, + Json(body): Json, +) -> Result<(StatusCode, Json), ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let url = body.url.unwrap_or_else(|| "about:blank".to_string()); + let result = cdp + .send( + "Target.createTarget", + Some(serde_json::json!({ "url": url })), + ) + .await?; + + let target_id = result + .get("targetId") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + // Give the page a moment to start loading + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + + // Get target info for the newly created tab + let targets_result = cdp.send("Target.getTargets", None).await?; + let targets = targets_result + .get("targetInfos") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + let tab_info = targets + .iter() + .find(|t| t.get("targetId").and_then(|v| v.as_str()) == Some(&target_id)); + + let (tab_url, tab_title) = tab_info + .map(|t| { + let u = t + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let ti = t + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + (u, ti) + }) + .unwrap_or_else(|| (url, String::new())); + + Ok(( + StatusCode::CREATED, + Json(BrowserTabInfo { + id: target_id, + url: tab_url, + title: tab_title, + active: false, + }), + )) +} + +/// Activate a browser tab. +/// +/// Brings the specified tab to the foreground via CDP `Target.activateTarget`. +#[utoipa::path( + post, + path = "/v1/browser/tabs/{tab_id}/activate", + tag = "v1", + params( + ("tab_id" = String, Path, description = "Target ID of the tab to activate") + ), + responses( + (status = 200, description = "Tab activated", body = BrowserTabInfo), + (status = 404, description = "Tab not found", body = ProblemDetails), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_tab_activate( + State(state): State>, + Path(tab_id): Path, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + // Verify the target exists first + let targets_result = cdp.send("Target.getTargets", None).await?; + let targets = targets_result + .get("targetInfos") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + let target = targets + .iter() + .find(|t| t.get("targetId").and_then(|v| v.as_str()) == Some(&tab_id)); + + let target = match target { + Some(t) => t.clone(), + None => return Err(BrowserProblem::not_found(&format!("Tab {} not found", tab_id)).into()), + }; + + cdp.send( + "Target.activateTarget", + Some(serde_json::json!({ "targetId": tab_id })), + ) + .await?; + + let url = target + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let title = target + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + Ok(Json(BrowserTabInfo { + id: tab_id, + url, + title, + active: true, + })) +} + +/// Close a browser tab. +/// +/// Closes the specified tab via CDP `Target.closeTarget`. +#[utoipa::path( + delete, + path = "/v1/browser/tabs/{tab_id}", + tag = "v1", + params( + ("tab_id" = String, Path, description = "Target ID of the tab to close") + ), + responses( + (status = 200, description = "Tab closed", body = BrowserActionResponse), + (status = 404, description = "Tab not found", body = ProblemDetails), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn delete_v1_browser_tab( + State(state): State>, + Path(tab_id): Path, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let result = cdp + .send( + "Target.closeTarget", + Some(serde_json::json!({ "targetId": tab_id })), + ) + .await?; + + let success = result + .get("success") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if !success { + return Err(BrowserProblem::not_found(&format!("Tab {} not found", tab_id)).into()); + } + + Ok(Json(BrowserActionResponse { ok: true })) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient, From 45258c32b03c4e1e46786589241e7de235149946 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:28:06 -0700 Subject: [PATCH 10/51] feat: [US-010] - Add browser screenshot and PDF endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 160 ++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index f5b16de2..313a8787 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -290,6 +290,8 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) post(post_v1_browser_tab_activate), ) .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) + .route("/browser/screenshot", get(get_v1_browser_screenshot)) + .route("/browser/pdf", get(get_v1_browser_pdf)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -490,6 +492,8 @@ pub async fn shutdown_servers(state: &Arc) { post_v1_browser_tabs, post_v1_browser_tab_activate, delete_v1_browser_tab, + get_v1_browser_screenshot, + get_v1_browser_pdf, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -573,6 +577,10 @@ pub async fn shutdown_servers(state: &Arc) { BrowserTabListResponse, BrowserCreateTabRequest, BrowserActionResponse, + BrowserScreenshotQuery, + BrowserScreenshotFormat, + BrowserPdfQuery, + BrowserPdfFormat, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1503,6 +1511,158 @@ async fn delete_v1_browser_tab( Ok(Json(BrowserActionResponse { ok: true })) } +/// Capture a browser page screenshot. +/// +/// Captures a screenshot of the current browser page via CDP +/// `Page.captureScreenshot` and returns the image bytes with the appropriate +/// Content-Type header. +#[utoipa::path( + get, + path = "/v1/browser/screenshot", + tag = "v1", + params(BrowserScreenshotQuery), + responses( + (status = 200, description = "Browser screenshot as image bytes"), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_screenshot( + State(state): State>, + Query(query): Query, +) -> Result { + use base64::engine::general_purpose::STANDARD as BASE64_ENGINE; + use base64::Engine; + + let cdp = state.browser_runtime().get_cdp().await?; + + let fmt = query.format.unwrap_or(BrowserScreenshotFormat::Png); + let cdp_format = match fmt { + BrowserScreenshotFormat::Png => "png", + BrowserScreenshotFormat::Jpeg => "jpeg", + BrowserScreenshotFormat::Webp => "webp", + }; + + let mut params = serde_json::json!({ "format": cdp_format }); + if let Some(quality) = query.quality { + params["quality"] = serde_json::json!(quality); + } + if query.full_page.unwrap_or(false) { + params["captureBeyondViewport"] = serde_json::json!(true); + } + if let Some(ref selector) = query.selector { + // Resolve element bounding box for clip region + let js = format!( + r#"(() => {{ + const el = document.querySelector({selector}); + if (!el) return null; + const r = el.getBoundingClientRect(); + return {{ x: r.x, y: r.y, width: r.width, height: r.height }}; + }})()"#, + selector = serde_json::to_string(selector).unwrap_or_default() + ); + let eval_result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": js, + "returnByValue": true + })), + ) + .await?; + if let Some(value) = eval_result.get("result").and_then(|r| r.get("value")) { + if !value.is_null() { + params["clip"] = serde_json::json!({ + "x": value.get("x").and_then(|v| v.as_f64()).unwrap_or(0.0), + "y": value.get("y").and_then(|v| v.as_f64()).unwrap_or(0.0), + "width": value.get("width").and_then(|v| v.as_f64()).unwrap_or(0.0), + "height": value.get("height").and_then(|v| v.as_f64()).unwrap_or(0.0), + "scale": 1 + }); + } else { + return Err(BrowserProblem::invalid_selector(&format!( + "No element matches selector: {}", + selector + )) + .into()); + } + } + } + + let result = cdp.send("Page.captureScreenshot", Some(params)).await?; + + let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or(""); + let bytes = BASE64_ENGINE + .decode(data_b64) + .map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode screenshot: {}", e)))?; + + let content_type = match fmt { + BrowserScreenshotFormat::Png => "image/png", + BrowserScreenshotFormat::Jpeg => "image/jpeg", + BrowserScreenshotFormat::Webp => "image/webp", + }; + + Ok(([(header::CONTENT_TYPE, content_type)], Bytes::from(bytes)).into_response()) +} + +/// Generate a PDF of the current browser page. +/// +/// Generates a PDF document from the current page via CDP `Page.printToPDF` +/// and returns the PDF bytes. +#[utoipa::path( + get, + path = "/v1/browser/pdf", + tag = "v1", + params(BrowserPdfQuery), + responses( + (status = 200, description = "Browser page as PDF bytes"), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_pdf( + State(state): State>, + Query(query): Query, +) -> Result { + use base64::engine::general_purpose::STANDARD as BASE64_ENGINE; + use base64::Engine; + + let cdp = state.browser_runtime().get_cdp().await?; + + let (paper_width, paper_height) = match query.format.unwrap_or(BrowserPdfFormat::Letter) { + BrowserPdfFormat::A4 => (8.27_f64, 11.69_f64), + BrowserPdfFormat::Letter => (8.5_f64, 11.0_f64), + BrowserPdfFormat::Legal => (8.5_f64, 14.0_f64), + }; + + let mut params = serde_json::json!({ + "paperWidth": paper_width, + "paperHeight": paper_height, + }); + if let Some(landscape) = query.landscape { + params["landscape"] = serde_json::json!(landscape); + } + if let Some(print_background) = query.print_background { + params["printBackground"] = serde_json::json!(print_background); + } + if let Some(scale) = query.scale { + params["scale"] = serde_json::json!(scale); + } + + let result = cdp.send("Page.printToPDF", Some(params)).await?; + + let data_b64 = result.get("data").and_then(|v| v.as_str()).unwrap_or(""); + let bytes = BASE64_ENGINE + .decode(data_b64) + .map_err(|e| BrowserProblem::cdp_error(&format!("Failed to decode PDF: {}", e)))?; + + Ok(( + [(header::CONTENT_TYPE, "application/pdf")], + Bytes::from(bytes), + ) + .into_response()) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient, From 1ae732d5b652ed8788c8ff4e9da66e320e08a42c Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:32:21 -0700 Subject: [PATCH 11/51] feat: [US-011] - Add browser content extraction endpoints (HTML, markdown, links, snapshot) Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/Cargo.toml | 1 + server/packages/sandbox-agent/src/router.rs | 245 ++++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/server/packages/sandbox-agent/Cargo.toml b/server/packages/sandbox-agent/Cargo.toml index 8749a22d..1fe04cf6 100644 --- a/server/packages/sandbox-agent/Cargo.toml +++ b/server/packages/sandbox-agent/Cargo.toml @@ -42,6 +42,7 @@ toml_edit.workspace = true tar.workspace = true zip.workspace = true tokio-tungstenite = "0.24" +html2md = "0.2" tempfile = { workspace = true, optional = true } [target.'cfg(unix)'.dependencies] diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 313a8787..40403142 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -292,6 +292,10 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/tabs/:tab_id", delete(delete_v1_browser_tab)) .route("/browser/screenshot", get(get_v1_browser_screenshot)) .route("/browser/pdf", get(get_v1_browser_pdf)) + .route("/browser/content", get(get_v1_browser_content)) + .route("/browser/markdown", get(get_v1_browser_markdown)) + .route("/browser/links", get(get_v1_browser_links)) + .route("/browser/snapshot", get(get_v1_browser_snapshot)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -494,6 +498,10 @@ pub async fn shutdown_servers(state: &Arc) { delete_v1_browser_tab, get_v1_browser_screenshot, get_v1_browser_pdf, + get_v1_browser_content, + get_v1_browser_markdown, + get_v1_browser_links, + get_v1_browser_snapshot, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -581,6 +589,12 @@ pub async fn shutdown_servers(state: &Arc) { BrowserScreenshotFormat, BrowserPdfQuery, BrowserPdfFormat, + BrowserContentQuery, + BrowserContentResponse, + BrowserMarkdownResponse, + BrowserLinkInfo, + BrowserLinksResponse, + BrowserSnapshotResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1663,6 +1677,237 @@ async fn get_v1_browser_pdf( .into_response()) } +/// Get the HTML content of the current browser page. +/// +/// Returns the outerHTML of the page or a specific element selected by a CSS +/// selector, along with the current URL and title. +#[utoipa::path( + get, + path = "/v1/browser/content", + tag = "v1", + params(BrowserContentQuery), + responses( + (status = 200, description = "Page HTML content", body = BrowserContentResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_content( + State(state): State>, + Query(query): Query, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + let expression = if let Some(ref selector) = query.selector { + let escaped = selector.replace('\\', "\\\\").replace('\'', "\\'"); + format!( + "(function() {{ var el = document.querySelector('{}'); return el ? el.outerHTML : null; }})()", + escaped + ) + } else { + "document.documentElement.outerHTML".to_string() + }; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let html = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + if query.selector.is_some() && html.is_empty() { + return Err(BrowserProblem::not_found(&format!( + "Element not found: {}", + query.selector.as_deref().unwrap_or("") + )) + .into()); + } + + Ok(Json(BrowserContentResponse { html, url, title })) +} + +/// Get the page content as Markdown. +/// +/// Extracts the DOM HTML via CDP, strips navigation/footer/aside elements, and +/// converts the remaining content to Markdown using html2md. +#[utoipa::path( + get, + path = "/v1/browser/markdown", + tag = "v1", + responses( + (status = 200, description = "Page content as Markdown", body = BrowserMarkdownResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_markdown( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + // Extract body HTML with nav/footer/aside stripped out + let expression = r#" + (function() { + var clone = document.body.cloneNode(true); + var selectors = ['nav', 'footer', 'aside', 'header', '[role="navigation"]', '[role="banner"]', '[role="contentinfo"]']; + selectors.forEach(function(sel) { + clone.querySelectorAll(sel).forEach(function(el) { el.remove(); }); + }); + return clone.innerHTML; + })() + "#; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let html = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + + let markdown = html2md::parse_html(html); + + Ok(Json(BrowserMarkdownResponse { + markdown, + url, + title, + })) +} + +/// Get all links on the current page. +/// +/// Extracts all anchor elements from the page via CDP and returns their href +/// and text content. +#[utoipa::path( + get, + path = "/v1/browser/links", + tag = "v1", + responses( + (status = 200, description = "Links on the page", body = BrowserLinksResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_links( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, _title) = get_page_info_via_cdp(&cdp).await?; + + let expression = r#" + (function() { + var links = []; + document.querySelectorAll('a[href]').forEach(function(a) { + links.push({ href: a.href, text: (a.textContent || '').trim() }); + }); + return JSON.stringify(links); + })() + "#; + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let json_str = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("[]"); + + let links: Vec = serde_json::from_str(json_str).unwrap_or_default(); + + Ok(Json(BrowserLinksResponse { links, url })) +} + +/// Get an accessibility tree snapshot of the current page. +/// +/// Returns a text representation of the page accessibility tree via CDP +/// `Accessibility.getFullAXTree`. +#[utoipa::path( + get, + path = "/v1/browser/snapshot", + tag = "v1", + responses( + (status = 200, description = "Accessibility tree snapshot", body = BrowserSnapshotResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn get_v1_browser_snapshot( + State(state): State>, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + let result = cdp.send("Accessibility.getFullAXTree", None).await?; + + // Format the AX tree into a readable text snapshot + let nodes = result + .get("nodes") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + let mut snapshot = String::new(); + for node in &nodes { + let role = node + .get("role") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + let name = node + .get("name") + .and_then(|n| n.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + + if role == "none" || role == "GenericContainer" || (role.is_empty() && name.is_empty()) { + continue; + } + + if !snapshot.is_empty() { + snapshot.push('\n'); + } + if name.is_empty() { + snapshot.push_str(role); + } else { + snapshot.push_str(&format!("{}: {}", role, name)); + } + } + + Ok(Json(BrowserSnapshotResponse { + snapshot, + url, + title, + })) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient, From a77b768fe415f62f076211554148a476c35cbd7a Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:35:38 -0700 Subject: [PATCH 12/51] feat: [US-012] - Add browser scrape and execute endpoints Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 143 ++++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 40403142..8416792f 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -296,6 +296,8 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/markdown", get(get_v1_browser_markdown)) .route("/browser/links", get(get_v1_browser_links)) .route("/browser/snapshot", get(get_v1_browser_snapshot)) + .route("/browser/scrape", post(post_v1_browser_scrape)) + .route("/browser/execute", post(post_v1_browser_execute)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -502,6 +504,8 @@ pub async fn shutdown_servers(state: &Arc) { get_v1_browser_markdown, get_v1_browser_links, get_v1_browser_snapshot, + post_v1_browser_scrape, + post_v1_browser_execute, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -595,6 +599,10 @@ pub async fn shutdown_servers(state: &Arc) { BrowserLinkInfo, BrowserLinksResponse, BrowserSnapshotResponse, + BrowserScrapeRequest, + BrowserScrapeResponse, + BrowserExecuteRequest, + BrowserExecuteResponse, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -1908,6 +1916,141 @@ async fn get_v1_browser_snapshot( })) } +/// Scrape structured data from the current page using CSS selectors. +/// +/// For each key in the `selectors` map, runs `querySelectorAll` with the CSS +/// selector value and collects `textContent` from every match. If `url` is +/// provided the browser navigates there first. +#[utoipa::path( + post, + path = "/v1/browser/scrape", + tag = "v1", + request_body = BrowserScrapeRequest, + responses( + (status = 200, description = "Scraped data", body = BrowserScrapeResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_scrape( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + // Navigate first if a URL was provided + if let Some(ref url) = body.url { + cdp.send("Page.enable", None).await?; + cdp.send("Page.navigate", Some(serde_json::json!({ "url": url }))) + .await?; + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } + + // Build a JS expression that evaluates all selectors and returns a JSON object + let selectors_json = serde_json::to_string(&body.selectors) + .map_err(|e| BrowserProblem::cdp_error(e.to_string()))?; + + let expression = format!( + r#"(() => {{ + const selectors = {selectors_json}; + const result = {{}}; + for (const [key, sel] of Object.entries(selectors)) {{ + const els = document.querySelectorAll(sel); + result[key] = Array.from(els).map(el => (el.textContent || '').trim()); + }} + return JSON.stringify(result); + }})()"# + ); + + let result = cdp + .send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": expression, + "returnByValue": true + })), + ) + .await?; + + let json_str = result + .get("result") + .and_then(|r| r.get("value")) + .and_then(|v| v.as_str()) + .unwrap_or("{}"); + + let data: std::collections::HashMap> = + serde_json::from_str(json_str).unwrap_or_default(); + + let (url, title) = get_page_info_via_cdp(&cdp).await?; + + Ok(Json(BrowserScrapeResponse { data, url, title })) +} + +/// Execute a JavaScript expression in the browser. +/// +/// Evaluates the given expression via CDP `Runtime.evaluate` and returns the +/// result value and its type. Set `awaitPromise` to resolve async expressions. +#[utoipa::path( + post, + path = "/v1/browser/execute", + tag = "v1", + request_body = BrowserExecuteRequest, + responses( + (status = 200, description = "Execution result", body = BrowserExecuteResponse), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_execute( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + let mut params = serde_json::json!({ + "expression": body.expression, + "returnByValue": true + }); + + if let Some(true) = body.await_promise { + params["awaitPromise"] = serde_json::json!(true); + } + + let result = cdp.send("Runtime.evaluate", Some(params)).await?; + + // Check for evaluation exceptions + if let Some(exception) = result.get("exceptionDetails") { + let msg = exception + .get("exception") + .and_then(|e| e.get("description")) + .and_then(|d| d.as_str()) + .or_else(|| exception.get("text").and_then(|t| t.as_str())) + .unwrap_or("Script execution failed"); + return Err(BrowserProblem::cdp_error(msg.to_string()).into()); + } + + let eval_result = result + .get("result") + .cloned() + .unwrap_or(serde_json::json!({})); + + let type_ = eval_result + .get("type") + .and_then(|t| t.as_str()) + .unwrap_or("undefined") + .to_string(); + + let value = eval_result + .get("value") + .cloned() + .unwrap_or(serde_json::Value::Null); + + Ok(Json(BrowserExecuteResponse { + result: value, + type_, + })) +} + /// Helper: get the current page URL and title via CDP Runtime.evaluate. async fn get_page_info_via_cdp( cdp: &crate::browser_cdp::CdpClient, From 4cf3dab73b44814f40242c866ee0a0b9b86634ab Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 05:39:36 -0700 Subject: [PATCH 13/51] feat: [US-013] - Add browser interaction endpoints (click, type, select, hover, scroll) Co-Authored-By: Claude Opus 4.6 (1M context) --- server/packages/sandbox-agent/src/router.rs | 458 ++++++++++++++++++++ 1 file changed, 458 insertions(+) diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 8416792f..9aaf9d98 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -298,6 +298,11 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc) .route("/browser/snapshot", get(get_v1_browser_snapshot)) .route("/browser/scrape", post(post_v1_browser_scrape)) .route("/browser/execute", post(post_v1_browser_execute)) + .route("/browser/click", post(post_v1_browser_click)) + .route("/browser/type", post(post_v1_browser_type)) + .route("/browser/select", post(post_v1_browser_select)) + .route("/browser/hover", post(post_v1_browser_hover)) + .route("/browser/scroll", post(post_v1_browser_scroll)) .route("/agents", get(get_v1_agents)) .route("/agents/:agent", get(get_v1_agent)) .route("/agents/:agent/install", post(post_v1_agent_install)) @@ -506,6 +511,11 @@ pub async fn shutdown_servers(state: &Arc) { get_v1_browser_snapshot, post_v1_browser_scrape, post_v1_browser_execute, + post_v1_browser_click, + post_v1_browser_type, + post_v1_browser_select, + post_v1_browser_hover, + post_v1_browser_scroll, get_v1_agents, get_v1_agent, post_v1_agent_install, @@ -603,6 +613,12 @@ pub async fn shutdown_servers(state: &Arc) { BrowserScrapeResponse, BrowserExecuteRequest, BrowserExecuteResponse, + BrowserClickRequest, + BrowserMouseButton, + BrowserTypeRequest, + BrowserSelectRequest, + BrowserHoverRequest, + BrowserScrollRequest, DesktopClipboardResponse, DesktopClipboardQuery, DesktopClipboardWriteRequest, @@ -2051,6 +2067,448 @@ async fn post_v1_browser_execute( })) } +/// Click an element in the browser page. +/// +/// Finds the element matching `selector`, computes its center point via +/// `DOM.getBoxModel`, and dispatches mouse events through `Input.dispatchMouseEvent`. +#[utoipa::path( + post, + path = "/v1/browser/click", + tag = "v1", + request_body = BrowserClickRequest, + responses( + (status = 200, description = "Click performed", body = BrowserActionResponse), + (status = 404, description = "Element not found", body = ProblemDetails), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_click( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + cdp.send("DOM.enable", None).await?; + + // Get document root + let doc = cdp.send("DOM.getDocument", None).await?; + let root_id = doc + .get("root") + .and_then(|r| r.get("nodeId")) + .and_then(|n| n.as_i64()) + .unwrap_or(0); + + // Find element by selector + let qs_result = cdp + .send( + "DOM.querySelector", + Some(serde_json::json!({ + "nodeId": root_id, + "selector": body.selector + })), + ) + .await?; + + let node_id = qs_result + .get("nodeId") + .and_then(|n| n.as_i64()) + .unwrap_or(0); + + if node_id == 0 { + return Err( + BrowserProblem::not_found(format!("Element not found: {}", body.selector)).into(), + ); + } + + // Get element box model for center coordinates + let box_model = cdp + .send( + "DOM.getBoxModel", + Some(serde_json::json!({ "nodeId": node_id })), + ) + .await?; + + let content = box_model + .get("model") + .and_then(|m| m.get("content")) + .and_then(|c| c.as_array()) + .ok_or_else(|| BrowserProblem::cdp_error("Failed to get element box model".to_string()))?; + + // content is [x1,y1, x2,y2, x3,y3, x4,y4] – compute center + let x = content + .iter() + .step_by(2) + .filter_map(|v| v.as_f64()) + .sum::() + / 4.0; + let y = content + .iter() + .skip(1) + .step_by(2) + .filter_map(|v| v.as_f64()) + .sum::() + / 4.0; + + let button = match body.button { + Some(BrowserMouseButton::Right) => "right", + Some(BrowserMouseButton::Middle) => "middle", + _ => "left", + }; + let click_count = body.click_count.unwrap_or(1); + + // Dispatch mousePressed + mouseReleased + cdp.send( + "Input.dispatchMouseEvent", + Some(serde_json::json!({ + "type": "mousePressed", + "x": x, + "y": y, + "button": button, + "clickCount": click_count + })), + ) + .await?; + + cdp.send( + "Input.dispatchMouseEvent", + Some(serde_json::json!({ + "type": "mouseReleased", + "x": x, + "y": y, + "button": button, + "clickCount": click_count + })), + ) + .await?; + + Ok(Json(BrowserActionResponse { ok: true })) +} + +/// Type text into a focused element. +/// +/// Finds the element matching `selector`, focuses it via `DOM.focus`, optionally +/// clears existing content, then dispatches key events for each character. +#[utoipa::path( + post, + path = "/v1/browser/type", + tag = "v1", + request_body = BrowserTypeRequest, + responses( + (status = 200, description = "Text typed", body = BrowserActionResponse), + (status = 404, description = "Element not found", body = ProblemDetails), + (status = 409, description = "Browser runtime is not active", body = ProblemDetails), + (status = 502, description = "CDP command failed", body = ProblemDetails) + ) +)] +async fn post_v1_browser_type( + State(state): State>, + Json(body): Json, +) -> Result, ApiError> { + let cdp = state.browser_runtime().get_cdp().await?; + + cdp.send("DOM.enable", None).await?; + + // Get document root and find element + let doc = cdp.send("DOM.getDocument", None).await?; + let root_id = doc + .get("root") + .and_then(|r| r.get("nodeId")) + .and_then(|n| n.as_i64()) + .unwrap_or(0); + + let qs_result = cdp + .send( + "DOM.querySelector", + Some(serde_json::json!({ + "nodeId": root_id, + "selector": body.selector + })), + ) + .await?; + + let node_id = qs_result + .get("nodeId") + .and_then(|n| n.as_i64()) + .unwrap_or(0); + + if node_id == 0 { + return Err( + BrowserProblem::not_found(format!("Element not found: {}", body.selector)).into(), + ); + } + + // Focus the element + cdp.send("DOM.focus", Some(serde_json::json!({ "nodeId": node_id }))) + .await?; + + // Clear existing content if requested + if body.clear == Some(true) { + cdp.send( + "Runtime.evaluate", + Some(serde_json::json!({ + "expression": format!( + "document.querySelector('{}').value = ''", + body.selector.replace('\'', "\\'") + ), + "returnByValue": true + })), + ) + .await?; + } + + // Type each character via Input.dispatchKeyEvent + let delay_ms = body.delay.unwrap_or(0); + for ch in body.text.chars() { + cdp.send( + "Input.dispatchKeyEvent", + Some(serde_json::json!({ + "type": "keyDown", + "text": ch.to_string() + })), + ) + .await?; + + cdp.send( + "Input.dispatchKeyEvent", + Some(serde_json::json!({ + "type": "keyUp", + "text": ch.to_string() + })), + ) + .await?; + + if delay_ms > 0 { + tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await; + } + } + + Ok(Json(BrowserActionResponse { ok: true })) +} + +/// Select an option in a `` element.", + "description": "Finds the element matching `selector` and sets its value via `Runtime.evaluate`,\nthen dispatches a `change` event so listeners fire.", + "operationId": "post_v1_browser_select", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserSelectRequest" + } + } + }, + "required": true + }, "responses": { "200": { - "description": "Desktop recording stopped", + "description": "Option selected", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopRecordingInfo" + "$ref": "#/components/schemas/BrowserActionResponse" + } + } + } + }, + "404": { + "description": "Element not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" } } } }, "409": { - "description": "No active desktop recording", + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { @@ -1575,7 +1721,7 @@ } }, "502": { - "description": "Desktop recording stop failed", + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -1587,25 +1733,35 @@ } } }, - "/v1/desktop/recordings": { + "/v1/browser/snapshot": { "get": { "tags": ["v1"], - "summary": "List desktop recordings.", - "description": "Returns the current desktop recording catalog.", - "operationId": "get_v1_desktop_recordings", + "summary": "Get an accessibility tree snapshot of the current page.", + "description": "Returns a text representation of the page accessibility tree via CDP\n`Accessibility.getFullAXTree`.", + "operationId": "get_v1_browser_snapshot", "responses": { "200": { - "description": "Desktop recordings", + "description": "Accessibility tree snapshot", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopRecordingListResponse" + "$ref": "#/components/schemas/BrowserSnapshotResponse" + } + } + } + }, + "409": { + "description": "Browser runtime is not active", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" } } } }, "502": { - "description": "Desktop recordings query failed", + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -1617,36 +1773,35 @@ } } }, - "/v1/desktop/recordings/{id}": { - "get": { + "/v1/browser/start": { + "post": { "tags": ["v1"], - "summary": "Get desktop recording metadata.", - "description": "Returns metadata for a single desktop recording.", - "operationId": "get_v1_desktop_recording", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Desktop recording ID", - "required": true, - "schema": { - "type": "string" + "summary": "Start the browser runtime.", + "description": "Launches Chromium with remote debugging, optionally starts Xvfb for\nnon-headless mode, and returns the resulting browser status snapshot.", + "operationId": "post_v1_browser_start", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserStartRequest" + } } - } - ], + }, + "required": true + }, "responses": { "200": { - "description": "Desktop recording metadata", + "description": "Browser runtime status after start", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopRecordingInfo" + "$ref": "#/components/schemas/BrowserStatusResponse" } } } }, - "404": { - "description": "Unknown desktop recording", + "400": { + "description": "Invalid browser start request", "content": { "application/json": { "schema": { @@ -1654,31 +1809,19 @@ } } } - } - } - }, - "delete": { - "tags": ["v1"], - "summary": "Delete a desktop recording.", - "description": "Removes a completed desktop recording and its file from disk.", - "operationId": "delete_v1_desktop_recording", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Desktop recording ID", - "required": true, - "schema": { - "type": "string" + }, + "409": { + "description": "Browser or desktop runtime conflict", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } - } - ], - "responses": { - "204": { - "description": "Desktop recording deleted" }, - "404": { - "description": "Unknown desktop recording", + "424": { + "description": "Browser dependencies not installed", "content": { "application/json": { "schema": { @@ -1687,8 +1830,8 @@ } } }, - "409": { - "description": "Desktop recording is still active", + "500": { + "description": "Browser runtime could not be started", "content": { "application/json": { "schema": { @@ -1700,29 +1843,55 @@ } } }, - "/v1/desktop/recordings/{id}/download": { + "/v1/browser/status": { "get": { "tags": ["v1"], - "summary": "Download a desktop recording.", - "description": "Serves the recorded MP4 bytes for a completed desktop recording.", - "operationId": "get_v1_desktop_recording_download", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Desktop recording ID", - "required": true, - "schema": { - "type": "string" + "summary": "Get browser runtime status.", + "description": "Returns the current browser state, display information, CDP URL,\nand managed process details.", + "operationId": "get_v1_browser_status", + "responses": { + "200": { + "description": "Browser runtime status", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserStatusResponse" + } + } + } + }, + "401": { + "description": "Authentication required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } } - ], + } + } + }, + "/v1/browser/stop": { + "post": { + "tags": ["v1"], + "summary": "Stop the browser runtime.", + "description": "Terminates Chromium, the CDP client, and any associated Xvfb/Neko\nprocesses, then returns the resulting status snapshot.", + "operationId": "post_v1_browser_stop", "responses": { "200": { - "description": "Desktop recording as MP4 bytes" + "description": "Browser runtime status after stop", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserStatusResponse" + } + } + } }, - "404": { - "description": "Unknown desktop recording", + "409": { + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { @@ -1734,73 +1903,73 @@ } } }, - "/v1/desktop/screenshot": { + "/v1/browser/tabs": { "get": { "tags": ["v1"], - "summary": "Capture a full desktop screenshot.", - "description": "Performs a health-gated full-frame screenshot of the managed desktop and\nreturns the requested image bytes.", - "operationId": "get_v1_desktop_screenshot", - "parameters": [ - { - "name": "format", - "in": "query", - "required": false, - "schema": { - "allOf": [ - { - "$ref": "#/components/schemas/DesktopScreenshotFormat" + "summary": "List open browser tabs.", + "description": "Returns all open browser tabs (pages) via CDP `Target.getTargets`,\nfiltered to type \"page\".", + "operationId": "get_v1_browser_tabs", + "responses": { + "200": { + "description": "List of open browser tabs", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserTabListResponse" } - ], - "nullable": true - } - }, - { - "name": "quality", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int32", - "nullable": true, - "minimum": 0 + } } }, - { - "name": "scale", - "in": "query", - "required": false, - "schema": { - "type": "number", - "format": "float", - "nullable": true + "409": { + "description": "Browser runtime is not active", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } }, - { - "name": "showCursor", - "in": "query", - "required": false, - "schema": { - "type": "boolean", - "nullable": true + "502": { + "description": "CDP command failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } } - ], - "responses": { - "200": { - "description": "Desktop screenshot as image bytes" + } + }, + "post": { + "tags": ["v1"], + "summary": "Create a new browser tab.", + "description": "Opens a new tab via CDP `Target.createTarget` and returns the tab info.", + "operationId": "post_v1_browser_tabs", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserCreateTabRequest" + } + } }, - "400": { - "description": "Invalid screenshot query", + "required": true + }, + "responses": { + "201": { + "description": "New tab created", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProblemDetails" + "$ref": "#/components/schemas/BrowserTabInfo" } } } }, "409": { - "description": "Desktop runtime is not ready", + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { @@ -1810,7 +1979,7 @@ } }, "502": { - "description": "Desktop runtime health or screenshot capture failed", + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -1822,101 +1991,97 @@ } } }, - "/v1/desktop/screenshot/region": { - "get": { + "/v1/browser/tabs/{tab_id}": { + "delete": { "tags": ["v1"], - "summary": "Capture a desktop screenshot region.", - "description": "Performs a health-gated screenshot crop against the managed desktop and\nreturns the requested region image bytes.", - "operationId": "get_v1_desktop_screenshot_region", + "summary": "Close a browser tab.", + "description": "Closes the specified tab via CDP `Target.closeTarget`.", + "operationId": "delete_v1_browser_tab", "parameters": [ { - "name": "x", - "in": "query", + "name": "tab_id", + "in": "path", + "description": "Target ID of the tab to close", "required": true, "schema": { - "type": "integer", - "format": "int32" - } - }, - { - "name": "y", - "in": "query", - "required": true, - "schema": { - "type": "integer", - "format": "int32" - } - }, - { - "name": "width", - "in": "query", - "required": true, - "schema": { - "type": "integer", - "format": "int32", - "minimum": 0 + "type": "string" } - }, - { - "name": "height", - "in": "query", - "required": true, - "schema": { - "type": "integer", - "format": "int32", - "minimum": 0 + } + ], + "responses": { + "200": { + "description": "Tab closed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserActionResponse" + } + } } }, - { - "name": "format", - "in": "query", - "required": false, - "schema": { - "allOf": [ - { - "$ref": "#/components/schemas/DesktopScreenshotFormat" + "404": { + "description": "Tab not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" } - ], - "nullable": true + } } }, - { - "name": "quality", - "in": "query", - "required": false, - "schema": { - "type": "integer", - "format": "int32", - "nullable": true, - "minimum": 0 + "409": { + "description": "Browser runtime is not active", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } }, - { - "name": "scale", - "in": "query", - "required": false, - "schema": { - "type": "number", - "format": "float", - "nullable": true + "502": { + "description": "CDP command failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } - }, + } + } + } + }, + "/v1/browser/tabs/{tab_id}/activate": { + "post": { + "tags": ["v1"], + "summary": "Activate a browser tab.", + "description": "Brings the specified tab to the foreground via CDP `Target.activateTarget`.", + "operationId": "post_v1_browser_tab_activate", + "parameters": [ { - "name": "showCursor", - "in": "query", - "required": false, + "name": "tab_id", + "in": "path", + "description": "Target ID of the tab to activate", + "required": true, "schema": { - "type": "boolean", - "nullable": true + "type": "string" } } ], "responses": { "200": { - "description": "Desktop screenshot region as image bytes" + "description": "Tab activated", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserTabInfo" + } + } + } }, - "400": { - "description": "Invalid screenshot region", + "404": { + "description": "Tab not found", "content": { "application/json": { "schema": { @@ -1926,7 +2091,7 @@ } }, "409": { - "description": "Desktop runtime is not ready", + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { @@ -1936,7 +2101,7 @@ } }, "502": { - "description": "Desktop runtime health or screenshot capture failed", + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -1948,17 +2113,17 @@ } } }, - "/v1/desktop/start": { + "/v1/browser/type": { "post": { "tags": ["v1"], - "summary": "Start the private desktop runtime.", - "description": "Lazily launches the managed Xvfb/openbox stack, validates display health,\nand returns the resulting desktop status snapshot.", - "operationId": "post_v1_desktop_start", + "summary": "Type text into a focused element.", + "description": "Finds the element matching `selector`, focuses it via `DOM.focus`, optionally\nclears existing content, then dispatches key events for each character.", + "operationId": "post_v1_browser_type", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStartRequest" + "$ref": "#/components/schemas/BrowserTypeRequest" } } }, @@ -1966,17 +2131,17 @@ }, "responses": { "200": { - "description": "Desktop runtime status after start", + "description": "Text typed", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStatusResponse" + "$ref": "#/components/schemas/BrowserActionResponse" } } } }, - "400": { - "description": "Invalid desktop start request", + "404": { + "description": "Element not found", "content": { "application/json": { "schema": { @@ -1986,7 +2151,7 @@ } }, "409": { - "description": "Desktop runtime is already transitioning", + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { @@ -1995,8 +2160,8 @@ } } }, - "501": { - "description": "Desktop API unsupported on this platform", + "502": { + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -2004,9 +2169,39 @@ } } } + } + } + } + }, + "/v1/browser/upload": { + "post": { + "tags": ["v1"], + "summary": "Upload a file to a file input element in the browser page.", + "description": "Resolves the file input element matching `selector` and sets the specified\nfile path using `DOM.setFileInputFiles`.", + "operationId": "post_v1_browser_upload", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserUploadRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "File uploaded to input", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserActionResponse" + } + } + } }, - "503": { - "description": "Desktop runtime could not be started", + "404": { + "description": "Element not found", "content": { "application/json": { "schema": { @@ -2014,29 +2209,19 @@ } } } - } - } - } - }, - "/v1/desktop/status": { - "get": { - "tags": ["v1"], - "summary": "Get desktop runtime status.", - "description": "Returns the current desktop runtime state, dependency status, active\ndisplay metadata, and supervised process information.", - "operationId": "get_v1_desktop_status", - "responses": { - "200": { - "description": "Desktop runtime status", + }, + "409": { + "description": "Browser runtime is not active", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStatusResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } }, - "401": { - "description": "Authentication required", + "502": { + "description": "CDP command failed", "content": { "application/json": { "schema": { @@ -2048,25 +2233,55 @@ } } }, - "/v1/desktop/stop": { + "/v1/browser/wait": { "post": { "tags": ["v1"], - "summary": "Stop the private desktop runtime.", - "description": "Terminates the managed openbox/Xvfb/dbus processes owned by the desktop\nruntime and returns the resulting status snapshot.", - "operationId": "post_v1_desktop_stop", + "summary": "Wait for a selector or condition in the browser.", + "description": "Polls the page DOM using `Runtime.evaluate` with a `querySelector` check\nuntil the element is found or the timeout expires.", + "operationId": "post_v1_browser_wait", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BrowserWaitRequest" + } + } + }, + "required": true + }, "responses": { "200": { - "description": "Desktop runtime status after stop", + "description": "Wait result", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStatusResponse" + "$ref": "#/components/schemas/BrowserWaitResponse" } } } }, "409": { - "description": "Desktop runtime is already transitioning", + "description": "Browser runtime is not active", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "CDP command failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "504": { + "description": "Timeout waiting for condition", "content": { "application/json": { "schema": { @@ -2078,40 +2293,43 @@ } } }, - "/v1/desktop/stream/signaling": { + "/v1/config/mcp": { "get": { "tags": ["v1"], - "summary": "Open a desktop WebRTC signaling session.", - "description": "Upgrades the connection to a WebSocket used for WebRTC signaling between\nthe browser client and the desktop streaming process. Also accepts mouse\nand keyboard input frames as a fallback transport.", - "operationId": "get_v1_desktop_stream_ws", + "operationId": "get_v1_config_mcp", "parameters": [ { - "name": "access_token", + "name": "directory", "in": "query", - "description": "Bearer token alternative for WS auth", - "required": false, + "description": "Target directory", + "required": true, "schema": { - "type": "string", - "nullable": true + "type": "string" + } + }, + { + "name": "mcpName", + "in": "query", + "description": "MCP entry name", + "required": true, + "schema": { + "type": "string" } } ], "responses": { - "101": { - "description": "WebSocket upgraded" - }, - "409": { - "description": "Desktop runtime or streaming session is not ready", + "200": { + "description": "MCP entry", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProblemDetails" + "$ref": "#/components/schemas/McpServerConfig" } } } }, - "502": { - "description": "Desktop stream failed", + "404": { + "description": "Entry not found", "content": { "application/json": { "schema": { @@ -2121,81 +2339,216 @@ } } } - } - }, - "/v1/desktop/stream/start": { - "post": { + }, + "put": { "tags": ["v1"], - "summary": "Start desktop streaming.", - "description": "Enables desktop websocket streaming for the managed desktop.", - "operationId": "post_v1_desktop_stream_start", - "responses": { - "200": { - "description": "Desktop streaming started", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DesktopStreamStatusResponse" - } + "operationId": "put_v1_config_mcp", + "parameters": [ + { + "name": "directory", + "in": "query", + "description": "Target directory", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "mcpName", + "in": "query", + "description": "MCP entry name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/McpServerConfig" } } + }, + "required": true + }, + "responses": { + "204": { + "description": "Stored" + } + } + }, + "delete": { + "tags": ["v1"], + "operationId": "delete_v1_config_mcp", + "parameters": [ + { + "name": "directory", + "in": "query", + "description": "Target directory", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "mcpName", + "in": "query", + "description": "MCP entry name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "204": { + "description": "Deleted" } } } }, - "/v1/desktop/stream/status": { + "/v1/config/skills": { "get": { "tags": ["v1"], - "summary": "Get desktop stream status.", - "description": "Returns the current state of the desktop WebRTC streaming session.", - "operationId": "get_v1_desktop_stream_status", + "operationId": "get_v1_config_skills", + "parameters": [ + { + "name": "directory", + "in": "query", + "description": "Target directory", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "skillName", + "in": "query", + "description": "Skill entry name", + "required": true, + "schema": { + "type": "string" + } + } + ], "responses": { "200": { - "description": "Desktop stream status", + "description": "Skills entry", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStreamStatusResponse" + "$ref": "#/components/schemas/SkillsConfig" } } } - } - } - } - }, - "/v1/desktop/stream/stop": { - "post": { - "tags": ["v1"], - "summary": "Stop desktop streaming.", - "description": "Disables desktop websocket streaming for the managed desktop.", - "operationId": "post_v1_desktop_stream_stop", - "responses": { - "200": { - "description": "Desktop streaming stopped", + }, + "404": { + "description": "Entry not found", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopStreamStatusResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } } } + }, + "put": { + "tags": ["v1"], + "operationId": "put_v1_config_skills", + "parameters": [ + { + "name": "directory", + "in": "query", + "description": "Target directory", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "skillName", + "in": "query", + "description": "Skill entry name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SkillsConfig" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "Stored" + } + } + }, + "delete": { + "tags": ["v1"], + "operationId": "delete_v1_config_skills", + "parameters": [ + { + "name": "directory", + "in": "query", + "description": "Target directory", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "skillName", + "in": "query", + "description": "Skill entry name", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "204": { + "description": "Deleted" + } + } } }, - "/v1/desktop/windows": { + "/v1/desktop/clipboard": { "get": { "tags": ["v1"], - "summary": "List visible desktop windows.", - "description": "Performs a health-gated visible-window enumeration against the managed\ndesktop and returns the current window metadata.", - "operationId": "get_v1_desktop_windows", + "summary": "Read the desktop clipboard.", + "description": "Returns the current text content of the X11 clipboard.", + "operationId": "get_v1_desktop_clipboard", + "parameters": [ + { + "name": "selection", + "in": "query", + "required": false, + "schema": { + "type": "string", + "nullable": true + } + } + ], "responses": { "200": { - "description": "Visible desktop windows", + "description": "Clipboard contents", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowListResponse" + "$ref": "#/components/schemas/DesktopClipboardResponse" } } } @@ -2210,8 +2563,8 @@ } } }, - "503": { - "description": "Desktop runtime health or window query failed", + "500": { + "description": "Clipboard read failed", "content": { "application/json": { "schema": { @@ -2221,27 +2574,35 @@ } } } - } - }, - "/v1/desktop/windows/focused": { - "get": { + }, + "post": { "tags": ["v1"], - "summary": "Get the currently focused desktop window.", - "description": "Returns information about the window that currently has input focus.", - "operationId": "get_v1_desktop_windows_focused", + "summary": "Write to the desktop clipboard.", + "description": "Sets the text content of the X11 clipboard.", + "operationId": "post_v1_desktop_clipboard", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopClipboardWriteRequest" + } + } + }, + "required": true + }, "responses": { "200": { - "description": "Focused window info", + "description": "Clipboard updated", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowInfo" + "$ref": "#/components/schemas/DesktopActionResponse" } } } }, - "404": { - "description": "No window is focused", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2250,8 +2611,8 @@ } } }, - "409": { - "description": "Desktop runtime is not ready", + "500": { + "description": "Clipboard write failed", "content": { "application/json": { "schema": { @@ -2263,36 +2624,25 @@ } } }, - "/v1/desktop/windows/{id}/focus": { - "post": { + "/v1/desktop/display/info": { + "get": { "tags": ["v1"], - "summary": "Focus a desktop window.", - "description": "Brings the specified window to the foreground and gives it input focus.", - "operationId": "post_v1_desktop_window_focus", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "X11 window ID", - "required": true, - "schema": { - "type": "string" - } - } - ], + "summary": "Get desktop display information.", + "description": "Performs a health-gated display query against the managed desktop and\nreturns the current display identifier and resolution.", + "operationId": "get_v1_desktop_display_info", "responses": { "200": { - "description": "Window info after focus", + "description": "Desktop display information", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowInfo" + "$ref": "#/components/schemas/DesktopDisplayInfoResponse" } } } }, - "404": { - "description": "Window not found", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2301,8 +2651,8 @@ } } }, - "409": { - "description": "Desktop runtime is not ready", + "503": { + "description": "Desktop runtime health or display query failed", "content": { "application/json": { "schema": { @@ -2314,28 +2664,17 @@ } } }, - "/v1/desktop/windows/{id}/move": { + "/v1/desktop/keyboard/down": { "post": { "tags": ["v1"], - "summary": "Move a desktop window.", - "description": "Moves the specified window to the given position.", - "operationId": "post_v1_desktop_window_move", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "X11 window ID", - "required": true, - "schema": { - "type": "string" - } - } - ], + "summary": "Press and hold a desktop keyboard key.", + "description": "Performs a health-gated `xdotool keydown` operation against the managed\ndesktop.", + "operationId": "post_v1_desktop_keyboard_down", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowMoveRequest" + "$ref": "#/components/schemas/DesktopKeyboardDownRequest" } } }, @@ -2343,17 +2682,17 @@ }, "responses": { "200": { - "description": "Window info after move", + "description": "Desktop keyboard action result", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowInfo" + "$ref": "#/components/schemas/DesktopActionResponse" } } } }, - "404": { - "description": "Window not found", + "400": { + "description": "Invalid keyboard down request", "content": { "application/json": { "schema": { @@ -2371,32 +2710,31 @@ } } } + }, + "502": { + "description": "Desktop runtime health or input failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } } } } }, - "/v1/desktop/windows/{id}/resize": { + "/v1/desktop/keyboard/press": { "post": { "tags": ["v1"], - "summary": "Resize a desktop window.", - "description": "Resizes the specified window to the given dimensions.", - "operationId": "post_v1_desktop_window_resize", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "X11 window ID", - "required": true, - "schema": { - "type": "string" - } - } - ], + "summary": "Press a desktop keyboard shortcut.", + "description": "Performs a health-gated `xdotool key` operation against the managed\ndesktop.", + "operationId": "post_v1_desktop_keyboard_press", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowResizeRequest" + "$ref": "#/components/schemas/DesktopKeyboardPressRequest" } } }, @@ -2404,17 +2742,17 @@ }, "responses": { "200": { - "description": "Window info after resize", + "description": "Desktop keyboard action result", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/DesktopWindowInfo" + "$ref": "#/components/schemas/DesktopActionResponse" } } } }, - "404": { - "description": "Window not found", + "400": { + "description": "Invalid keyboard press request", "content": { "application/json": { "schema": { @@ -2432,36 +2770,13 @@ } } } - } - } - } - }, - "/v1/fs/entries": { - "get": { - "tags": ["v1"], - "operationId": "get_v1_fs_entries", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "Directory path", - "required": false, - "schema": { - "type": "string", - "nullable": true - } - } - ], - "responses": { - "200": { - "description": "Directory entries", + }, + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/FsEntry" - } + "$ref": "#/components/schemas/ProblemDetails" } } } @@ -2469,127 +2784,59 @@ } } }, - "/v1/fs/entry": { - "delete": { + "/v1/desktop/keyboard/type": { + "post": { "tags": ["v1"], - "operationId": "delete_v1_fs_entry", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "File or directory path", - "required": true, - "schema": { - "type": "string" + "summary": "Type desktop keyboard text.", + "description": "Performs a health-gated `xdotool type` operation against the managed\ndesktop.", + "operationId": "post_v1_desktop_keyboard_type", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopKeyboardTypeRequest" + } } }, - { - "name": "recursive", - "in": "query", - "description": "Delete directory recursively", - "required": false, - "schema": { - "type": "boolean", - "nullable": true - } - } - ], + "required": true + }, "responses": { "200": { - "description": "Delete result", + "description": "Desktop keyboard action result", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsActionResponse" + "$ref": "#/components/schemas/DesktopActionResponse" } } } - } - } - } - }, - "/v1/fs/file": { - "get": { - "tags": ["v1"], - "operationId": "get_v1_fs_file", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "File path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "File content" - } - } - }, - "put": { - "tags": ["v1"], - "operationId": "put_v1_fs_file", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "File path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "description": "Raw file bytes", - "content": { - "text/plain": { - "schema": { - "type": "string" + }, + "400": { + "description": "Invalid keyboard type request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } } } }, - "required": true - }, - "responses": { - "200": { - "description": "Write result", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsWriteResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } - } - } - } - }, - "/v1/fs/mkdir": { - "post": { - "tags": ["v1"], - "operationId": "post_v1_fs_mkdir", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "Directory path", - "required": true, - "schema": { - "type": "string" - } - } - ], - "responses": { - "200": { - "description": "Directory created", + }, + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsActionResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } @@ -2597,15 +2844,17 @@ } } }, - "/v1/fs/move": { + "/v1/desktop/keyboard/up": { "post": { "tags": ["v1"], - "operationId": "post_v1_fs_move", + "summary": "Release a desktop keyboard key.", + "description": "Performs a health-gated `xdotool keyup` operation against the managed\ndesktop.", + "operationId": "post_v1_desktop_keyboard_up", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsMoveRequest" + "$ref": "#/components/schemas/DesktopKeyboardUpRequest" } } }, @@ -2613,40 +2862,41 @@ }, "responses": { "200": { - "description": "Move result", + "description": "Desktop keyboard action result", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsMoveResponse" + "$ref": "#/components/schemas/DesktopActionResponse" } } } - } - } - } - }, - "/v1/fs/stat": { - "get": { - "tags": ["v1"], - "operationId": "get_v1_fs_stat", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "Path to stat", - "required": true, - "schema": { - "type": "string" + }, + "400": { + "description": "Invalid keyboard up request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } } - } - ], - "responses": { - "200": { - "description": "Path metadata", + }, + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/FsStat" + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "Desktop runtime health or input failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" } } } @@ -2654,28 +2904,17 @@ } } }, - "/v1/fs/upload-batch": { + "/v1/desktop/launch": { "post": { "tags": ["v1"], - "operationId": "post_v1_fs_upload_batch", - "parameters": [ - { - "name": "path", - "in": "query", - "description": "Destination path", - "required": false, - "schema": { - "type": "string", - "nullable": true - } - } - ], + "summary": "Launch a desktop application.", + "description": "Launches an application by name on the managed desktop, optionally waiting\nfor its window to appear.", + "operationId": "post_v1_desktop_launch", "requestBody": { - "description": "tar archive body", "content": { - "text/plain": { + "application/json": { "schema": { - "type": "string" + "$ref": "#/components/schemas/DesktopLaunchRequest" } } }, @@ -2683,70 +2922,27 @@ }, "responses": { "200": { - "description": "Upload/extract result", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/FsUploadBatchResponse" - } - } - } - } - } - } - }, - "/v1/health": { - "get": { - "tags": ["v1"], - "operationId": "get_v1_health", - "responses": { - "200": { - "description": "Service health response", + "description": "Application launched", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/HealthResponse" + "$ref": "#/components/schemas/DesktopLaunchResponse" } } } - } - } - } - }, - "/v1/processes": { - "get": { - "tags": ["v1"], - "summary": "List all managed processes.", - "description": "Returns a list of all processes (running and exited) currently tracked\nby the runtime, sorted by process ID.", - "operationId": "get_v1_processes", - "parameters": [ - { - "name": "owner", - "in": "query", - "required": false, - "schema": { - "allOf": [ - { - "$ref": "#/components/schemas/ProcessOwner" - } - ], - "nullable": true - } - } - ], - "responses": { - "200": { - "description": "List processes", + }, + "404": { + "description": "Application not found", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessListResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } }, - "501": { - "description": "Process API unsupported on this platform", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2756,17 +2952,19 @@ } } } - }, + } + }, + "/v1/desktop/mouse/click": { "post": { "tags": ["v1"], - "summary": "Create a long-lived managed process.", - "description": "Spawns a new process with the given command and arguments. Supports both\npipe-based and PTY (tty) modes. Returns the process descriptor on success.", - "operationId": "post_v1_processes", + "summary": "Click on the desktop.", + "description": "Performs a health-gated pointer move and click against the managed desktop\nand returns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_click", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessCreateRequest" + "$ref": "#/components/schemas/DesktopMouseClickRequest" } } }, @@ -2774,17 +2972,17 @@ }, "responses": { "200": { - "description": "Started process", + "description": "Desktop mouse position after click", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInfo" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, "400": { - "description": "Invalid request", + "description": "Invalid mouse click request", "content": { "application/json": { "schema": { @@ -2794,7 +2992,7 @@ } }, "409": { - "description": "Process limit or state conflict", + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2803,8 +3001,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { @@ -2816,25 +3014,55 @@ } } }, - "/v1/processes/config": { - "get": { + "/v1/desktop/mouse/down": { + "post": { "tags": ["v1"], - "summary": "Get process runtime configuration.", - "description": "Returns the current runtime configuration for the process management API,\nincluding limits for concurrency, timeouts, and buffer sizes.", - "operationId": "get_v1_processes_config", + "summary": "Press and hold a desktop mouse button.", + "description": "Performs a health-gated optional pointer move followed by `xdotool mousedown`\nand returns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_down", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopMouseDownRequest" + } + } + }, + "required": true + }, "responses": { "200": { - "description": "Current runtime process config", + "description": "Desktop mouse position after button press", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessConfig" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, - "501": { - "description": "Process API unsupported on this platform", + "400": { + "description": "Invalid mouse down request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { @@ -2844,17 +3072,19 @@ } } } - }, + } + }, + "/v1/desktop/mouse/drag": { "post": { "tags": ["v1"], - "summary": "Update process runtime configuration.", - "description": "Replaces the runtime configuration for the process management API.\nValidates that all values are non-zero and clamps default timeout to max.", - "operationId": "post_v1_processes_config", + "summary": "Drag the desktop mouse.", + "description": "Performs a health-gated drag gesture against the managed desktop and\nreturns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_drag", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessConfig" + "$ref": "#/components/schemas/DesktopMouseDragRequest" } } }, @@ -2862,17 +3092,17 @@ }, "responses": { "200": { - "description": "Updated runtime process config", + "description": "Desktop mouse position after drag", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessConfig" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, "400": { - "description": "Invalid config", + "description": "Invalid mouse drag request", "content": { "application/json": { "schema": { @@ -2881,8 +3111,18 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { @@ -2894,17 +3134,17 @@ } } }, - "/v1/processes/run": { + "/v1/desktop/mouse/move": { "post": { "tags": ["v1"], - "summary": "Run a one-shot command.", - "description": "Executes a command to completion and returns its stdout, stderr, exit code,\nand duration. Supports configurable timeout and output size limits.", - "operationId": "post_v1_processes_run", + "summary": "Move the desktop mouse.", + "description": "Performs a health-gated absolute pointer move on the managed desktop and\nreturns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_move", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessRunRequest" + "$ref": "#/components/schemas/DesktopMouseMoveRequest" } } }, @@ -2912,17 +3152,17 @@ }, "responses": { "200": { - "description": "One-off command result", + "description": "Desktop mouse position after move", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessRunResponse" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, "400": { - "description": "Invalid request", + "description": "Invalid mouse move request", "content": { "application/json": { "schema": { @@ -2931,8 +3171,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2940,40 +3180,39 @@ } } } - } - } + }, + "502": { + "description": "Desktop runtime health or input failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } } }, - "/v1/processes/{id}": { + "/v1/desktop/mouse/position": { "get": { "tags": ["v1"], - "summary": "Get a single process by ID.", - "description": "Returns the current state of a managed process including its status,\nPID, exit code, and creation/exit timestamps.", - "operationId": "get_v1_process", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, - "schema": { - "type": "string" - } - } - ], + "summary": "Get the current desktop mouse position.", + "description": "Performs a health-gated mouse position query against the managed desktop.", + "operationId": "get_v1_desktop_mouse_position", "responses": { "200": { - "description": "Process details", + "description": "Desktop mouse position", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInfo" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, - "404": { - "description": "Unknown process", + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -2982,8 +3221,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop runtime health or input check failed", "content": { "application/json": { "schema": { @@ -2993,29 +3232,37 @@ } } } - }, - "delete": { + } + }, + "/v1/desktop/mouse/scroll": { + "post": { "tags": ["v1"], - "summary": "Delete a process record.", - "description": "Removes a stopped process from the runtime. Returns 409 if the process\nis still running; stop or kill it first.", - "operationId": "delete_v1_process", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, - "schema": { - "type": "string" + "summary": "Scroll the desktop mouse wheel.", + "description": "Performs a health-gated scroll gesture at the requested coordinates and\nreturns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_scroll", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopMouseScrollRequest" + } } - } - ], + }, + "required": true + }, "responses": { - "204": { - "description": "Process deleted" + "200": { + "description": "Desktop mouse position after scroll", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopMousePositionResponse" + } + } + } }, - "404": { - "description": "Unknown process", + "400": { + "description": "Invalid mouse scroll request", "content": { "application/json": { "schema": { @@ -3025,7 +3272,7 @@ } }, "409": { - "description": "Process is still running", + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -3034,8 +3281,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { @@ -3047,28 +3294,17 @@ } } }, - "/v1/processes/{id}/input": { + "/v1/desktop/mouse/up": { "post": { "tags": ["v1"], - "summary": "Write input to a process.", - "description": "Sends data to a process's stdin (pipe mode) or PTY writer (tty mode).\nData can be encoded as base64, utf8, or text. Returns 413 if the decoded\npayload exceeds the configured `maxInputBytesPerRequest` limit.", - "operationId": "post_v1_process_input", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, - "schema": { - "type": "string" - } - } - ], + "summary": "Release a desktop mouse button.", + "description": "Performs a health-gated optional pointer move followed by `xdotool mouseup`\nand returns the resulting mouse position.", + "operationId": "post_v1_desktop_mouse_up", "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInputRequest" + "$ref": "#/components/schemas/DesktopMouseUpRequest" } } }, @@ -3076,17 +3312,17 @@ }, "responses": { "200": { - "description": "Input accepted", + "description": "Desktop mouse position after button release", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInputResponse" + "$ref": "#/components/schemas/DesktopMousePositionResponse" } } } }, "400": { - "description": "Invalid request", + "description": "Invalid mouse up request", "content": { "application/json": { "schema": { @@ -3096,7 +3332,7 @@ } }, "409": { - "description": "Process not writable", + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -3105,8 +3341,8 @@ } } }, - "413": { - "description": "Input exceeds configured limit", + "502": { + "description": "Desktop runtime health or input failed", "content": { "application/json": { "schema": { @@ -3114,9 +3350,39 @@ } } } + } + } + } + }, + "/v1/desktop/open": { + "post": { + "tags": ["v1"], + "summary": "Open a file or URL with the default handler.", + "description": "Opens a file path or URL using xdg-open on the managed desktop.", + "operationId": "post_v1_desktop_open", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopOpenRequest" + } + } }, - "501": { - "description": "Process API unsupported on this platform", + "required": true + }, + "responses": { + "200": { + "description": "Target opened", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopOpenResponse" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -3128,48 +3394,35 @@ } } }, - "/v1/processes/{id}/kill": { + "/v1/desktop/recording/start": { "post": { "tags": ["v1"], - "summary": "Send SIGKILL to a process.", - "description": "Sends SIGKILL to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", - "operationId": "post_v1_process_kill", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, - "schema": { - "type": "string" + "summary": "Start desktop recording.", + "description": "Starts an ffmpeg x11grab recording against the managed desktop and returns\nthe created recording metadata.", + "operationId": "post_v1_desktop_recording_start", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopRecordingStartRequest" + } } }, - { - "name": "waitMs", - "in": "query", - "description": "Wait up to N ms for process to exit", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "nullable": true, - "minimum": 0 - } - } - ], + "required": true + }, "responses": { "200": { - "description": "Kill signal sent", + "description": "Desktop recording started", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInfo" + "$ref": "#/components/schemas/DesktopRecordingInfo" } } } }, - "404": { - "description": "Unknown process", + "409": { + "description": "Desktop runtime is not ready or a recording is already active", "content": { "application/json": { "schema": { @@ -3178,8 +3431,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop recording failed", "content": { "application/json": { "schema": { @@ -3191,93 +3444,65 @@ } } }, - "/v1/processes/{id}/logs": { - "get": { + "/v1/desktop/recording/stop": { + "post": { "tags": ["v1"], - "summary": "Fetch process logs.", - "description": "Returns buffered log entries for a process. Supports filtering by stream\ntype, tail count, and sequence-based resumption. When `follow=true`,\nreturns an SSE stream that replays buffered entries then streams live output.", - "operationId": "get_v1_process_logs", - "parameters": [ - { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, - "schema": { - "type": "string" + "summary": "Stop desktop recording.", + "description": "Stops the active desktop recording and returns the finalized recording\nmetadata.", + "operationId": "post_v1_desktop_recording_stop", + "responses": { + "200": { + "description": "Desktop recording stopped", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopRecordingInfo" + } + } } }, - { - "name": "stream", - "in": "query", - "description": "stdout|stderr|combined|pty", - "required": false, - "schema": { - "allOf": [ - { - "$ref": "#/components/schemas/ProcessLogsStream" + "409": { + "description": "No active desktop recording", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" } - ], - "nullable": true + } } }, - { - "name": "tail", - "in": "query", - "description": "Tail N entries", - "required": false, - "schema": { - "type": "integer", - "nullable": true, - "minimum": 0 - } - }, - { - "name": "follow", - "in": "query", - "description": "Follow via SSE", - "required": false, - "schema": { - "type": "boolean", - "nullable": true - } - }, - { - "name": "since", - "in": "query", - "description": "Only entries with sequence greater than this", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "nullable": true, - "minimum": 0 - } - } - ], - "responses": { - "200": { - "description": "Process logs", + "502": { + "description": "Desktop recording stop failed", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessLogsResponse" + "$ref": "#/components/schemas/ProblemDetails" } } } - }, - "404": { - "description": "Unknown process", + } + } + } + }, + "/v1/desktop/recordings": { + "get": { + "tags": ["v1"], + "summary": "List desktop recordings.", + "description": "Returns the current desktop recording catalog.", + "operationId": "get_v1_desktop_recordings", + "responses": { + "200": { + "description": "Desktop recordings", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProblemDetails" + "$ref": "#/components/schemas/DesktopRecordingListResponse" } } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop recordings query failed", "content": { "application/json": { "schema": { @@ -3289,58 +3514,36 @@ } } }, - "/v1/processes/{id}/stop": { - "post": { + "/v1/desktop/recordings/{id}": { + "get": { "tags": ["v1"], - "summary": "Send SIGTERM to a process.", - "description": "Sends SIGTERM to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", - "operationId": "post_v1_process_stop", + "summary": "Get desktop recording metadata.", + "description": "Returns metadata for a single desktop recording.", + "operationId": "get_v1_desktop_recording", "parameters": [ { "name": "id", "in": "path", - "description": "Process ID", + "description": "Desktop recording ID", "required": true, "schema": { "type": "string" } - }, - { - "name": "waitMs", - "in": "query", - "description": "Wait up to N ms for process to exit", - "required": false, - "schema": { - "type": "integer", - "format": "int64", - "nullable": true, - "minimum": 0 - } } ], "responses": { "200": { - "description": "Stop signal sent", + "description": "Desktop recording metadata", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ProcessInfo" + "$ref": "#/components/schemas/DesktopRecordingInfo" } } } }, "404": { - "description": "Unknown process", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ProblemDetails" - } - } - } - }, - "501": { - "description": "Process API unsupported on this platform", + "description": "Unknown desktop recording", "content": { "application/json": { "schema": { @@ -3350,58 +3553,29 @@ } } } - } - }, - "/v1/processes/{id}/terminal/resize": { - "post": { + }, + "delete": { "tags": ["v1"], - "summary": "Resize a process terminal.", - "description": "Sets the PTY window size (columns and rows) for a tty-mode process and\nsends SIGWINCH so the child process can adapt.", - "operationId": "post_v1_process_terminal_resize", + "summary": "Delete a desktop recording.", + "description": "Removes a completed desktop recording and its file from disk.", + "operationId": "delete_v1_desktop_recording", "parameters": [ { "name": "id", "in": "path", - "description": "Process ID", + "description": "Desktop recording ID", "required": true, "schema": { "type": "string" } } ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ProcessTerminalResizeRequest" - } - } - }, - "required": true - }, "responses": { - "200": { - "description": "Resize accepted", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ProcessTerminalResizeResponse" - } - } - } - }, - "400": { - "description": "Invalid request", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ProblemDetails" - } - } - } + "204": { + "description": "Desktop recording deleted" }, "404": { - "description": "Unknown process", + "description": "Unknown desktop recording", "content": { "application/json": { "schema": { @@ -3411,7 +3585,7 @@ } }, "409": { - "description": "Not a terminal process", + "description": "Desktop recording is still active", "content": { "application/json": { "schema": { @@ -3419,9 +3593,33 @@ } } } + } + } + } + }, + "/v1/desktop/recordings/{id}/download": { + "get": { + "tags": ["v1"], + "summary": "Download a desktop recording.", + "description": "Serves the recorded MP4 bytes for a completed desktop recording.", + "operationId": "get_v1_desktop_recording_download", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Desktop recording ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Desktop recording as MP4 bytes" }, - "501": { - "description": "Process API unsupported on this platform", + "404": { + "description": "Unknown desktop recording", "content": { "application/json": { "schema": { @@ -3433,49 +3631,63 @@ } } }, - "/v1/processes/{id}/terminal/ws": { + "/v1/desktop/screenshot": { "get": { "tags": ["v1"], - "summary": "Open an interactive WebSocket terminal session.", - "description": "Upgrades the connection to a WebSocket for bidirectional PTY I/O. Accepts\n`access_token` query param for browser-based auth (WebSocket API cannot\nsend custom headers). Streams raw PTY output as binary frames and accepts\nJSON control frames for input, resize, and close.", - "operationId": "get_v1_process_terminal_ws", + "summary": "Capture a full desktop screenshot.", + "description": "Performs a health-gated full-frame screenshot of the managed desktop and\nreturns the requested image bytes.", + "operationId": "get_v1_desktop_screenshot", "parameters": [ { - "name": "id", - "in": "path", - "description": "Process ID", - "required": true, + "name": "format", + "in": "query", + "required": false, "schema": { - "type": "string" + "allOf": [ + { + "$ref": "#/components/schemas/DesktopScreenshotFormat" + } + ], + "nullable": true } }, { - "name": "access_token", + "name": "quality", "in": "query", - "description": "Bearer token alternative for WS auth", "required": false, "schema": { - "type": "string", + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + } + }, + { + "name": "scale", + "in": "query", + "required": false, + "schema": { + "type": "number", + "format": "float", + "nullable": true + } + }, + { + "name": "showCursor", + "in": "query", + "required": false, + "schema": { + "type": "boolean", "nullable": true } } ], "responses": { - "101": { - "description": "WebSocket upgraded" + "200": { + "description": "Desktop screenshot as image bytes" }, "400": { - "description": "Invalid websocket frame or upgrade request", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ProblemDetails" - } - } - } - }, - "404": { - "description": "Unknown process", + "description": "Invalid screenshot query", "content": { "application/json": { "schema": { @@ -3485,7 +3697,7 @@ } }, "409": { - "description": "Not a terminal process", + "description": "Desktop runtime is not ready", "content": { "application/json": { "schema": { @@ -3494,8 +3706,8 @@ } } }, - "501": { - "description": "Process API unsupported on this platform", + "502": { + "description": "Desktop runtime health or screenshot capture failed", "content": { "application/json": { "schema": { @@ -3506,257 +3718,2857 @@ } } } - } - }, - "components": { - "schemas": { - "AcpEnvelope": { - "type": "object", + }, + "/v1/desktop/screenshot/region": { + "get": { + "tags": ["v1"], + "summary": "Capture a desktop screenshot region.", + "description": "Performs a health-gated screenshot crop against the managed desktop and\nreturns the requested region image bytes.", + "operationId": "get_v1_desktop_screenshot_region", + "parameters": [ + { + "name": "x", + "in": "query", + "required": true, + "schema": { + "type": "integer", + "format": "int32" + } + }, + { + "name": "y", + "in": "query", + "required": true, + "schema": { + "type": "integer", + "format": "int32" + } + }, + { + "name": "width", + "in": "query", + "required": true, + "schema": { + "type": "integer", + "format": "int32", + "minimum": 0 + } + }, + { + "name": "height", + "in": "query", + "required": true, + "schema": { + "type": "integer", + "format": "int32", + "minimum": 0 + } + }, + { + "name": "format", + "in": "query", + "required": false, + "schema": { + "allOf": [ + { + "$ref": "#/components/schemas/DesktopScreenshotFormat" + } + ], + "nullable": true + } + }, + { + "name": "quality", + "in": "query", + "required": false, + "schema": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + } + }, + { + "name": "scale", + "in": "query", + "required": false, + "schema": { + "type": "number", + "format": "float", + "nullable": true + } + }, + { + "name": "showCursor", + "in": "query", + "required": false, + "schema": { + "type": "boolean", + "nullable": true + } + } + ], + "responses": { + "200": { + "description": "Desktop screenshot region as image bytes" + }, + "400": { + "description": "Invalid screenshot region", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "Desktop runtime health or screenshot capture failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/start": { + "post": { + "tags": ["v1"], + "summary": "Start the private desktop runtime.", + "description": "Lazily launches the managed Xvfb/openbox stack, validates display health,\nand returns the resulting desktop status snapshot.", + "operationId": "post_v1_desktop_start", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStartRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Desktop runtime status after start", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStatusResponse" + } + } + } + }, + "400": { + "description": "Invalid desktop start request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is already transitioning", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Desktop API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "503": { + "description": "Desktop runtime could not be started", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/status": { + "get": { + "tags": ["v1"], + "summary": "Get desktop runtime status.", + "description": "Returns the current desktop runtime state, dependency status, active\ndisplay metadata, and supervised process information.", + "operationId": "get_v1_desktop_status", + "responses": { + "200": { + "description": "Desktop runtime status", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStatusResponse" + } + } + } + }, + "401": { + "description": "Authentication required", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/stop": { + "post": { + "tags": ["v1"], + "summary": "Stop the private desktop runtime.", + "description": "Terminates the managed openbox/Xvfb/dbus processes owned by the desktop\nruntime and returns the resulting status snapshot.", + "operationId": "post_v1_desktop_stop", + "responses": { + "200": { + "description": "Desktop runtime status after stop", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStatusResponse" + } + } + } + }, + "409": { + "description": "Desktop runtime is already transitioning", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/stream/signaling": { + "get": { + "tags": ["v1"], + "summary": "Open a desktop WebRTC signaling session.", + "description": "Upgrades the connection to a WebSocket used for WebRTC signaling between\nthe browser client and the desktop streaming process. Also accepts mouse\nand keyboard input frames as a fallback transport.", + "operationId": "get_v1_desktop_stream_ws", + "parameters": [ + { + "name": "access_token", + "in": "query", + "description": "Bearer token alternative for WS auth", + "required": false, + "schema": { + "type": "string", + "nullable": true + } + } + ], + "responses": { + "101": { + "description": "WebSocket upgraded" + }, + "409": { + "description": "Desktop runtime or streaming session is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "502": { + "description": "Desktop stream failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/stream/start": { + "post": { + "tags": ["v1"], + "summary": "Start desktop streaming.", + "description": "Enables desktop websocket streaming for the managed desktop.", + "operationId": "post_v1_desktop_stream_start", + "responses": { + "200": { + "description": "Desktop streaming started", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStreamStatusResponse" + } + } + } + } + } + } + }, + "/v1/desktop/stream/status": { + "get": { + "tags": ["v1"], + "summary": "Get desktop stream status.", + "description": "Returns the current state of the desktop WebRTC streaming session.", + "operationId": "get_v1_desktop_stream_status", + "responses": { + "200": { + "description": "Desktop stream status", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStreamStatusResponse" + } + } + } + } + } + } + }, + "/v1/desktop/stream/stop": { + "post": { + "tags": ["v1"], + "summary": "Stop desktop streaming.", + "description": "Disables desktop websocket streaming for the managed desktop.", + "operationId": "post_v1_desktop_stream_stop", + "responses": { + "200": { + "description": "Desktop streaming stopped", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopStreamStatusResponse" + } + } + } + } + } + } + }, + "/v1/desktop/windows": { + "get": { + "tags": ["v1"], + "summary": "List visible desktop windows.", + "description": "Performs a health-gated visible-window enumeration against the managed\ndesktop and returns the current window metadata.", + "operationId": "get_v1_desktop_windows", + "responses": { + "200": { + "description": "Visible desktop windows", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowListResponse" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "503": { + "description": "Desktop runtime health or window query failed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/windows/focused": { + "get": { + "tags": ["v1"], + "summary": "Get the currently focused desktop window.", + "description": "Returns information about the window that currently has input focus.", + "operationId": "get_v1_desktop_windows_focused", + "responses": { + "200": { + "description": "Focused window info", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowInfo" + } + } + } + }, + "404": { + "description": "No window is focused", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/windows/{id}/focus": { + "post": { + "tags": ["v1"], + "summary": "Focus a desktop window.", + "description": "Brings the specified window to the foreground and gives it input focus.", + "operationId": "post_v1_desktop_window_focus", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "X11 window ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Window info after focus", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowInfo" + } + } + } + }, + "404": { + "description": "Window not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/windows/{id}/move": { + "post": { + "tags": ["v1"], + "summary": "Move a desktop window.", + "description": "Moves the specified window to the given position.", + "operationId": "post_v1_desktop_window_move", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "X11 window ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowMoveRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Window info after move", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowInfo" + } + } + } + }, + "404": { + "description": "Window not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/desktop/windows/{id}/resize": { + "post": { + "tags": ["v1"], + "summary": "Resize a desktop window.", + "description": "Resizes the specified window to the given dimensions.", + "operationId": "post_v1_desktop_window_resize", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "X11 window ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowResizeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Window info after resize", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DesktopWindowInfo" + } + } + } + }, + "404": { + "description": "Window not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Desktop runtime is not ready", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/fs/entries": { + "get": { + "tags": ["v1"], + "operationId": "get_v1_fs_entries", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "Directory path", + "required": false, + "schema": { + "type": "string", + "nullable": true + } + } + ], + "responses": { + "200": { + "description": "Directory entries", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FsEntry" + } + } + } + } + } + } + } + }, + "/v1/fs/entry": { + "delete": { + "tags": ["v1"], + "operationId": "delete_v1_fs_entry", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "File or directory path", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "recursive", + "in": "query", + "description": "Delete directory recursively", + "required": false, + "schema": { + "type": "boolean", + "nullable": true + } + } + ], + "responses": { + "200": { + "description": "Delete result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsActionResponse" + } + } + } + } + } + } + }, + "/v1/fs/file": { + "get": { + "tags": ["v1"], + "operationId": "get_v1_fs_file", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "File path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "File content" + } + } + }, + "put": { + "tags": ["v1"], + "operationId": "put_v1_fs_file", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "File path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "description": "Raw file bytes", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Write result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsWriteResponse" + } + } + } + } + } + } + }, + "/v1/fs/mkdir": { + "post": { + "tags": ["v1"], + "operationId": "post_v1_fs_mkdir", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "Directory path", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Directory created", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsActionResponse" + } + } + } + } + } + } + }, + "/v1/fs/move": { + "post": { + "tags": ["v1"], + "operationId": "post_v1_fs_move", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsMoveRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Move result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsMoveResponse" + } + } + } + } + } + } + }, + "/v1/fs/stat": { + "get": { + "tags": ["v1"], + "operationId": "get_v1_fs_stat", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "Path to stat", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Path metadata", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsStat" + } + } + } + } + } + } + }, + "/v1/fs/upload-batch": { + "post": { + "tags": ["v1"], + "operationId": "post_v1_fs_upload_batch", + "parameters": [ + { + "name": "path", + "in": "query", + "description": "Destination path", + "required": false, + "schema": { + "type": "string", + "nullable": true + } + } + ], + "requestBody": { + "description": "tar archive body", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Upload/extract result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FsUploadBatchResponse" + } + } + } + } + } + } + }, + "/v1/health": { + "get": { + "tags": ["v1"], + "operationId": "get_v1_health", + "responses": { + "200": { + "description": "Service health response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + } + } + } + }, + "/v1/processes": { + "get": { + "tags": ["v1"], + "summary": "List all managed processes.", + "description": "Returns a list of all processes (running and exited) currently tracked\nby the runtime, sorted by process ID.", + "operationId": "get_v1_processes", + "parameters": [ + { + "name": "owner", + "in": "query", + "required": false, + "schema": { + "allOf": [ + { + "$ref": "#/components/schemas/ProcessOwner" + } + ], + "nullable": true + } + } + ], + "responses": { + "200": { + "description": "List processes", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessListResponse" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + }, + "post": { + "tags": ["v1"], + "summary": "Create a long-lived managed process.", + "description": "Spawns a new process with the given command and arguments. Supports both\npipe-based and PTY (tty) modes. Returns the process descriptor on success.", + "operationId": "post_v1_processes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessCreateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Started process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInfo" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Process limit or state conflict", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/config": { + "get": { + "tags": ["v1"], + "summary": "Get process runtime configuration.", + "description": "Returns the current runtime configuration for the process management API,\nincluding limits for concurrency, timeouts, and buffer sizes.", + "operationId": "get_v1_processes_config", + "responses": { + "200": { + "description": "Current runtime process config", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessConfig" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + }, + "post": { + "tags": ["v1"], + "summary": "Update process runtime configuration.", + "description": "Replaces the runtime configuration for the process management API.\nValidates that all values are non-zero and clamps default timeout to max.", + "operationId": "post_v1_processes_config", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessConfig" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Updated runtime process config", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessConfig" + } + } + } + }, + "400": { + "description": "Invalid config", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/run": { + "post": { + "tags": ["v1"], + "summary": "Run a one-shot command.", + "description": "Executes a command to completion and returns its stdout, stderr, exit code,\nand duration. Supports configurable timeout and output size limits.", + "operationId": "post_v1_processes_run", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessRunRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "One-off command result", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessRunResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}": { + "get": { + "tags": ["v1"], + "summary": "Get a single process by ID.", + "description": "Returns the current state of a managed process including its status,\nPID, exit code, and creation/exit timestamps.", + "operationId": "get_v1_process", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "Process details", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInfo" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + }, + "delete": { + "tags": ["v1"], + "summary": "Delete a process record.", + "description": "Removes a stopped process from the runtime. Returns 409 if the process\nis still running; stop or kill it first.", + "operationId": "delete_v1_process", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "responses": { + "204": { + "description": "Process deleted" + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Process is still running", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/input": { + "post": { + "tags": ["v1"], + "summary": "Write input to a process.", + "description": "Sends data to a process's stdin (pipe mode) or PTY writer (tty mode).\nData can be encoded as base64, utf8, or text. Returns 413 if the decoded\npayload exceeds the configured `maxInputBytesPerRequest` limit.", + "operationId": "post_v1_process_input", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInputRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Input accepted", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInputResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Process not writable", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "413": { + "description": "Input exceeds configured limit", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/kill": { + "post": { + "tags": ["v1"], + "summary": "Send SIGKILL to a process.", + "description": "Sends SIGKILL to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", + "operationId": "post_v1_process_kill", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "waitMs", + "in": "query", + "description": "Wait up to N ms for process to exit", + "required": false, + "schema": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "Kill signal sent", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInfo" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/logs": { + "get": { + "tags": ["v1"], + "summary": "Fetch process logs.", + "description": "Returns buffered log entries for a process. Supports filtering by stream\ntype, tail count, and sequence-based resumption. When `follow=true`,\nreturns an SSE stream that replays buffered entries then streams live output.", + "operationId": "get_v1_process_logs", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "stream", + "in": "query", + "description": "stdout|stderr|combined|pty", + "required": false, + "schema": { + "allOf": [ + { + "$ref": "#/components/schemas/ProcessLogsStream" + } + ], + "nullable": true + } + }, + { + "name": "tail", + "in": "query", + "description": "Tail N entries", + "required": false, + "schema": { + "type": "integer", + "nullable": true, + "minimum": 0 + } + }, + { + "name": "follow", + "in": "query", + "description": "Follow via SSE", + "required": false, + "schema": { + "type": "boolean", + "nullable": true + } + }, + { + "name": "since", + "in": "query", + "description": "Only entries with sequence greater than this", + "required": false, + "schema": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "Process logs", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessLogsResponse" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/stop": { + "post": { + "tags": ["v1"], + "summary": "Send SIGTERM to a process.", + "description": "Sends SIGTERM to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", + "operationId": "post_v1_process_stop", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "waitMs", + "in": "query", + "description": "Wait up to N ms for process to exit", + "required": false, + "schema": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "Stop signal sent", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessInfo" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/terminal/resize": { + "post": { + "tags": ["v1"], + "summary": "Resize a process terminal.", + "description": "Sets the PTY window size (columns and rows) for a tty-mode process and\nsends SIGWINCH so the child process can adapt.", + "operationId": "post_v1_process_terminal_resize", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessTerminalResizeRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Resize accepted", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProcessTerminalResizeResponse" + } + } + } + }, + "400": { + "description": "Invalid request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Not a terminal process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, + "/v1/processes/{id}/terminal/ws": { + "get": { + "tags": ["v1"], + "summary": "Open an interactive WebSocket terminal session.", + "description": "Upgrades the connection to a WebSocket for bidirectional PTY I/O. Accepts\n`access_token` query param for browser-based auth (WebSocket API cannot\nsend custom headers). Streams raw PTY output as binary frames and accepts\nJSON control frames for input, resize, and close.", + "operationId": "get_v1_process_terminal_ws", + "parameters": [ + { + "name": "id", + "in": "path", + "description": "Process ID", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "access_token", + "in": "query", + "description": "Bearer token alternative for WS auth", + "required": false, + "schema": { + "type": "string", + "nullable": true + } + } + ], + "responses": { + "101": { + "description": "WebSocket upgraded" + }, + "400": { + "description": "Invalid websocket frame or upgrade request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "404": { + "description": "Unknown process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "409": { + "description": "Not a terminal process", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + }, + "501": { + "description": "Process API unsupported on this platform", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "AcpEnvelope": { + "type": "object", "required": ["jsonrpc"], "properties": { - "error": { + "error": { + "nullable": true + }, + "id": { + "nullable": true + }, + "jsonrpc": { + "type": "string" + }, + "method": { + "type": "string", + "nullable": true + }, + "params": { + "nullable": true + }, + "result": { + "nullable": true + } + } + }, + "AcpPostQuery": { + "type": "object", + "properties": { + "agent": { + "type": "string", + "nullable": true + } + } + }, + "AcpServerInfo": { + "type": "object", + "required": ["serverId", "agent", "createdAtMs"], + "properties": { + "agent": { + "type": "string" + }, + "createdAtMs": { + "type": "integer", + "format": "int64" + }, + "serverId": { + "type": "string" + } + } + }, + "AcpServerListResponse": { + "type": "object", + "required": ["servers"], + "properties": { + "servers": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AcpServerInfo" + } + } + } + }, + "AgentCapabilities": { + "type": "object", + "required": [ + "planMode", + "permissions", + "questions", + "toolCalls", + "toolResults", + "textMessages", + "images", + "fileAttachments", + "sessionLifecycle", + "errorEvents", + "reasoning", + "status", + "commandExecution", + "fileChanges", + "mcpTools", + "streamingDeltas", + "itemStarted", + "sharedProcess" + ], + "properties": { + "commandExecution": { + "type": "boolean" + }, + "errorEvents": { + "type": "boolean" + }, + "fileAttachments": { + "type": "boolean" + }, + "fileChanges": { + "type": "boolean" + }, + "images": { + "type": "boolean" + }, + "itemStarted": { + "type": "boolean" + }, + "mcpTools": { + "type": "boolean" + }, + "permissions": { + "type": "boolean" + }, + "planMode": { + "type": "boolean" + }, + "questions": { + "type": "boolean" + }, + "reasoning": { + "type": "boolean" + }, + "sessionLifecycle": { + "type": "boolean" + }, + "sharedProcess": { + "type": "boolean" + }, + "status": { + "type": "boolean" + }, + "streamingDeltas": { + "type": "boolean" + }, + "textMessages": { + "type": "boolean" + }, + "toolCalls": { + "type": "boolean" + }, + "toolResults": { + "type": "boolean" + } + } + }, + "AgentInfo": { + "type": "object", + "required": ["id", "installed", "credentialsAvailable", "capabilities"], + "properties": { + "capabilities": { + "$ref": "#/components/schemas/AgentCapabilities" + }, + "configError": { + "type": "string", + "nullable": true + }, + "configOptions": { + "type": "array", + "items": {}, + "nullable": true + }, + "credentialsAvailable": { + "type": "boolean" + }, + "id": { + "type": "string" + }, + "installed": { + "type": "boolean" + }, + "path": { + "type": "string", + "nullable": true + }, + "serverStatus": { + "allOf": [ + { + "$ref": "#/components/schemas/ServerStatusInfo" + } + ], + "nullable": true + }, + "version": { + "type": "string", + "nullable": true + } + } + }, + "AgentInstallArtifact": { + "type": "object", + "required": ["kind", "path", "source"], + "properties": { + "kind": { + "type": "string" + }, + "path": { + "type": "string" + }, + "source": { + "type": "string" + }, + "version": { + "type": "string", + "nullable": true + } + } + }, + "AgentInstallRequest": { + "type": "object", + "properties": { + "agentProcessVersion": { + "type": "string", + "nullable": true + }, + "agentVersion": { + "type": "string", + "nullable": true + }, + "reinstall": { + "type": "boolean", + "nullable": true + } + } + }, + "AgentInstallResponse": { + "type": "object", + "required": ["already_installed", "artifacts"], + "properties": { + "already_installed": { + "type": "boolean" + }, + "artifacts": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentInstallArtifact" + } + } + } + }, + "AgentListResponse": { + "type": "object", + "required": ["agents"], + "properties": { + "agents": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentInfo" + } + } + } + }, + "BrowserActionResponse": { + "type": "object", + "required": ["ok"], + "properties": { + "ok": { + "type": "boolean" + } + } + }, + "BrowserClickRequest": { + "type": "object", + "required": ["selector"], + "properties": { + "button": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserMouseButton" + } + ], + "nullable": true + }, + "clickCount": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "selector": { + "type": "string" + }, + "timeout": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + } + } + }, + "BrowserConsoleMessage": { + "type": "object", + "required": ["level", "text", "timestamp"], + "properties": { + "level": { + "type": "string" + }, + "line": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "text": { + "type": "string" + }, + "timestamp": { + "type": "string" + }, + "url": { + "type": "string", + "nullable": true + } + } + }, + "BrowserConsoleQuery": { + "type": "object", + "properties": { + "level": { + "type": "string", + "nullable": true + }, + "limit": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + } + } + }, + "BrowserConsoleResponse": { + "type": "object", + "required": ["messages"], + "properties": { + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserConsoleMessage" + } + } + } + }, + "BrowserContentQuery": { + "type": "object", + "properties": { + "selector": { + "type": "string", + "nullable": true + } + } + }, + "BrowserContentResponse": { + "type": "object", + "required": ["html", "url", "title"], + "properties": { + "html": { + "type": "string" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserContextCreateRequest": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string" + } + } + }, + "BrowserContextInfo": { + "type": "object", + "required": ["id", "name", "createdAt"], + "properties": { + "createdAt": { + "type": "string" + }, + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "sizeBytes": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + } + } + }, + "BrowserContextListResponse": { + "type": "object", + "required": ["contexts"], + "properties": { + "contexts": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserContextInfo" + } + } + } + }, + "BrowserCookie": { + "type": "object", + "required": ["name", "value"], + "properties": { + "domain": { + "type": "string", + "nullable": true + }, + "expires": { + "type": "number", + "format": "double", + "nullable": true + }, + "httpOnly": { + "type": "boolean", + "nullable": true + }, + "name": { + "type": "string" + }, + "path": { + "type": "string", + "nullable": true + }, + "sameSite": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserCookieSameSite" + } + ], + "nullable": true + }, + "secure": { + "type": "boolean", + "nullable": true + }, + "value": { + "type": "string" + } + } + }, + "BrowserCookieSameSite": { + "type": "string", + "enum": ["Strict", "Lax", "None"] + }, + "BrowserCookiesQuery": { + "type": "object", + "properties": { + "url": { + "type": "string", + "nullable": true + } + } + }, + "BrowserCookiesResponse": { + "type": "object", + "required": ["cookies"], + "properties": { + "cookies": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserCookie" + } + } + } + }, + "BrowserCrawlExtract": { + "type": "string", + "enum": ["markdown", "html", "text", "links"] + }, + "BrowserCrawlPage": { + "type": "object", + "required": ["url", "title", "content", "depth"], + "properties": { + "content": { + "type": "string" + }, + "depth": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "links": { + "type": "array", + "items": { + "type": "string" + } + }, + "status": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserCrawlRequest": { + "type": "object", + "required": ["url"], + "properties": { + "allowedDomains": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true + }, + "extract": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserCrawlExtract" + } + ], + "nullable": true + }, + "maxDepth": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "maxPages": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "url": { + "type": "string" + } + } + }, + "BrowserCrawlResponse": { + "type": "object", + "required": ["pages", "totalPages", "truncated"], + "properties": { + "pages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserCrawlPage" + } + }, + "totalPages": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "truncated": { + "type": "boolean" + } + } + }, + "BrowserCreateTabRequest": { + "type": "object", + "properties": { + "url": { + "type": "string", + "nullable": true + } + } + }, + "BrowserDeleteCookiesQuery": { + "type": "object", + "properties": { + "domain": { + "type": "string", "nullable": true }, - "id": { + "name": { + "type": "string", "nullable": true + } + } + }, + "BrowserDialogRequest": { + "type": "object", + "required": ["accept"], + "properties": { + "accept": { + "type": "boolean" }, - "jsonrpc": { + "text": { + "type": "string", + "nullable": true + } + } + }, + "BrowserExecuteRequest": { + "type": "object", + "required": ["expression"], + "properties": { + "awaitPromise": { + "type": "boolean", + "nullable": true + }, + "expression": { + "type": "string" + } + } + }, + "BrowserExecuteResponse": { + "type": "object", + "required": ["result", "type"], + "properties": { + "result": {}, + "type": { + "type": "string" + } + } + }, + "BrowserHoverRequest": { + "type": "object", + "required": ["selector"], + "properties": { + "selector": { + "type": "string" + } + } + }, + "BrowserLinkInfo": { + "type": "object", + "required": ["href", "text"], + "properties": { + "href": { + "type": "string" + }, + "text": { + "type": "string" + } + } + }, + "BrowserLinksResponse": { + "type": "object", + "required": ["links", "url"], + "properties": { + "links": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserLinkInfo" + } + }, + "url": { + "type": "string" + } + } + }, + "BrowserMarkdownResponse": { + "type": "object", + "required": ["markdown", "url", "title"], + "properties": { + "markdown": { + "type": "string" + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserMouseButton": { + "type": "string", + "enum": ["left", "right", "middle"] + }, + "BrowserNavigateRequest": { + "type": "object", + "required": ["url"], + "properties": { + "url": { "type": "string" }, + "waitUntil": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserNavigateWaitUntil" + } + ], + "nullable": true + } + } + }, + "BrowserNavigateWaitUntil": { + "type": "string", + "enum": ["load", "domcontentloaded", "networkidle"] + }, + "BrowserNetworkQuery": { + "type": "object", + "properties": { + "limit": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "urlPattern": { + "type": "string", + "nullable": true + } + } + }, + "BrowserNetworkRequest": { + "type": "object", + "required": ["url", "method", "timestamp"], + "properties": { + "duration": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + }, "method": { + "type": "string" + }, + "mimeType": { "type": "string", "nullable": true }, - "params": { + "responseSize": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 + }, + "status": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "timestamp": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserNetworkResponse": { + "type": "object", + "required": ["requests"], + "properties": { + "requests": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserNetworkRequest" + } + } + } + }, + "BrowserPageInfo": { + "type": "object", + "required": ["url", "title"], + "properties": { + "status": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserPdfFormat": { + "type": "string", + "enum": ["a4", "letter", "legal"] + }, + "BrowserPdfQuery": { + "type": "object", + "properties": { + "format": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserPdfFormat" + } + ], "nullable": true }, - "result": { + "landscape": { + "type": "boolean", + "nullable": true + }, + "printBackground": { + "type": "boolean", + "nullable": true + }, + "scale": { + "type": "number", + "format": "float", "nullable": true } } }, - "AcpPostQuery": { + "BrowserReloadRequest": { "type": "object", "properties": { - "agent": { + "ignoreCache": { + "type": "boolean", + "nullable": true + } + } + }, + "BrowserScrapeRequest": { + "type": "object", + "required": ["selectors"], + "properties": { + "selectors": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "url": { "type": "string", "nullable": true } } }, - "AcpServerInfo": { + "BrowserScrapeResponse": { "type": "object", - "required": ["serverId", "agent", "createdAtMs"], + "required": ["data", "url", "title"], "properties": { - "agent": { + "data": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "title": { + "type": "string" + }, + "url": { + "type": "string" + } + } + }, + "BrowserScreenshotFormat": { + "type": "string", + "enum": ["png", "jpeg", "webp"] + }, + "BrowserScreenshotQuery": { + "type": "object", + "properties": { + "format": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserScreenshotFormat" + } + ], + "nullable": true + }, + "fullPage": { + "type": "boolean", + "nullable": true + }, + "quality": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 + }, + "selector": { + "type": "string", + "nullable": true + } + } + }, + "BrowserScrollRequest": { + "type": "object", + "properties": { + "selector": { + "type": "string", + "nullable": true + }, + "x": { + "type": "integer", + "format": "int32", + "nullable": true + }, + "y": { + "type": "integer", + "format": "int32", + "nullable": true + } + } + }, + "BrowserSelectRequest": { + "type": "object", + "required": ["selector", "value"], + "properties": { + "selector": { "type": "string" }, - "createdAtMs": { - "type": "integer", - "format": "int64" - }, - "serverId": { + "value": { "type": "string" } } }, - "AcpServerListResponse": { + "BrowserSetCookiesRequest": { "type": "object", - "required": ["servers"], + "required": ["cookies"], "properties": { - "servers": { + "cookies": { "type": "array", "items": { - "$ref": "#/components/schemas/AcpServerInfo" + "$ref": "#/components/schemas/BrowserCookie" } } } }, - "AgentCapabilities": { + "BrowserSnapshotResponse": { "type": "object", - "required": [ - "planMode", - "permissions", - "questions", - "toolCalls", - "toolResults", - "textMessages", - "images", - "fileAttachments", - "sessionLifecycle", - "errorEvents", - "reasoning", - "status", - "commandExecution", - "fileChanges", - "mcpTools", - "streamingDeltas", - "itemStarted", - "sharedProcess" - ], + "required": ["snapshot", "url", "title"], "properties": { - "commandExecution": { - "type": "boolean" - }, - "errorEvents": { - "type": "boolean" - }, - "fileAttachments": { - "type": "boolean" - }, - "fileChanges": { - "type": "boolean" - }, - "images": { - "type": "boolean" - }, - "itemStarted": { - "type": "boolean" + "snapshot": { + "type": "string" }, - "mcpTools": { - "type": "boolean" + "title": { + "type": "string" }, - "permissions": { - "type": "boolean" + "url": { + "type": "string" + } + } + }, + "BrowserStartRequest": { + "type": "object", + "properties": { + "contextId": { + "type": "string", + "nullable": true }, - "planMode": { - "type": "boolean" + "dpi": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 }, - "questions": { - "type": "boolean" + "headless": { + "type": "boolean", + "nullable": true }, - "reasoning": { - "type": "boolean" + "height": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 }, - "sessionLifecycle": { - "type": "boolean" + "recordingFps": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 }, - "sharedProcess": { - "type": "boolean" + "streamAudioCodec": { + "type": "string", + "nullable": true }, - "status": { - "type": "boolean" + "streamFrameRate": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 }, - "streamingDeltas": { - "type": "boolean" + "streamVideoCodec": { + "type": "string", + "nullable": true }, - "textMessages": { - "type": "boolean" + "url": { + "type": "string", + "nullable": true }, - "toolCalls": { - "type": "boolean" + "webrtcPortRange": { + "type": "string", + "nullable": true }, - "toolResults": { - "type": "boolean" + "width": { + "type": "integer", + "format": "int32", + "nullable": true, + "minimum": 0 } } }, - "AgentInfo": { + "BrowserState": { + "type": "string", + "enum": ["inactive", "install_required", "starting", "active", "stopping", "failed"] + }, + "BrowserStatusResponse": { "type": "object", - "required": ["id", "installed", "credentialsAvailable", "capabilities"], + "required": ["state"], "properties": { - "capabilities": { - "$ref": "#/components/schemas/AgentCapabilities" - }, - "configError": { + "cdpUrl": { "type": "string", "nullable": true }, - "configOptions": { - "type": "array", - "items": {}, + "display": { + "type": "string", "nullable": true }, - "credentialsAvailable": { - "type": "boolean" + "installCommand": { + "type": "string", + "nullable": true }, - "id": { - "type": "string" + "lastError": { + "allOf": [ + { + "$ref": "#/components/schemas/DesktopErrorInfo" + } + ], + "nullable": true }, - "installed": { - "type": "boolean" + "missingDependencies": { + "type": "array", + "items": { + "type": "string" + } }, - "path": { - "type": "string", - "nullable": true + "processes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DesktopProcessInfo" + } }, - "serverStatus": { + "resolution": { "allOf": [ { - "$ref": "#/components/schemas/ServerStatusInfo" + "$ref": "#/components/schemas/DesktopResolution" } ], "nullable": true }, - "version": { + "startedAt": { + "type": "string", + "nullable": true + }, + "state": { + "$ref": "#/components/schemas/BrowserState" + }, + "url": { "type": "string", "nullable": true } } }, - "AgentInstallArtifact": { + "BrowserTabInfo": { "type": "object", - "required": ["kind", "path", "source"], + "required": ["id", "url", "title", "active"], "properties": { - "kind": { - "type": "string" + "active": { + "type": "boolean" }, - "path": { + "id": { "type": "string" }, - "source": { + "title": { "type": "string" }, - "version": { - "type": "string", - "nullable": true + "url": { + "type": "string" } } }, - "AgentInstallRequest": { + "BrowserTabListResponse": { "type": "object", + "required": ["tabs"], "properties": { - "agentProcessVersion": { - "type": "string", + "tabs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BrowserTabInfo" + } + } + } + }, + "BrowserTypeRequest": { + "type": "object", + "required": ["selector", "text"], + "properties": { + "clear": { + "type": "boolean", "nullable": true }, - "agentVersion": { - "type": "string", - "nullable": true + "delay": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 }, - "reinstall": { - "type": "boolean", - "nullable": true + "selector": { + "type": "string" + }, + "text": { + "type": "string" } } }, - "AgentInstallResponse": { + "BrowserUploadRequest": { "type": "object", - "required": ["already_installed", "artifacts"], + "required": ["selector", "path"], "properties": { - "already_installed": { - "type": "boolean" + "path": { + "type": "string" }, - "artifacts": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgentInstallArtifact" - } + "selector": { + "type": "string" } } }, - "AgentListResponse": { + "BrowserWaitRequest": { "type": "object", - "required": ["agents"], "properties": { - "agents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgentInfo" - } + "selector": { + "type": "string", + "nullable": true + }, + "state": { + "allOf": [ + { + "$ref": "#/components/schemas/BrowserWaitState" + } + ], + "nullable": true + }, + "timeout": { + "type": "integer", + "format": "int64", + "nullable": true, + "minimum": 0 } } }, + "BrowserWaitResponse": { + "type": "object", + "required": ["found"], + "properties": { + "found": { + "type": "boolean" + } + } + }, + "BrowserWaitState": { + "type": "string", + "enum": ["visible", "hidden", "attached"] + }, "DesktopActionResponse": { "type": "object", "required": ["ok"], diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts index 537102fd..8469af50 100644 --- a/sdks/typescript/src/generated/openapi.ts +++ b/sdks/typescript/src/generated/openapi.ts @@ -21,6 +21,294 @@ export interface paths { "/v1/agents/{agent}/install": { post: operations["post_v1_agent_install"]; }; + "/v1/browser/back": { + /** + * Navigate the browser back in history. + * @description Sends a CDP `Page.navigateToHistoryEntry` command with the previous + * history entry and returns the resulting page URL and title. + */ + post: operations["post_v1_browser_back"]; + }; + "/v1/browser/cdp": { + /** + * Open a CDP WebSocket proxy session. + * @description Upgrades the connection to a WebSocket that relays bidirectionally to + * Chromium's internal CDP WebSocket endpoint. External tools like Playwright + * or Puppeteer can connect via `ws://sandbox-host:2468/v1/browser/cdp`. + */ + get: operations["get_v1_browser_cdp_ws"]; + }; + "/v1/browser/click": { + /** + * Click an element in the browser page. + * @description Finds the element matching `selector`, computes its center point via + * `DOM.getBoxModel`, and dispatches mouse events through `Input.dispatchMouseEvent`. + */ + post: operations["post_v1_browser_click"]; + }; + "/v1/browser/console": { + /** + * Get browser console messages. + * @description Returns console messages captured from the browser, optionally filtered by + * level (log, debug, info, warning, error) and limited in count. + */ + get: operations["get_v1_browser_console"]; + }; + "/v1/browser/content": { + /** + * Get the HTML content of the current browser page. + * @description Returns the outerHTML of the page or a specific element selected by a CSS + * selector, along with the current URL and title. + */ + get: operations["get_v1_browser_content"]; + }; + "/v1/browser/contexts": { + /** + * List browser contexts (persistent profiles). + * @description Returns all browser context directories with their name, creation date, + * and on-disk size. + */ + get: operations["get_v1_browser_contexts"]; + /** + * Create a browser context (persistent profile). + * @description Creates a new browser context directory that can be passed as contextId + * to the browser start endpoint for persistent cookies and storage. + */ + post: operations["post_v1_browser_contexts"]; + }; + "/v1/browser/contexts/{context_id}": { + /** + * Delete a browser context (persistent profile). + * @description Removes the browser context directory and all stored data (cookies, + * local storage, cache, etc.). + */ + delete: operations["delete_v1_browser_context"]; + }; + "/v1/browser/cookies": { + /** + * Get browser cookies. + * @description Returns cookies from the browser, optionally filtered by URL. + * Uses CDP Network.getCookies. + */ + get: operations["get_v1_browser_cookies"]; + /** + * Set browser cookies. + * @description Sets one or more cookies in the browser via CDP Network.setCookies. + */ + post: operations["post_v1_browser_cookies"]; + /** + * Delete browser cookies. + * @description Deletes cookies matching the given name and/or domain. If no filters are + * provided, clears all browser cookies. + */ + delete: operations["delete_v1_browser_cookies"]; + }; + "/v1/browser/crawl": { + /** + * Crawl multiple pages starting from a URL. + * @description Performs a breadth-first crawl: navigates to each page, extracts content in + * the requested format, collects links, and follows them within the configured + * domain and depth limits. + */ + post: operations["post_v1_browser_crawl"]; + }; + "/v1/browser/dialog": { + /** + * Handle a JavaScript dialog (alert, confirm, prompt) in the browser. + * @description Accepts or dismisses the currently open dialog using + * `Page.handleJavaScriptDialog`, optionally providing prompt text. + */ + post: operations["post_v1_browser_dialog"]; + }; + "/v1/browser/execute": { + /** + * Execute a JavaScript expression in the browser. + * @description Evaluates the given expression via CDP `Runtime.evaluate` and returns the + * result value and its type. Set `awaitPromise` to resolve async expressions. + */ + post: operations["post_v1_browser_execute"]; + }; + "/v1/browser/forward": { + /** + * Navigate the browser forward in history. + * @description Sends a CDP `Page.navigateToHistoryEntry` command with the next + * history entry and returns the resulting page URL and title. + */ + post: operations["post_v1_browser_forward"]; + }; + "/v1/browser/hover": { + /** + * Hover over an element. + * @description Finds the element matching `selector`, computes its center via `DOM.getBoxModel`, + * and dispatches a `mouseMoved` event. + */ + post: operations["post_v1_browser_hover"]; + }; + "/v1/browser/links": { + /** + * Get all links on the current page. + * @description Extracts all anchor elements from the page via CDP and returns their href + * and text content. + */ + get: operations["get_v1_browser_links"]; + }; + "/v1/browser/markdown": { + /** + * Get the page content as Markdown. + * @description Extracts the DOM HTML via CDP, strips navigation/footer/aside elements, and + * converts the remaining content to Markdown using html2md. + */ + get: operations["get_v1_browser_markdown"]; + }; + "/v1/browser/navigate": { + /** + * Navigate the browser to a URL. + * @description Sends a CDP `Page.navigate` command and optionally waits for a lifecycle + * event before returning the resulting page URL, title, and HTTP status. + */ + post: operations["post_v1_browser_navigate"]; + }; + "/v1/browser/network": { + /** + * Get browser network requests. + * @description Returns network requests captured from the browser, optionally filtered by + * URL pattern and limited in count. + */ + get: operations["get_v1_browser_network"]; + }; + "/v1/browser/pdf": { + /** + * Generate a PDF of the current browser page. + * @description Generates a PDF document from the current page via CDP `Page.printToPDF` + * and returns the PDF bytes. + */ + get: operations["get_v1_browser_pdf"]; + }; + "/v1/browser/reload": { + /** + * Reload the current browser page. + * @description Sends a CDP `Page.reload` command with an optional cache bypass flag + * and returns the resulting page URL and title. + */ + post: operations["post_v1_browser_reload"]; + }; + "/v1/browser/scrape": { + /** + * Scrape structured data from the current page using CSS selectors. + * @description For each key in the `selectors` map, runs `querySelectorAll` with the CSS + * selector value and collects `textContent` from every match. If `url` is + * provided the browser navigates there first. + */ + post: operations["post_v1_browser_scrape"]; + }; + "/v1/browser/screenshot": { + /** + * Capture a browser page screenshot. + * @description Captures a screenshot of the current browser page via CDP + * `Page.captureScreenshot` and returns the image bytes with the appropriate + * Content-Type header. + */ + get: operations["get_v1_browser_screenshot"]; + }; + "/v1/browser/scroll": { + /** + * Scroll the page or a specific element. + * @description If a `selector` is provided, scrolls that element. Otherwise scrolls the + * page window by the given `x` and `y` pixel offsets. + */ + post: operations["post_v1_browser_scroll"]; + }; + "/v1/browser/select": { + /** + * Select an option in a `` element. + * @description Finds the element matching `selector` and sets its value via `Runtime.evaluate`, + * then dispatches a `change` event so listeners fire. + */ + post_v1_browser_select: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserSelectRequest"]; + }; + }; + responses: { + /** @description Option selected */ + 200: { + content: { + "application/json": components["schemas"]["BrowserActionResponse"]; + }; + }; + /** @description Element not found */ + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Get an accessibility tree snapshot of the current page. + * @description Returns a text representation of the page accessibility tree via CDP + * `Accessibility.getFullAXTree`. + */ + get_v1_browser_snapshot: { + responses: { + /** @description Accessibility tree snapshot */ + 200: { + content: { + "application/json": components["schemas"]["BrowserSnapshotResponse"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Start the browser runtime. + * @description Launches Chromium with remote debugging, optionally starts Xvfb for + * non-headless mode, and returns the resulting browser status snapshot. + */ + post_v1_browser_start: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserStartRequest"]; + }; + }; + responses: { + /** @description Browser runtime status after start */ + 200: { + content: { + "application/json": components["schemas"]["BrowserStatusResponse"]; + }; + }; + /** @description Invalid browser start request */ + 400: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser or desktop runtime conflict */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser dependencies not installed */ + 424: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime could not be started */ + 500: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Get browser runtime status. + * @description Returns the current browser state, display information, CDP URL, + * and managed process details. + */ + get_v1_browser_status: { + responses: { + /** @description Browser runtime status */ + 200: { + content: { + "application/json": components["schemas"]["BrowserStatusResponse"]; + }; + }; + /** @description Authentication required */ + 401: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Stop the browser runtime. + * @description Terminates Chromium, the CDP client, and any associated Xvfb/Neko + * processes, then returns the resulting status snapshot. + */ + post_v1_browser_stop: { + responses: { + /** @description Browser runtime status after stop */ + 200: { + content: { + "application/json": components["schemas"]["BrowserStatusResponse"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * List open browser tabs. + * @description Returns all open browser tabs (pages) via CDP `Target.getTargets`, + * filtered to type "page". + */ + get_v1_browser_tabs: { + responses: { + /** @description List of open browser tabs */ + 200: { + content: { + "application/json": components["schemas"]["BrowserTabListResponse"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Create a new browser tab. + * @description Opens a new tab via CDP `Target.createTarget` and returns the tab info. + */ + post_v1_browser_tabs: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserCreateTabRequest"]; + }; + }; + responses: { + /** @description New tab created */ + 201: { + content: { + "application/json": components["schemas"]["BrowserTabInfo"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Close a browser tab. + * @description Closes the specified tab via CDP `Target.closeTarget`. + */ + delete_v1_browser_tab: { + parameters: { + path: { + /** @description Target ID of the tab to close */ + tab_id: string; + }; + }; + responses: { + /** @description Tab closed */ + 200: { + content: { + "application/json": components["schemas"]["BrowserActionResponse"]; + }; + }; + /** @description Tab not found */ + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Activate a browser tab. + * @description Brings the specified tab to the foreground via CDP `Target.activateTarget`. + */ + post_v1_browser_tab_activate: { + parameters: { + path: { + /** @description Target ID of the tab to activate */ + tab_id: string; + }; + }; + responses: { + /** @description Tab activated */ + 200: { + content: { + "application/json": components["schemas"]["BrowserTabInfo"]; + }; + }; + /** @description Tab not found */ + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Type text into a focused element. + * @description Finds the element matching `selector`, focuses it via `DOM.focus`, optionally + * clears existing content, then dispatches key events for each character. + */ + post_v1_browser_type: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserTypeRequest"]; + }; + }; + responses: { + /** @description Text typed */ + 200: { + content: { + "application/json": components["schemas"]["BrowserActionResponse"]; + }; + }; + /** @description Element not found */ + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Upload a file to a file input element in the browser page. + * @description Resolves the file input element matching `selector` and sets the specified + * file path using `DOM.setFileInputFiles`. + */ + post_v1_browser_upload: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserUploadRequest"]; + }; + }; + responses: { + /** @description File uploaded to input */ + 200: { + content: { + "application/json": components["schemas"]["BrowserActionResponse"]; + }; + }; + /** @description Element not found */ + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; + /** + * Wait for a selector or condition in the browser. + * @description Polls the page DOM using `Runtime.evaluate` with a `querySelector` check + * until the element is found or the timeout expires. + */ + post_v1_browser_wait: { + requestBody: { + content: { + "application/json": components["schemas"]["BrowserWaitRequest"]; + }; + }; + responses: { + /** @description Wait result */ + 200: { + content: { + "application/json": components["schemas"]["BrowserWaitResponse"]; + }; + }; + /** @description Browser runtime is not active */ + 409: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description CDP command failed */ + 502: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + /** @description Timeout waiting for condition */ + 504: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; get_v1_config_mcp: { parameters: { query: { diff --git a/sdks/typescript/src/index.ts b/sdks/typescript/src/index.ts index 8c057605..b76e6934 100644 --- a/sdks/typescript/src/index.ts +++ b/sdks/typescript/src/index.ts @@ -89,6 +89,61 @@ export type { DesktopStreamStatusResponse, DesktopWindowInfo, DesktopWindowListResponse, + BrowserActionResponse, + BrowserClickRequest, + BrowserConsoleMessage, + BrowserConsoleQuery, + BrowserConsoleResponse, + BrowserContentQuery, + BrowserContentResponse, + BrowserContextCreateRequest, + BrowserContextInfo, + BrowserContextListResponse, + BrowserCookie, + BrowserCookieSameSite, + BrowserCookiesQuery, + BrowserCookiesResponse, + BrowserCrawlExtract, + BrowserCrawlPage, + BrowserCrawlRequest, + BrowserCrawlResponse, + BrowserCreateTabRequest, + BrowserDeleteCookiesQuery, + BrowserDialogRequest, + BrowserExecuteRequest, + BrowserExecuteResponse, + BrowserHoverRequest, + BrowserLinkInfo, + BrowserLinksResponse, + BrowserMarkdownResponse, + BrowserMouseButton, + BrowserNavigateRequest, + BrowserNavigateWaitUntil, + BrowserNetworkQuery, + BrowserNetworkRequest, + BrowserNetworkResponse, + BrowserPageInfo, + BrowserPdfFormat, + BrowserPdfQuery, + BrowserReloadRequest, + BrowserScrapeRequest, + BrowserScrapeResponse, + BrowserScreenshotFormat, + BrowserScreenshotQuery, + BrowserScrollRequest, + BrowserSelectRequest, + BrowserSetCookiesRequest, + BrowserSnapshotResponse, + BrowserStartRequest, + BrowserState, + BrowserStatusResponse, + BrowserTabInfo, + BrowserTabListResponse, + BrowserTypeRequest, + BrowserUploadRequest, + BrowserWaitRequest, + BrowserWaitResponse, + BrowserWaitState, FsActionResponse, FsDeleteQuery, FsEntriesQuery, diff --git a/sdks/typescript/src/types.ts b/sdks/typescript/src/types.ts index d56a8291..78967f42 100644 --- a/sdks/typescript/src/types.ts +++ b/sdks/typescript/src/types.ts @@ -46,6 +46,71 @@ export type DesktopOpenRequest = JsonRequestBody; export type DesktopWindowMoveRequest = JsonRequestBody; export type DesktopWindowResizeRequest = JsonRequestBody; + +// Browser types +export type BrowserState = components["schemas"]["BrowserState"]; +export type BrowserStartRequest = JsonRequestBody; +export type BrowserStatusResponse = JsonResponse; +export type BrowserNavigateRequest = JsonRequestBody; +export type BrowserNavigateWaitUntil = components["schemas"]["BrowserNavigateWaitUntil"]; +export type BrowserPageInfo = JsonResponse; +export type BrowserReloadRequest = JsonRequestBody; +export type BrowserWaitRequest = JsonRequestBody; +export type BrowserWaitState = components["schemas"]["BrowserWaitState"]; +export type BrowserWaitResponse = JsonResponse; +export type BrowserTabInfo = components["schemas"]["BrowserTabInfo"]; +export type BrowserTabListResponse = JsonResponse; +export type BrowserCreateTabRequest = JsonRequestBody; +export type BrowserScreenshotFormat = components["schemas"]["BrowserScreenshotFormat"]; +export type BrowserScreenshotQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserPdfFormat = components["schemas"]["BrowserPdfFormat"]; +export type BrowserPdfQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserContentQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserContentResponse = JsonResponse; +export type BrowserMarkdownResponse = JsonResponse; +export type BrowserLinkInfo = components["schemas"]["BrowserLinkInfo"]; +export type BrowserLinksResponse = JsonResponse; +export type BrowserSnapshotResponse = JsonResponse; +export type BrowserScrapeRequest = JsonRequestBody; +export type BrowserScrapeResponse = JsonResponse; +export type BrowserExecuteRequest = JsonRequestBody; +export type BrowserExecuteResponse = JsonResponse; +export type BrowserMouseButton = components["schemas"]["BrowserMouseButton"]; +export type BrowserClickRequest = JsonRequestBody; +export type BrowserTypeRequest = JsonRequestBody; +export type BrowserSelectRequest = JsonRequestBody; +export type BrowserHoverRequest = JsonRequestBody; +export type BrowserScrollRequest = JsonRequestBody; +export type BrowserUploadRequest = JsonRequestBody; +export type BrowserDialogRequest = JsonRequestBody; +export type BrowserActionResponse = JsonResponse; +export type BrowserConsoleQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserConsoleMessage = components["schemas"]["BrowserConsoleMessage"]; +export type BrowserConsoleResponse = JsonResponse; +export type BrowserNetworkQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserNetworkRequest = components["schemas"]["BrowserNetworkRequest"]; +export type BrowserNetworkResponse = JsonResponse; +export type BrowserCrawlExtract = components["schemas"]["BrowserCrawlExtract"]; +export type BrowserCrawlRequest = JsonRequestBody; +export type BrowserCrawlPage = components["schemas"]["BrowserCrawlPage"]; +export type BrowserCrawlResponse = JsonResponse; +export type BrowserContextInfo = components["schemas"]["BrowserContextInfo"]; +export type BrowserContextListResponse = JsonResponse; +export type BrowserContextCreateRequest = JsonRequestBody; +export type BrowserCookieSameSite = components["schemas"]["BrowserCookieSameSite"]; +export type BrowserCookie = components["schemas"]["BrowserCookie"]; +export type BrowserCookiesQuery = + QueryParams extends never ? Record : QueryParams; +export type BrowserCookiesResponse = JsonResponse; +export type BrowserSetCookiesRequest = JsonRequestBody; +export type BrowserDeleteCookiesQuery = + QueryParams extends never ? Record : QueryParams; + export type AgentListResponse = JsonResponse; export type AgentInfo = components["schemas"]["AgentInfo"]; export type AgentQuery = QueryParams; From 8ade8bba9dbedec3057828858aefcd7bb9dd5d1e Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:19:12 -0700 Subject: [PATCH 20/51] feat: [US-020] - Add TypeScript SDK browser lifecycle and CDP methods Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/typescript/src/client.ts | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index df664001..acff2437 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -30,6 +30,8 @@ import { type AgentInstallRequest, type AgentInstallResponse, type AgentListResponse, + type BrowserStartRequest, + type BrowserStatusResponse, type DesktopActionResponse, type DesktopClipboardQuery, type DesktopClipboardResponse, @@ -2008,6 +2010,28 @@ export class SandboxAgent { return new DesktopStreamSession(this.connectDesktopStreamWebSocket(options)); } + async startBrowser(request: BrowserStartRequest = {}): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/start`, { + body: request, + }); + } + + async stopBrowser(): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/stop`); + } + + async getBrowserStatus(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/status`); + } + + getBrowserCdpUrl(options: ProcessTerminalWebSocketUrlOptions = {}): string { + return toWebSocketUrl( + this.buildUrl(`${API_PREFIX}/browser/cdp`, { + access_token: options.accessToken ?? this.token, + }), + ); + } + private async getLiveConnection(agent: string): Promise { await this.awaitHealthy(); From 9776e0f4cb8b01f87b8e1c3043bfc4a16a416eee Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:21:32 -0700 Subject: [PATCH 21/51] feat: [US-021] - Add TypeScript SDK browser navigation and tab methods Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/typescript/src/client.ts | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index acff2437..adaa58ee 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -30,8 +30,17 @@ import { type AgentInstallRequest, type AgentInstallResponse, type AgentListResponse, + type BrowserActionResponse, + type BrowserCreateTabRequest, + type BrowserNavigateRequest, + type BrowserPageInfo, + type BrowserReloadRequest, type BrowserStartRequest, type BrowserStatusResponse, + type BrowserTabInfo, + type BrowserTabListResponse, + type BrowserWaitRequest, + type BrowserWaitResponse, type DesktopActionResponse, type DesktopClipboardQuery, type DesktopClipboardResponse, @@ -2032,6 +2041,50 @@ export class SandboxAgent { ); } + async browserNavigate(request: BrowserNavigateRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/navigate`, { + body: request, + }); + } + + async browserBack(): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/back`); + } + + async browserForward(): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/forward`); + } + + async browserReload(request: BrowserReloadRequest = {}): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/reload`, { + body: request, + }); + } + + async browserWait(request: BrowserWaitRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/wait`, { + body: request, + }); + } + + async getBrowserTabs(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/tabs`); + } + + async createBrowserTab(request: BrowserCreateTabRequest = {}): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/tabs`, { + body: request, + }); + } + + async activateBrowserTab(tabId: string): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/tabs/${tabId}/activate`); + } + + async closeBrowserTab(tabId: string): Promise { + return this.requestJson("DELETE", `${API_PREFIX}/browser/tabs/${tabId}`); + } + private async getLiveConnection(agent: string): Promise { await this.awaitHealthy(); From f4c43a5ac5294db20259d337fe0d498db8ccc988 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:23:14 -0700 Subject: [PATCH 22/51] feat: [US-022] - Add TypeScript SDK browser content extraction methods Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/typescript/src/client.ts | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index adaa58ee..2f0e7e45 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -31,10 +31,21 @@ import { type AgentInstallResponse, type AgentListResponse, type BrowserActionResponse, + type BrowserContentQuery, + type BrowserContentResponse, type BrowserCreateTabRequest, + type BrowserExecuteRequest, + type BrowserExecuteResponse, + type BrowserLinksResponse, + type BrowserMarkdownResponse, type BrowserNavigateRequest, type BrowserPageInfo, + type BrowserPdfQuery, type BrowserReloadRequest, + type BrowserScreenshotQuery, + type BrowserScrapeRequest, + type BrowserScrapeResponse, + type BrowserSnapshotResponse, type BrowserStartRequest, type BrowserStatusResponse, type BrowserTabInfo, @@ -2085,6 +2096,48 @@ export class SandboxAgent { return this.requestJson("DELETE", `${API_PREFIX}/browser/tabs/${tabId}`); } + async takeBrowserScreenshot(query: BrowserScreenshotQuery = {}): Promise { + const response = await this.requestRaw("GET", `${API_PREFIX}/browser/screenshot`, { + query, + accept: "image/*", + }); + const buffer = await response.arrayBuffer(); + return new Uint8Array(buffer); + } + + async getBrowserPdf(query: BrowserPdfQuery = {}): Promise { + const response = await this.requestRaw("GET", `${API_PREFIX}/browser/pdf`, { + query, + accept: "application/pdf", + }); + const buffer = await response.arrayBuffer(); + return new Uint8Array(buffer); + } + + async getBrowserContent(query: BrowserContentQuery = {}): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/content`, { query }); + } + + async getBrowserMarkdown(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/markdown`); + } + + async scrapeBrowser(request: BrowserScrapeRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/scrape`, { body: request }); + } + + async getBrowserLinks(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/links`); + } + + async executeBrowserScript(request: BrowserExecuteRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/execute`, { body: request }); + } + + async getBrowserSnapshot(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/snapshot`); + } + private async getLiveConnection(agent: string): Promise { await this.awaitHealthy(); From 264ec25578a4a62906e6b41c2c1b39d4f45c3e77 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:25:27 -0700 Subject: [PATCH 23/51] feat: [US-023] - Add TypeScript SDK browser interaction methods Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/typescript/src/client.ts | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index 2f0e7e45..4322655f 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -31,11 +31,14 @@ import { type AgentInstallResponse, type AgentListResponse, type BrowserActionResponse, + type BrowserClickRequest, type BrowserContentQuery, type BrowserContentResponse, type BrowserCreateTabRequest, + type BrowserDialogRequest, type BrowserExecuteRequest, type BrowserExecuteResponse, + type BrowserHoverRequest, type BrowserLinksResponse, type BrowserMarkdownResponse, type BrowserNavigateRequest, @@ -43,13 +46,17 @@ import { type BrowserPdfQuery, type BrowserReloadRequest, type BrowserScreenshotQuery, + type BrowserScrollRequest, type BrowserScrapeRequest, type BrowserScrapeResponse, + type BrowserSelectRequest, type BrowserSnapshotResponse, type BrowserStartRequest, type BrowserStatusResponse, type BrowserTabInfo, type BrowserTabListResponse, + type BrowserTypeRequest, + type BrowserUploadRequest, type BrowserWaitRequest, type BrowserWaitResponse, type DesktopActionResponse, @@ -2138,6 +2145,34 @@ export class SandboxAgent { return this.requestJson("GET", `${API_PREFIX}/browser/snapshot`); } + async browserClick(request: BrowserClickRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/click`, { body: request }); + } + + async browserType(request: BrowserTypeRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/type`, { body: request }); + } + + async browserSelect(request: BrowserSelectRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/select`, { body: request }); + } + + async browserHover(request: BrowserHoverRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/hover`, { body: request }); + } + + async browserScroll(request: BrowserScrollRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/scroll`, { body: request }); + } + + async browserUpload(request: BrowserUploadRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/upload`, { body: request }); + } + + async browserDialog(request: BrowserDialogRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/dialog`, { body: request }); + } + private async getLiveConnection(agent: string): Promise { await this.awaitHealthy(); From 11efecfda9785d483854b47c7438ad7904ada83c Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:27:12 -0700 Subject: [PATCH 24/51] feat: [US-024] - Add TypeScript SDK browser monitoring, crawl, context, and cookie methods Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/typescript/src/client.ts | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index 4322655f..5b5e7b62 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -32,9 +32,19 @@ import { type AgentListResponse, type BrowserActionResponse, type BrowserClickRequest, + type BrowserConsoleQuery, + type BrowserConsoleResponse, type BrowserContentQuery, type BrowserContentResponse, + type BrowserContextCreateRequest, + type BrowserContextInfo, + type BrowserContextListResponse, + type BrowserCookiesQuery, + type BrowserCookiesResponse, + type BrowserCrawlRequest, + type BrowserCrawlResponse, type BrowserCreateTabRequest, + type BrowserDeleteCookiesQuery, type BrowserDialogRequest, type BrowserExecuteRequest, type BrowserExecuteResponse, @@ -42,6 +52,8 @@ import { type BrowserLinksResponse, type BrowserMarkdownResponse, type BrowserNavigateRequest, + type BrowserNetworkQuery, + type BrowserNetworkResponse, type BrowserPageInfo, type BrowserPdfQuery, type BrowserReloadRequest, @@ -50,6 +62,7 @@ import { type BrowserScrapeRequest, type BrowserScrapeResponse, type BrowserSelectRequest, + type BrowserSetCookiesRequest, type BrowserSnapshotResponse, type BrowserStartRequest, type BrowserStatusResponse, @@ -2173,6 +2186,42 @@ export class SandboxAgent { return this.requestJson("POST", `${API_PREFIX}/browser/dialog`, { body: request }); } + async getBrowserConsole(query?: BrowserConsoleQuery): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/console`, { query }); + } + + async getBrowserNetwork(query?: BrowserNetworkQuery): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/network`, { query }); + } + + async crawlBrowser(request: BrowserCrawlRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/crawl`, { body: request }); + } + + async getBrowserContexts(): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/contexts`); + } + + async createBrowserContext(request: BrowserContextCreateRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/contexts`, { body: request }); + } + + async deleteBrowserContext(contextId: string): Promise { + return this.requestJson("DELETE", `${API_PREFIX}/browser/contexts/${contextId}`); + } + + async getBrowserCookies(query?: BrowserCookiesQuery): Promise { + return this.requestJson("GET", `${API_PREFIX}/browser/cookies`, { query }); + } + + async setBrowserCookies(request: BrowserSetCookiesRequest): Promise { + return this.requestJson("POST", `${API_PREFIX}/browser/cookies`, { body: request }); + } + + async deleteBrowserCookies(query?: BrowserDeleteCookiesQuery): Promise { + return this.requestJson("DELETE", `${API_PREFIX}/browser/cookies`, { query }); + } + private async getLiveConnection(agent: string): Promise { await this.awaitHealthy(); From 189dfc0cc8ad7f657b3cf0a8f60fb721bbf25033 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:31:29 -0700 Subject: [PATCH 25/51] feat: [US-025] - Add BrowserViewer React component Co-Authored-By: Claude Opus 4.6 (1M context) --- sdks/react/src/BrowserViewer.tsx | 257 +++++++++++++++++++++++++++++++ sdks/react/src/index.ts | 6 + 2 files changed, 263 insertions(+) create mode 100644 sdks/react/src/BrowserViewer.tsx diff --git a/sdks/react/src/BrowserViewer.tsx b/sdks/react/src/BrowserViewer.tsx new file mode 100644 index 00000000..d7762747 --- /dev/null +++ b/sdks/react/src/BrowserViewer.tsx @@ -0,0 +1,257 @@ +"use client"; + +import type { CSSProperties, KeyboardEvent } from "react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import type { + BrowserNavigateRequest, + BrowserPageInfo, + BrowserStatusResponse, + DesktopStreamErrorStatus, + DesktopStreamReadyStatus, + SandboxAgent, +} from "sandbox-agent"; +import { DesktopViewer } from "./DesktopViewer.tsx"; +import type { DesktopViewerProps } from "./DesktopViewer.tsx"; + +export type BrowserViewerClient = Pick< + SandboxAgent, + "connectDesktopStream" | "browserNavigate" | "browserBack" | "browserForward" | "browserReload" | "getBrowserStatus" +>; + +export interface BrowserViewerProps { + client: BrowserViewerClient; + className?: string; + style?: CSSProperties; + height?: number | string; + showNavigationBar?: boolean; + showStatusBar?: boolean; + onNavigate?: (page: BrowserPageInfo) => void; + onConnect?: (status: DesktopStreamReadyStatus) => void; + onDisconnect?: () => void; + onError?: (error: DesktopStreamErrorStatus | Error) => void; +} + +const navBarStyle: CSSProperties = { + display: "flex", + alignItems: "center", + gap: 4, + padding: "6px 8px", + borderBottom: "1px solid rgba(15, 23, 42, 0.08)", + background: "rgba(255, 255, 255, 0.78)", +}; + +const navButtonStyle: CSSProperties = { + display: "inline-flex", + alignItems: "center", + justifyContent: "center", + width: 28, + height: 28, + padding: 0, + border: "1px solid rgba(15, 23, 42, 0.12)", + borderRadius: 6, + background: "rgba(255, 255, 255, 0.9)", + color: "#334155", + fontSize: 14, + lineHeight: 1, + cursor: "pointer", + flexShrink: 0, +}; + +const navButtonDisabledStyle: CSSProperties = { + ...navButtonStyle, + opacity: 0.4, + cursor: "default", +}; + +const urlInputStyle: CSSProperties = { + flex: 1, + height: 28, + padding: "0 8px", + border: "1px solid rgba(15, 23, 42, 0.12)", + borderRadius: 6, + background: "rgba(248, 250, 252, 0.9)", + color: "#0f172a", + fontSize: 12, + lineHeight: "28px", + outline: "none", + minWidth: 0, +}; + +const shellStyle: CSSProperties = { + display: "flex", + flexDirection: "column", + overflow: "hidden", + border: "1px solid rgba(15, 23, 42, 0.14)", + borderRadius: 14, + background: "linear-gradient(180deg, rgba(248, 250, 252, 0.96) 0%, rgba(226, 232, 240, 0.92) 100%)", + boxShadow: "0 20px 40px rgba(15, 23, 42, 0.08)", +}; + +export const BrowserViewer = ({ + client, + className, + style, + height = 480, + showNavigationBar = true, + showStatusBar = true, + onNavigate, + onConnect, + onDisconnect, + onError, +}: BrowserViewerProps) => { + const [urlInput, setUrlInput] = useState(""); + const [isNavigating, setIsNavigating] = useState(false); + const urlInputRef = useRef(null); + + // Sync URL from browser status on connect + const handleConnect = useCallback( + (status: DesktopStreamReadyStatus) => { + client + .getBrowserStatus() + .then((browserStatus: BrowserStatusResponse) => { + if (browserStatus.url) { + setUrlInput(browserStatus.url); + } + }) + .catch(() => undefined); + onConnect?.(status); + }, + [client, onConnect], + ); + + const navigate = useCallback( + async (request: BrowserNavigateRequest) => { + setIsNavigating(true); + try { + const page = await client.browserNavigate(request); + setUrlInput(page.url ?? ""); + onNavigate?.(page); + } catch { + // navigation error handled by caller or silently ignored + } finally { + setIsNavigating(false); + } + }, + [client, onNavigate], + ); + + const handleBack = useCallback(async () => { + setIsNavigating(true); + try { + const page = await client.browserBack(); + setUrlInput(page.url ?? ""); + onNavigate?.(page); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }, [client, onNavigate]); + + const handleForward = useCallback(async () => { + setIsNavigating(true); + try { + const page = await client.browserForward(); + setUrlInput(page.url ?? ""); + onNavigate?.(page); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }, [client, onNavigate]); + + const handleReload = useCallback(async () => { + setIsNavigating(true); + try { + const page = await client.browserReload(); + setUrlInput(page.url ?? ""); + onNavigate?.(page); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }, [client, onNavigate]); + + const handleUrlKeyDown = useCallback( + (event: KeyboardEvent) => { + if (event.key === "Enter" && urlInput.trim()) { + event.preventDefault(); + let url = urlInput.trim(); + if (!/^https?:\/\//i.test(url)) { + url = `https://${url}`; + } + void navigate({ url }); + } + }, + [urlInput, navigate], + ); + + // Inner DesktopViewer props: no shell styling (we provide our own), no status bar + // duplication (BrowserViewer wraps it) + const desktopViewerProps: DesktopViewerProps = { + client, + height, + showStatusBar, + onConnect: handleConnect, + onDisconnect, + onError, + style: { + border: "none", + borderRadius: 0, + background: "transparent", + boxShadow: "none", + }, + }; + + return ( +
+ {showNavigationBar ? ( +
+ + + + setUrlInput(e.target.value)} + onKeyDown={handleUrlKeyDown} + placeholder="Enter URL..." + aria-label="URL" + /> +
+ ) : null} + +
+ ); +}; diff --git a/sdks/react/src/index.ts b/sdks/react/src/index.ts index 1d8d1e16..89f10f58 100644 --- a/sdks/react/src/index.ts +++ b/sdks/react/src/index.ts @@ -1,5 +1,6 @@ export { AgentConversation } from "./AgentConversation.tsx"; export { AgentTranscript } from "./AgentTranscript.tsx"; +export { BrowserViewer } from "./BrowserViewer.tsx"; export { ChatComposer } from "./ChatComposer.tsx"; export { DesktopViewer } from "./DesktopViewer.tsx"; export { ProcessTerminal } from "./ProcessTerminal.tsx"; @@ -24,6 +25,11 @@ export type { ChatComposerProps, } from "./ChatComposer.tsx"; +export type { + BrowserViewerClient, + BrowserViewerProps, +} from "./BrowserViewer.tsx"; + export type { DesktopViewerClient, DesktopViewerProps, From 4c00d71f5dee29b4b1efacfe58053cf5150ca875 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:37:12 -0700 Subject: [PATCH 26/51] feat: [US-026] - Add Browser tab to Inspector UI - runtime control and live view sections Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/components/debug/BrowserTab.tsx | 445 ++++++++++++++++++ .../src/components/debug/DebugPanel.tsx | 11 +- 2 files changed, 454 insertions(+), 2 deletions(-) create mode 100644 frontend/packages/inspector/src/components/debug/BrowserTab.tsx diff --git a/frontend/packages/inspector/src/components/debug/BrowserTab.tsx b/frontend/packages/inspector/src/components/debug/BrowserTab.tsx new file mode 100644 index 00000000..b6495c34 --- /dev/null +++ b/frontend/packages/inspector/src/components/debug/BrowserTab.tsx @@ -0,0 +1,445 @@ +import { ArrowLeft, ArrowRight, Globe, Loader2, Play, RefreshCw, Square } from "lucide-react"; +import { useCallback, useEffect, useMemo, useState } from "react"; +import { SandboxAgentError } from "sandbox-agent"; +import type { BrowserContextInfo, BrowserStatusResponse, SandboxAgent } from "sandbox-agent"; +import { DesktopViewer } from "@sandbox-agent/react"; +import type { BrowserViewerClient } from "@sandbox-agent/react"; + +const MIN_SPIN_MS = 350; + +const extractErrorMessage = (error: unknown, fallback: string): string => { + if (error instanceof SandboxAgentError && error.problem?.detail) return error.problem.detail; + if (error instanceof Error) return error.message; + return fallback; +}; + +const formatStartedAt = (value: string | null | undefined): string => { + if (!value) return "Not started"; + const parsed = new Date(value); + return Number.isNaN(parsed.getTime()) ? value : parsed.toLocaleString(); +}; + +const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { + // Status + const [status, setStatus] = useState(null); + const [loading, setLoading] = useState(false); + const [refreshing, setRefreshing] = useState(false); + const [acting, setActing] = useState<"start" | "stop" | null>(null); + const [error, setError] = useState(null); + + // Config inputs + const [width, setWidth] = useState("1280"); + const [height, setHeight] = useState("720"); + const [startUrl, setStartUrl] = useState(""); + const [contextId, setContextId] = useState(""); + const [contexts, setContexts] = useState([]); + + // Live view + const [liveViewActive, setLiveViewActive] = useState(false); + const [liveViewError, setLiveViewError] = useState(null); + const [navUrl, setNavUrl] = useState(""); + const [isNavigating, setIsNavigating] = useState(false); + + const isActive = status?.state === "active"; + + const resolutionLabel = useMemo(() => { + const resolution = status?.resolution; + if (!resolution) return "Unknown"; + return `${resolution.width} x ${resolution.height}`; + }, [status?.resolution]); + + const viewerClient = useMemo(() => { + const c = getClient(); + return { + connectDesktopStream: (opts?: Parameters[0]) => c.connectDesktopStream(opts), + browserNavigate: (req) => c.browserNavigate(req), + browserBack: () => c.browserBack(), + browserForward: () => c.browserForward(), + browserReload: (req?) => c.browserReload(req), + getBrowserStatus: () => c.getBrowserStatus(), + }; + }, [getClient]); + + const loadStatus = useCallback( + async (mode: "initial" | "refresh" = "initial") => { + if (mode === "initial") setLoading(true); + else setRefreshing(true); + setError(null); + try { + const next = await getClient().getBrowserStatus(); + setStatus(next); + if (next.url) setNavUrl(next.url); + return next; + } catch (loadError) { + setError(extractErrorMessage(loadError, "Unable to load browser status.")); + return null; + } finally { + setLoading(false); + setRefreshing(false); + } + }, + [getClient], + ); + + const loadContexts = useCallback(async () => { + try { + const result = await getClient().getBrowserContexts(); + setContexts(result.contexts); + } catch { + // non-critical + } + }, [getClient]); + + // Initial load + useEffect(() => { + void loadStatus(); + void loadContexts(); + }, [loadStatus, loadContexts]); + + // Auto-refresh status every 5s when active + useEffect(() => { + if (status?.state !== "active") return; + const interval = setInterval(() => void loadStatus("refresh"), 5000); + return () => clearInterval(interval); + }, [status?.state, loadStatus]); + + // Reset live view when browser becomes inactive + useEffect(() => { + if (status?.state !== "active") { + setLiveViewActive(false); + } + }, [status?.state]); + + const handleStart = async () => { + const parsedWidth = Number.parseInt(width, 10); + const parsedHeight = Number.parseInt(height, 10); + setActing("start"); + setError(null); + const startedAt = Date.now(); + try { + const request: Parameters[0] = { + width: Number.isFinite(parsedWidth) ? parsedWidth : undefined, + height: Number.isFinite(parsedHeight) ? parsedHeight : undefined, + url: startUrl.trim() || undefined, + contextId: contextId || undefined, + }; + const next = await getClient().startBrowser(request); + setStatus(next); + if (next.url) setNavUrl(next.url); + } catch (startError) { + setError(extractErrorMessage(startError, "Unable to start browser.")); + await loadStatus("refresh"); + } finally { + const elapsedMs = Date.now() - startedAt; + if (elapsedMs < MIN_SPIN_MS) { + await new Promise((resolve) => window.setTimeout(resolve, MIN_SPIN_MS - elapsedMs)); + } + setActing(null); + } + }; + + const handleStop = async () => { + setActing("stop"); + setError(null); + const startedAt = Date.now(); + try { + const next = await getClient().stopBrowser(); + setStatus(next); + setLiveViewActive(false); + } catch (stopError) { + setError(extractErrorMessage(stopError, "Unable to stop browser.")); + await loadStatus("refresh"); + } finally { + const elapsedMs = Date.now() - startedAt; + if (elapsedMs < MIN_SPIN_MS) { + await new Promise((resolve) => window.setTimeout(resolve, MIN_SPIN_MS - elapsedMs)); + } + setActing(null); + } + }; + + const handleNavigate = async (url: string) => { + if (!url.trim()) return; + setIsNavigating(true); + try { + let normalizedUrl = url.trim(); + if (!/^https?:\/\//i.test(normalizedUrl)) { + normalizedUrl = `https://${normalizedUrl}`; + } + const page = await getClient().browserNavigate({ url: normalizedUrl }); + setNavUrl(page.url ?? ""); + } catch { + // navigation error silently ignored + } finally { + setIsNavigating(false); + } + }; + + const handleBack = async () => { + setIsNavigating(true); + try { + const page = await getClient().browserBack(); + setNavUrl(page.url ?? ""); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }; + + const handleForward = async () => { + setIsNavigating(true); + try { + const page = await getClient().browserForward(); + setNavUrl(page.url ?? ""); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }; + + const handleReload = async () => { + setIsNavigating(true); + try { + const page = await getClient().browserReload(); + setNavUrl(page.url ?? ""); + } catch { + // ignore + } finally { + setIsNavigating(false); + } + }; + + return ( +
+
+ +
+ + {error &&
{error}
} + + {/* ========== Runtime Control Section ========== */} +
+
+ + + Browser Runtime + + + {status?.state ?? "unknown"} + +
+ +
+
+
URL
+
+ {status?.url ?? "None"} +
+
+
+
Resolution
+
{resolutionLabel}
+
+
+
Started
+
{formatStartedAt(status?.startedAt)}
+
+
+ +
+
+ + setWidth(e.target.value)} inputMode="numeric" /> +
+
+ + setHeight(e.target.value)} inputMode="numeric" /> +
+
+ + setStartUrl(e.target.value)} placeholder="https://example.com" /> +
+
+ + +
+
+ +
+ {isActive ? ( + + ) : ( + + )} +
+
+ + {/* ========== Missing Dependencies ========== */} + {status?.missingDependencies && status.missingDependencies.length > 0 && ( +
+
+ Missing Dependencies +
+
+ {status.missingDependencies.map((dep) => ( + + {dep} + + ))} +
+ {status.installCommand && ( + <> +
+ Install command +
+
{status.installCommand}
+ + )} +
+ )} + + {/* ========== Live View Section ========== */} +
+
+ + + Live View + + {isActive && ( + + )} +
+ + {liveViewError && ( +
+ {liveViewError} +
+ )} + + {!isActive &&
Start the browser runtime to enable live view.
} + + {isActive && liveViewActive && ( + <> + {/* Navigation Bar */} +
+ + + + setNavUrl(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + void handleNavigate(navUrl); + } + }} + placeholder="Enter URL..." + style={{ flex: 1, fontSize: 11 }} + /> +
+ + + + {status?.url && ( +
+ {status.url} +
+ )} + + )} + + {isActive && !liveViewActive &&
Click "Start Stream" for live browser view.
} +
+
+ ); +}; + +export default BrowserTab; diff --git a/frontend/packages/inspector/src/components/debug/DebugPanel.tsx b/frontend/packages/inspector/src/components/debug/DebugPanel.tsx index 9855d38a..163caaa9 100644 --- a/frontend/packages/inspector/src/components/debug/DebugPanel.tsx +++ b/frontend/packages/inspector/src/components/debug/DebugPanel.tsx @@ -1,4 +1,4 @@ -import { ChevronLeft, ChevronRight, Cloud, Monitor, Play, PlayCircle, Server, Terminal, Wrench } from "lucide-react"; +import { ChevronLeft, ChevronRight, Cloud, Globe, Monitor, Play, PlayCircle, Server, Terminal, Wrench } from "lucide-react"; import type { AgentInfo, SandboxAgent, SessionEvent } from "sandbox-agent"; type AgentModeInfo = { id: string; name: string; description: string }; @@ -9,10 +9,11 @@ import ProcessesTab from "./ProcessesTab"; import ProcessRunTab from "./ProcessRunTab"; import SkillsTab from "./SkillsTab"; import RequestLogTab from "./RequestLogTab"; +import BrowserTab from "./BrowserTab"; import DesktopTab from "./DesktopTab"; import type { RequestLog } from "../../types/requestLog"; -export type DebugTab = "log" | "events" | "agents" | "desktop" | "mcp" | "skills" | "processes" | "run-process"; +export type DebugTab = "log" | "events" | "agents" | "desktop" | "browser" | "mcp" | "skills" | "processes" | "run-process"; const DebugPanel = ({ debugTab, @@ -80,6 +81,10 @@ const DebugPanel = ({ Desktop + + + +
+
+ + +
+ {screenshotFormat !== "png" && ( +
+ + setScreenshotQuality(e.target.value)} + inputMode="numeric" + style={{ maxWidth: 60 }} + /> +
+ )} + +
+ + setScreenshotSelector(e.target.value)} + placeholder="e.g. #main" + style={{ maxWidth: 140 }} + /> +
+
+ + {screenshotError && ( +
+ {screenshotError} +
+ )} + + {screenshotUrl ? ( +
+ Browser screenshot +
+ ) : ( +
Click "Capture" to take a browser screenshot.
+ )} + + )} + + {/* ========== Tabs Section ========== */} + {isActive && ( +
+
+ + + Tabs + + +
+ + {tabsError && ( +
+ {tabsError} +
+ )} + + {tabs.length > 0 ? ( +
+ {tabs.map((tab) => ( +
+
+
+ {tab.title || "(untitled)"} + {tab.active && ( + + active + + )} +
+ {tab.url} +
+
+
+ {!tab.active && ( + + )} + +
+
+
+ ))} +
+ ) : ( +
No tabs open.
+ )} + +
+ setNewTabUrl(e.target.value)} + placeholder="https://example.com" + onKeyDown={(e) => { + if (e.key === "Enter") void handleCreateTab(); + }} + style={{ flex: 1, fontSize: 11 }} + /> + +
+
+ )} + + {/* ========== Console Section ========== */} + {isActive && ( +
+
+ + + Console + +
+ +
+
+ + {/* Level filter pills */} +
+ {CONSOLE_LEVELS.map((level) => ( + + ))} +
+ + {consoleError && ( +
+ {consoleError} +
+ )} + + {consoleMessages.length > 0 ? ( +
+ {consoleMessages.map((msg, idx) => ( +
+ + {msg.level} + {msg.text} + {new Date(msg.timestamp).toLocaleTimeString()} +
+ ))} +
+
+ ) : ( +
No console messages.
+ )} +
+ )}
); }; From 65df2735f33658fa7adc7b3c44f6a363e0e61376 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 06:49:18 -0700 Subject: [PATCH 28/51] feat: [US-028] - Add Browser tab - network, content tools, recording, contexts, diagnostics sections Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/components/debug/BrowserTab.tsx | 630 +++++++++++++++++- 1 file changed, 626 insertions(+), 4 deletions(-) diff --git a/frontend/packages/inspector/src/components/debug/BrowserTab.tsx b/frontend/packages/inspector/src/components/debug/BrowserTab.tsx index 8f75954b..e2158738 100644 --- a/frontend/packages/inspector/src/components/debug/BrowserTab.tsx +++ b/frontend/packages/inspector/src/components/debug/BrowserTab.tsx @@ -1,7 +1,34 @@ -import { ArrowLeft, ArrowRight, Camera, Globe, Layers, Loader2, Play, Plus, RefreshCw, Square, Terminal, X } from "lucide-react"; +import { + ArrowLeft, + ArrowRight, + Camera, + Circle, + Code, + Database, + Download, + Globe, + Layers, + Loader2, + Play, + Plus, + RefreshCw, + Square, + Terminal, + Trash2, + Video, + X, +} from "lucide-react"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { SandboxAgentError } from "sandbox-agent"; -import type { BrowserConsoleMessage, BrowserContextInfo, BrowserStatusResponse, BrowserTabInfo, SandboxAgent } from "sandbox-agent"; +import type { + BrowserConsoleMessage, + BrowserContextInfo, + BrowserNetworkRequest, + BrowserStatusResponse, + BrowserTabInfo, + DesktopRecordingInfo, + SandboxAgent, +} from "sandbox-agent"; import { DesktopViewer } from "@sandbox-agent/react"; import type { BrowserViewerClient } from "@sandbox-agent/react"; @@ -40,6 +67,20 @@ const createScreenshotUrl = async (bytes: Uint8Array, mimeType = "image/png"): P }); }; +const formatBytes = (bytes: number): string => { + if (bytes === 0) return "0 B"; + const units = ["B", "KB", "MB", "GB"]; + const i = Math.floor(Math.log(bytes) / Math.log(1024)); + return `${(bytes / 1024 ** i).toFixed(i > 0 ? 1 : 0)} ${units[i]}`; +}; + +const formatDuration = (start: string, end?: string | null): string => { + if (!end) return "in progress"; + const ms = new Date(end).getTime() - new Date(start).getTime(); + if (ms < 1000) return `${ms}ms`; + return `${(ms / 1000).toFixed(1)}s`; +}; + const CONSOLE_LEVELS = ["all", "log", "warn", "error", "info"] as const; const consoleLevelColor = (level: string): string => { @@ -93,6 +134,31 @@ const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { const [consoleLevel, setConsoleLevel] = useState("all"); const consoleEndRef = useRef(null); + // Network + const [networkRequests, setNetworkRequests] = useState([]); + const [networkLoading, setNetworkLoading] = useState(false); + const [networkError, setNetworkError] = useState(null); + const [networkUrlPattern, setNetworkUrlPattern] = useState(""); + + // Content Tools + const [contentOutput, setContentOutput] = useState(""); + const [contentLoading, setContentLoading] = useState(null); + const [contentError, setContentError] = useState(null); + + // Recording + const [recordings, setRecordings] = useState([]); + const [recordingLoading, setRecordingLoading] = useState(false); + const [recordingActing, setRecordingActing] = useState<"start" | "stop" | null>(null); + const [recordingError, setRecordingError] = useState(null); + const [recordingFps, setRecordingFps] = useState("30"); + const [deletingRecordingId, setDeletingRecordingId] = useState(null); + const [downloadingRecordingId, setDownloadingRecordingId] = useState(null); + + // Context management + const [contextName, setContextName] = useState(""); + const [contextActing, setContextActing] = useState(null); + const [contextError, setContextError] = useState(null); + // Live view const [liveViewActive, setLiveViewActive] = useState(false); const [liveViewError, setLiveViewError] = useState(null); @@ -251,6 +317,164 @@ const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { } }, [getClient, consoleLevel]); + // Network + const loadNetwork = useCallback(async () => { + setNetworkLoading(true); + setNetworkError(null); + try { + const query = networkUrlPattern.trim() ? { urlPattern: networkUrlPattern.trim() } : {}; + const result = await getClient().getBrowserNetwork(query); + setNetworkRequests(result.requests); + } catch (err) { + setNetworkError(extractErrorMessage(err, "Unable to load network requests.")); + } finally { + setNetworkLoading(false); + } + }, [getClient, networkUrlPattern]); + + // Recording + const activeRecording = useMemo(() => recordings.find((r) => r.status === "recording"), [recordings]); + + const loadRecordings = useCallback(async () => { + setRecordingLoading(true); + setRecordingError(null); + try { + const result = await getClient().listDesktopRecordings(); + setRecordings(result.recordings); + } catch (loadError) { + setRecordingError(extractErrorMessage(loadError, "Unable to load recordings.")); + } finally { + setRecordingLoading(false); + } + }, [getClient]); + + const handleStartRecording = async () => { + const fps = Number.parseInt(recordingFps, 10); + setRecordingActing("start"); + setRecordingError(null); + try { + await getClient().startDesktopRecording({ + fps: Number.isFinite(fps) ? fps : undefined, + }); + await loadRecordings(); + } catch (err) { + setRecordingError(extractErrorMessage(err, "Unable to start recording.")); + } finally { + setRecordingActing(null); + } + }; + + const handleStopRecording = async () => { + setRecordingActing("stop"); + setRecordingError(null); + try { + await getClient().stopDesktopRecording(); + await loadRecordings(); + } catch (err) { + setRecordingError(extractErrorMessage(err, "Unable to stop recording.")); + } finally { + setRecordingActing(null); + } + }; + + const handleDeleteRecording = async (id: string) => { + setDeletingRecordingId(id); + try { + await getClient().deleteDesktopRecording(id); + setRecordings((prev) => prev.filter((r) => r.id !== id)); + } catch (err) { + setRecordingError(extractErrorMessage(err, "Unable to delete recording.")); + } finally { + setDeletingRecordingId(null); + } + }; + + const handleDownloadRecording = async (id: string, fileName: string) => { + setDownloadingRecordingId(id); + try { + const bytes = await getClient().downloadDesktopRecording(id); + const payload = new Uint8Array(bytes.byteLength); + payload.set(bytes); + const blob = new Blob([payload.buffer], { type: "video/webm" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = fileName; + a.click(); + URL.revokeObjectURL(url); + } catch (err) { + setRecordingError(extractErrorMessage(err, "Unable to download recording.")); + } finally { + setDownloadingRecordingId(null); + } + }; + + // Context management + const handleCreateContext = async () => { + if (!contextName.trim()) return; + setContextActing("create"); + setContextError(null); + try { + await getClient().createBrowserContext({ name: contextName.trim() }); + setContextName(""); + await loadContexts(); + } catch (err) { + setContextError(extractErrorMessage(err, "Unable to create context.")); + } finally { + setContextActing(null); + } + }; + + const handleDeleteContext = async (id: string) => { + setContextActing(id); + setContextError(null); + try { + await getClient().deleteBrowserContext(id); + if (contextId === id) setContextId(""); + await loadContexts(); + } catch (err) { + setContextError(extractErrorMessage(err, "Unable to delete context.")); + } finally { + setContextActing(null); + } + }; + + // Content tools + const handleGetContent = async (type: "html" | "markdown" | "links" | "snapshot") => { + setContentLoading(type); + setContentError(null); + try { + let output = ""; + switch (type) { + case "html": { + const result = await getClient().getBrowserContent(); + output = result.html; + break; + } + case "markdown": { + const result = await getClient().getBrowserMarkdown(); + output = result.markdown; + break; + } + case "links": { + const result = await getClient().getBrowserLinks(); + output = result.links.map((l) => `${l.text} -> ${l.href}`).join("\n"); + break; + } + case "snapshot": { + const result = await getClient().getBrowserSnapshot(); + output = result.snapshot; + break; + } + } + setContentOutput(output); + } catch (err) { + setContentError(extractErrorMessage(err, `Unable to get ${type}.`)); + } finally { + setContentLoading(null); + } + }; + // Initial load useEffect(() => { void loadStatus(); @@ -264,13 +488,15 @@ const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { return () => clearInterval(interval); }, [status?.state, loadStatus]); - // Load tabs and console when browser becomes active + // Load tabs, console, network, and recordings when browser becomes active useEffect(() => { if (status?.state === "active") { void loadTabs(); void loadConsole(); + void loadNetwork(); + void loadRecordings(); } - }, [status?.state, loadTabs, loadConsole]); + }, [status?.state, loadTabs, loadConsole, loadNetwork, loadRecordings]); // Auto-refresh console every 3s when active useEffect(() => { @@ -279,6 +505,20 @@ const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { return () => clearInterval(interval); }, [status?.state, loadConsole]); + // Auto-refresh network every 3s when active + useEffect(() => { + if (status?.state !== "active") return; + const interval = setInterval(() => void loadNetwork(), 3000); + return () => clearInterval(interval); + }, [status?.state, loadNetwork]); + + // Poll recording list while a recording is active + useEffect(() => { + if (!activeRecording) return; + const interval = setInterval(() => void loadRecordings(), 3000); + return () => clearInterval(interval); + }, [activeRecording, loadRecordings]); + // Cleanup screenshot URL on unmount useEffect(() => { return () => revokeScreenshotUrl(); @@ -866,6 +1106,388 @@ const BrowserTab = ({ getClient }: { getClient: () => SandboxAgent }) => { )} )} + + {/* ========== Network Section ========== */} + {isActive && ( +
+
+ + + +
+ +
+ setNetworkUrlPattern(e.target.value)} + placeholder="Filter by URL pattern..." + style={{ width: "100%", fontSize: 11 }} + /> +
+ + {networkError && ( +
+ {networkError} +
+ )} + + {networkRequests.length > 0 ? ( +
+ {networkRequests.map((req, idx) => ( +
+ + {req.method} + + = 400 + ? "var(--danger, #ef4444)" + : req.status && req.status >= 300 + ? "var(--warning, #f59e0b)" + : "var(--success, #22c55e)", + }} + > + {req.status ?? "..."} + + {req.url} + {req.responseSize != null ? formatBytes(req.responseSize) : ""} + + {req.duration != null ? `${req.duration}ms` : ""} + +
+ ))} +
+ ) : ( +
No network requests captured.
+ )} +
+ )} + + {/* ========== Content Tools Section ========== */} + {isActive && ( +
+
+ + + Content Tools + +
+ +
+ {(["html", "markdown", "links", "snapshot"] as const).map((type) => ( + + ))} +
+ + {contentError && ( +
+ {contentError} +
+ )} + + {contentOutput ? ( +