diff --git a/CHANGELOG.md b/CHANGELOG.md index a317764e28..223e13a162 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Docker: make `docker-setup.sh` compatible with macOS Bash 3.2 and empty extra mounts. (#9441) Thanks @mateusz-michalik. - Auth: strip embedded line breaks from pasted API keys and tokens before storing/resolving credentials. - Agents: strip reasoning tags and downgraded tool markers from messaging tool and streaming output to prevent leakage. (#11053, #13453) Thanks @liebertar, @meaadore1221-afk, @gumadeiras. +- Browser: prevent stuck `act:evaluate` from wedging the browser tool, and make cancellation stop waiting promptly. (#13498) Thanks @onutc. - Web UI: make chat refresh smoothly scroll to the latest messages and suppress new-messages badge flash during manual refresh. - Tools/web_search: include provider-specific settings in the web search cache key, and pass `inlineCitations` for Grok. (#12419) Thanks @tmchow. - Tools/web_search: fix Grok response parsing for xAI Responses API output blocks. (#13049) Thanks @ereid7. diff --git a/docs/experiments/plans/browser-evaluate-cdp-refactor.md b/docs/experiments/plans/browser-evaluate-cdp-refactor.md new file mode 100644 index 0000000000..553437d62e --- /dev/null +++ b/docs/experiments/plans/browser-evaluate-cdp-refactor.md @@ -0,0 +1,229 @@ +--- +summary: "Plan: isolate browser act:evaluate from Playwright queue using CDP, with end-to-end deadlines and safer ref resolution" +owner: "openclaw" +status: "draft" +last_updated: "2026-02-10" +title: "Browser Evaluate CDP Refactor" +--- + +# Browser Evaluate CDP Refactor Plan + +## Context + +`act:evaluate` executes user provided JavaScript in the page. Today it runs via Playwright +(`page.evaluate` or `locator.evaluate`). Playwright serializes CDP commands per page, so a +stuck or long running evaluate can block the page command queue and make every later action +on that tab look "stuck". + +PR #13498 adds a pragmatic safety net (bounded evaluate, abort propagation, and best-effort +recovery). This document describes a larger refactor that makes `act:evaluate` inherently +isolated from Playwright so a stuck evaluate cannot wedge normal Playwright operations. + +## Goals + +- `act:evaluate` cannot permanently block later browser actions on the same tab. +- Timeouts are single source of truth end to end so a caller can rely on a budget. +- Abort and timeout are treated the same way across HTTP and in-process dispatch. +- Element targeting for evaluate is supported without switching everything off Playwright. +- Maintain backward compatibility for existing callers and payloads. + +## Non-goals + +- Replace all browser actions (click, type, wait, etc.) with CDP implementations. +- Remove the existing safety net introduced in PR #13498 (it remains a useful fallback). +- Introduce new unsafe capabilities beyond the existing `browser.evaluateEnabled` gate. +- Add process isolation (worker process/thread) for evaluate. If we still see hard to recover + stuck states after this refactor, that is a follow-up idea. + +## Current Architecture (Why It Gets Stuck) + +At a high level: + +- Callers send `act:evaluate` to the browser control service. +- The route handler calls into Playwright to execute the JavaScript. +- Playwright serializes page commands, so an evaluate that never finishes blocks the queue. +- A stuck queue means later click/type/wait operations on the tab can appear to hang. + +## Proposed Architecture + +### 1. Deadline Propagation + +Introduce a single budget concept and derive everything from it: + +- Caller sets `timeoutMs` (or a deadline in the future). +- The outer request timeout, route handler logic, and the execution budget inside the page + all use the same budget, with small headroom where needed for serialization overhead. +- Abort is propagated as an `AbortSignal` everywhere so cancellation is consistent. + +Implementation direction: + +- Add a small helper (for example `createBudget({ timeoutMs, signal })`) that returns: + - `signal`: the linked AbortSignal + - `deadlineAtMs`: absolute deadline + - `remainingMs()`: remaining budget for child operations +- Use this helper in: + - `src/browser/client-fetch.ts` (HTTP and in-process dispatch) + - `src/node-host/runner.ts` (proxy path) + - browser action implementations (Playwright and CDP) + +### 2. Separate Evaluate Engine (CDP Path) + +Add a CDP based evaluate implementation that does not share Playwright's per page command +queue. The key property is that the evaluate transport is a separate WebSocket connection +and a separate CDP session attached to the target. + +Implementation direction: + +- New module, for example `src/browser/cdp-evaluate.ts`, that: + - Connects to the configured CDP endpoint (browser level socket). + - Uses `Target.attachToTarget({ targetId, flatten: true })` to get a `sessionId`. + - Runs either: + - `Runtime.evaluate` for page level evaluate, or + - `DOM.resolveNode` plus `Runtime.callFunctionOn` for element evaluate. + - On timeout or abort: + - Sends `Runtime.terminateExecution` best-effort for the session. + - Closes the WebSocket and returns a clear error. + +Notes: + +- This still executes JavaScript in the page, so termination can have side effects. The win + is that it does not wedge the Playwright queue, and it is cancelable at the transport + layer by killing the CDP session. + +### 3. Ref Story (Element Targeting Without A Full Rewrite) + +The hard part is element targeting. CDP needs a DOM handle or `backendDOMNodeId`, while +today most browser actions use Playwright locators based on refs from snapshots. + +Recommended approach: keep existing refs, but attach an optional CDP resolvable id. + +#### 3.1 Extend Stored Ref Info + +Extend the stored role ref metadata to optionally include a CDP id: + +- Today: `{ role, name, nth }` +- Proposed: `{ role, name, nth, backendDOMNodeId?: number }` + +This keeps all existing Playwright based actions working and allows CDP evaluate to accept +the same `ref` value when the `backendDOMNodeId` is available. + +#### 3.2 Populate backendDOMNodeId At Snapshot Time + +When producing a role snapshot: + +1. Generate the existing role ref map as today (role, name, nth). +2. Fetch the AX tree via CDP (`Accessibility.getFullAXTree`) and compute a parallel map of + `(role, name, nth) -> backendDOMNodeId` using the same duplicate handling rules. +3. Merge the id back into the stored ref info for the current tab. + +If mapping fails for a ref, leave `backendDOMNodeId` undefined. This makes the feature +best-effort and safe to roll out. + +#### 3.3 Evaluate Behavior With Ref + +In `act:evaluate`: + +- If `ref` is present and has `backendDOMNodeId`, run element evaluate via CDP. +- If `ref` is present but has no `backendDOMNodeId`, fall back to the Playwright path (with + the safety net). + +Optional escape hatch: + +- Extend the request shape to accept `backendDOMNodeId` directly for advanced callers (and + for debugging), while keeping `ref` as the primary interface. + +### 4. Keep A Last Resort Recovery Path + +Even with CDP evaluate, there are other ways to wedge a tab or a connection. Keep the +existing recovery mechanisms (terminate execution + disconnect Playwright) as a last resort +for: + +- legacy callers +- environments where CDP attach is blocked +- unexpected Playwright edge cases + +## Implementation Plan (Single Iteration) + +### Deliverables + +- A CDP based evaluate engine that runs outside the Playwright per-page command queue. +- A single end-to-end timeout/abort budget used consistently by callers and handlers. +- Ref metadata that can optionally carry `backendDOMNodeId` for element evaluate. +- `act:evaluate` prefers the CDP engine when possible and falls back to Playwright when not. +- Tests that prove a stuck evaluate does not wedge later actions. +- Logs/metrics that make failures and fallbacks visible. + +### Implementation Checklist + +1. Add a shared "budget" helper to link `timeoutMs` + upstream `AbortSignal` into: + - a single `AbortSignal` + - an absolute deadline + - a `remainingMs()` helper for downstream operations +2. Update all caller paths to use that helper so `timeoutMs` means the same thing everywhere: + - `src/browser/client-fetch.ts` (HTTP and in-process dispatch) + - `src/node-host/runner.ts` (node proxy path) + - CLI wrappers that call `/act` (add `--timeout-ms` to `browser evaluate`) +3. Implement `src/browser/cdp-evaluate.ts`: + - connect to the browser-level CDP socket + - `Target.attachToTarget` to get a `sessionId` + - run `Runtime.evaluate` for page evaluate + - run `DOM.resolveNode` + `Runtime.callFunctionOn` for element evaluate + - on timeout/abort: best-effort `Runtime.terminateExecution` then close the socket +4. Extend stored role ref metadata to optionally include `backendDOMNodeId`: + - keep existing `{ role, name, nth }` behavior for Playwright actions + - add `backendDOMNodeId?: number` for CDP element targeting +5. Populate `backendDOMNodeId` during snapshot creation (best-effort): + - fetch AX tree via CDP (`Accessibility.getFullAXTree`) + - compute `(role, name, nth) -> backendDOMNodeId` and merge into the stored ref map + - if mapping is ambiguous or missing, leave the id undefined +6. Update `act:evaluate` routing: + - if no `ref`: always use CDP evaluate + - if `ref` resolves to a `backendDOMNodeId`: use CDP element evaluate + - otherwise: fall back to Playwright evaluate (still bounded and abortable) +7. Keep the existing "last resort" recovery path as a fallback, not the default path. +8. Add tests: + - stuck evaluate times out within budget and the next click/type succeeds + - abort cancels evaluate (client disconnect or timeout) and unblocks subsequent actions + - mapping failures cleanly fall back to Playwright +9. Add observability: + - evaluate duration and timeout counters + - terminateExecution usage + - fallback rate (CDP -> Playwright) and reasons + +### Acceptance Criteria + +- A deliberately hung `act:evaluate` returns within the caller budget and does not wedge the + tab for later actions. +- `timeoutMs` behaves consistently across CLI, agent tool, node proxy, and in-process calls. +- If `ref` can be mapped to `backendDOMNodeId`, element evaluate uses CDP; otherwise the + fallback path is still bounded and recoverable. + +## Testing Plan + +- Unit tests: + - `(role, name, nth)` matching logic between role refs and AX tree nodes. + - Budget helper behavior (headroom, remaining time math). +- Integration tests: + - CDP evaluate timeout returns within budget and does not block the next action. + - Abort cancels evaluate and triggers termination best-effort. +- Contract tests: + - Ensure `BrowserActRequest` and `BrowserActResponse` remain compatible. + +## Risks And Mitigations + +- Mapping is imperfect: + - Mitigation: best-effort mapping, fallback to Playwright evaluate, and add debug tooling. +- `Runtime.terminateExecution` has side effects: + - Mitigation: only use on timeout/abort and document the behavior in errors. +- Extra overhead: + - Mitigation: only fetch AX tree when snapshots are requested, cache per target, and keep + CDP session short lived. +- Extension relay limitations: + - Mitigation: use browser level attach APIs when per page sockets are not available, and + keep the current Playwright path as fallback. + +## Open Questions + +- Should the new engine be configurable as `playwright`, `cdp`, or `auto`? +- Do we want to expose a new "nodeRef" format for advanced users, or keep `ref` only? +- How should frame snapshots and selector scoped snapshots participate in AX mapping? diff --git a/src/browser/bridge-server.ts b/src/browser/bridge-server.ts index 513258406c..a1802493fe 100644 --- a/src/browser/bridge-server.ts +++ b/src/browser/bridge-server.ts @@ -28,6 +28,19 @@ export async function startBrowserBridgeServer(params: { const port = params.port ?? 0; const app = express(); + app.use((req, res, next) => { + const ctrl = new AbortController(); + const abort = () => ctrl.abort(new Error("request aborted")); + req.once("aborted", abort); + res.once("close", () => { + if (!res.writableEnded) { + abort(); + } + }); + // Make the signal available to browser route handlers (best-effort). + (req as unknown as { signal?: AbortSignal }).signal = ctrl.signal; + next(); + }); app.use(express.json({ limit: "1mb" })); const authToken = params.authToken?.trim(); diff --git a/src/browser/cdp.helpers.ts b/src/browser/cdp.helpers.ts index 78f73fc857..2c3f4c0af0 100644 --- a/src/browser/cdp.helpers.ts +++ b/src/browser/cdp.helpers.ts @@ -16,7 +16,11 @@ type Pending = { reject: (err: Error) => void; }; -export type CdpSendFn = (method: string, params?: Record) => Promise; +export type CdpSendFn = ( + method: string, + params?: Record, + sessionId?: string, +) => Promise; export function getHeadersWithAuth(url: string, headers: Record = {}) { const relayHeaders = getChromeExtensionRelayAuthHeaders(url); @@ -51,9 +55,13 @@ function createCdpSender(ws: WebSocket) { let nextId = 1; const pending = new Map(); - const send: CdpSendFn = (method: string, params?: Record) => { + const send: CdpSendFn = ( + method: string, + params?: Record, + sessionId?: string, + ) => { const id = nextId++; - const msg = { id, method, params }; + const msg = { id, method, params, sessionId }; ws.send(JSON.stringify(msg)); return new Promise((resolve, reject) => { pending.set(id, { resolve, reject }); @@ -72,6 +80,10 @@ function createCdpSender(ws: WebSocket) { } }; + ws.on("error", (err) => { + closeWithError(err instanceof Error ? err : new Error(String(err))); + }); + ws.on("message", (data) => { try { const parsed = JSON.parse(rawDataToString(data)) as CdpResponse; @@ -132,11 +144,15 @@ export async function fetchOk(url: string, timeoutMs = 1500, init?: RequestInit) export async function withCdpSocket( wsUrl: string, fn: (send: CdpSendFn) => Promise, - opts?: { headers?: Record }, + opts?: { headers?: Record; handshakeTimeoutMs?: number }, ): Promise { const headers = getHeadersWithAuth(wsUrl, opts?.headers ?? {}); + const handshakeTimeoutMs = + typeof opts?.handshakeTimeoutMs === "number" && Number.isFinite(opts.handshakeTimeoutMs) + ? Math.max(1, Math.floor(opts.handshakeTimeoutMs)) + : 5000; const ws = new WebSocket(wsUrl, { - handshakeTimeout: 5000, + handshakeTimeout: handshakeTimeoutMs, ...(Object.keys(headers).length ? { headers } : {}), }); const { send, closeWithError } = createCdpSender(ws); @@ -144,9 +160,15 @@ export async function withCdpSocket( const openPromise = new Promise((resolve, reject) => { ws.once("open", () => resolve()); ws.once("error", (err) => reject(err)); + ws.once("close", () => reject(new Error("CDP socket closed"))); }); - await openPromise; + try { + await openPromise; + } catch (err) { + closeWithError(err instanceof Error ? err : new Error(String(err))); + throw err; + } try { return await fn(send); diff --git a/src/browser/client-actions-core.ts b/src/browser/client-actions-core.ts index 7c90d2a7c5..c3d17922c6 100644 --- a/src/browser/client-actions-core.ts +++ b/src/browser/client-actions-core.ts @@ -83,7 +83,7 @@ export type BrowserActRequest = targetId?: string; timeoutMs?: number; } - | { kind: "evaluate"; fn: string; ref?: string; targetId?: string } + | { kind: "evaluate"; fn: string; ref?: string; targetId?: string; timeoutMs?: number } | { kind: "close"; targetId?: string }; export type BrowserActResponse = { diff --git a/src/browser/client-fetch.ts b/src/browser/client-fetch.ts index d9530892f3..1a5a835d1b 100644 --- a/src/browser/client-fetch.ts +++ b/src/browser/client-fetch.ts @@ -35,7 +35,18 @@ async function fetchHttpJson( ): Promise { const timeoutMs = init.timeoutMs ?? 5000; const ctrl = new AbortController(); - const t = setTimeout(() => ctrl.abort(), timeoutMs); + const upstreamSignal = init.signal; + let upstreamAbortListener: (() => void) | undefined; + if (upstreamSignal) { + if (upstreamSignal.aborted) { + ctrl.abort(upstreamSignal.reason); + } else { + upstreamAbortListener = () => ctrl.abort(upstreamSignal.reason); + upstreamSignal.addEventListener("abort", upstreamAbortListener, { once: true }); + } + } + + const t = setTimeout(() => ctrl.abort(new Error("timed out")), timeoutMs); try { const res = await fetch(url, { ...init, signal: ctrl.signal }); if (!res.ok) { @@ -45,6 +56,9 @@ async function fetchHttpJson( return (await res.json()) as T; } finally { clearTimeout(t); + if (upstreamSignal && upstreamAbortListener) { + upstreamSignal.removeEventListener("abort", upstreamAbortListener); + } } } @@ -75,6 +89,32 @@ export async function fetchBrowserJson( // keep as string } } + + const abortCtrl = new AbortController(); + const upstreamSignal = init?.signal; + let upstreamAbortListener: (() => void) | undefined; + if (upstreamSignal) { + if (upstreamSignal.aborted) { + abortCtrl.abort(upstreamSignal.reason); + } else { + upstreamAbortListener = () => abortCtrl.abort(upstreamSignal.reason); + upstreamSignal.addEventListener("abort", upstreamAbortListener, { once: true }); + } + } + + let abortListener: (() => void) | undefined; + const abortPromise: Promise = abortCtrl.signal.aborted + ? Promise.reject(abortCtrl.signal.reason ?? new Error("aborted")) + : new Promise((_, reject) => { + abortListener = () => reject(abortCtrl.signal.reason ?? new Error("aborted")); + abortCtrl.signal.addEventListener("abort", abortListener, { once: true }); + }); + + let timer: ReturnType | undefined; + if (timeoutMs) { + timer = setTimeout(() => abortCtrl.abort(new Error("timed out")), timeoutMs); + } + const dispatchPromise = dispatcher.dispatch({ method: init?.method?.toUpperCase() === "DELETE" @@ -85,16 +125,20 @@ export async function fetchBrowserJson( path: parsed.pathname, query, body, + signal: abortCtrl.signal, }); - const result = await (timeoutMs - ? Promise.race([ - dispatchPromise, - new Promise((_, reject) => - setTimeout(() => reject(new Error("timed out")), timeoutMs), - ), - ]) - : dispatchPromise); + const result = await Promise.race([dispatchPromise, abortPromise]).finally(() => { + if (timer) { + clearTimeout(timer); + } + if (abortListener) { + abortCtrl.signal.removeEventListener("abort", abortListener); + } + if (upstreamSignal && upstreamAbortListener) { + upstreamSignal.removeEventListener("abort", upstreamAbortListener); + } + }); if (result.status >= 400) { const message = diff --git a/src/browser/pw-ai.ts b/src/browser/pw-ai.ts index 446ecd0a46..72ba680c43 100644 --- a/src/browser/pw-ai.ts +++ b/src/browser/pw-ai.ts @@ -4,6 +4,7 @@ export { closePlaywrightBrowserConnection, createPageViaPlaywright, ensurePageState, + forceDisconnectPlaywrightForTarget, focusPageByTargetIdViaPlaywright, getPageForTargetId, listPagesViaPlaywright, diff --git a/src/browser/pw-session.ts b/src/browser/pw-session.ts index 7d72b3b13a..5cbe25a5c1 100644 --- a/src/browser/pw-session.ts +++ b/src/browser/pw-session.ts @@ -8,7 +8,8 @@ import type { } from "playwright-core"; import { chromium } from "playwright-core"; import { formatErrorMessage } from "../infra/errors.js"; -import { getHeadersWithAuth } from "./cdp.helpers.js"; +import { appendCdpPath, fetchJson, getHeadersWithAuth, withCdpSocket } from "./cdp.helpers.js"; +import { normalizeCdpWsUrl } from "./cdp.js"; import { getChromeWebSocketUrl } from "./chrome.js"; export type BrowserConsoleMessage = { @@ -52,6 +53,7 @@ type TargetInfoResponse = { type ConnectedBrowser = { browser: Browser; cdpUrl: string; + onDisconnected?: () => void; }; type PageState = { @@ -333,14 +335,15 @@ async function connectBrowser(cdpUrl: string): Promise { const endpoint = wsUrl ?? normalized; const headers = getHeadersWithAuth(endpoint); const browser = await chromium.connectOverCDP(endpoint, { timeout, headers }); - const connected: ConnectedBrowser = { browser, cdpUrl: normalized }; - cached = connected; - observeBrowser(browser); - browser.on("disconnected", () => { + const onDisconnected = () => { if (cached?.browser === browser) { cached = null; } - }); + }; + const connected: ConnectedBrowser = { browser, cdpUrl: normalized, onDisconnected }; + cached = connected; + browser.on("disconnected", onDisconnected); + observeBrowser(browser); return connected; } catch (err) { lastErr = err; @@ -503,12 +506,168 @@ export function refLocator(page: Page, ref: string) { export async function closePlaywrightBrowserConnection(): Promise { const cur = cached; cached = null; + connecting = null; if (!cur) { return; } + if (cur.onDisconnected && typeof cur.browser.off === "function") { + cur.browser.off("disconnected", cur.onDisconnected); + } await cur.browser.close().catch(() => {}); } +function normalizeCdpHttpBaseForJsonEndpoints(cdpUrl: string): string { + try { + const url = new URL(cdpUrl); + if (url.protocol === "ws:") { + url.protocol = "http:"; + } else if (url.protocol === "wss:") { + url.protocol = "https:"; + } + url.pathname = url.pathname.replace(/\/devtools\/browser\/.*$/, ""); + url.pathname = url.pathname.replace(/\/cdp$/, ""); + return url.toString().replace(/\/$/, ""); + } catch { + // Best-effort fallback for non-URL-ish inputs. + return cdpUrl + .replace(/^ws:/, "http:") + .replace(/^wss:/, "https:") + .replace(/\/devtools\/browser\/.*$/, "") + .replace(/\/cdp$/, "") + .replace(/\/$/, ""); + } +} + +function cdpSocketNeedsAttach(wsUrl: string): boolean { + try { + const pathname = new URL(wsUrl).pathname; + return ( + pathname === "/cdp" || pathname.endsWith("/cdp") || pathname.includes("/devtools/browser/") + ); + } catch { + return false; + } +} + +async function tryTerminateExecutionViaCdp(opts: { + cdpUrl: string; + targetId: string; +}): Promise { + const cdpHttpBase = normalizeCdpHttpBaseForJsonEndpoints(opts.cdpUrl); + const listUrl = appendCdpPath(cdpHttpBase, "/json/list"); + + const pages = await fetchJson< + Array<{ + id?: string; + webSocketDebuggerUrl?: string; + }> + >(listUrl, 2000).catch(() => null); + if (!pages || pages.length === 0) { + return; + } + + const target = pages.find((p) => String(p.id ?? "").trim() === opts.targetId); + const wsUrlRaw = String(target?.webSocketDebuggerUrl ?? "").trim(); + if (!wsUrlRaw) { + return; + } + const wsUrl = normalizeCdpWsUrl(wsUrlRaw, cdpHttpBase); + const needsAttach = cdpSocketNeedsAttach(wsUrl); + + const runWithTimeout = async (work: Promise, ms: number): Promise => { + let timer: ReturnType | undefined; + const timeoutPromise = new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error("CDP command timed out")), ms); + }); + try { + return await Promise.race([work, timeoutPromise]); + } finally { + if (timer) { + clearTimeout(timer); + } + } + }; + + await withCdpSocket( + wsUrl, + async (send) => { + let sessionId: string | undefined; + try { + if (needsAttach) { + const attached = (await runWithTimeout( + send("Target.attachToTarget", { targetId: opts.targetId, flatten: true }), + 1500, + )) as { sessionId?: unknown }; + if (typeof attached?.sessionId === "string" && attached.sessionId.trim()) { + sessionId = attached.sessionId; + } + } + await runWithTimeout(send("Runtime.terminateExecution", undefined, sessionId), 1500); + if (sessionId) { + // Best-effort cleanup; not required for termination to take effect. + void send("Target.detachFromTarget", { sessionId }).catch(() => {}); + } + } catch { + // Best-effort; ignore + } + }, + { handshakeTimeoutMs: 2000 }, + ).catch(() => {}); +} + +/** + * Best-effort cancellation for stuck page operations. + * + * Playwright serializes CDP commands per page; a long-running or stuck operation (notably evaluate) + * can block all subsequent commands. We cannot safely "cancel" an individual command, and we do + * not want to close the actual Chromium tab. Instead, we disconnect Playwright's CDP connection + * so in-flight commands fail fast and the next request reconnects transparently. + * + * IMPORTANT: We CANNOT call Connection.close() because Playwright shares a single Connection + * across all objects (BrowserType, Browser, etc.). Closing it corrupts the entire Playwright + * instance, preventing reconnection. + * + * Instead we: + * 1. Null out `cached` so the next call triggers a fresh connectOverCDP + * 2. Fire-and-forget browser.close() — it may hang but won't block us + * 3. The next connectBrowser() creates a completely new CDP WebSocket connection + * + * The old browser.close() eventually resolves when the in-browser evaluate timeout fires, + * or the old connection gets GC'd. Either way, it doesn't affect the fresh connection. + */ +export async function forceDisconnectPlaywrightForTarget(opts: { + cdpUrl: string; + targetId?: string; + reason?: string; +}): Promise { + const normalized = normalizeCdpUrl(opts.cdpUrl); + if (cached?.cdpUrl !== normalized) { + return; + } + const cur = cached; + cached = null; + // Also clear `connecting` so the next call does a fresh connectOverCDP + // rather than awaiting a stale promise. + connecting = null; + if (cur) { + // Remove the "disconnected" listener to prevent the old browser's teardown + // from racing with a fresh connection and nulling the new `cached`. + if (cur.onDisconnected && typeof cur.browser.off === "function") { + cur.browser.off("disconnected", cur.onDisconnected); + } + + // Best-effort: kill any stuck JS to unblock the target's execution context before we + // disconnect Playwright's CDP connection. + const targetId = opts.targetId?.trim() || ""; + if (targetId) { + await tryTerminateExecutionViaCdp({ cdpUrl: normalized, targetId }).catch(() => {}); + } + + // Fire-and-forget: don't await because browser.close() may hang on the stuck CDP pipe. + cur.browser.close().catch(() => {}); + } +} + /** * List all pages/tabs from the persistent Playwright connection. * Used for remote profiles where HTTP-based /json/list is ephemeral. diff --git a/src/browser/pw-tools-core.interactions.evaluate.abort.test.ts b/src/browser/pw-tools-core.interactions.evaluate.abort.test.ts new file mode 100644 index 0000000000..dcada002db --- /dev/null +++ b/src/browser/pw-tools-core.interactions.evaluate.abort.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it, vi } from "vitest"; + +let page: { evaluate: ReturnType } | null = null; +let locator: { evaluate: ReturnType } | null = null; + +const forceDisconnectPlaywrightForTarget = vi.fn(async () => {}); +const getPageForTargetId = vi.fn(async () => { + if (!page) { + throw new Error("test: page not set"); + } + return page; +}); +const ensurePageState = vi.fn(() => {}); +const restoreRoleRefsForTarget = vi.fn(() => {}); +const refLocator = vi.fn(() => { + if (!locator) { + throw new Error("test: locator not set"); + } + return locator; +}); + +vi.mock("./pw-session.js", () => { + return { + ensurePageState, + forceDisconnectPlaywrightForTarget, + getPageForTargetId, + refLocator, + restoreRoleRefsForTarget, + }; +}); + +describe("evaluateViaPlaywright (abort)", () => { + it("rejects when aborted after page.evaluate starts", async () => { + vi.clearAllMocks(); + const ctrl = new AbortController(); + + let evalCalled!: () => void; + const evalCalledPromise = new Promise((resolve) => { + evalCalled = resolve; + }); + + page = { + evaluate: vi.fn(() => { + evalCalled(); + return new Promise(() => {}); + }), + }; + locator = { evaluate: vi.fn() }; + + const { evaluateViaPlaywright } = await import("./pw-tools-core.interactions.js"); + const p = evaluateViaPlaywright({ + cdpUrl: "http://127.0.0.1:9222", + fn: "() => 1", + signal: ctrl.signal, + }); + + await evalCalledPromise; + ctrl.abort(new Error("aborted by test")); + + await expect(p).rejects.toThrow("aborted by test"); + expect(forceDisconnectPlaywrightForTarget).toHaveBeenCalled(); + }); + + it("rejects when aborted after locator.evaluate starts", async () => { + vi.clearAllMocks(); + const ctrl = new AbortController(); + + let evalCalled!: () => void; + const evalCalledPromise = new Promise((resolve) => { + evalCalled = resolve; + }); + + page = { evaluate: vi.fn() }; + locator = { + evaluate: vi.fn(() => { + evalCalled(); + return new Promise(() => {}); + }), + }; + + const { evaluateViaPlaywright } = await import("./pw-tools-core.interactions.js"); + const p = evaluateViaPlaywright({ + cdpUrl: "http://127.0.0.1:9222", + fn: "(el) => el.textContent", + ref: "e1", + signal: ctrl.signal, + }); + + await evalCalledPromise; + ctrl.abort(new Error("aborted by test")); + + await expect(p).rejects.toThrow("aborted by test"); + expect(forceDisconnectPlaywrightForTarget).toHaveBeenCalled(); + }); +}); diff --git a/src/browser/pw-tools-core.interactions.ts b/src/browser/pw-tools-core.interactions.ts index 0c673ec1fa..55e130c580 100644 --- a/src/browser/pw-tools-core.interactions.ts +++ b/src/browser/pw-tools-core.interactions.ts @@ -1,6 +1,7 @@ import type { BrowserFormField } from "./client-actions-core.js"; import { ensurePageState, + forceDisconnectPlaywrightForTarget, getPageForTargetId, refLocator, restoreRoleRefsForTarget, @@ -221,6 +222,8 @@ export async function evaluateViaPlaywright(opts: { targetId?: string; fn: string; ref?: string; + timeoutMs?: number; + signal?: AbortSignal; }): Promise { const fnText = String(opts.fn ?? "").trim(); if (!fnText) { @@ -229,42 +232,139 @@ export async function evaluateViaPlaywright(opts: { const page = await getPageForTargetId(opts); ensurePageState(page); restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page }); - if (opts.ref) { - const locator = refLocator(page, opts.ref); - // Use Function constructor at runtime to avoid esbuild adding __name helper - // which doesn't exist in the browser context - // eslint-disable-next-line @typescript-eslint/no-implied-eval -- required for browser-context eval - const elementEvaluator = new Function( - "el", - "fnBody", - ` - "use strict"; - try { - var candidate = eval("(" + fnBody + ")"); - return typeof candidate === "function" ? candidate(el) : candidate; - } catch (err) { - throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err))); - } - `, - ) as (el: Element, fnBody: string) => unknown; - return await locator.evaluate(elementEvaluator, fnText); + // Clamp evaluate timeout to prevent permanently blocking Playwright's command queue. + // Without this, a long-running async evaluate blocks all subsequent page operations + // because Playwright serializes CDP commands per page. + // + // NOTE: Playwright's { timeout } on evaluate only applies to installing the function, + // NOT to its execution time. We must inject a Promise.race timeout into the browser + // context itself so async functions are bounded. + const outerTimeout = normalizeTimeoutMs(opts.timeoutMs, 20_000); + // Leave headroom for routing/serialization overhead so the outer request timeout + // doesn't fire first and strand a long-running evaluate. + let evaluateTimeout = Math.max(1000, Math.min(120_000, outerTimeout - 500)); + evaluateTimeout = Math.min(evaluateTimeout, outerTimeout); + + const signal = opts.signal; + let abortListener: (() => void) | undefined; + let abortReject: ((reason: unknown) => void) | undefined; + let abortPromise: Promise | undefined; + if (signal) { + abortPromise = new Promise((_, reject) => { + abortReject = reject; + }); + // Ensure the abort promise never becomes an unhandled rejection if we throw early. + void abortPromise.catch(() => {}); } - // Use Function constructor at runtime to avoid esbuild adding __name helper - // which doesn't exist in the browser context - // eslint-disable-next-line @typescript-eslint/no-implied-eval -- required for browser-context eval - const browserEvaluator = new Function( - "fnBody", - ` - "use strict"; - try { - var candidate = eval("(" + fnBody + ")"); - return typeof candidate === "function" ? candidate() : candidate; - } catch (err) { - throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err))); + if (signal) { + const disconnect = () => { + void forceDisconnectPlaywrightForTarget({ + cdpUrl: opts.cdpUrl, + targetId: opts.targetId, + reason: "evaluate aborted", + }).catch(() => {}); + }; + if (signal.aborted) { + disconnect(); + throw signal.reason ?? new Error("aborted"); } - `, - ) as (fnBody: string) => unknown; - return await page.evaluate(browserEvaluator, fnText); + abortListener = () => { + disconnect(); + abortReject?.(signal.reason ?? new Error("aborted")); + }; + signal.addEventListener("abort", abortListener, { once: true }); + // If the signal aborted between the initial check and listener registration, handle it. + if (signal.aborted) { + abortListener(); + throw signal.reason ?? new Error("aborted"); + } + } + + try { + if (opts.ref) { + const locator = refLocator(page, opts.ref); + // eslint-disable-next-line @typescript-eslint/no-implied-eval -- required for browser-context eval + const elementEvaluator = new Function( + "el", + "args", + ` + "use strict"; + var fnBody = args.fnBody, timeoutMs = args.timeoutMs; + try { + var candidate = eval("(" + fnBody + ")"); + var result = typeof candidate === "function" ? candidate(el) : candidate; + if (result && typeof result.then === "function") { + return Promise.race([ + result, + new Promise(function(_, reject) { + setTimeout(function() { reject(new Error("evaluate timed out after " + timeoutMs + "ms")); }, timeoutMs); + }) + ]); + } + return result; + } catch (err) { + throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err))); + } + `, + ) as (el: Element, args: { fnBody: string; timeoutMs: number }) => unknown; + const evalPromise = locator.evaluate(elementEvaluator, { + fnBody: fnText, + timeoutMs: evaluateTimeout, + }); + if (!abortPromise) { + return await evalPromise; + } + try { + return await Promise.race([evalPromise, abortPromise]); + } catch (err) { + // If abort wins the race, the underlying evaluate may reject later; ensure we don't + // surface it as an unhandled rejection. + void evalPromise.catch(() => {}); + throw err; + } + } + + // eslint-disable-next-line @typescript-eslint/no-implied-eval -- required for browser-context eval + const browserEvaluator = new Function( + "args", + ` + "use strict"; + var fnBody = args.fnBody, timeoutMs = args.timeoutMs; + try { + var candidate = eval("(" + fnBody + ")"); + var result = typeof candidate === "function" ? candidate() : candidate; + if (result && typeof result.then === "function") { + return Promise.race([ + result, + new Promise(function(_, reject) { + setTimeout(function() { reject(new Error("evaluate timed out after " + timeoutMs + "ms")); }, timeoutMs); + }) + ]); + } + return result; + } catch (err) { + throw new Error("Invalid evaluate function: " + (err && err.message ? err.message : String(err))); + } + `, + ) as (args: { fnBody: string; timeoutMs: number }) => unknown; + const evalPromise = page.evaluate(browserEvaluator, { + fnBody: fnText, + timeoutMs: evaluateTimeout, + }); + if (!abortPromise) { + return await evalPromise; + } + try { + return await Promise.race([evalPromise, abortPromise]); + } catch (err) { + void evalPromise.catch(() => {}); + throw err; + } + } finally { + if (signal && abortListener) { + signal.removeEventListener("abort", abortListener); + } + } } export async function scrollIntoViewViaPlaywright(opts: { diff --git a/src/browser/routes/agent.act.ts b/src/browser/routes/agent.act.ts index b3e97ccba8..da692997c7 100644 --- a/src/browser/routes/agent.act.ts +++ b/src/browser/routes/agent.act.ts @@ -306,12 +306,18 @@ export function registerBrowserAgentActRoutes( return jsonError(res, 400, "fn is required"); } const ref = toStringOrEmpty(body.ref) || undefined; - const result = await pw.evaluateViaPlaywright({ + const evalTimeoutMs = toNumber(body.timeoutMs); + const evalRequest: Parameters[0] = { cdpUrl, targetId: tab.targetId, fn, ref, - }); + signal: req.signal, + }; + if (evalTimeoutMs !== undefined) { + evalRequest.timeoutMs = evalTimeoutMs; + } + const result = await pw.evaluateViaPlaywright(evalRequest); return res.json({ ok: true, targetId: tab.targetId, diff --git a/src/browser/routes/dispatcher.abort.test.ts b/src/browser/routes/dispatcher.abort.test.ts new file mode 100644 index 0000000000..42859bb26e --- /dev/null +++ b/src/browser/routes/dispatcher.abort.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it, vi } from "vitest"; +import type { BrowserRouteContext } from "../server-context.js"; + +vi.mock("./index.js", () => { + return { + registerBrowserRoutes(app: { get: (path: string, handler: unknown) => void }) { + app.get( + "/slow", + async (req: { signal?: AbortSignal }, res: { json: (body: unknown) => void }) => { + const signal = req.signal; + await new Promise((resolve, reject) => { + if (signal?.aborted) { + reject(signal.reason ?? new Error("aborted")); + return; + } + const onAbort = () => reject(signal?.reason ?? new Error("aborted")); + signal?.addEventListener("abort", onAbort, { once: true }); + setTimeout(resolve, 50); + }); + res.json({ ok: true }); + }, + ); + }, + }; +}); + +describe("browser route dispatcher (abort)", () => { + it("propagates AbortSignal and lets handlers observe abort", async () => { + const { createBrowserRouteDispatcher } = await import("./dispatcher.js"); + const dispatcher = createBrowserRouteDispatcher({} as BrowserRouteContext); + + const ctrl = new AbortController(); + const promise = dispatcher.dispatch({ + method: "GET", + path: "/slow", + signal: ctrl.signal, + }); + + ctrl.abort(new Error("timed out")); + + await expect(promise).resolves.toMatchObject({ + status: 500, + body: { error: expect.stringContaining("timed out") }, + }); + }); +}); diff --git a/src/browser/routes/dispatcher.ts b/src/browser/routes/dispatcher.ts index 39a6535014..6395cd192a 100644 --- a/src/browser/routes/dispatcher.ts +++ b/src/browser/routes/dispatcher.ts @@ -8,6 +8,7 @@ type BrowserDispatchRequest = { path: string; query?: Record; body?: unknown; + signal?: AbortSignal; }; type BrowserDispatchResponse = { @@ -68,6 +69,7 @@ export function createBrowserRouteDispatcher(ctx: BrowserRouteContext) { const path = normalizePath(req.path); const query = req.query ?? {}; const body = req.body; + const signal = req.signal; const match = registry.routes.find((route) => { if (route.method !== method) { @@ -108,6 +110,7 @@ export function createBrowserRouteDispatcher(ctx: BrowserRouteContext) { params, query, body, + signal, }, res, ); diff --git a/src/browser/routes/types.ts b/src/browser/routes/types.ts index 76e6051c95..97d5ff470a 100644 --- a/src/browser/routes/types.ts +++ b/src/browser/routes/types.ts @@ -2,6 +2,11 @@ export type BrowserRequest = { params: Record; query: Record; body?: unknown; + /** + * Optional abort signal for in-process dispatch. This lets callers enforce + * timeouts and (where supported) cancel long-running operations. + */ + signal?: AbortSignal; }; export type BrowserResponse = { diff --git a/src/browser/server.agent-contract-form-layout-act-commands.test.ts b/src/browser/server.agent-contract-form-layout-act-commands.test.ts index a8b8a38744..d1ea49b9f8 100644 --- a/src/browser/server.agent-contract-form-layout-act-commands.test.ts +++ b/src/browser/server.agent-contract-form-layout-act-commands.test.ts @@ -357,12 +357,15 @@ describe("browser control server", () => { }); expect(evalRes.ok).toBe(true); expect(evalRes.result).toBe("ok"); - expect(pwMocks.evaluateViaPlaywright).toHaveBeenCalledWith({ - cdpUrl: cdpBaseUrl, - targetId: "abcd1234", - fn: "() => 1", - ref: undefined, - }); + expect(pwMocks.evaluateViaPlaywright).toHaveBeenCalledWith( + expect.objectContaining({ + cdpUrl: cdpBaseUrl, + targetId: "abcd1234", + fn: "() => 1", + ref: undefined, + signal: expect.any(AbortSignal), + }), + ); }, slowTimeoutMs, ); diff --git a/src/browser/server.ts b/src/browser/server.ts index 8be214654b..345f044973 100644 --- a/src/browser/server.ts +++ b/src/browser/server.ts @@ -24,6 +24,19 @@ export async function startBrowserControlServerFromConfig(): Promise { + const ctrl = new AbortController(); + const abort = () => ctrl.abort(new Error("request aborted")); + req.once("aborted", abort); + res.once("close", () => { + if (!res.writableEnded) { + abort(); + } + }); + // Make the signal available to browser route handlers (best-effort). + (req as unknown as { signal?: AbortSignal }).signal = ctrl.signal; + next(); + }); app.use(express.json({ limit: "1mb" })); const ctx = createBrowserRouteContext({ diff --git a/src/node-host/runner.ts b/src/node-host/runner.ts index 15e9fdde79..be16a1ff55 100644 --- a/src/node-host/runner.ts +++ b/src/node-host/runner.ts @@ -45,6 +45,7 @@ import { detectMime } from "../media/mime.js"; import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js"; import { VERSION } from "../version.js"; import { ensureNodeHostConfig, saveNodeHostConfig, type NodeHostGatewayConfig } from "./config.js"; +import { withTimeout } from "./with-timeout.js"; type NodeHostRunOptions = { gatewayHost: string; @@ -275,29 +276,6 @@ async function ensureBrowserControlService(): Promise { return browserControlReady; } -async function withTimeout(promise: Promise, timeoutMs?: number, label?: string): Promise { - const resolved = - typeof timeoutMs === "number" && Number.isFinite(timeoutMs) - ? Math.max(1, Math.floor(timeoutMs)) - : undefined; - if (!resolved) { - return await promise; - } - let timer: ReturnType | undefined; - const timeoutPromise = new Promise((_, reject) => { - timer = setTimeout(() => { - reject(new Error(`${label ?? "request"} timed out`)); - }, resolved); - }); - try { - return await Promise.race([promise, timeoutPromise]); - } finally { - if (timer) { - clearTimeout(timer); - } - } -} - function isProfileAllowed(params: { allowProfiles: string[]; profile?: string | null }) { const { allowProfiles, profile } = params; if (!allowProfiles.length) { @@ -790,12 +768,14 @@ async function handleInvoke( } const dispatcher = createBrowserRouteDispatcher(createBrowserControlContext()); const response = await withTimeout( - dispatcher.dispatch({ - method: method === "DELETE" ? "DELETE" : method === "POST" ? "POST" : "GET", - path, - query, - body, - }), + (signal) => + dispatcher.dispatch({ + method: method === "DELETE" ? "DELETE" : method === "POST" ? "POST" : "GET", + path, + query, + body, + signal, + }), params.timeoutMs, "browser proxy request", ); diff --git a/src/node-host/with-timeout.ts b/src/node-host/with-timeout.ts new file mode 100644 index 0000000000..07ea141549 --- /dev/null +++ b/src/node-host/with-timeout.ts @@ -0,0 +1,34 @@ +export async function withTimeout( + work: (signal: AbortSignal | undefined) => Promise, + timeoutMs?: number, + label?: string, +): Promise { + const resolved = + typeof timeoutMs === "number" && Number.isFinite(timeoutMs) + ? Math.max(1, Math.floor(timeoutMs)) + : undefined; + if (!resolved) { + return await work(undefined); + } + + const abortCtrl = new AbortController(); + const timeoutError = new Error(`${label ?? "request"} timed out`); + const timer = setTimeout(() => abortCtrl.abort(timeoutError), resolved); + + let abortListener: (() => void) | undefined; + const abortPromise: Promise = abortCtrl.signal.aborted + ? Promise.reject(abortCtrl.signal.reason ?? timeoutError) + : new Promise((_, reject) => { + abortListener = () => reject(abortCtrl.signal.reason ?? timeoutError); + abortCtrl.signal.addEventListener("abort", abortListener, { once: true }); + }); + + try { + return await Promise.race([work(abortCtrl.signal), abortPromise]); + } finally { + clearTimeout(timer); + if (abortListener) { + abortCtrl.signal.removeEventListener("abort", abortListener); + } + } +}