refactor: centralize exec approval timeout

This commit is contained in:
Peter Steinberger
2026-02-15 01:10:18 +01:00
parent 27eef96380
commit ea0ef18704
5 changed files with 69 additions and 42 deletions

View File

@@ -1,4 +1,5 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { DEFAULT_EXEC_APPROVAL_TIMEOUT_MS } from "../../infra/exec-approvals.js";
import { parseTimeoutMs } from "../nodes-run.js";
/**
@@ -14,7 +15,7 @@ import { parseTimeoutMs } from "../nodes-run.js";
* without overriding opts.timeout, so the 35s CLI default raced against the
* 120s approval wait on the gateway side. The CLI always lost.
*
* The fix: override opts.timeout for the exec.approval.request call to be at
* The fix: override the transport timeout for exec.approval.request to be at
* least approvalTimeoutMs + 10_000.
*/
@@ -48,17 +49,20 @@ describe("nodes run: approval transport timeout (#12098)", () => {
expect(callOpts.timeoutMs).toBe(35_000);
});
it("fix: overriding opts.timeout gives the approval enough transport time", async () => {
it("fix: overriding transportTimeoutMs gives the approval enough transport time", async () => {
const { callGatewayCli } = await import("./rpc.js");
const approvalTimeoutMs = 120_000;
// Mirror the production code: parseTimeoutMs(opts.timeout) ?? 0
const fixedTimeout = String(Math.max(parseTimeoutMs("35000") ?? 0, approvalTimeoutMs + 10_000));
expect(Number(fixedTimeout)).toBe(130_000);
const transportTimeoutMs = Math.max(parseTimeoutMs("35000") ?? 0, approvalTimeoutMs + 10_000);
expect(transportTimeoutMs).toBe(130_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
await callGatewayCli(
"exec.approval.request",
{ timeout: "35000" } as never,
{ timeoutMs: approvalTimeoutMs },
{ transportTimeoutMs },
);
expect(callGatewaySpy).toHaveBeenCalledTimes(1);
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
@@ -72,14 +76,18 @@ describe("nodes run: approval transport timeout (#12098)", () => {
const approvalTimeoutMs = 120_000;
const userTimeout = 200_000;
// Mirror the production code: parseTimeoutMs preserves valid large values
const fixedTimeout = String(
Math.max(parseTimeoutMs(String(userTimeout)) ?? 0, approvalTimeoutMs + 10_000),
const transportTimeoutMs = Math.max(
parseTimeoutMs(String(userTimeout)) ?? 0,
approvalTimeoutMs + 10_000,
);
expect(Number(fixedTimeout)).toBe(200_000);
expect(transportTimeoutMs).toBe(200_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
await callGatewayCli(
"exec.approval.request",
{ timeout: String(userTimeout) } as never,
{ timeoutMs: approvalTimeoutMs },
{ transportTimeoutMs },
);
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.timeoutMs).toBe(200_000);
@@ -88,15 +96,18 @@ describe("nodes run: approval transport timeout (#12098)", () => {
it("fix: non-numeric timeout falls back to approval floor", async () => {
const { callGatewayCli } = await import("./rpc.js");
const approvalTimeoutMs = 120_000;
const approvalTimeoutMs = DEFAULT_EXEC_APPROVAL_TIMEOUT_MS;
// parseTimeoutMs returns undefined for garbage input, ?? 0 ensures
// Math.max picks the approval floor instead of producing NaN
const fixedTimeout = String(Math.max(parseTimeoutMs("foo") ?? 0, approvalTimeoutMs + 10_000));
expect(Number(fixedTimeout)).toBe(130_000);
const transportTimeoutMs = Math.max(parseTimeoutMs("foo") ?? 0, approvalTimeoutMs + 10_000);
expect(transportTimeoutMs).toBe(130_000);
await callGatewayCli("exec.approval.request", { timeout: fixedTimeout } as never, {
timeoutMs: approvalTimeoutMs,
});
await callGatewayCli(
"exec.approval.request",
{ timeout: "foo" } as never,
{ timeoutMs: approvalTimeoutMs },
{ transportTimeoutMs },
);
const callOpts = callGatewaySpy.mock.calls[0][0] as Record<string, unknown>;
expect(callOpts.timeoutMs).toBe(130_000);

View File

@@ -5,6 +5,7 @@ import { resolveAgentConfig, resolveDefaultAgentId } from "../../agents/agent-sc
import { loadConfig } from "../../config/config.js";
import { randomIdempotencyKey } from "../../gateway/call.js";
import {
DEFAULT_EXEC_APPROVAL_TIMEOUT_MS,
type ExecApprovalsFile,
type ExecAsk,
type ExecSecurity,
@@ -272,30 +273,33 @@ export function registerNodesInvokeCommands(nodes: Command) {
let approvalId: string | null = null;
if (requiresAsk) {
approvalId = crypto.randomUUID();
const approvalTimeoutMs = 120_000;
const approvalTimeoutMs = DEFAULT_EXEC_APPROVAL_TIMEOUT_MS;
// The CLI transport timeout (opts.timeout) must be longer than the
// gateway-side approval wait so the connection stays alive while the
// user decides. Without this override the default 35 s transport
// timeout races — and always loses — against the 120 s approval
// timeout, causing "gateway timeout after 35000ms" (#12098).
const approvalOpts = {
...opts,
timeout: String(
Math.max(parseTimeoutMs(opts.timeout) ?? 0, approvalTimeoutMs + 10_000),
),
};
const decisionResult = (await callGatewayCli("exec.approval.request", approvalOpts, {
id: approvalId,
command: rawCommand ?? argv.join(" "),
cwd: opts.cwd,
host: "node",
security: hostSecurity,
ask: hostAsk,
agentId,
resolvedPath: undefined,
sessionKey: undefined,
timeoutMs: approvalTimeoutMs,
})) as { decision?: string } | null;
const transportTimeoutMs = Math.max(
parseTimeoutMs(opts.timeout) ?? 0,
approvalTimeoutMs + 10_000,
);
const decisionResult = (await callGatewayCli(
"exec.approval.request",
opts,
{
id: approvalId,
command: rawCommand ?? argv.join(" "),
cwd: opts.cwd,
host: "node",
security: hostSecurity,
ask: hostAsk,
agentId,
resolvedPath: undefined,
sessionKey: undefined,
timeoutMs: approvalTimeoutMs,
},
{ transportTimeoutMs },
)) as { decision?: string } | null;
const decision =
decisionResult && typeof decisionResult === "object"
? (decisionResult.decision ?? null)

View File

@@ -13,7 +13,12 @@ export const nodesCallOpts = (cmd: Command, defaults?: { timeoutMs?: number }) =
.option("--timeout <ms>", "Timeout in ms", String(defaults?.timeoutMs ?? 10_000))
.option("--json", "Output JSON", false);
export const callGatewayCli = async (method: string, opts: NodesRpcOpts, params?: unknown) =>
export const callGatewayCli = async (
method: string,
opts: NodesRpcOpts,
params?: unknown,
callOpts?: { transportTimeoutMs?: number },
) =>
withProgress(
{
label: `Nodes ${method}`,
@@ -26,7 +31,7 @@ export const callGatewayCli = async (method: string, opts: NodesRpcOpts, params?
token: opts.token,
method,
params,
timeoutMs: Number(opts.timeout ?? 10_000),
timeoutMs: callOpts?.transportTimeoutMs ?? Number(opts.timeout ?? 10_000),
clientName: GATEWAY_CLIENT_NAMES.CLI,
mode: GATEWAY_CLIENT_MODES.CLI,
}),

View File

@@ -1,7 +1,10 @@
import type { ExecApprovalForwarder } from "../../infra/exec-approval-forwarder.js";
import type { ExecApprovalDecision } from "../../infra/exec-approvals.js";
import type { ExecApprovalManager } from "../exec-approval-manager.js";
import type { GatewayRequestHandlers } from "./types.js";
import {
DEFAULT_EXEC_APPROVAL_TIMEOUT_MS,
type ExecApprovalDecision,
} from "../../infra/exec-approvals.js";
import {
ErrorCodes,
errorShape,
@@ -43,7 +46,8 @@ export function createExecApprovalHandlers(
twoPhase?: boolean;
};
const twoPhase = p.twoPhase === true;
const timeoutMs = typeof p.timeoutMs === "number" ? p.timeoutMs : 120_000;
const timeoutMs =
typeof p.timeoutMs === "number" ? p.timeoutMs : DEFAULT_EXEC_APPROVAL_TIMEOUT_MS;
const explicitId = typeof p.id === "string" && p.id.trim().length > 0 ? p.id.trim() : null;
if (explicitId && manager.getSnapshot(explicitId)) {
respond(

View File

@@ -81,6 +81,9 @@ export type ExecApprovalsResolved = {
file: ExecApprovalsFile;
};
// Keep CLI + gateway defaults in sync.
export const DEFAULT_EXEC_APPROVAL_TIMEOUT_MS = 120_000;
const DEFAULT_SECURITY: ExecSecurity = "deny";
const DEFAULT_ASK: ExecAsk = "on-miss";
const DEFAULT_ASK_FALLBACK: ExecSecurity = "deny";