diff --git a/src/process/kill-tree.test.ts b/src/process/kill-tree.test.ts new file mode 100644 index 0000000000..48f081f19e --- /dev/null +++ b/src/process/kill-tree.test.ts @@ -0,0 +1,135 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const { spawnMock } = vi.hoisted(() => ({ + spawnMock: vi.fn(), +})); + +vi.mock("node:child_process", () => ({ + spawn: (...args: unknown[]) => spawnMock(...args), +})); + +async function withPlatform(platform: NodeJS.Platform, run: () => Promise | T): Promise { + const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform"); + Object.defineProperty(process, "platform", { value: platform, configurable: true }); + try { + return await run(); + } finally { + if (originalPlatform) { + Object.defineProperty(process, "platform", originalPlatform); + } + } +} + +describe("killProcessTree", () => { + let killSpy: ReturnType; + + beforeEach(() => { + spawnMock.mockReset(); + killSpy = vi.spyOn(process, "kill"); + vi.useFakeTimers(); + }); + + afterEach(() => { + killSpy.mockRestore(); + vi.useRealTimers(); + vi.resetModules(); + vi.clearAllMocks(); + }); + + it("on Windows skips delayed force-kill when PID is already gone", async () => { + killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 4242 && signal === 0) { + throw new Error("ESRCH"); + } + return true; + }) as typeof process.kill); + + await withPlatform("win32", async () => { + const { killProcessTree } = await import("./kill-tree.js"); + killProcessTree(4242, { graceMs: 25 }); + + expect(spawnMock).toHaveBeenCalledTimes(1); + expect(spawnMock).toHaveBeenNthCalledWith( + 1, + "taskkill", + ["/T", "/PID", "4242"], + expect.objectContaining({ detached: true, stdio: "ignore" }), + ); + + await vi.advanceTimersByTimeAsync(25); + expect(spawnMock).toHaveBeenCalledTimes(1); + }); + }); + + it("on Windows force-kills after grace period only when PID still exists", async () => { + killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === 5252 && signal === 0) { + return true; + } + return true; + }) as typeof process.kill); + + await withPlatform("win32", async () => { + const { killProcessTree } = await import("./kill-tree.js"); + killProcessTree(5252, { graceMs: 10 }); + + await vi.advanceTimersByTimeAsync(10); + + expect(spawnMock).toHaveBeenCalledTimes(2); + expect(spawnMock).toHaveBeenNthCalledWith( + 1, + "taskkill", + ["/T", "/PID", "5252"], + expect.objectContaining({ detached: true, stdio: "ignore" }), + ); + expect(spawnMock).toHaveBeenNthCalledWith( + 2, + "taskkill", + ["/F", "/T", "/PID", "5252"], + expect.objectContaining({ detached: true, stdio: "ignore" }), + ); + }); + }); + + it("on Unix sends SIGTERM first and skips SIGKILL when process exits", async () => { + killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === -3333 && signal === 0) { + throw new Error("ESRCH"); + } + if (pid === 3333 && signal === 0) { + throw new Error("ESRCH"); + } + return true; + }) as typeof process.kill); + + await withPlatform("linux", async () => { + const { killProcessTree } = await import("./kill-tree.js"); + killProcessTree(3333, { graceMs: 10 }); + + await vi.advanceTimersByTimeAsync(10); + + expect(killSpy).toHaveBeenCalledWith(-3333, "SIGTERM"); + expect(killSpy).not.toHaveBeenCalledWith(-3333, "SIGKILL"); + expect(killSpy).not.toHaveBeenCalledWith(3333, "SIGKILL"); + }); + }); + + it("on Unix sends SIGKILL after grace period when process is still alive", async () => { + killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => { + if (pid === -4444 && signal === 0) { + return true; + } + return true; + }) as typeof process.kill); + + await withPlatform("linux", async () => { + const { killProcessTree } = await import("./kill-tree.js"); + killProcessTree(4444, { graceMs: 5 }); + + await vi.advanceTimersByTimeAsync(5); + + expect(killSpy).toHaveBeenCalledWith(-4444, "SIGTERM"); + expect(killSpy).toHaveBeenCalledWith(-4444, "SIGKILL"); + }); + }); +}); diff --git a/src/process/kill-tree.ts b/src/process/kill-tree.ts index ce12ca9c5b..e3f83f63a0 100644 --- a/src/process/kill-tree.ts +++ b/src/process/kill-tree.ts @@ -1,5 +1,8 @@ import { spawn } from "node:child_process"; +const DEFAULT_GRACE_MS = 3000; +const MAX_GRACE_MS = 60_000; + /** * Best-effort process-tree termination with graceful shutdown. * - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent @@ -14,7 +17,7 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void return; } - const graceMs = opts?.graceMs ?? 3000; + const graceMs = normalizeGraceMs(opts?.graceMs); if (process.platform === "win32") { killProcessTreeWindows(pid, graceMs); @@ -24,6 +27,22 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void killProcessTreeUnix(pid, graceMs); } +function normalizeGraceMs(value?: number): number { + if (typeof value !== "number" || !Number.isFinite(value)) { + return DEFAULT_GRACE_MS; + } + return Math.max(0, Math.min(MAX_GRACE_MS, Math.floor(value))); +} + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + function killProcessTreeUnix(pid: number, graceMs: number): void { // Step 1: Try graceful SIGTERM to process group try { @@ -40,55 +59,46 @@ function killProcessTreeUnix(pid: number, graceMs: number): void { // Step 2: Wait grace period, then SIGKILL if still alive setTimeout(() => { - try { - // Check if still alive by sending signal 0 - process.kill(-pid, 0); - // Still alive - hard kill + if (isProcessAlive(-pid)) { try { process.kill(-pid, "SIGKILL"); + return; } catch { - try { - process.kill(pid, "SIGKILL"); - } catch { - // Gone now - } + // Fall through to direct pid kill } + } + if (!isProcessAlive(pid)) { + return; + } + try { + process.kill(pid, "SIGKILL"); } catch { - // Process group gone - check direct - try { - process.kill(pid, 0); - try { - process.kill(pid, "SIGKILL"); - } catch { - // Gone - } - } catch { - // Already terminated - } + // Process exited between liveness check and kill } }, graceMs).unref(); // Don't block event loop exit } -function killProcessTreeWindows(pid: number, graceMs: number): void { - // Step 1: Try graceful termination (taskkill without /F) +function runTaskkill(args: string[]): void { try { - spawn("taskkill", ["/T", "/PID", String(pid)], { + spawn("taskkill", args, { stdio: "ignore", detached: true, }); } catch { - // Ignore spawn failures + // Ignore taskkill spawn failures } +} - // Step 2: Wait grace period, then force kill if still alive +function killProcessTreeWindows(pid: number, graceMs: number): void { + // Step 1: Try graceful termination (taskkill without /F) + runTaskkill(["/T", "/PID", String(pid)]); + + // Step 2: Wait grace period, then force kill only if pid still exists. + // This avoids unconditional delayed /F kills after graceful shutdown. setTimeout(() => { - try { - spawn("taskkill", ["/F", "/T", "/PID", String(pid)], { - stdio: "ignore", - detached: true, - }); - } catch { - // Ignore taskkill failures + if (!isProcessAlive(pid)) { + return; } + runTaskkill(["/F", "/T", "/PID", String(pid)]); }, graceMs).unref(); // Don't block event loop exit } diff --git a/src/process/supervisor/adapters/pty.test.ts b/src/process/supervisor/adapters/pty.test.ts index c6753251be..4b45fd17e6 100644 --- a/src/process/supervisor/adapters/pty.test.ts +++ b/src/process/supervisor/adapters/pty.test.ts @@ -36,9 +36,11 @@ describe("createPtyAdapter", () => { spawnMock.mockReset(); ptyKillMock.mockReset(); killProcessTreeMock.mockReset(); + vi.useRealTimers(); }); afterEach(() => { + vi.useRealTimers(); vi.resetModules(); vi.clearAllMocks(); }); @@ -79,6 +81,53 @@ describe("createPtyAdapter", () => { expect(ptyKillMock).not.toHaveBeenCalled(); }); + it("wait does not settle immediately on SIGKILL", async () => { + vi.useFakeTimers(); + spawnMock.mockReturnValue(createStubPty()); + const { createPtyAdapter } = await import("./pty.js"); + + const adapter = await createPtyAdapter({ + shell: "bash", + args: ["-lc", "sleep 10"], + }); + + const waitPromise = adapter.wait(); + const settled = vi.fn(); + void waitPromise.then(() => settled()); + + adapter.kill(); + + await Promise.resolve(); + expect(settled).not.toHaveBeenCalled(); + + await vi.advanceTimersByTimeAsync(3999); + expect(settled).not.toHaveBeenCalled(); + + await vi.advanceTimersByTimeAsync(1); + await expect(waitPromise).resolves.toEqual({ code: null, signal: "SIGKILL" }); + }); + + it("prefers real PTY exit over SIGKILL fallback settle", async () => { + vi.useFakeTimers(); + const stub = createStubPty(); + spawnMock.mockReturnValue(stub); + const { createPtyAdapter } = await import("./pty.js"); + + const adapter = await createPtyAdapter({ + shell: "bash", + args: ["-lc", "sleep 10"], + }); + + const waitPromise = adapter.wait(); + adapter.kill(); + stub.emitExit({ exitCode: 0, signal: 9 }); + + await expect(waitPromise).resolves.toEqual({ code: 0, signal: 9 }); + + await vi.advanceTimersByTimeAsync(10_000); + await expect(adapter.wait()).resolves.toEqual({ code: 0, signal: 9 }); + }); + it("resolves wait when exit fires before wait is called", async () => { const stub = createStubPty(); spawnMock.mockReturnValue(stub); diff --git a/src/process/supervisor/adapters/pty.ts b/src/process/supervisor/adapters/pty.ts index 966b534243..e93c42a00b 100644 --- a/src/process/supervisor/adapters/pty.ts +++ b/src/process/supervisor/adapters/pty.ts @@ -1,7 +1,9 @@ -import { killProcessTree } from "../../kill-tree.js"; import type { ManagedRunStdin } from "../types.js"; +import { killProcessTree } from "../../kill-tree.js"; import { toStringEnv } from "./env.js"; +const FORCE_KILL_WAIT_FALLBACK_MS = 4000; + type PtyExitEvent = { exitCode: number; signal?: number }; type PtyDisposable = { dispose: () => void }; type PtySpawnHandle = { @@ -70,11 +72,21 @@ export async function createPtyAdapter(params: { | null = null; let waitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | number | null }> | null = null; + let forceKillWaitFallbackTimer: NodeJS.Timeout | null = null; + + const clearForceKillWaitFallback = () => { + if (!forceKillWaitFallbackTimer) { + return; + } + clearTimeout(forceKillWaitFallbackTimer); + forceKillWaitFallbackTimer = null; + }; const settleWait = (value: { code: number | null; signal: NodeJS.Signals | number | null }) => { if (waitResult) { return; } + clearForceKillWaitFallback(); waitResult = value; if (resolveWait) { const resolve = resolveWait; @@ -83,6 +95,16 @@ export async function createPtyAdapter(params: { } }; + const scheduleForceKillWaitFallback = (signal: NodeJS.Signals) => { + clearForceKillWaitFallback(); + // Some PTY hosts fail to emit onExit after kill; use a delayed fallback + // so callers can still unblock without marking termination immediately. + forceKillWaitFallbackTimer = setTimeout(() => { + settleWait({ code: null, signal }); + }, FORCE_KILL_WAIT_FALLBACK_MS); + forceKillWaitFallbackTimer.unref(); + }; + exitListener = pty.onExit((event) => { const signal = event.signal && event.signal !== 0 ? event.signal : null; @@ -151,9 +173,10 @@ export async function createPtyAdapter(params: { } catch { // ignore kill errors } - // Some PTY hosts do not emit `onExit` reliably after kill. - // Ensure waiters can progress on forced termination. - settleWait({ code: null, signal }); + + if (signal === "SIGKILL") { + scheduleForceKillWaitFallback(signal); + } }; const dispose = () => { @@ -167,6 +190,7 @@ export async function createPtyAdapter(params: { } catch { // ignore disposal errors } + clearForceKillWaitFallback(); dataListener = null; exitListener = null; settleWait({ code: null, signal: null });