fix(process): harden graceful kill-tree cancellation semantics

This commit is contained in:
Sebastian
2026-02-16 20:35:00 -05:00
parent 7b172d61cd
commit fb996031bc
4 changed files with 255 additions and 37 deletions

View File

@@ -0,0 +1,135 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
const { spawnMock } = vi.hoisted(() => ({
spawnMock: vi.fn(),
}));
vi.mock("node:child_process", () => ({
spawn: (...args: unknown[]) => spawnMock(...args),
}));
async function withPlatform<T>(platform: NodeJS.Platform, run: () => Promise<T> | T): Promise<T> {
const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform");
Object.defineProperty(process, "platform", { value: platform, configurable: true });
try {
return await run();
} finally {
if (originalPlatform) {
Object.defineProperty(process, "platform", originalPlatform);
}
}
}
describe("killProcessTree", () => {
let killSpy: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
spawnMock.mockReset();
killSpy = vi.spyOn(process, "kill");
vi.useFakeTimers();
});
afterEach(() => {
killSpy.mockRestore();
vi.useRealTimers();
vi.resetModules();
vi.clearAllMocks();
});
it("on Windows skips delayed force-kill when PID is already gone", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === 4242 && signal === 0) {
throw new Error("ESRCH");
}
return true;
}) as typeof process.kill);
await withPlatform("win32", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(4242, { graceMs: 25 });
expect(spawnMock).toHaveBeenCalledTimes(1);
expect(spawnMock).toHaveBeenNthCalledWith(
1,
"taskkill",
["/T", "/PID", "4242"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
await vi.advanceTimersByTimeAsync(25);
expect(spawnMock).toHaveBeenCalledTimes(1);
});
});
it("on Windows force-kills after grace period only when PID still exists", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === 5252 && signal === 0) {
return true;
}
return true;
}) as typeof process.kill);
await withPlatform("win32", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(5252, { graceMs: 10 });
await vi.advanceTimersByTimeAsync(10);
expect(spawnMock).toHaveBeenCalledTimes(2);
expect(spawnMock).toHaveBeenNthCalledWith(
1,
"taskkill",
["/T", "/PID", "5252"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
expect(spawnMock).toHaveBeenNthCalledWith(
2,
"taskkill",
["/F", "/T", "/PID", "5252"],
expect.objectContaining({ detached: true, stdio: "ignore" }),
);
});
});
it("on Unix sends SIGTERM first and skips SIGKILL when process exits", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === -3333 && signal === 0) {
throw new Error("ESRCH");
}
if (pid === 3333 && signal === 0) {
throw new Error("ESRCH");
}
return true;
}) as typeof process.kill);
await withPlatform("linux", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(3333, { graceMs: 10 });
await vi.advanceTimersByTimeAsync(10);
expect(killSpy).toHaveBeenCalledWith(-3333, "SIGTERM");
expect(killSpy).not.toHaveBeenCalledWith(-3333, "SIGKILL");
expect(killSpy).not.toHaveBeenCalledWith(3333, "SIGKILL");
});
});
it("on Unix sends SIGKILL after grace period when process is still alive", async () => {
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
if (pid === -4444 && signal === 0) {
return true;
}
return true;
}) as typeof process.kill);
await withPlatform("linux", async () => {
const { killProcessTree } = await import("./kill-tree.js");
killProcessTree(4444, { graceMs: 5 });
await vi.advanceTimersByTimeAsync(5);
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGTERM");
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGKILL");
});
});
});

View File

@@ -1,5 +1,8 @@
import { spawn } from "node:child_process";
const DEFAULT_GRACE_MS = 3000;
const MAX_GRACE_MS = 60_000;
/**
* Best-effort process-tree termination with graceful shutdown.
* - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent
@@ -14,7 +17,7 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
return;
}
const graceMs = opts?.graceMs ?? 3000;
const graceMs = normalizeGraceMs(opts?.graceMs);
if (process.platform === "win32") {
killProcessTreeWindows(pid, graceMs);
@@ -24,6 +27,22 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
killProcessTreeUnix(pid, graceMs);
}
function normalizeGraceMs(value?: number): number {
if (typeof value !== "number" || !Number.isFinite(value)) {
return DEFAULT_GRACE_MS;
}
return Math.max(0, Math.min(MAX_GRACE_MS, Math.floor(value)));
}
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function killProcessTreeUnix(pid: number, graceMs: number): void {
// Step 1: Try graceful SIGTERM to process group
try {
@@ -40,55 +59,46 @@ function killProcessTreeUnix(pid: number, graceMs: number): void {
// Step 2: Wait grace period, then SIGKILL if still alive
setTimeout(() => {
try {
// Check if still alive by sending signal 0
process.kill(-pid, 0);
// Still alive - hard kill
if (isProcessAlive(-pid)) {
try {
process.kill(-pid, "SIGKILL");
return;
} catch {
try {
process.kill(pid, "SIGKILL");
} catch {
// Gone now
}
// Fall through to direct pid kill
}
}
if (!isProcessAlive(pid)) {
return;
}
try {
process.kill(pid, "SIGKILL");
} catch {
// Process group gone - check direct
try {
process.kill(pid, 0);
try {
process.kill(pid, "SIGKILL");
} catch {
// Gone
}
} catch {
// Already terminated
}
// Process exited between liveness check and kill
}
}, graceMs).unref(); // Don't block event loop exit
}
function killProcessTreeWindows(pid: number, graceMs: number): void {
// Step 1: Try graceful termination (taskkill without /F)
function runTaskkill(args: string[]): void {
try {
spawn("taskkill", ["/T", "/PID", String(pid)], {
spawn("taskkill", args, {
stdio: "ignore",
detached: true,
});
} catch {
// Ignore spawn failures
// Ignore taskkill spawn failures
}
}
// Step 2: Wait grace period, then force kill if still alive
function killProcessTreeWindows(pid: number, graceMs: number): void {
// Step 1: Try graceful termination (taskkill without /F)
runTaskkill(["/T", "/PID", String(pid)]);
// Step 2: Wait grace period, then force kill only if pid still exists.
// This avoids unconditional delayed /F kills after graceful shutdown.
setTimeout(() => {
try {
spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
stdio: "ignore",
detached: true,
});
} catch {
// Ignore taskkill failures
if (!isProcessAlive(pid)) {
return;
}
runTaskkill(["/F", "/T", "/PID", String(pid)]);
}, graceMs).unref(); // Don't block event loop exit
}

View File

@@ -36,9 +36,11 @@ describe("createPtyAdapter", () => {
spawnMock.mockReset();
ptyKillMock.mockReset();
killProcessTreeMock.mockReset();
vi.useRealTimers();
});
afterEach(() => {
vi.useRealTimers();
vi.resetModules();
vi.clearAllMocks();
});
@@ -79,6 +81,53 @@ describe("createPtyAdapter", () => {
expect(ptyKillMock).not.toHaveBeenCalled();
});
it("wait does not settle immediately on SIGKILL", async () => {
vi.useFakeTimers();
spawnMock.mockReturnValue(createStubPty());
const { createPtyAdapter } = await import("./pty.js");
const adapter = await createPtyAdapter({
shell: "bash",
args: ["-lc", "sleep 10"],
});
const waitPromise = adapter.wait();
const settled = vi.fn();
void waitPromise.then(() => settled());
adapter.kill();
await Promise.resolve();
expect(settled).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(3999);
expect(settled).not.toHaveBeenCalled();
await vi.advanceTimersByTimeAsync(1);
await expect(waitPromise).resolves.toEqual({ code: null, signal: "SIGKILL" });
});
it("prefers real PTY exit over SIGKILL fallback settle", async () => {
vi.useFakeTimers();
const stub = createStubPty();
spawnMock.mockReturnValue(stub);
const { createPtyAdapter } = await import("./pty.js");
const adapter = await createPtyAdapter({
shell: "bash",
args: ["-lc", "sleep 10"],
});
const waitPromise = adapter.wait();
adapter.kill();
stub.emitExit({ exitCode: 0, signal: 9 });
await expect(waitPromise).resolves.toEqual({ code: 0, signal: 9 });
await vi.advanceTimersByTimeAsync(10_000);
await expect(adapter.wait()).resolves.toEqual({ code: 0, signal: 9 });
});
it("resolves wait when exit fires before wait is called", async () => {
const stub = createStubPty();
spawnMock.mockReturnValue(stub);

View File

@@ -1,7 +1,9 @@
import { killProcessTree } from "../../kill-tree.js";
import type { ManagedRunStdin } from "../types.js";
import { killProcessTree } from "../../kill-tree.js";
import { toStringEnv } from "./env.js";
const FORCE_KILL_WAIT_FALLBACK_MS = 4000;
type PtyExitEvent = { exitCode: number; signal?: number };
type PtyDisposable = { dispose: () => void };
type PtySpawnHandle = {
@@ -70,11 +72,21 @@ export async function createPtyAdapter(params: {
| null = null;
let waitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | number | null }> | null =
null;
let forceKillWaitFallbackTimer: NodeJS.Timeout | null = null;
const clearForceKillWaitFallback = () => {
if (!forceKillWaitFallbackTimer) {
return;
}
clearTimeout(forceKillWaitFallbackTimer);
forceKillWaitFallbackTimer = null;
};
const settleWait = (value: { code: number | null; signal: NodeJS.Signals | number | null }) => {
if (waitResult) {
return;
}
clearForceKillWaitFallback();
waitResult = value;
if (resolveWait) {
const resolve = resolveWait;
@@ -83,6 +95,16 @@ export async function createPtyAdapter(params: {
}
};
const scheduleForceKillWaitFallback = (signal: NodeJS.Signals) => {
clearForceKillWaitFallback();
// Some PTY hosts fail to emit onExit after kill; use a delayed fallback
// so callers can still unblock without marking termination immediately.
forceKillWaitFallbackTimer = setTimeout(() => {
settleWait({ code: null, signal });
}, FORCE_KILL_WAIT_FALLBACK_MS);
forceKillWaitFallbackTimer.unref();
};
exitListener =
pty.onExit((event) => {
const signal = event.signal && event.signal !== 0 ? event.signal : null;
@@ -151,9 +173,10 @@ export async function createPtyAdapter(params: {
} catch {
// ignore kill errors
}
// Some PTY hosts do not emit `onExit` reliably after kill.
// Ensure waiters can progress on forced termination.
settleWait({ code: null, signal });
if (signal === "SIGKILL") {
scheduleForceKillWaitFallback(signal);
}
};
const dispose = () => {
@@ -167,6 +190,7 @@ export async function createPtyAdapter(params: {
} catch {
// ignore disposal errors
}
clearForceKillWaitFallback();
dataListener = null;
exitListener = null;
settleWait({ code: null, signal: null });