mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(process): harden graceful kill-tree cancellation semantics
This commit is contained in:
135
src/process/kill-tree.test.ts
Normal file
135
src/process/kill-tree.test.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const { spawnMock } = vi.hoisted(() => ({
|
||||
spawnMock: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock("node:child_process", () => ({
|
||||
spawn: (...args: unknown[]) => spawnMock(...args),
|
||||
}));
|
||||
|
||||
async function withPlatform<T>(platform: NodeJS.Platform, run: () => Promise<T> | T): Promise<T> {
|
||||
const originalPlatform = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
Object.defineProperty(process, "platform", { value: platform, configurable: true });
|
||||
try {
|
||||
return await run();
|
||||
} finally {
|
||||
if (originalPlatform) {
|
||||
Object.defineProperty(process, "platform", originalPlatform);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe("killProcessTree", () => {
|
||||
let killSpy: ReturnType<typeof vi.spyOn>;
|
||||
|
||||
beforeEach(() => {
|
||||
spawnMock.mockReset();
|
||||
killSpy = vi.spyOn(process, "kill");
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
killSpy.mockRestore();
|
||||
vi.useRealTimers();
|
||||
vi.resetModules();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("on Windows skips delayed force-kill when PID is already gone", async () => {
|
||||
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
|
||||
if (pid === 4242 && signal === 0) {
|
||||
throw new Error("ESRCH");
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
|
||||
await withPlatform("win32", async () => {
|
||||
const { killProcessTree } = await import("./kill-tree.js");
|
||||
killProcessTree(4242, { graceMs: 25 });
|
||||
|
||||
expect(spawnMock).toHaveBeenCalledTimes(1);
|
||||
expect(spawnMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"taskkill",
|
||||
["/T", "/PID", "4242"],
|
||||
expect.objectContaining({ detached: true, stdio: "ignore" }),
|
||||
);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(25);
|
||||
expect(spawnMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
it("on Windows force-kills after grace period only when PID still exists", async () => {
|
||||
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
|
||||
if (pid === 5252 && signal === 0) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
|
||||
await withPlatform("win32", async () => {
|
||||
const { killProcessTree } = await import("./kill-tree.js");
|
||||
killProcessTree(5252, { graceMs: 10 });
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10);
|
||||
|
||||
expect(spawnMock).toHaveBeenCalledTimes(2);
|
||||
expect(spawnMock).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
"taskkill",
|
||||
["/T", "/PID", "5252"],
|
||||
expect.objectContaining({ detached: true, stdio: "ignore" }),
|
||||
);
|
||||
expect(spawnMock).toHaveBeenNthCalledWith(
|
||||
2,
|
||||
"taskkill",
|
||||
["/F", "/T", "/PID", "5252"],
|
||||
expect.objectContaining({ detached: true, stdio: "ignore" }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it("on Unix sends SIGTERM first and skips SIGKILL when process exits", async () => {
|
||||
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
|
||||
if (pid === -3333 && signal === 0) {
|
||||
throw new Error("ESRCH");
|
||||
}
|
||||
if (pid === 3333 && signal === 0) {
|
||||
throw new Error("ESRCH");
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
|
||||
await withPlatform("linux", async () => {
|
||||
const { killProcessTree } = await import("./kill-tree.js");
|
||||
killProcessTree(3333, { graceMs: 10 });
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10);
|
||||
|
||||
expect(killSpy).toHaveBeenCalledWith(-3333, "SIGTERM");
|
||||
expect(killSpy).not.toHaveBeenCalledWith(-3333, "SIGKILL");
|
||||
expect(killSpy).not.toHaveBeenCalledWith(3333, "SIGKILL");
|
||||
});
|
||||
});
|
||||
|
||||
it("on Unix sends SIGKILL after grace period when process is still alive", async () => {
|
||||
killSpy.mockImplementation(((pid: number, signal?: NodeJS.Signals | number) => {
|
||||
if (pid === -4444 && signal === 0) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}) as typeof process.kill);
|
||||
|
||||
await withPlatform("linux", async () => {
|
||||
const { killProcessTree } = await import("./kill-tree.js");
|
||||
killProcessTree(4444, { graceMs: 5 });
|
||||
|
||||
await vi.advanceTimersByTimeAsync(5);
|
||||
|
||||
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGTERM");
|
||||
expect(killSpy).toHaveBeenCalledWith(-4444, "SIGKILL");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,8 @@
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
const DEFAULT_GRACE_MS = 3000;
|
||||
const MAX_GRACE_MS = 60_000;
|
||||
|
||||
/**
|
||||
* Best-effort process-tree termination with graceful shutdown.
|
||||
* - Windows: use taskkill /T to include descendants. Sends SIGTERM-equivalent
|
||||
@@ -14,7 +17,7 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
|
||||
return;
|
||||
}
|
||||
|
||||
const graceMs = opts?.graceMs ?? 3000;
|
||||
const graceMs = normalizeGraceMs(opts?.graceMs);
|
||||
|
||||
if (process.platform === "win32") {
|
||||
killProcessTreeWindows(pid, graceMs);
|
||||
@@ -24,6 +27,22 @@ export function killProcessTree(pid: number, opts?: { graceMs?: number }): void
|
||||
killProcessTreeUnix(pid, graceMs);
|
||||
}
|
||||
|
||||
function normalizeGraceMs(value?: number): number {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return DEFAULT_GRACE_MS;
|
||||
}
|
||||
return Math.max(0, Math.min(MAX_GRACE_MS, Math.floor(value)));
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function killProcessTreeUnix(pid: number, graceMs: number): void {
|
||||
// Step 1: Try graceful SIGTERM to process group
|
||||
try {
|
||||
@@ -40,55 +59,46 @@ function killProcessTreeUnix(pid: number, graceMs: number): void {
|
||||
|
||||
// Step 2: Wait grace period, then SIGKILL if still alive
|
||||
setTimeout(() => {
|
||||
try {
|
||||
// Check if still alive by sending signal 0
|
||||
process.kill(-pid, 0);
|
||||
// Still alive - hard kill
|
||||
if (isProcessAlive(-pid)) {
|
||||
try {
|
||||
process.kill(-pid, "SIGKILL");
|
||||
return;
|
||||
} catch {
|
||||
try {
|
||||
process.kill(pid, "SIGKILL");
|
||||
} catch {
|
||||
// Gone now
|
||||
}
|
||||
// Fall through to direct pid kill
|
||||
}
|
||||
}
|
||||
if (!isProcessAlive(pid)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
process.kill(pid, "SIGKILL");
|
||||
} catch {
|
||||
// Process group gone - check direct
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
try {
|
||||
process.kill(pid, "SIGKILL");
|
||||
} catch {
|
||||
// Gone
|
||||
}
|
||||
} catch {
|
||||
// Already terminated
|
||||
}
|
||||
// Process exited between liveness check and kill
|
||||
}
|
||||
}, graceMs).unref(); // Don't block event loop exit
|
||||
}
|
||||
|
||||
function killProcessTreeWindows(pid: number, graceMs: number): void {
|
||||
// Step 1: Try graceful termination (taskkill without /F)
|
||||
function runTaskkill(args: string[]): void {
|
||||
try {
|
||||
spawn("taskkill", ["/T", "/PID", String(pid)], {
|
||||
spawn("taskkill", args, {
|
||||
stdio: "ignore",
|
||||
detached: true,
|
||||
});
|
||||
} catch {
|
||||
// Ignore spawn failures
|
||||
// Ignore taskkill spawn failures
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Wait grace period, then force kill if still alive
|
||||
function killProcessTreeWindows(pid: number, graceMs: number): void {
|
||||
// Step 1: Try graceful termination (taskkill without /F)
|
||||
runTaskkill(["/T", "/PID", String(pid)]);
|
||||
|
||||
// Step 2: Wait grace period, then force kill only if pid still exists.
|
||||
// This avoids unconditional delayed /F kills after graceful shutdown.
|
||||
setTimeout(() => {
|
||||
try {
|
||||
spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
|
||||
stdio: "ignore",
|
||||
detached: true,
|
||||
});
|
||||
} catch {
|
||||
// Ignore taskkill failures
|
||||
if (!isProcessAlive(pid)) {
|
||||
return;
|
||||
}
|
||||
runTaskkill(["/F", "/T", "/PID", String(pid)]);
|
||||
}, graceMs).unref(); // Don't block event loop exit
|
||||
}
|
||||
|
||||
@@ -36,9 +36,11 @@ describe("createPtyAdapter", () => {
|
||||
spawnMock.mockReset();
|
||||
ptyKillMock.mockReset();
|
||||
killProcessTreeMock.mockReset();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.resetModules();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
@@ -79,6 +81,53 @@ describe("createPtyAdapter", () => {
|
||||
expect(ptyKillMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("wait does not settle immediately on SIGKILL", async () => {
|
||||
vi.useFakeTimers();
|
||||
spawnMock.mockReturnValue(createStubPty());
|
||||
const { createPtyAdapter } = await import("./pty.js");
|
||||
|
||||
const adapter = await createPtyAdapter({
|
||||
shell: "bash",
|
||||
args: ["-lc", "sleep 10"],
|
||||
});
|
||||
|
||||
const waitPromise = adapter.wait();
|
||||
const settled = vi.fn();
|
||||
void waitPromise.then(() => settled());
|
||||
|
||||
adapter.kill();
|
||||
|
||||
await Promise.resolve();
|
||||
expect(settled).not.toHaveBeenCalled();
|
||||
|
||||
await vi.advanceTimersByTimeAsync(3999);
|
||||
expect(settled).not.toHaveBeenCalled();
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1);
|
||||
await expect(waitPromise).resolves.toEqual({ code: null, signal: "SIGKILL" });
|
||||
});
|
||||
|
||||
it("prefers real PTY exit over SIGKILL fallback settle", async () => {
|
||||
vi.useFakeTimers();
|
||||
const stub = createStubPty();
|
||||
spawnMock.mockReturnValue(stub);
|
||||
const { createPtyAdapter } = await import("./pty.js");
|
||||
|
||||
const adapter = await createPtyAdapter({
|
||||
shell: "bash",
|
||||
args: ["-lc", "sleep 10"],
|
||||
});
|
||||
|
||||
const waitPromise = adapter.wait();
|
||||
adapter.kill();
|
||||
stub.emitExit({ exitCode: 0, signal: 9 });
|
||||
|
||||
await expect(waitPromise).resolves.toEqual({ code: 0, signal: 9 });
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10_000);
|
||||
await expect(adapter.wait()).resolves.toEqual({ code: 0, signal: 9 });
|
||||
});
|
||||
|
||||
it("resolves wait when exit fires before wait is called", async () => {
|
||||
const stub = createStubPty();
|
||||
spawnMock.mockReturnValue(stub);
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import { killProcessTree } from "../../kill-tree.js";
|
||||
import type { ManagedRunStdin } from "../types.js";
|
||||
import { killProcessTree } from "../../kill-tree.js";
|
||||
import { toStringEnv } from "./env.js";
|
||||
|
||||
const FORCE_KILL_WAIT_FALLBACK_MS = 4000;
|
||||
|
||||
type PtyExitEvent = { exitCode: number; signal?: number };
|
||||
type PtyDisposable = { dispose: () => void };
|
||||
type PtySpawnHandle = {
|
||||
@@ -70,11 +72,21 @@ export async function createPtyAdapter(params: {
|
||||
| null = null;
|
||||
let waitPromise: Promise<{ code: number | null; signal: NodeJS.Signals | number | null }> | null =
|
||||
null;
|
||||
let forceKillWaitFallbackTimer: NodeJS.Timeout | null = null;
|
||||
|
||||
const clearForceKillWaitFallback = () => {
|
||||
if (!forceKillWaitFallbackTimer) {
|
||||
return;
|
||||
}
|
||||
clearTimeout(forceKillWaitFallbackTimer);
|
||||
forceKillWaitFallbackTimer = null;
|
||||
};
|
||||
|
||||
const settleWait = (value: { code: number | null; signal: NodeJS.Signals | number | null }) => {
|
||||
if (waitResult) {
|
||||
return;
|
||||
}
|
||||
clearForceKillWaitFallback();
|
||||
waitResult = value;
|
||||
if (resolveWait) {
|
||||
const resolve = resolveWait;
|
||||
@@ -83,6 +95,16 @@ export async function createPtyAdapter(params: {
|
||||
}
|
||||
};
|
||||
|
||||
const scheduleForceKillWaitFallback = (signal: NodeJS.Signals) => {
|
||||
clearForceKillWaitFallback();
|
||||
// Some PTY hosts fail to emit onExit after kill; use a delayed fallback
|
||||
// so callers can still unblock without marking termination immediately.
|
||||
forceKillWaitFallbackTimer = setTimeout(() => {
|
||||
settleWait({ code: null, signal });
|
||||
}, FORCE_KILL_WAIT_FALLBACK_MS);
|
||||
forceKillWaitFallbackTimer.unref();
|
||||
};
|
||||
|
||||
exitListener =
|
||||
pty.onExit((event) => {
|
||||
const signal = event.signal && event.signal !== 0 ? event.signal : null;
|
||||
@@ -151,9 +173,10 @@ export async function createPtyAdapter(params: {
|
||||
} catch {
|
||||
// ignore kill errors
|
||||
}
|
||||
// Some PTY hosts do not emit `onExit` reliably after kill.
|
||||
// Ensure waiters can progress on forced termination.
|
||||
settleWait({ code: null, signal });
|
||||
|
||||
if (signal === "SIGKILL") {
|
||||
scheduleForceKillWaitFallback(signal);
|
||||
}
|
||||
};
|
||||
|
||||
const dispose = () => {
|
||||
@@ -167,6 +190,7 @@ export async function createPtyAdapter(params: {
|
||||
} catch {
|
||||
// ignore disposal errors
|
||||
}
|
||||
clearForceKillWaitFallback();
|
||||
dataListener = null;
|
||||
exitListener = null;
|
||||
settleWait({ code: null, signal: null });
|
||||
|
||||
Reference in New Issue
Block a user