fix(agents): stabilize sessions_spawn e2e suite

This commit is contained in:
Peter Steinberger
2026-02-15 22:39:40 +00:00
parent a948212ca7
commit 6b4590be06
4 changed files with 188 additions and 204 deletions

View File

@@ -1,6 +1,6 @@
import { beforeEach, describe, expect, it } from "vitest";
import { createOpenClawTools } from "./openclaw-tools.js";
import "./test-helpers/fast-core-tools.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import {
getCallGatewayMock,
resetSessionsSpawnConfigOverride,
@@ -9,6 +9,7 @@ import {
import { resetSubagentRegistryForTests } from "./subagent-registry.js";
const callGatewayMock = getCallGatewayMock();
const setConfigOverride = setSessionsSpawnConfigOverride;
describe("openclaw-tools: subagents (sessions_spawn allowlist)", () => {
beforeEach(() => {

View File

@@ -1,16 +1,117 @@
import { beforeEach, describe, expect, it } from "vitest";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { emitAgentEvent } from "../infra/agent-events.js";
import { sleep } from "../utils.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import "./test-helpers/fast-core-tools.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import {
getCallGatewayMock,
resetSessionsSpawnConfigOverride,
} from "./openclaw-tools.subagents.sessions-spawn.test-harness.js";
import { resetSubagentRegistryForTests } from "./subagent-registry.js";
vi.mock("./pi-embedded.js", () => ({
isEmbeddedPiRunActive: () => false,
isEmbeddedPiRunStreaming: () => false,
queueEmbeddedPiMessage: () => false,
waitForEmbeddedPiRunEnd: async () => true,
}));
const callGatewayMock = getCallGatewayMock();
type GatewayRequest = { method?: string; params?: unknown };
type AgentWaitCall = { runId?: string; timeoutMs?: number };
function setupSessionsSpawnGatewayMock(opts: {
includeSessionsList?: boolean;
includeChatHistory?: boolean;
onAgentSubagentSpawn?: (params: unknown) => void;
onSessionsPatch?: (params: unknown) => void;
onSessionsDelete?: (params: unknown) => void;
agentWaitResult?: { status: "ok" | "timeout"; startedAt: number; endedAt: number };
}): {
calls: Array<GatewayRequest>;
waitCalls: Array<AgentWaitCall>;
getChild: () => { runId?: string; sessionKey?: string };
} {
const calls: Array<GatewayRequest> = [];
const waitCalls: Array<AgentWaitCall> = [];
let agentCallCount = 0;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
callGatewayMock.mockImplementation(async (optsUnknown: unknown) => {
const request = optsUnknown as GatewayRequest;
calls.push(request);
if (request.method === "sessions.list" && opts.includeSessionsList) {
return {
sessions: [
{
key: "main",
lastChannel: "whatsapp",
lastTo: "+123",
},
],
};
}
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as { lane?: string; sessionKey?: string } | undefined;
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
opts.onAgentSubagentSpawn?.(params);
}
return {
runId,
status: "accepted",
acceptedAt: 1000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as AgentWaitCall | undefined;
waitCalls.push(params ?? {});
const res = opts.agentWaitResult ?? { status: "ok", startedAt: 1000, endedAt: 2000 };
return {
runId: params?.runId ?? "run-1",
...res,
};
}
if (request.method === "sessions.patch") {
opts.onSessionsPatch?.(request.params);
return { ok: true };
}
if (request.method === "sessions.delete") {
opts.onSessionsDelete?.(request.params);
return { ok: true };
}
if (request.method === "chat.history" && opts.includeChatHistory) {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
return {};
});
return {
calls,
waitCalls,
getChild: () => ({ runId: childRunId, sessionKey: childSessionKey }),
};
}
describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
beforeEach(() => {
resetSessionsSpawnConfigOverride();
@@ -19,75 +120,17 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
it("sessions_spawn runs cleanup flow after subagent completion", async () => {
resetSubagentRegistryForTests();
callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
let patchParams: { key?: string; label?: string } = {};
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: unknown };
calls.push(request);
if (request.method === "sessions.list") {
return {
sessions: [
{
key: "main",
lastChannel: "whatsapp",
lastTo: "+123",
},
],
};
}
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as {
message?: string;
sessionKey?: string;
lane?: string;
};
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
const ctx = setupSessionsSpawnGatewayMock({
includeSessionsList: true,
includeChatHistory: true,
onSessionsPatch: (params) => {
const rec = params as { key?: string; label?: string } | undefined;
if (typeof rec?.label === "string" && rec.label.trim()) {
patchParams = { key: rec.key, label: rec.label };
}
return {
runId,
status: "accepted",
acceptedAt: 2000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 1000,
endedAt: 2000,
};
}
if (request.method === "sessions.patch") {
const params = request.params as { key?: string; label?: string } | undefined;
patchParams = { key: params?.key, label: params?.label };
return { ok: true };
}
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
if (request.method === "sessions.delete") {
return { ok: true };
}
return {};
},
});
const tool = createOpenClawTools({
@@ -108,11 +151,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1",
});
if (!childRunId) {
const child = ctx.getChild();
if (!child.runId) {
throw new Error("missing child runId");
}
emitAgentEvent({
runId: childRunId,
runId: child.runId,
stream: "lifecycle",
data: {
phase: "end",
@@ -121,18 +165,21 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
},
});
await sleep(0);
await sleep(0);
await sleep(0);
vi.useFakeTimers();
try {
await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const childWait = waitCalls.find((call) => call.runId === childRunId);
const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000);
// Cleanup should patch the label
expect(patchParams.key).toBe(childSessionKey);
expect(patchParams.key).toBe(child.sessionKey);
expect(patchParams.label).toBe("my-task");
// Two agent calls: subagent spawn + main agent trigger
const agentCalls = calls.filter((c) => c.method === "agent");
const agentCalls = ctx.calls.filter((c) => c.method === "agent");
expect(agentCalls).toHaveLength(2);
// First call: subagent spawn
@@ -145,62 +192,25 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.message).toContain("subagent task");
// No direct send to external channel (main agent handles delivery)
const sendCalls = calls.filter((c) => c.method === "send");
const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
});
it("sessions_spawn runs cleanup via lifecycle events", async () => {
resetSubagentRegistryForTests();
callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let deletedKey: string | undefined;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: unknown };
calls.push(request);
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as {
message?: string;
sessionKey?: string;
channel?: string;
timeout?: number;
lane?: string;
};
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
expect(params?.channel).toBe("discord");
expect(params?.timeout).toBe(1);
}
return {
runId,
status: "accepted",
acceptedAt: 1000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 1000,
endedAt: 2000,
};
}
if (request.method === "sessions.delete") {
const params = request.params as { key?: string } | undefined;
deletedKey = params?.key;
return { ok: true };
}
return {};
const ctx = setupSessionsSpawnGatewayMock({
onAgentSubagentSpawn: (params) => {
const rec = params as { channel?: string; timeout?: number } | undefined;
expect(rec?.channel).toBe("discord");
expect(rec?.timeout).toBe(1);
},
onSessionsDelete: (params) => {
const rec = params as { key?: string } | undefined;
deletedKey = rec?.key;
},
});
const tool = createOpenClawTools({
@@ -221,13 +231,14 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1",
});
if (!childRunId) {
const child = ctx.getChild();
if (!child.runId) {
throw new Error("missing child runId");
}
vi.useFakeTimers();
try {
emitAgentEvent({
runId: childRunId,
runId: child.runId,
stream: "lifecycle",
data: {
phase: "end",
@@ -241,10 +252,10 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
vi.useRealTimers();
}
const childWait = waitCalls.find((call) => call.runId === childRunId);
const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000);
const agentCalls = calls.filter((call) => call.method === "agent");
const agentCalls = ctx.calls.filter((call) => call.method === "agent");
expect(agentCalls).toHaveLength(2);
const first = agentCalls[0]?.params as
@@ -259,7 +270,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(first?.deliver).toBe(false);
expect(first?.channel).toBe("discord");
expect(first?.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
const second = agentCalls[1]?.params as
| {
@@ -272,7 +283,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.deliver).toBe(true);
expect(second?.message).toContain("subagent task");
const sendCalls = calls.filter((c) => c.method === "send");
const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0);
expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true);
@@ -281,65 +292,19 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
it("sessions_spawn deletes session when cleanup=delete via agent.wait", async () => {
resetSubagentRegistryForTests();
callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
let deletedKey: string | undefined;
let childRunId: string | undefined;
let childSessionKey: string | undefined;
const waitCalls: Array<{ runId?: string; timeoutMs?: number }> = [];
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: unknown };
calls.push(request);
if (request.method === "agent") {
agentCallCount += 1;
const runId = `run-${agentCallCount}`;
const params = request.params as {
message?: string;
sessionKey?: string;
channel?: string;
timeout?: number;
lane?: string;
};
// Only capture the first agent call (subagent spawn, not main agent trigger)
if (params?.lane === "subagent") {
childRunId = runId;
childSessionKey = params?.sessionKey ?? "";
expect(params?.channel).toBe("discord");
expect(params?.timeout).toBe(1);
}
return {
runId,
status: "accepted",
acceptedAt: 2000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string; timeoutMs?: number } | undefined;
waitCalls.push(params ?? {});
return {
runId: params?.runId ?? "run-1",
status: "ok",
startedAt: 3000,
endedAt: 4000,
};
}
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "done" }],
},
],
};
}
if (request.method === "sessions.delete") {
const params = request.params as { key?: string } | undefined;
deletedKey = params?.key;
return { ok: true };
}
return {};
const ctx = setupSessionsSpawnGatewayMock({
includeChatHistory: true,
onAgentSubagentSpawn: (params) => {
const rec = params as { channel?: string; timeout?: number } | undefined;
expect(rec?.channel).toBe("discord");
expect(rec?.timeout).toBe(1);
},
onSessionsDelete: (params) => {
const rec = params as { key?: string } | undefined;
deletedKey = rec?.key;
},
agentWaitResult: { status: "ok", startedAt: 3000, endedAt: 4000 },
});
const tool = createOpenClawTools({
@@ -360,16 +325,20 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1",
});
await sleep(0);
await sleep(0);
await sleep(0);
vi.useFakeTimers();
try {
await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const childWait = waitCalls.find((call) => call.runId === childRunId);
const child = ctx.getChild();
const childWait = ctx.waitCalls.find((call) => call.runId === child.runId);
expect(childWait?.timeoutMs).toBe(1000);
expect(childSessionKey?.startsWith("agent:main:subagent:")).toBe(true);
expect(child.sessionKey?.startsWith("agent:main:subagent:")).toBe(true);
// Two agent calls: subagent spawn + main agent trigger
const agentCalls = calls.filter((call) => call.method === "agent");
const agentCalls = ctx.calls.filter((call) => call.method === "agent");
expect(agentCalls).toHaveLength(2);
// First call: subagent spawn
@@ -382,7 +351,7 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
expect(second?.deliver).toBe(true);
// No direct send to external channel (main agent handles delivery)
const sendCalls = calls.filter((c) => c.method === "send");
const sendCalls = ctx.calls.filter((c) => c.method === "send");
expect(sendCalls.length).toBe(0);
// Session should be deleted
@@ -446,9 +415,12 @@ describe("openclaw-tools: subagents (sessions_spawn lifecycle)", () => {
runId: "run-1",
});
await sleep(0);
await sleep(0);
await sleep(0);
vi.useFakeTimers();
try {
await vi.advanceTimersByTimeAsync(500);
} finally {
vi.useRealTimers();
}
const mainAgentCall = calls
.filter((call) => call.method === "agent")

View File

@@ -271,7 +271,9 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
modelApplied: true,
});
const patchCall = calls.find((call) => call.method === "sessions.patch");
const patchCall = calls.find(
(call) => call.method === "sessions.patch" && (call.params as { model?: string })?.model,
);
expect(patchCall?.params).toMatchObject({
model: "opencode/claude",
});
@@ -287,7 +289,11 @@ describe("openclaw-tools: subagents (sessions_spawn model + thinking)", () => {
const request = opts as { method?: string; params?: unknown };
calls.push(request);
if (request.method === "sessions.patch") {
throw new Error("invalid model: bad-model");
const params = request.params as { model?: unknown } | undefined;
if (typeof params?.model === "string" && params.model.trim()) {
throw new Error("invalid model: bad-model");
}
return { ok: true };
}
if (request.method === "agent") {
agentCallCount += 1;

View File

@@ -28,6 +28,7 @@ const SessionsSpawnToolSchema = Type.Object({
model: Type.Optional(Type.String()),
thinking: Type.Optional(Type.String()),
runTimeoutSeconds: Type.Optional(Type.Number({ minimum: 0 })),
timeoutSeconds: Type.Optional(Type.Number({ minimum: 0 })),
cleanup: optionalStringEnum(["delete", "keep"] as const),
});
@@ -97,10 +98,14 @@ export function createSessionsSpawnTool(opts?: {
});
// Default to 0 (no timeout) when omitted. Sub-agent runs are long-lived
// by default and should not inherit the main agent 600s timeout.
const legacyTimeoutSeconds =
typeof params.timeoutSeconds === "number" && Number.isFinite(params.timeoutSeconds)
? Math.max(0, Math.floor(params.timeoutSeconds))
: undefined;
const runTimeoutSeconds =
typeof params.runTimeoutSeconds === "number" && Number.isFinite(params.runTimeoutSeconds)
? Math.max(0, Math.floor(params.runTimeoutSeconds))
: 0;
: (legacyTimeoutSeconds ?? 0);
let modelWarning: string | undefined;
let modelApplied = false;