test(agents): dedupe ping-pong loop test scaffolding

This commit is contained in:
Peter Steinberger
2026-02-19 07:44:30 +00:00
parent 3cb0c96740
commit d7b2efc2e7

View File

@@ -45,6 +45,50 @@ function recordSuccessfulCall(
});
}
function recordSuccessfulPingPongCalls(params: {
state: SessionState;
readParams: { path: string };
listParams: { dir: string };
count: number;
textAtIndex: (toolName: "read" | "list", index: number) => string;
}) {
for (let i = 0; i < params.count; i += 1) {
if (i % 2 === 0) {
recordSuccessfulCall(
params.state,
"read",
params.readParams,
{ content: [{ type: "text", text: params.textAtIndex("read", i) }], details: { ok: true } },
i,
);
} else {
recordSuccessfulCall(
params.state,
"list",
params.listParams,
{ content: [{ type: "text", text: params.textAtIndex("list", i) }], details: { ok: true } },
i,
);
}
}
}
function expectPingPongLoop(
loopResult: ReturnType<typeof detectToolCallLoop>,
expected: { level: "warning" | "critical"; count: number; expectCriticalText?: boolean },
) {
expect(loopResult.stuck).toBe(true);
if (!loopResult.stuck) {
return;
}
expect(loopResult.level).toBe(expected.level);
expect(loopResult.detector).toBe("ping_pong");
expect(loopResult.count).toBe(expected.count);
if (expected.expectCriticalText) {
expect(loopResult.message).toContain("CRITICAL");
}
}
describe("tool-loop-detection", () => {
describe("hashToolCall", () => {
it("creates consistent hash for same tool and params", () => {
@@ -356,11 +400,8 @@ describe("tool-loop-detection", () => {
}
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
expectPingPongLoop(loopResult, { level: "warning", count: WARNING_THRESHOLD });
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
expect(loopResult.detector).toBe("ping_pong");
expect(loopResult.count).toBe(WARNING_THRESHOLD);
expect(loopResult.message).toContain("ping-pong loop");
}
});
@@ -370,33 +411,21 @@ describe("tool-loop-detection", () => {
const readParams = { path: "/a.txt" };
const listParams = { dir: "/workspace" };
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
if (i % 2 === 0) {
recordSuccessfulCall(
state,
"read",
readParams,
{ content: [{ type: "text", text: "read stable" }], details: { ok: true } },
i,
);
} else {
recordSuccessfulCall(
state,
"list",
listParams,
{ content: [{ type: "text", text: "list stable" }], details: { ok: true } },
i,
);
}
}
recordSuccessfulPingPongCalls({
state,
readParams,
listParams,
count: CRITICAL_THRESHOLD - 1,
textAtIndex: (toolName) => (toolName === "read" ? "read stable" : "list stable"),
});
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
expectPingPongLoop(loopResult, {
level: "critical",
count: CRITICAL_THRESHOLD,
expectCriticalText: true,
});
if (loopResult.stuck) {
expect(loopResult.level).toBe("critical");
expect(loopResult.detector).toBe("ping_pong");
expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
expect(loopResult.message).toContain("CRITICAL");
expect(loopResult.message).toContain("ping-pong loop");
}
});
@@ -406,33 +435,16 @@ describe("tool-loop-detection", () => {
const readParams = { path: "/a.txt" };
const listParams = { dir: "/workspace" };
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
if (i % 2 === 0) {
recordSuccessfulCall(
state,
"read",
readParams,
{ content: [{ type: "text", text: `read ${i}` }], details: { ok: true } },
i,
);
} else {
recordSuccessfulCall(
state,
"list",
listParams,
{ content: [{ type: "text", text: `list ${i}` }], details: { ok: true } },
i,
);
}
}
recordSuccessfulPingPongCalls({
state,
readParams,
listParams,
count: CRITICAL_THRESHOLD - 1,
textAtIndex: (toolName, index) => `${toolName} ${index}`,
});
const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig);
expect(loopResult.stuck).toBe(true);
if (loopResult.stuck) {
expect(loopResult.level).toBe("warning");
expect(loopResult.detector).toBe("ping_pong");
expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
}
expectPingPongLoop(loopResult, { level: "warning", count: CRITICAL_THRESHOLD });
});
it("does not flag ping-pong when alternation is broken", () => {