mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
feat: add stuck loop detection and exponential backoff infrastructure for agent polling (#17118)
Merged via /review-pr -> /prepare-pr -> /merge-pr.
Prepared head SHA: eebabf679b
Co-authored-by: akramcodez <179671552+akramcodez@users.noreply.github.com>
Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com>
Reviewed-by: @gumadeiras
This commit is contained in:
@@ -20,6 +20,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway/Config: prevent `config.patch` object-array merges from falling back to full-array replacement when some patch entries lack `id`, so partial `agents.list` updates no longer drop unrelated agents. (#17989) Thanks @stakeswky.
|
||||
- Config/Discord: require string IDs in Discord allowlists, keep onboarding inputs string-only, and add doctor repair for numeric entries. (#18220) Thanks @thewilloftheshadow.
|
||||
- Agents/Models: probe the primary model when its auth-profile cooldown is near expiry (with per-provider throttling), so runs recover from temporary rate limits without staying on fallback models until restart. (#17478) Thanks @PlayerGhost.
|
||||
- Agents/Tools: make loop detection progress-aware and phased by hard-blocking known `process(action=poll|log)` no-progress loops, warning on generic identical-call repeats, warning + no-progress-blocking ping-pong alternation loops (10/20), coalescing repeated warning spam into threshold buckets (including canonical ping-pong pairs), adding a global circuit breaker at 30 no-progress repeats, and emitting structured diagnostic `tool.loop` warning/error events for loop actions. (#16808) Thanks @akramcodez and @beca-oc.
|
||||
- Agents/Tools: scope the `message` tool schema to the active channel so Telegram uses `buttons` and Discord uses `components`. (#18215) Thanks @obviyus.
|
||||
- Discord: optimize reaction notification handling to skip unnecessary message fetches in `off`/`all`/`allowlist` modes, streamline reaction routing, and improve reaction emoji formatting. (#18248) Thanks @thewilloftheshadow and @victorGPT.
|
||||
- Telegram: keep draft-stream preview replies attached to the user message for `replyToMode: "all"` in groups and DMs, preserving threaded reply context from preview through finalization. (#17880) Thanks @yinghaosang.
|
||||
|
||||
173
src/agents/command-poll-backoff.test.ts
Normal file
173
src/agents/command-poll-backoff.test.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import {
|
||||
calculateBackoffMs,
|
||||
getCommandPollSuggestion,
|
||||
pruneStaleCommandPolls,
|
||||
recordCommandPoll,
|
||||
resetCommandPollCount,
|
||||
} from "./command-poll-backoff.js";
|
||||
|
||||
describe("command-poll-backoff", () => {
|
||||
describe("calculateBackoffMs", () => {
|
||||
it("returns 5s for first poll", () => {
|
||||
expect(calculateBackoffMs(0)).toBe(5000);
|
||||
});
|
||||
|
||||
it("returns 10s for second poll", () => {
|
||||
expect(calculateBackoffMs(1)).toBe(10000);
|
||||
});
|
||||
|
||||
it("returns 30s for third poll", () => {
|
||||
expect(calculateBackoffMs(2)).toBe(30000);
|
||||
});
|
||||
|
||||
it("returns 60s for fourth and subsequent polls (capped)", () => {
|
||||
expect(calculateBackoffMs(3)).toBe(60000);
|
||||
expect(calculateBackoffMs(4)).toBe(60000);
|
||||
expect(calculateBackoffMs(10)).toBe(60000);
|
||||
expect(calculateBackoffMs(100)).toBe(60000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordCommandPoll", () => {
|
||||
it("returns 5s on first no-output poll", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
const retryMs = recordCommandPoll(state, "cmd-123", false);
|
||||
expect(retryMs).toBe(5000);
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0); // First poll = index 0
|
||||
});
|
||||
|
||||
it("increments count and increases backoff on consecutive no-output polls", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(5000); // count=0 -> 5s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(10000); // count=1 -> 10s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(30000); // count=2 -> 30s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=3 -> 60s
|
||||
expect(recordCommandPoll(state, "cmd-123", false)).toBe(60000); // count=4 -> 60s (capped)
|
||||
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(4); // 5 polls = index 4
|
||||
});
|
||||
|
||||
it("resets count when poll returns new output", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(2); // 3 polls = index 2
|
||||
|
||||
// New output resets count
|
||||
const retryMs = recordCommandPoll(state, "cmd-123", true);
|
||||
expect(retryMs).toBe(5000); // Back to first poll delay
|
||||
expect(state.commandPollCounts?.get("cmd-123")?.count).toBe(0);
|
||||
});
|
||||
|
||||
it("tracks different commands independently", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-1", false);
|
||||
recordCommandPoll(state, "cmd-1", false);
|
||||
recordCommandPoll(state, "cmd-2", false);
|
||||
|
||||
expect(state.commandPollCounts?.get("cmd-1")?.count).toBe(1); // 2 polls = index 1
|
||||
expect(state.commandPollCounts?.get("cmd-2")?.count).toBe(0); // 1 poll = index 0
|
||||
});
|
||||
});
|
||||
|
||||
describe("getCommandPollSuggestion", () => {
|
||||
it("returns undefined for untracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
expect(getCommandPollSuggestion(state, "unknown")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns current backoff for tracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
|
||||
expect(getCommandPollSuggestion(state, "cmd-123")).toBe(10000);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resetCommandPollCount", () => {
|
||||
it("removes command from tracking", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
recordCommandPoll(state, "cmd-123", false);
|
||||
expect(state.commandPollCounts?.has("cmd-123")).toBe(true);
|
||||
|
||||
resetCommandPollCount(state, "cmd-123");
|
||||
expect(state.commandPollCounts?.has("cmd-123")).toBe(false);
|
||||
});
|
||||
|
||||
it("is safe to call on untracked command", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(() => resetCommandPollCount(state, "unknown")).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("pruneStaleCommandPolls", () => {
|
||||
it("removes polls older than maxAge", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
commandPollCounts: new Map([
|
||||
["cmd-old", { count: 5, lastPollAt: Date.now() - 7200000 }], // 2 hours ago
|
||||
["cmd-new", { count: 3, lastPollAt: Date.now() - 1000 }], // 1 second ago
|
||||
]),
|
||||
};
|
||||
|
||||
pruneStaleCommandPolls(state, 3600000); // 1 hour max age
|
||||
|
||||
expect(state.commandPollCounts?.has("cmd-old")).toBe(false);
|
||||
expect(state.commandPollCounts?.has("cmd-new")).toBe(true);
|
||||
});
|
||||
|
||||
it("handles empty state gracefully", () => {
|
||||
const state: SessionState = {
|
||||
lastActivity: Date.now(),
|
||||
state: "idle",
|
||||
queueDepth: 0,
|
||||
};
|
||||
|
||||
expect(() => pruneStaleCommandPolls(state)).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
82
src/agents/command-poll-backoff.ts
Normal file
82
src/agents/command-poll-backoff.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
|
||||
// Exponential backoff schedule for command polling
|
||||
const BACKOFF_SCHEDULE_MS = [5000, 10000, 30000, 60000];
|
||||
|
||||
/**
|
||||
* Calculate suggested retry delay based on consecutive no-output poll count.
|
||||
* Implements exponential backoff schedule: 5s → 10s → 30s → 60s (capped).
|
||||
*/
|
||||
export function calculateBackoffMs(consecutiveNoOutputPolls: number): number {
|
||||
const index = Math.min(consecutiveNoOutputPolls, BACKOFF_SCHEDULE_MS.length - 1);
|
||||
return BACKOFF_SCHEDULE_MS[index] ?? 60000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a command poll and return suggested retry delay.
|
||||
* @param state Session state to track polling in
|
||||
* @param commandId Unique identifier for the command being polled
|
||||
* @param hasNewOutput Whether this poll returned new output
|
||||
* @returns Suggested delay in milliseconds before next poll
|
||||
*/
|
||||
export function recordCommandPoll(
|
||||
state: SessionState,
|
||||
commandId: string,
|
||||
hasNewOutput: boolean,
|
||||
): number {
|
||||
if (!state.commandPollCounts) {
|
||||
state.commandPollCounts = new Map();
|
||||
}
|
||||
|
||||
const existing = state.commandPollCounts.get(commandId);
|
||||
const now = Date.now();
|
||||
|
||||
if (hasNewOutput) {
|
||||
state.commandPollCounts.set(commandId, { count: 0, lastPollAt: now });
|
||||
return BACKOFF_SCHEDULE_MS[0] ?? 5000;
|
||||
}
|
||||
|
||||
const newCount = (existing?.count ?? -1) + 1;
|
||||
state.commandPollCounts.set(commandId, { count: newCount, lastPollAt: now });
|
||||
|
||||
return calculateBackoffMs(newCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current suggested backoff for a command without modifying state.
|
||||
* Useful for checking current backoff level.
|
||||
*/
|
||||
export function getCommandPollSuggestion(
|
||||
state: SessionState,
|
||||
commandId: string,
|
||||
): number | undefined {
|
||||
const pollData = state.commandPollCounts?.get(commandId);
|
||||
if (!pollData) {
|
||||
return undefined;
|
||||
}
|
||||
return calculateBackoffMs(pollData.count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset poll count for a command (e.g., when command completes).
|
||||
*/
|
||||
export function resetCommandPollCount(state: SessionState, commandId: string): void {
|
||||
state.commandPollCounts?.delete(commandId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune stale command poll records (older than 1 hour).
|
||||
* Call periodically to prevent memory bloat.
|
||||
*/
|
||||
export function pruneStaleCommandPolls(state: SessionState, maxAgeMs = 3600000): void {
|
||||
if (!state.commandPollCounts) {
|
||||
return;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
for (const [commandId, data] of state.commandPollCounts.entries()) {
|
||||
if (now - data.lastPollAt > maxAgeMs) {
|
||||
state.commandPollCounts.delete(commandId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { toClientToolDefinitions, toToolDefinitions } from "./pi-tool-definition-adapter.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
@@ -14,6 +15,7 @@ describe("before_tool_call hook integration", () => {
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
@@ -115,6 +117,7 @@ describe("before_tool_call hook deduplication (#15502)", () => {
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(() => true),
|
||||
runBeforeToolCall: vi.fn(async () => undefined),
|
||||
@@ -153,6 +156,7 @@ describe("before_tool_call hook integration for client tools", () => {
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
|
||||
363
src/agents/pi-tools.before-tool-call.test.ts
Normal file
363
src/agents/pi-tools.before-tool-call.test.ts
Normal file
@@ -0,0 +1,363 @@
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import {
|
||||
onDiagnosticEvent,
|
||||
resetDiagnosticEventsForTest,
|
||||
type DiagnosticToolLoopEvent,
|
||||
} from "../infra/diagnostic-events.js";
|
||||
import { resetDiagnosticSessionStateForTest } from "../logging/diagnostic-session-state.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD } from "./tool-loop-detection.js";
|
||||
|
||||
vi.mock("../plugins/hook-runner-global.js");
|
||||
|
||||
const mockGetGlobalHookRunner = vi.mocked(getGlobalHookRunner);
|
||||
|
||||
describe("before_tool_call loop detection behavior", () => {
|
||||
let hookRunner: {
|
||||
hasHooks: ReturnType<typeof vi.fn>;
|
||||
runBeforeToolCall: ReturnType<typeof vi.fn>;
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
resetDiagnosticSessionStateForTest();
|
||||
resetDiagnosticEventsForTest();
|
||||
hookRunner = {
|
||||
hasHooks: vi.fn(),
|
||||
runBeforeToolCall: vi.fn(),
|
||||
};
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
mockGetGlobalHookRunner.mockReturnValue(hookRunner as any);
|
||||
hookRunner.hasHooks.mockReturnValue(false);
|
||||
});
|
||||
|
||||
it("blocks known poll loops when no progress repeats", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`poll-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
});
|
||||
|
||||
it("does not block known poll loops when output progresses", async () => {
|
||||
const execute = vi.fn().mockImplementation(async (toolCallId: string) => {
|
||||
return {
|
||||
content: [{ type: "text", text: `output ${toolCallId}` }],
|
||||
details: { status: "running", aggregated: `output ${toolCallId}` },
|
||||
};
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-2" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(
|
||||
tool.execute(`poll-progress-${i}`, params, undefined, undefined),
|
||||
).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps generic repeated calls warn-only below global breaker", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks generic repeated no-progress calls at global breaker threshold", async () => {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
await expect(tool.execute(`read-${i}`, params, undefined, undefined)).resolves.toBeDefined();
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`read-${GLOBAL_CIRCUIT_BREAKER_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("global circuit breaker");
|
||||
});
|
||||
|
||||
it("coalesces repeated generic warning events into threshold buckets", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop" && evt.level === "warning") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "read", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { path: "/tmp/file" };
|
||||
|
||||
for (let i = 0; i < 21; i += 1) {
|
||||
await tool.execute(`read-bucket-${i}`, params, undefined, undefined);
|
||||
}
|
||||
|
||||
const genericWarns = emitted.filter((evt) => evt.detector === "generic_repeat");
|
||||
expect(genericWarns.map((evt) => evt.count)).toEqual([10, 20]);
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("emits structured warning diagnostic events for ping-pong loops", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const readExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "read ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const listExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "list ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const readTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "read", execute: readExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
const listTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "list", execute: listExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
|
||||
for (let i = 0; i < 9; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
|
||||
await listTool.execute("list-9", { dir: "/workspace" }, undefined, undefined);
|
||||
await readTool.execute("read-10", { path: "/a.txt" }, undefined, undefined);
|
||||
await listTool.execute("list-11", { dir: "/workspace" }, undefined, undefined);
|
||||
|
||||
const pingPongWarns = emitted.filter(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(pingPongWarns).toHaveLength(1);
|
||||
const loopEvent = pingPongWarns[0];
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("warning");
|
||||
expect(loopEvent?.action).toBe("warn");
|
||||
expect(loopEvent?.detector).toBe("ping_pong");
|
||||
expect(loopEvent?.count).toBe(10);
|
||||
expect(loopEvent?.toolName).toBe("list");
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks ping-pong loops at critical threshold and emits critical diagnostic events", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const readExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "read ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const listExecute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "list ok" }],
|
||||
details: { ok: true },
|
||||
});
|
||||
const readTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "read", execute: readExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
const listTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "list", execute: listExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
|
||||
await expect(
|
||||
listTool.execute(
|
||||
`list-${CRITICAL_THRESHOLD - 1}`,
|
||||
{ dir: "/workspace" },
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("critical");
|
||||
expect(loopEvent?.action).toBe("block");
|
||||
expect(loopEvent?.detector).toBe("ping_pong");
|
||||
expect(loopEvent?.count).toBe(CRITICAL_THRESHOLD);
|
||||
expect(loopEvent?.toolName).toBe("list");
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block ping-pong at critical threshold when outcomes are progressing", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const readExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `read ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}));
|
||||
const listExecute = vi.fn().mockImplementation(async (toolCallId: string) => ({
|
||||
content: [{ type: "text", text: `list ${toolCallId}` }],
|
||||
details: { ok: true },
|
||||
}));
|
||||
const readTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "read", execute: readExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
const listTool = wrapToolWithBeforeToolCallHook(
|
||||
{ name: "list", execute: listExecute } as unknown as AnyAgentTool,
|
||||
{
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
},
|
||||
);
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
await readTool.execute(`read-${i}`, { path: "/a.txt" }, undefined, undefined);
|
||||
} else {
|
||||
await listTool.execute(`list-${i}`, { dir: "/workspace" }, undefined, undefined);
|
||||
}
|
||||
}
|
||||
|
||||
await expect(
|
||||
listTool.execute(
|
||||
`list-${CRITICAL_THRESHOLD - 1}`,
|
||||
{ dir: "/workspace" },
|
||||
undefined,
|
||||
undefined,
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
|
||||
const criticalPingPong = emitted.find(
|
||||
(evt) => evt.level === "critical" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(criticalPingPong).toBeUndefined();
|
||||
const warningPingPong = emitted.find(
|
||||
(evt) => evt.level === "warning" && evt.detector === "ping_pong",
|
||||
);
|
||||
expect(warningPingPong).toBeTruthy();
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
|
||||
it("emits structured critical diagnostic events when blocking loops", async () => {
|
||||
const emitted: DiagnosticToolLoopEvent[] = [];
|
||||
const stop = onDiagnosticEvent((evt) => {
|
||||
if (evt.type === "tool.loop") {
|
||||
emitted.push(evt);
|
||||
}
|
||||
});
|
||||
try {
|
||||
const execute = vi.fn().mockResolvedValue({
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
});
|
||||
// oxlint-disable-next-line typescript/no-explicit-any
|
||||
const tool = wrapToolWithBeforeToolCallHook({ name: "process", execute } as any, {
|
||||
agentId: "main",
|
||||
sessionKey: "main",
|
||||
});
|
||||
const params = { action: "poll", sessionId: "sess-crit" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
await tool.execute(`poll-${i}`, params, undefined, undefined);
|
||||
}
|
||||
|
||||
await expect(
|
||||
tool.execute(`poll-${CRITICAL_THRESHOLD}`, params, undefined, undefined),
|
||||
).rejects.toThrow("CRITICAL");
|
||||
|
||||
const loopEvent = emitted.at(-1);
|
||||
expect(loopEvent?.type).toBe("tool.loop");
|
||||
expect(loopEvent?.level).toBe("critical");
|
||||
expect(loopEvent?.action).toBe("block");
|
||||
expect(loopEvent?.detector).toBe("known_poll_no_progress");
|
||||
expect(loopEvent?.count).toBe(CRITICAL_THRESHOLD);
|
||||
expect(loopEvent?.toolName).toBe("process");
|
||||
} finally {
|
||||
stop();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getGlobalHookRunner } from "../plugins/hook-runner-global.js";
|
||||
@@ -15,6 +16,57 @@ const log = createSubsystemLogger("agents/tools");
|
||||
const BEFORE_TOOL_CALL_WRAPPED = Symbol("beforeToolCallWrapped");
|
||||
const adjustedParamsByToolCallId = new Map<string, unknown>();
|
||||
const MAX_TRACKED_ADJUSTED_PARAMS = 1024;
|
||||
const LOOP_WARNING_BUCKET_SIZE = 10;
|
||||
const MAX_LOOP_WARNING_KEYS = 256;
|
||||
|
||||
function shouldEmitLoopWarning(state: SessionState, warningKey: string, count: number): boolean {
|
||||
if (!state.toolLoopWarningBuckets) {
|
||||
state.toolLoopWarningBuckets = new Map();
|
||||
}
|
||||
const bucket = Math.floor(count / LOOP_WARNING_BUCKET_SIZE);
|
||||
const lastBucket = state.toolLoopWarningBuckets.get(warningKey) ?? 0;
|
||||
if (bucket <= lastBucket) {
|
||||
return false;
|
||||
}
|
||||
state.toolLoopWarningBuckets.set(warningKey, bucket);
|
||||
if (state.toolLoopWarningBuckets.size > MAX_LOOP_WARNING_KEYS) {
|
||||
const oldest = state.toolLoopWarningBuckets.keys().next().value;
|
||||
if (oldest) {
|
||||
state.toolLoopWarningBuckets.delete(oldest);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function recordLoopOutcome(args: {
|
||||
ctx?: HookContext;
|
||||
toolName: string;
|
||||
toolParams: unknown;
|
||||
toolCallId?: string;
|
||||
result?: unknown;
|
||||
error?: unknown;
|
||||
}): Promise<void> {
|
||||
if (!args.ctx?.sessionKey) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
|
||||
const { recordToolCallOutcome } = await import("./tool-loop-detection.js");
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
});
|
||||
recordToolCallOutcome(sessionState, {
|
||||
toolName: args.toolName,
|
||||
toolParams: args.toolParams,
|
||||
toolCallId: args.toolCallId,
|
||||
result: args.result,
|
||||
error: args.error,
|
||||
});
|
||||
} catch (err) {
|
||||
log.warn(`tool loop outcome tracking failed: tool=${args.toolName} error=${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function runBeforeToolCallHook(args: {
|
||||
toolName: string;
|
||||
@@ -25,6 +77,58 @@ export async function runBeforeToolCallHook(args: {
|
||||
const toolName = normalizeToolName(args.toolName || "tool");
|
||||
const params = args.params;
|
||||
|
||||
if (args.ctx?.sessionKey) {
|
||||
const { getDiagnosticSessionState } = await import("../logging/diagnostic-session-state.js");
|
||||
const { logToolLoopAction } = await import("../logging/diagnostic.js");
|
||||
const { detectToolCallLoop, recordToolCall } = await import("./tool-loop-detection.js");
|
||||
|
||||
const sessionState = getDiagnosticSessionState({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
});
|
||||
|
||||
const loopResult = detectToolCallLoop(sessionState, toolName, params);
|
||||
|
||||
if (loopResult.stuck) {
|
||||
if (loopResult.level === "critical") {
|
||||
log.error(`Blocking ${toolName} due to critical loop: ${loopResult.message}`);
|
||||
logToolLoopAction({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
toolName,
|
||||
level: "critical",
|
||||
action: "block",
|
||||
detector: loopResult.detector,
|
||||
count: loopResult.count,
|
||||
message: loopResult.message,
|
||||
pairedToolName: loopResult.pairedToolName,
|
||||
});
|
||||
return {
|
||||
blocked: true,
|
||||
reason: loopResult.message,
|
||||
};
|
||||
} else {
|
||||
const warningKey = loopResult.warningKey ?? `${loopResult.detector}:${toolName}`;
|
||||
if (shouldEmitLoopWarning(sessionState, warningKey, loopResult.count)) {
|
||||
log.warn(`Loop warning for ${toolName}: ${loopResult.message}`);
|
||||
logToolLoopAction({
|
||||
sessionKey: args.ctx.sessionKey,
|
||||
sessionId: args.ctx?.agentId,
|
||||
toolName,
|
||||
level: "warning",
|
||||
action: "warn",
|
||||
detector: loopResult.detector,
|
||||
count: loopResult.count,
|
||||
message: loopResult.message,
|
||||
pairedToolName: loopResult.pairedToolName,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recordToolCall(sessionState, toolName, params, args.toolCallId);
|
||||
}
|
||||
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
if (!hookRunner?.hasHooks("before_tool_call")) {
|
||||
return { blocked: false, params: args.params };
|
||||
@@ -95,7 +199,27 @@ export function wrapToolWithBeforeToolCallHook(
|
||||
}
|
||||
}
|
||||
}
|
||||
return await execute(toolCallId, outcome.params, signal, onUpdate);
|
||||
const normalizedToolName = normalizeToolName(toolName || "tool");
|
||||
try {
|
||||
const result = await execute(toolCallId, outcome.params, signal, onUpdate);
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
result,
|
||||
});
|
||||
return result;
|
||||
} catch (err) {
|
||||
await recordLoopOutcome({
|
||||
ctx,
|
||||
toolName: normalizedToolName,
|
||||
toolParams: outcome.params,
|
||||
toolCallId,
|
||||
error: err,
|
||||
});
|
||||
throw err;
|
||||
}
|
||||
},
|
||||
};
|
||||
Object.defineProperty(wrappedTool, BEFORE_TOOL_CALL_WRAPPED, {
|
||||
|
||||
422
src/agents/tool-loop-detection.test.ts
Normal file
422
src/agents/tool-loop-detection.test.ts
Normal file
@@ -0,0 +1,422 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import {
|
||||
CRITICAL_THRESHOLD,
|
||||
GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
|
||||
TOOL_CALL_HISTORY_SIZE,
|
||||
WARNING_THRESHOLD,
|
||||
detectToolCallLoop,
|
||||
getToolCallStats,
|
||||
hashToolCall,
|
||||
recordToolCall,
|
||||
recordToolCallOutcome,
|
||||
} from "./tool-loop-detection.js";
|
||||
|
||||
function createState(): SessionState {
|
||||
return {
|
||||
lastActivity: Date.now(),
|
||||
state: "processing",
|
||||
queueDepth: 0,
|
||||
};
|
||||
}
|
||||
|
||||
function recordSuccessfulCall(
|
||||
state: SessionState,
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
result: unknown,
|
||||
index: number,
|
||||
): void {
|
||||
const toolCallId = `${toolName}-${index}`;
|
||||
recordToolCall(state, toolName, params, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName,
|
||||
toolParams: params,
|
||||
toolCallId,
|
||||
result,
|
||||
});
|
||||
}
|
||||
|
||||
describe("tool-loop-detection", () => {
|
||||
describe("hashToolCall", () => {
|
||||
it("creates consistent hash for same tool and params", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
||||
const hash2 = hashToolCall("read", { path: "/file.txt" });
|
||||
expect(hash1).toBe(hash2);
|
||||
});
|
||||
|
||||
it("creates different hashes for different params", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file1.txt" });
|
||||
const hash2 = hashToolCall("read", { path: "/file2.txt" });
|
||||
expect(hash1).not.toBe(hash2);
|
||||
});
|
||||
|
||||
it("creates different hashes for different tools", () => {
|
||||
const hash1 = hashToolCall("read", { path: "/file.txt" });
|
||||
const hash2 = hashToolCall("write", { path: "/file.txt" });
|
||||
expect(hash1).not.toBe(hash2);
|
||||
});
|
||||
|
||||
it("handles non-object params", () => {
|
||||
expect(() => hashToolCall("tool", "string-param")).not.toThrow();
|
||||
expect(() => hashToolCall("tool", 123)).not.toThrow();
|
||||
expect(() => hashToolCall("tool", null)).not.toThrow();
|
||||
});
|
||||
|
||||
it("produces deterministic hashes regardless of key order", () => {
|
||||
const hash1 = hashToolCall("tool", { a: 1, b: 2 });
|
||||
const hash2 = hashToolCall("tool", { b: 2, a: 1 });
|
||||
expect(hash1).toBe(hash2);
|
||||
});
|
||||
|
||||
it("keeps hashes fixed-size even for large params", () => {
|
||||
const payload = { data: "x".repeat(20_000) };
|
||||
const hash = hashToolCall("read", payload);
|
||||
expect(hash.startsWith("read:")).toBe(true);
|
||||
expect(hash.length).toBe("read:".length + 64);
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordToolCall", () => {
|
||||
it("adds tool call to empty history", () => {
|
||||
const state = createState();
|
||||
|
||||
recordToolCall(state, "read", { path: "/file.txt" }, "call-1");
|
||||
|
||||
expect(state.toolCallHistory).toHaveLength(1);
|
||||
expect(state.toolCallHistory?.[0]?.toolName).toBe("read");
|
||||
expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-1");
|
||||
});
|
||||
|
||||
it("maintains sliding window of last N calls", () => {
|
||||
const state = createState();
|
||||
|
||||
for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i += 1) {
|
||||
recordToolCall(state, "tool", { iteration: i }, `call-${i}`);
|
||||
}
|
||||
|
||||
expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE);
|
||||
|
||||
const oldestCall = state.toolCallHistory?.[0];
|
||||
expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 }));
|
||||
});
|
||||
|
||||
it("records timestamp for each call", () => {
|
||||
const state = createState();
|
||||
const before = Date.now();
|
||||
recordToolCall(state, "tool", { arg: 1 }, "call-ts");
|
||||
const after = Date.now();
|
||||
|
||||
const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0;
|
||||
expect(timestamp).toBeGreaterThanOrEqual(before);
|
||||
expect(timestamp).toBeLessThanOrEqual(after);
|
||||
});
|
||||
});
|
||||
|
||||
describe("detectToolCallLoop", () => {
|
||||
it("does not flag unique tool calls", () => {
|
||||
const state = createState();
|
||||
|
||||
for (let i = 0; i < 15; i += 1) {
|
||||
recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`);
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "read", { path: "/new-file.txt" });
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("warns on generic repeated tool+args calls", () => {
|
||||
const state = createState();
|
||||
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
||||
recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`);
|
||||
}
|
||||
|
||||
const result = detectToolCallLoop(state, "read", { path: "/same.txt" });
|
||||
|
||||
expect(result.stuck).toBe(true);
|
||||
if (result.stuck) {
|
||||
expect(result.level).toBe("warning");
|
||||
expect(result.detector).toBe("generic_repeat");
|
||||
expect(result.count).toBe(WARNING_THRESHOLD);
|
||||
expect(result.message).toContain("WARNING");
|
||||
expect(result.message).toContain(`${WARNING_THRESHOLD} times`);
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps generic loops warn-only below global breaker threshold", () => {
|
||||
const state = createState();
|
||||
const params = { path: "/same.txt" };
|
||||
const result = {
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
};
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
recordSuccessfulCall(state, "read", params, result, i);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "read", params);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
}
|
||||
});
|
||||
|
||||
it("warns for known polling no-progress loops", () => {
|
||||
const state = createState();
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
const result = {
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
};
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD; i += 1) {
|
||||
recordSuccessfulCall(state, "process", params, result, i);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "process", params);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("known_poll_no_progress");
|
||||
expect(loopResult.message).toContain("no progress");
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks known polling no-progress loops at critical threshold", () => {
|
||||
const state = createState();
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
const result = {
|
||||
content: [{ type: "text", text: "(no new output)\n\nProcess still running." }],
|
||||
details: { status: "running", aggregated: "steady" },
|
||||
};
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD; i += 1) {
|
||||
recordSuccessfulCall(state, "process", params, result, i);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "process", params);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("known_poll_no_progress");
|
||||
expect(loopResult.message).toContain("CRITICAL");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block known polling when output progresses", () => {
|
||||
const state = createState();
|
||||
const params = { action: "poll", sessionId: "sess-1" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) {
|
||||
const result = {
|
||||
content: [{ type: "text", text: `line ${i}` }],
|
||||
details: { status: "running", aggregated: `line ${i}` },
|
||||
};
|
||||
recordSuccessfulCall(state, "process", params, result, i);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "process", params);
|
||||
expect(loopResult.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("blocks any tool with global no-progress breaker at 30", () => {
|
||||
const state = createState();
|
||||
const params = { path: "/same.txt" };
|
||||
const result = {
|
||||
content: [{ type: "text", text: "same output" }],
|
||||
details: { ok: true },
|
||||
};
|
||||
|
||||
for (let i = 0; i < GLOBAL_CIRCUIT_BREAKER_THRESHOLD; i += 1) {
|
||||
recordSuccessfulCall(state, "read", params, result, i);
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "read", params);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("global_circuit_breaker");
|
||||
expect(loopResult.message).toContain("global circuit breaker");
|
||||
}
|
||||
});
|
||||
|
||||
it("warns on ping-pong alternating patterns", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
const listParams = { dir: "/workspace" };
|
||||
|
||||
for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordToolCall(state, "read", readParams, `read-${i}`);
|
||||
} else {
|
||||
recordToolCall(state, "list", listParams, `list-${i}`);
|
||||
}
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", listParams);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("ping_pong");
|
||||
expect(loopResult.count).toBe(WARNING_THRESHOLD);
|
||||
expect(loopResult.message).toContain("ping-pong loop");
|
||||
}
|
||||
});
|
||||
|
||||
it("blocks ping-pong alternating patterns at critical threshold", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
const listParams = { dir: "/workspace" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"read",
|
||||
readParams,
|
||||
{ content: [{ type: "text", text: "read stable" }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
} else {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"list",
|
||||
listParams,
|
||||
{ content: [{ type: "text", text: "list stable" }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", listParams);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("critical");
|
||||
expect(loopResult.detector).toBe("ping_pong");
|
||||
expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
|
||||
expect(loopResult.message).toContain("CRITICAL");
|
||||
expect(loopResult.message).toContain("ping-pong loop");
|
||||
}
|
||||
});
|
||||
|
||||
it("does not block ping-pong at critical threshold when outcomes are progressing", () => {
|
||||
const state = createState();
|
||||
const readParams = { path: "/a.txt" };
|
||||
const listParams = { dir: "/workspace" };
|
||||
|
||||
for (let i = 0; i < CRITICAL_THRESHOLD - 1; i += 1) {
|
||||
if (i % 2 === 0) {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"read",
|
||||
readParams,
|
||||
{ content: [{ type: "text", text: `read ${i}` }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
} else {
|
||||
recordSuccessfulCall(
|
||||
state,
|
||||
"list",
|
||||
listParams,
|
||||
{ content: [{ type: "text", text: `list ${i}` }], details: { ok: true } },
|
||||
i,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", listParams);
|
||||
expect(loopResult.stuck).toBe(true);
|
||||
if (loopResult.stuck) {
|
||||
expect(loopResult.level).toBe("warning");
|
||||
expect(loopResult.detector).toBe("ping_pong");
|
||||
expect(loopResult.count).toBe(CRITICAL_THRESHOLD);
|
||||
}
|
||||
});
|
||||
|
||||
it("does not flag ping-pong when alternation is broken", () => {
|
||||
const state = createState();
|
||||
recordToolCall(state, "read", { path: "/a.txt" }, "a1");
|
||||
recordToolCall(state, "list", { dir: "/workspace" }, "b1");
|
||||
recordToolCall(state, "read", { path: "/a.txt" }, "a2");
|
||||
recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation
|
||||
|
||||
const loopResult = detectToolCallLoop(state, "list", { dir: "/workspace" });
|
||||
expect(loopResult.stuck).toBe(false);
|
||||
});
|
||||
|
||||
it("records fixed-size result hashes for large tool outputs", () => {
|
||||
const state = createState();
|
||||
const params = { action: "log", sessionId: "sess-big" };
|
||||
const toolCallId = "log-big";
|
||||
recordToolCall(state, "process", params, toolCallId);
|
||||
recordToolCallOutcome(state, {
|
||||
toolName: "process",
|
||||
toolParams: params,
|
||||
toolCallId,
|
||||
result: {
|
||||
content: [{ type: "text", text: "y".repeat(40_000) }],
|
||||
details: { status: "running", totalLines: 1, totalChars: 40_000 },
|
||||
},
|
||||
});
|
||||
|
||||
const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId);
|
||||
expect(typeof entry?.resultHash).toBe("string");
|
||||
expect(entry?.resultHash?.length).toBe(64);
|
||||
});
|
||||
|
||||
it("handles empty history", () => {
|
||||
const state = createState();
|
||||
|
||||
const result = detectToolCallLoop(state, "tool", { arg: 1 });
|
||||
expect(result.stuck).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getToolCallStats", () => {
|
||||
it("returns zero stats for empty history", () => {
|
||||
const state = createState();
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.totalCalls).toBe(0);
|
||||
expect(stats.uniquePatterns).toBe(0);
|
||||
expect(stats.mostFrequent).toBeNull();
|
||||
});
|
||||
|
||||
it("counts total calls and unique patterns", () => {
|
||||
const state = createState();
|
||||
|
||||
for (let i = 0; i < 5; i += 1) {
|
||||
recordToolCall(state, "read", { path: "/file.txt" }, `same-${i}`);
|
||||
}
|
||||
|
||||
recordToolCall(state, "write", { path: "/output.txt" }, "write-1");
|
||||
recordToolCall(state, "list", { dir: "/home" }, "list-1");
|
||||
recordToolCall(state, "read", { path: "/other.txt" }, "read-other");
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.totalCalls).toBe(8);
|
||||
expect(stats.uniquePatterns).toBe(4);
|
||||
});
|
||||
|
||||
it("identifies most frequent pattern", () => {
|
||||
const state = createState();
|
||||
|
||||
for (let i = 0; i < 3; i += 1) {
|
||||
recordToolCall(state, "read", { path: "/file1.txt" }, `p1-${i}`);
|
||||
}
|
||||
|
||||
for (let i = 0; i < 7; i += 1) {
|
||||
recordToolCall(state, "read", { path: "/file2.txt" }, `p2-${i}`);
|
||||
}
|
||||
|
||||
for (let i = 0; i < 2; i += 1) {
|
||||
recordToolCall(state, "write", { path: "/output.txt" }, `p3-${i}`);
|
||||
}
|
||||
|
||||
const stats = getToolCallStats(state);
|
||||
expect(stats.mostFrequent?.toolName).toBe("read");
|
||||
expect(stats.mostFrequent?.count).toBe(7);
|
||||
});
|
||||
});
|
||||
});
|
||||
527
src/agents/tool-loop-detection.ts
Normal file
527
src/agents/tool-loop-detection.ts
Normal file
@@ -0,0 +1,527 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import type { SessionState } from "../logging/diagnostic-session-state.js";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { isPlainObject } from "../utils.js";
|
||||
|
||||
const log = createSubsystemLogger("agents/loop-detection");
|
||||
|
||||
export type LoopDetectorKind =
|
||||
| "generic_repeat"
|
||||
| "known_poll_no_progress"
|
||||
| "global_circuit_breaker"
|
||||
| "ping_pong";
|
||||
|
||||
export type LoopDetectionResult =
|
||||
| { stuck: false }
|
||||
| {
|
||||
stuck: true;
|
||||
level: "warning" | "critical";
|
||||
detector: LoopDetectorKind;
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
warningKey?: string;
|
||||
};
|
||||
|
||||
export const TOOL_CALL_HISTORY_SIZE = 30;
|
||||
export const WARNING_THRESHOLD = 10;
|
||||
export const CRITICAL_THRESHOLD = 20;
|
||||
export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30;
|
||||
|
||||
/**
|
||||
* Hash a tool call for pattern matching.
|
||||
* Uses tool name + deterministic JSON serialization digest of params.
|
||||
*/
|
||||
export function hashToolCall(toolName: string, params: unknown): string {
|
||||
return `${toolName}:${digestStable(params)}`;
|
||||
}
|
||||
|
||||
function stableStringify(value: unknown): string {
|
||||
if (value === null || typeof value !== "object") {
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return `[${value.map(stableStringify).join(",")}]`;
|
||||
}
|
||||
const obj = value as Record<string, unknown>;
|
||||
const keys = Object.keys(obj).toSorted();
|
||||
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`;
|
||||
}
|
||||
|
||||
function digestStable(value: unknown): string {
|
||||
const serialized = stableStringifyFallback(value);
|
||||
return createHash("sha256").update(serialized).digest("hex");
|
||||
}
|
||||
|
||||
function stableStringifyFallback(value: unknown): string {
|
||||
try {
|
||||
return stableStringify(value);
|
||||
} catch {
|
||||
if (value === null || value === undefined) {
|
||||
return `${value}`;
|
||||
}
|
||||
if (typeof value === "string") {
|
||||
return value;
|
||||
}
|
||||
if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") {
|
||||
return `${value}`;
|
||||
}
|
||||
if (value instanceof Error) {
|
||||
return `${value.name}:${value.message}`;
|
||||
}
|
||||
return Object.prototype.toString.call(value);
|
||||
}
|
||||
}
|
||||
|
||||
function isKnownPollToolCall(toolName: string, params: unknown): boolean {
|
||||
if (toolName === "command_status") {
|
||||
return true;
|
||||
}
|
||||
if (toolName !== "process" || !isPlainObject(params)) {
|
||||
return false;
|
||||
}
|
||||
const action = params.action;
|
||||
return action === "poll" || action === "log";
|
||||
}
|
||||
|
||||
function extractTextContent(result: unknown): string {
|
||||
if (!isPlainObject(result) || !Array.isArray(result.content)) {
|
||||
return "";
|
||||
}
|
||||
return result.content
|
||||
.filter(
|
||||
(entry): entry is { type: string; text: string } =>
|
||||
isPlainObject(entry) && typeof entry.type === "string" && typeof entry.text === "string",
|
||||
)
|
||||
.map((entry) => entry.text)
|
||||
.join("\n")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function formatErrorForHash(error: unknown): string {
|
||||
if (error instanceof Error) {
|
||||
return error.message || error.name;
|
||||
}
|
||||
if (typeof error === "string") {
|
||||
return error;
|
||||
}
|
||||
if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") {
|
||||
return `${error}`;
|
||||
}
|
||||
return stableStringify(error);
|
||||
}
|
||||
|
||||
function hashToolOutcome(
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
result: unknown,
|
||||
error: unknown,
|
||||
): string | undefined {
|
||||
if (error !== undefined) {
|
||||
return `error:${digestStable(formatErrorForHash(error))}`;
|
||||
}
|
||||
if (!isPlainObject(result)) {
|
||||
return result === undefined ? undefined : digestStable(result);
|
||||
}
|
||||
|
||||
const details = isPlainObject(result.details) ? result.details : {};
|
||||
const text = extractTextContent(result);
|
||||
if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
|
||||
const action = params.action;
|
||||
if (action === "poll") {
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
aggregated: details.aggregated ?? null,
|
||||
text,
|
||||
});
|
||||
}
|
||||
if (action === "log") {
|
||||
return digestStable({
|
||||
action,
|
||||
status: details.status,
|
||||
totalLines: details.totalLines ?? null,
|
||||
totalChars: details.totalChars ?? null,
|
||||
truncated: details.truncated ?? null,
|
||||
exitCode: details.exitCode ?? null,
|
||||
exitSignal: details.exitSignal ?? null,
|
||||
text,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return digestStable({
|
||||
details,
|
||||
text,
|
||||
});
|
||||
}
|
||||
|
||||
function getNoProgressStreak(
|
||||
history: Array<{ toolName: string; argsHash: string; resultHash?: string }>,
|
||||
toolName: string,
|
||||
argsHash: string,
|
||||
): { count: number; latestResultHash?: string } {
|
||||
let streak = 0;
|
||||
let latestResultHash: string | undefined;
|
||||
|
||||
for (let i = history.length - 1; i >= 0; i -= 1) {
|
||||
const record = history[i];
|
||||
if (!record || record.toolName !== toolName || record.argsHash !== argsHash) {
|
||||
continue;
|
||||
}
|
||||
if (typeof record.resultHash !== "string" || !record.resultHash) {
|
||||
continue;
|
||||
}
|
||||
if (!latestResultHash) {
|
||||
latestResultHash = record.resultHash;
|
||||
streak = 1;
|
||||
continue;
|
||||
}
|
||||
if (record.resultHash !== latestResultHash) {
|
||||
break;
|
||||
}
|
||||
streak += 1;
|
||||
}
|
||||
|
||||
return { count: streak, latestResultHash };
|
||||
}
|
||||
|
||||
function getPingPongStreak(
|
||||
history: Array<{ toolName: string; argsHash: string; resultHash?: string }>,
|
||||
currentSignature: string,
|
||||
): {
|
||||
count: number;
|
||||
pairedToolName?: string;
|
||||
pairedSignature?: string;
|
||||
noProgressEvidence: boolean;
|
||||
} {
|
||||
const last = history.at(-1);
|
||||
if (!last) {
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
let otherSignature: string | undefined;
|
||||
let otherToolName: string | undefined;
|
||||
for (let i = history.length - 2; i >= 0; i -= 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
if (call.argsHash !== last.argsHash) {
|
||||
otherSignature = call.argsHash;
|
||||
otherToolName = call.toolName;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!otherSignature || !otherToolName) {
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
let alternatingTailCount = 0;
|
||||
for (let i = history.length - 1; i >= 0; i -= 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
const expected = alternatingTailCount % 2 === 0 ? last.argsHash : otherSignature;
|
||||
if (call.argsHash !== expected) {
|
||||
break;
|
||||
}
|
||||
alternatingTailCount += 1;
|
||||
}
|
||||
|
||||
if (alternatingTailCount < 2) {
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
const expectedCurrentSignature = otherSignature;
|
||||
if (currentSignature !== expectedCurrentSignature) {
|
||||
return { count: 0, noProgressEvidence: false };
|
||||
}
|
||||
|
||||
const tailStart = Math.max(0, history.length - alternatingTailCount);
|
||||
let firstHashA: string | undefined;
|
||||
let firstHashB: string | undefined;
|
||||
let noProgressEvidence = true;
|
||||
for (let i = tailStart; i < history.length; i += 1) {
|
||||
const call = history[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
if (!call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
if (call.argsHash === last.argsHash) {
|
||||
if (!firstHashA) {
|
||||
firstHashA = call.resultHash;
|
||||
} else if (firstHashA !== call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (call.argsHash === otherSignature) {
|
||||
if (!firstHashB) {
|
||||
firstHashB = call.resultHash;
|
||||
} else if (firstHashB !== call.resultHash) {
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
noProgressEvidence = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Need repeated stable outcomes on both sides before treating ping-pong as no-progress.
|
||||
if (!firstHashA || !firstHashB) {
|
||||
noProgressEvidence = false;
|
||||
}
|
||||
|
||||
return {
|
||||
count: alternatingTailCount + 1,
|
||||
pairedToolName: last.toolName,
|
||||
pairedSignature: last.argsHash,
|
||||
noProgressEvidence,
|
||||
};
|
||||
}
|
||||
|
||||
function canonicalPairKey(signatureA: string, signatureB: string): string {
|
||||
return [signatureA, signatureB].toSorted().join("|");
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an agent is stuck in a repetitive tool call loop.
|
||||
* Checks if the same tool+params combination has been called excessively.
|
||||
*/
|
||||
export function detectToolCallLoop(
|
||||
state: SessionState,
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
): LoopDetectionResult {
|
||||
const history = state.toolCallHistory ?? [];
|
||||
const currentHash = hashToolCall(toolName, params);
|
||||
const noProgress = getNoProgressStreak(history, toolName, currentHash);
|
||||
const noProgressStreak = noProgress.count;
|
||||
const knownPollTool = isKnownPollToolCall(toolName, params);
|
||||
const pingPong = getPingPongStreak(history, currentHash);
|
||||
|
||||
if (noProgressStreak >= GLOBAL_CIRCUIT_BREAKER_THRESHOLD) {
|
||||
log.error(
|
||||
`Global circuit breaker triggered: ${toolName} repeated ${noProgressStreak} times with no progress`,
|
||||
);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "global_circuit_breaker",
|
||||
count: noProgressStreak,
|
||||
message: `CRITICAL: ${toolName} has repeated identical no-progress outcomes ${noProgressStreak} times. Session execution blocked by global circuit breaker to prevent runaway loops.`,
|
||||
warningKey: `global:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (knownPollTool && noProgressStreak >= CRITICAL_THRESHOLD) {
|
||||
log.error(`Critical polling loop detected: ${toolName} repeated ${noProgressStreak} times`);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "known_poll_no_progress",
|
||||
count: noProgressStreak,
|
||||
message: `CRITICAL: Called ${toolName} with identical arguments and no progress ${noProgressStreak} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`,
|
||||
warningKey: `poll:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (knownPollTool && noProgressStreak >= WARNING_THRESHOLD) {
|
||||
log.warn(`Polling loop warning: ${toolName} repeated ${noProgressStreak} times`);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "known_poll_no_progress",
|
||||
count: noProgressStreak,
|
||||
message: `WARNING: You have called ${toolName} ${noProgressStreak} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`,
|
||||
warningKey: `poll:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
|
||||
};
|
||||
}
|
||||
|
||||
const pingPongWarningKey = pingPong.pairedSignature
|
||||
? `pingpong:${canonicalPairKey(currentHash, pingPong.pairedSignature)}`
|
||||
: `pingpong:${toolName}:${currentHash}`;
|
||||
|
||||
if (pingPong.count >= CRITICAL_THRESHOLD && pingPong.noProgressEvidence) {
|
||||
log.error(
|
||||
`Critical ping-pong loop detected: alternating calls count=${pingPong.count} currentTool=${toolName}`,
|
||||
);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "critical",
|
||||
detector: "ping_pong",
|
||||
count: pingPong.count,
|
||||
message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls) with no progress. This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`,
|
||||
pairedToolName: pingPong.pairedToolName,
|
||||
warningKey: pingPongWarningKey,
|
||||
};
|
||||
}
|
||||
|
||||
if (pingPong.count >= WARNING_THRESHOLD) {
|
||||
log.warn(
|
||||
`Ping-pong loop warning: alternating calls count=${pingPong.count} currentTool=${toolName}`,
|
||||
);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "ping_pong",
|
||||
count: pingPong.count,
|
||||
message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`,
|
||||
pairedToolName: pingPong.pairedToolName,
|
||||
warningKey: pingPongWarningKey,
|
||||
};
|
||||
}
|
||||
|
||||
// Generic detector: warn-only for repeated identical calls.
|
||||
const recentCount = history.filter(
|
||||
(h) => h.toolName === toolName && h.argsHash === currentHash,
|
||||
).length;
|
||||
|
||||
if (!knownPollTool && recentCount >= WARNING_THRESHOLD) {
|
||||
log.warn(`Loop warning: ${toolName} called ${recentCount} times with identical arguments`);
|
||||
return {
|
||||
stuck: true,
|
||||
level: "warning",
|
||||
detector: "generic_repeat",
|
||||
count: recentCount,
|
||||
message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments. If this is not making progress, stop retrying and report the task as failed.`,
|
||||
warningKey: `generic:${toolName}:${currentHash}`,
|
||||
};
|
||||
}
|
||||
|
||||
return { stuck: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a tool call in the session's history for loop detection.
|
||||
* Maintains sliding window of last N calls.
|
||||
*/
|
||||
export function recordToolCall(
|
||||
state: SessionState,
|
||||
toolName: string,
|
||||
params: unknown,
|
||||
toolCallId?: string,
|
||||
): void {
|
||||
if (!state.toolCallHistory) {
|
||||
state.toolCallHistory = [];
|
||||
}
|
||||
|
||||
state.toolCallHistory.push({
|
||||
toolName,
|
||||
argsHash: hashToolCall(toolName, params),
|
||||
toolCallId,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
|
||||
if (state.toolCallHistory.length > TOOL_CALL_HISTORY_SIZE) {
|
||||
state.toolCallHistory.shift();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a completed tool call outcome so loop detection can identify no-progress repeats.
|
||||
*/
|
||||
export function recordToolCallOutcome(
|
||||
state: SessionState,
|
||||
params: {
|
||||
toolName: string;
|
||||
toolParams: unknown;
|
||||
toolCallId?: string;
|
||||
result?: unknown;
|
||||
error?: unknown;
|
||||
},
|
||||
): void {
|
||||
const resultHash = hashToolOutcome(
|
||||
params.toolName,
|
||||
params.toolParams,
|
||||
params.result,
|
||||
params.error,
|
||||
);
|
||||
if (!resultHash) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!state.toolCallHistory) {
|
||||
state.toolCallHistory = [];
|
||||
}
|
||||
|
||||
const argsHash = hashToolCall(params.toolName, params.toolParams);
|
||||
let matched = false;
|
||||
for (let i = state.toolCallHistory.length - 1; i >= 0; i -= 1) {
|
||||
const call = state.toolCallHistory[i];
|
||||
if (!call) {
|
||||
continue;
|
||||
}
|
||||
if (params.toolCallId && call.toolCallId !== params.toolCallId) {
|
||||
continue;
|
||||
}
|
||||
if (call.toolName !== params.toolName || call.argsHash !== argsHash) {
|
||||
continue;
|
||||
}
|
||||
if (call.resultHash !== undefined) {
|
||||
continue;
|
||||
}
|
||||
call.resultHash = resultHash;
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!matched) {
|
||||
state.toolCallHistory.push({
|
||||
toolName: params.toolName,
|
||||
argsHash,
|
||||
toolCallId: params.toolCallId,
|
||||
resultHash,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
if (state.toolCallHistory.length > TOOL_CALL_HISTORY_SIZE) {
|
||||
state.toolCallHistory.splice(0, state.toolCallHistory.length - TOOL_CALL_HISTORY_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current tool call statistics for a session (for debugging/monitoring).
|
||||
*/
|
||||
export function getToolCallStats(state: SessionState): {
|
||||
totalCalls: number;
|
||||
uniquePatterns: number;
|
||||
mostFrequent: { toolName: string; count: number } | null;
|
||||
} {
|
||||
const history = state.toolCallHistory ?? [];
|
||||
const patterns = new Map<string, { toolName: string; count: number }>();
|
||||
|
||||
for (const call of history) {
|
||||
const key = call.argsHash;
|
||||
const existing = patterns.get(key);
|
||||
if (existing) {
|
||||
existing.count += 1;
|
||||
} else {
|
||||
patterns.set(key, { toolName: call.toolName, count: 1 });
|
||||
}
|
||||
}
|
||||
|
||||
let mostFrequent: { toolName: string; count: number } | null = null;
|
||||
for (const pattern of patterns.values()) {
|
||||
if (!mostFrequent || pattern.count > mostFrequent.count) {
|
||||
mostFrequent = pattern;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
totalCalls: history.length,
|
||||
uniquePatterns: patterns.size,
|
||||
mostFrequent,
|
||||
};
|
||||
}
|
||||
@@ -134,6 +134,19 @@ export type DiagnosticHeartbeatEvent = DiagnosticBaseEvent & {
|
||||
queued: number;
|
||||
};
|
||||
|
||||
export type DiagnosticToolLoopEvent = DiagnosticBaseEvent & {
|
||||
type: "tool.loop";
|
||||
sessionKey?: string;
|
||||
sessionId?: string;
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
};
|
||||
|
||||
export type DiagnosticEventPayload =
|
||||
| DiagnosticUsageEvent
|
||||
| DiagnosticWebhookReceivedEvent
|
||||
@@ -146,7 +159,8 @@ export type DiagnosticEventPayload =
|
||||
| DiagnosticLaneEnqueueEvent
|
||||
| DiagnosticLaneDequeueEvent
|
||||
| DiagnosticRunAttemptEvent
|
||||
| DiagnosticHeartbeatEvent;
|
||||
| DiagnosticHeartbeatEvent
|
||||
| DiagnosticToolLoopEvent;
|
||||
|
||||
export type DiagnosticEventInput = DiagnosticEventPayload extends infer Event
|
||||
? Event extends DiagnosticEventPayload
|
||||
|
||||
@@ -6,6 +6,17 @@ export type SessionState = {
|
||||
lastActivity: number;
|
||||
state: SessionStateValue;
|
||||
queueDepth: number;
|
||||
toolCallHistory?: ToolCallRecord[];
|
||||
toolLoopWarningBuckets?: Map<string, number>;
|
||||
commandPollCounts?: Map<string, { count: number; lastPollAt: number }>;
|
||||
};
|
||||
|
||||
export type ToolCallRecord = {
|
||||
toolName: string;
|
||||
argsHash: string;
|
||||
toolCallId?: string;
|
||||
resultHash?: string;
|
||||
timestamp: number;
|
||||
};
|
||||
|
||||
export type SessionRef = {
|
||||
|
||||
@@ -256,6 +256,42 @@ export function logRunAttempt(params: SessionRef & { runId: string; attempt: num
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logToolLoopAction(
|
||||
params: SessionRef & {
|
||||
toolName: string;
|
||||
level: "warning" | "critical";
|
||||
action: "warn" | "block";
|
||||
detector: "generic_repeat" | "known_poll_no_progress" | "global_circuit_breaker" | "ping_pong";
|
||||
count: number;
|
||||
message: string;
|
||||
pairedToolName?: string;
|
||||
},
|
||||
) {
|
||||
const payload = `tool loop: sessionId=${params.sessionId ?? "unknown"} sessionKey=${
|
||||
params.sessionKey ?? "unknown"
|
||||
} tool=${params.toolName} level=${params.level} action=${params.action} detector=${
|
||||
params.detector
|
||||
} count=${params.count}${params.pairedToolName ? ` pairedTool=${params.pairedToolName}` : ""} message="${params.message}"`;
|
||||
if (params.level === "critical") {
|
||||
diag.error(payload);
|
||||
} else {
|
||||
diag.warn(payload);
|
||||
}
|
||||
emitDiagnosticEvent({
|
||||
type: "tool.loop",
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
toolName: params.toolName,
|
||||
level: params.level,
|
||||
action: params.action,
|
||||
detector: params.detector,
|
||||
count: params.count,
|
||||
message: params.message,
|
||||
pairedToolName: params.pairedToolName,
|
||||
});
|
||||
markActivity();
|
||||
}
|
||||
|
||||
export function logActiveRuns() {
|
||||
const activeSessions = Array.from(diagnosticSessionStates.entries())
|
||||
.filter(([, s]) => s.state === "processing")
|
||||
@@ -314,6 +350,16 @@ export function startDiagnosticHeartbeat() {
|
||||
queued: totalQueued,
|
||||
});
|
||||
|
||||
import("../agents/command-poll-backoff.js")
|
||||
.then(({ pruneStaleCommandPolls }) => {
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
pruneStaleCommandPolls(state);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
diag.debug(`command-poll-backoff prune failed: ${String(err)}`);
|
||||
});
|
||||
|
||||
for (const [, state] of diagnosticSessionStates) {
|
||||
const ageMs = now - state.lastActivity;
|
||||
if (state.state === "processing" && ageMs > 120_000) {
|
||||
|
||||
Reference in New Issue
Block a user