From dbdcbe03e71127575b475ce83e922649259d15bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20=C5=A0kvorc?= Date: Sat, 14 Feb 2026 23:01:16 +0100 Subject: [PATCH] fix: preserve bootstrap paths and expose failed mutations (#16131) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 385dcbd8a9d3fd1bd67b5cb439b699a98728a679 Co-authored-by: Swader <1430603+Swader@users.noreply.github.com> Co-authored-by: gumadeiras <5599352+gumadeiras@users.noreply.github.com> Reviewed-by: @gumadeiras --- CHANGELOG.md | 1 + src/agents/bootstrap-files.e2e.test.ts | 4 +- ...ers.buildbootstrapcontextfiles.e2e.test.ts | 2 +- src/agents/pi-embedded-helpers/bootstrap.ts | 4 +- .../run/payloads.e2e.test.ts | 123 +++++++++-- src/agents/pi-embedded-runner/run/payloads.ts | 42 +++- src/agents/pi-embedded-runner/run/types.ts | 8 +- .../pi-embedded-subscribe.handlers.tools.ts | 38 +++- .../pi-embedded-subscribe.handlers.types.ts | 10 +- ...ion.subscribeembeddedpisession.e2e.test.ts | 186 ++++++++++++++++ src/agents/system-prompt-report.test.ts | 47 ++++ src/agents/system-prompt-report.ts | 16 +- src/agents/tool-mutation.test.ts | 70 ++++++ src/agents/tool-mutation.ts | 201 ++++++++++++++++++ 14 files changed, 718 insertions(+), 34 deletions(-) create mode 100644 src/agents/system-prompt-report.test.ts create mode 100644 src/agents/tool-mutation.test.ts create mode 100644 src/agents/tool-mutation.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 316d67ec62..7828bc333e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Agents: keep unresolved mutating tool failures visible until the same action retry succeeds, scope mutation-error surfacing to mutating calls (including `session_status` model changes), and dedupe duplicate failure warnings in outbound replies. (#16131) Thanks @Swader. - Agents: classify external timeout aborts during compaction the same as internal timeouts, preventing unnecessary auth-profile rotation and preserving compaction-timeout snapshot fallback behavior. (#9855) Thanks @mverrilli. - Sessions/Agents: harden transcript path resolution for mismatched agent context by preserving explicit store roots and adding safe absolute-path fallback to the correct agent sessions directory. (#16288) Thanks @robbyczgw-cla. - BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x. diff --git a/src/agents/bootstrap-files.e2e.test.ts b/src/agents/bootstrap-files.e2e.test.ts index 4cf0941e6a..eee80fadc1 100644 --- a/src/agents/bootstrap-files.e2e.test.ts +++ b/src/agents/bootstrap-files.e2e.test.ts @@ -53,7 +53,9 @@ describe("resolveBootstrapContextForRun", () => { const workspaceDir = await makeTempWorkspace("openclaw-bootstrap-"); const result = await resolveBootstrapContextForRun({ workspaceDir }); - const extra = result.contextFiles.find((file) => file.path === "EXTRA.md"); + const extra = result.contextFiles.find( + (file) => file.path === path.join(workspaceDir, "EXTRA.md"), + ); expect(extra?.content).toBe("extra"); }); diff --git a/src/agents/pi-embedded-helpers.buildbootstrapcontextfiles.e2e.test.ts b/src/agents/pi-embedded-helpers.buildbootstrapcontextfiles.e2e.test.ts index 4139bf3198..a646098c25 100644 --- a/src/agents/pi-embedded-helpers.buildbootstrapcontextfiles.e2e.test.ts +++ b/src/agents/pi-embedded-helpers.buildbootstrapcontextfiles.e2e.test.ts @@ -14,7 +14,7 @@ describe("buildBootstrapContextFiles", () => { const files = [makeFile({ missing: true, content: undefined })]; expect(buildBootstrapContextFiles(files)).toEqual([ { - path: DEFAULT_AGENTS_FILENAME, + path: "/tmp/AGENTS.md", content: "[MISSING] Expected at: /tmp/AGENTS.md", }, ]); diff --git a/src/agents/pi-embedded-helpers/bootstrap.ts b/src/agents/pi-embedded-helpers/bootstrap.ts index 725324be9f..7c141a09b7 100644 --- a/src/agents/pi-embedded-helpers/bootstrap.ts +++ b/src/agents/pi-embedded-helpers/bootstrap.ts @@ -168,7 +168,7 @@ export function buildBootstrapContextFiles( for (const file of files) { if (file.missing) { result.push({ - path: file.name, + path: file.path, content: `[MISSING] Expected at: ${file.path}`, }); continue; @@ -183,7 +183,7 @@ export function buildBootstrapContextFiles( ); } result.push({ - path: file.name, + path: file.path, content: trimmed.content, }); } diff --git a/src/agents/pi-embedded-runner/run/payloads.e2e.test.ts b/src/agents/pi-embedded-runner/run/payloads.e2e.test.ts index bac074e018..f9fd438e05 100644 --- a/src/agents/pi-embedded-runner/run/payloads.e2e.test.ts +++ b/src/agents/pi-embedded-runner/run/payloads.e2e.test.ts @@ -229,7 +229,56 @@ describe("buildEmbeddedRunPayloads", () => { expect(payloads[0]?.text).toContain("code 1"); }); - it("suppresses recoverable tool errors containing 'required'", () => { + it("suppresses recoverable tool errors containing 'required' for non-mutating tools", () => { + const payloads = buildEmbeddedRunPayloads({ + assistantTexts: [], + toolMetas: [], + lastAssistant: undefined, + lastToolError: { toolName: "browser", error: "url required" }, + sessionKey: "session:telegram", + inlineToolResultsAllowed: false, + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + }); + + // Recoverable errors should not be sent to the user + expect(payloads).toHaveLength(0); + }); + + it("suppresses recoverable tool errors containing 'missing' for non-mutating tools", () => { + const payloads = buildEmbeddedRunPayloads({ + assistantTexts: [], + toolMetas: [], + lastAssistant: undefined, + lastToolError: { toolName: "browser", error: "url missing" }, + sessionKey: "session:telegram", + inlineToolResultsAllowed: false, + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + }); + + expect(payloads).toHaveLength(0); + }); + + it("suppresses recoverable tool errors containing 'invalid' for non-mutating tools", () => { + const payloads = buildEmbeddedRunPayloads({ + assistantTexts: [], + toolMetas: [], + lastAssistant: undefined, + lastToolError: { toolName: "browser", error: "invalid parameter: url" }, + sessionKey: "session:telegram", + inlineToolResultsAllowed: false, + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + }); + + expect(payloads).toHaveLength(0); + }); + + it("shows recoverable tool errors for mutating tools", () => { const payloads = buildEmbeddedRunPayloads({ assistantTexts: [], toolMetas: [], @@ -242,16 +291,17 @@ describe("buildEmbeddedRunPayloads", () => { toolResultFormat: "plain", }); - // Recoverable errors should not be sent to the user - expect(payloads).toHaveLength(0); + expect(payloads).toHaveLength(1); + expect(payloads[0]?.isError).toBe(true); + expect(payloads[0]?.text).toContain("required"); }); - it("suppresses recoverable tool errors containing 'missing'", () => { + it("shows mutating tool errors even when assistant output exists", () => { const payloads = buildEmbeddedRunPayloads({ - assistantTexts: [], + assistantTexts: ["Done."], toolMetas: [], - lastAssistant: undefined, - lastToolError: { toolName: "message", error: "messageId missing" }, + lastAssistant: { stopReason: "end_turn" } as AssistantMessage, + lastToolError: { toolName: "write", error: "file missing" }, sessionKey: "session:telegram", inlineToolResultsAllowed: false, verboseLevel: "off", @@ -259,15 +309,22 @@ describe("buildEmbeddedRunPayloads", () => { toolResultFormat: "plain", }); - expect(payloads).toHaveLength(0); + expect(payloads).toHaveLength(2); + expect(payloads[0]?.text).toBe("Done."); + expect(payloads[1]?.isError).toBe(true); + expect(payloads[1]?.text).toContain("missing"); }); - it("suppresses recoverable tool errors containing 'invalid'", () => { + it("does not treat session_status read failures as mutating when explicitly flagged", () => { const payloads = buildEmbeddedRunPayloads({ - assistantTexts: [], + assistantTexts: ["Status loaded."], toolMetas: [], - lastAssistant: undefined, - lastToolError: { toolName: "message", error: "invalid parameter: to" }, + lastAssistant: { stopReason: "end_turn" } as AssistantMessage, + lastToolError: { + toolName: "session_status", + error: "model required", + mutatingAction: false, + }, sessionKey: "session:telegram", inlineToolResultsAllowed: false, verboseLevel: "off", @@ -275,7 +332,47 @@ describe("buildEmbeddedRunPayloads", () => { toolResultFormat: "plain", }); - expect(payloads).toHaveLength(0); + expect(payloads).toHaveLength(1); + expect(payloads[0]?.text).toBe("Status loaded."); + }); + + it("dedupes identical tool warning text already present in assistant output", () => { + const seed = buildEmbeddedRunPayloads({ + assistantTexts: [], + toolMetas: [], + lastAssistant: undefined, + lastToolError: { + toolName: "write", + error: "file missing", + mutatingAction: true, + }, + sessionKey: "session:telegram", + inlineToolResultsAllowed: false, + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + }); + const warningText = seed[0]?.text; + expect(warningText).toBeTruthy(); + + const payloads = buildEmbeddedRunPayloads({ + assistantTexts: [warningText ?? ""], + toolMetas: [], + lastAssistant: { stopReason: "end_turn" } as AssistantMessage, + lastToolError: { + toolName: "write", + error: "file missing", + mutatingAction: true, + }, + sessionKey: "session:telegram", + inlineToolResultsAllowed: false, + verboseLevel: "off", + reasoningLevel: "off", + toolResultFormat: "plain", + }); + + expect(payloads).toHaveLength(1); + expect(payloads[0]?.text).toBe(warningText); }); it("shows non-recoverable tool errors to the user", () => { diff --git a/src/agents/pi-embedded-runner/run/payloads.ts b/src/agents/pi-embedded-runner/run/payloads.ts index 440f7eaed4..ad5d99d0a8 100644 --- a/src/agents/pi-embedded-runner/run/payloads.ts +++ b/src/agents/pi-embedded-runner/run/payloads.ts @@ -18,6 +18,7 @@ import { extractAssistantThinking, formatReasoningMessage, } from "../../pi-embedded-utils.js"; +import { isLikelyMutatingToolName } from "../../tool-mutation.js"; type ToolMetaEntry = { toolName: string; meta?: string }; @@ -25,7 +26,13 @@ export function buildEmbeddedRunPayloads(params: { assistantTexts: string[]; toolMetas: ToolMetaEntry[]; lastAssistant: AssistantMessage | undefined; - lastToolError?: { toolName: string; meta?: string; error?: string }; + lastToolError?: { + toolName: string; + meta?: string; + error?: string; + mutatingAction?: boolean; + actionFingerprint?: string; + }; config?: OpenClawConfig; sessionKey: string; provider?: string; @@ -223,22 +230,37 @@ export function buildEmbeddedRunPayloads(params: { errorLower.includes("must have") || errorLower.includes("needs") || errorLower.includes("requires"); + const isMutatingToolError = + params.lastToolError.mutatingAction ?? + isLikelyMutatingToolName(params.lastToolError.toolName); + const shouldShowToolError = isMutatingToolError || (!hasUserFacingReply && !isRecoverableError); - // Show tool errors only when: - // 1. There's no user-facing reply AND the error is not recoverable - // Recoverable errors (validation, missing params) are already in the model's context - // and shouldn't be surfaced to users since the model should retry. - if (!hasUserFacingReply && !isRecoverableError) { + // Always surface mutating tool failures so we do not silently confirm actions that did not happen. + // Otherwise, keep the previous behavior and only surface non-recoverable failures when no reply exists. + if (shouldShowToolError) { const toolSummary = formatToolAggregate( params.lastToolError.toolName, params.lastToolError.meta ? [params.lastToolError.meta] : undefined, { markdown: useMarkdown }, ); const errorSuffix = params.lastToolError.error ? `: ${params.lastToolError.error}` : ""; - replyItems.push({ - text: `⚠️ ${toolSummary} failed${errorSuffix}`, - isError: true, - }); + const warningText = `⚠️ ${toolSummary} failed${errorSuffix}`; + const normalizedWarning = normalizeTextForComparison(warningText); + const duplicateWarning = normalizedWarning + ? replyItems.some((item) => { + if (!item.text) { + return false; + } + const normalizedExisting = normalizeTextForComparison(item.text); + return normalizedExisting.length > 0 && normalizedExisting === normalizedWarning; + }) + : false; + if (!duplicateWarning) { + replyItems.push({ + text: warningText, + isError: true, + }); + } } } diff --git a/src/agents/pi-embedded-runner/run/types.ts b/src/agents/pi-embedded-runner/run/types.ts index dc05d0a128..2d22e0a953 100644 --- a/src/agents/pi-embedded-runner/run/types.ts +++ b/src/agents/pi-embedded-runner/run/types.ts @@ -33,7 +33,13 @@ export type EmbeddedRunAttemptResult = { assistantTexts: string[]; toolMetas: Array<{ toolName: string; meta?: string }>; lastAssistant: AssistantMessage | undefined; - lastToolError?: { toolName: string; meta?: string; error?: string }; + lastToolError?: { + toolName: string; + meta?: string; + error?: string; + mutatingAction?: boolean; + actionFingerprint?: string; + }; didSendViaMessagingTool: boolean; messagingToolSentTexts: string[]; messagingToolSentTargets: MessagingToolSend[]; diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.ts b/src/agents/pi-embedded-subscribe.handlers.tools.ts index ba2151be8d..a733e4f147 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.ts @@ -1,6 +1,9 @@ import type { AgentEvent } from "@mariozechner/pi-agent-core"; import type { PluginHookAfterToolCallEvent } from "../plugins/types.js"; -import type { EmbeddedPiSubscribeContext } from "./pi-embedded-subscribe.handlers.types.js"; +import type { + EmbeddedPiSubscribeContext, + ToolCallSummary, +} from "./pi-embedded-subscribe.handlers.types.js"; import { emitAgentEvent } from "../infra/agent-events.js"; import { getGlobalHookRunner } from "../plugins/hook-runner-global.js"; import { normalizeTextForComparison } from "./pi-embedded-helpers.js"; @@ -13,10 +16,21 @@ import { sanitizeToolResult, } from "./pi-embedded-subscribe.tools.js"; import { inferToolMetaFromArgs } from "./pi-embedded-utils.js"; +import { buildToolMutationState, isSameToolMutationAction } from "./tool-mutation.js"; import { normalizeToolName } from "./tool-policy.js"; /** Track tool execution start times and args for after_tool_call hook */ const toolStartData = new Map(); + +function buildToolCallSummary(toolName: string, args: unknown, meta?: string): ToolCallSummary { + const mutation = buildToolMutationState(toolName, args, meta); + return { + meta, + mutatingAction: mutation.mutatingAction, + actionFingerprint: mutation.actionFingerprint, + }; +} + function extendExecMeta(toolName: string, args: unknown, meta?: string): string | undefined { const normalized = toolName.trim().toLowerCase(); if (normalized !== "exec" && normalized !== "bash") { @@ -70,7 +84,7 @@ export async function handleToolExecutionStart( } const meta = extendExecMeta(toolName, args, inferToolMetaFromArgs(toolName, args)); - ctx.state.toolMetaById.set(toolCallId, meta); + ctx.state.toolMetaById.set(toolCallId, buildToolCallSummary(toolName, args, meta)); ctx.log.debug( `embedded run tool start: runId=${ctx.params.runId} tool=${toolName} toolCallId=${toolCallId}`, ); @@ -167,7 +181,8 @@ export async function handleToolExecutionEnd( const result = evt.result; const isToolError = isError || isToolResultError(result); const sanitizedResult = sanitizeToolResult(result); - const meta = ctx.state.toolMetaById.get(toolCallId); + const callSummary = ctx.state.toolMetaById.get(toolCallId); + const meta = callSummary?.meta; ctx.state.toolMetas.push({ toolName, meta }); ctx.state.toolMetaById.delete(toolCallId); ctx.state.toolSummaryById.delete(toolCallId); @@ -177,7 +192,24 @@ export async function handleToolExecutionEnd( toolName, meta, error: errorMessage, + mutatingAction: callSummary?.mutatingAction, + actionFingerprint: callSummary?.actionFingerprint, }; + } else if (ctx.state.lastToolError) { + // Keep unresolved mutating failures until the same action succeeds. + if (ctx.state.lastToolError.mutatingAction) { + if ( + isSameToolMutationAction(ctx.state.lastToolError, { + toolName, + meta, + actionFingerprint: callSummary?.actionFingerprint, + }) + ) { + ctx.state.lastToolError = undefined; + } + } else { + ctx.state.lastToolError = undefined; + } } // Commit messaging tool text on success, discard on error. diff --git a/src/agents/pi-embedded-subscribe.handlers.types.ts b/src/agents/pi-embedded-subscribe.handlers.types.ts index 4dd091ab55..aa70dc4e91 100644 --- a/src/agents/pi-embedded-subscribe.handlers.types.ts +++ b/src/agents/pi-embedded-subscribe.handlers.types.ts @@ -20,12 +20,20 @@ export type ToolErrorSummary = { toolName: string; meta?: string; error?: string; + mutatingAction?: boolean; + actionFingerprint?: string; +}; + +export type ToolCallSummary = { + meta?: string; + mutatingAction: boolean; + actionFingerprint?: string; }; export type EmbeddedPiSubscribeState = { assistantTexts: string[]; toolMetas: Array<{ toolName?: string; meta?: string }>; - toolMetaById: Map; + toolMetaById: Map; toolSummaryById: Set; lastToolError?: ToolErrorSummary; diff --git a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts index 42c0158af4..b53ffa62e5 100644 --- a/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts +++ b/src/agents/pi-embedded-subscribe.subscribe-embedded-pi-session.subscribeembeddedpisession.e2e.test.ts @@ -318,6 +318,192 @@ describe("subscribeEmbeddedPiSession", () => { expect(payloads[0]?.mediaUrls).toEqual(["https://example.com/a.png"]); }); + it("keeps unresolved mutating failure when an unrelated tool succeeds", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const subscription = subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run-tools-1", + sessionKey: "test-session", + }); + + handler?.({ + type: "tool_execution_start", + toolName: "write", + toolCallId: "w1", + args: { path: "/tmp/demo.txt", content: "next" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "write", + toolCallId: "w1", + isError: true, + result: { error: "disk full" }, + }); + expect(subscription.getLastToolError()?.toolName).toBe("write"); + + handler?.({ + type: "tool_execution_start", + toolName: "read", + toolCallId: "r1", + args: { path: "/tmp/demo.txt" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "read", + toolCallId: "r1", + isError: false, + result: { text: "ok" }, + }); + + expect(subscription.getLastToolError()?.toolName).toBe("write"); + }); + + it("clears unresolved mutating failure when the same action succeeds", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const subscription = subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run-tools-2", + sessionKey: "test-session", + }); + + handler?.({ + type: "tool_execution_start", + toolName: "write", + toolCallId: "w1", + args: { path: "/tmp/demo.txt", content: "next" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "write", + toolCallId: "w1", + isError: true, + result: { error: "disk full" }, + }); + expect(subscription.getLastToolError()?.toolName).toBe("write"); + + handler?.({ + type: "tool_execution_start", + toolName: "write", + toolCallId: "w2", + args: { path: "/tmp/demo.txt", content: "retry" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "write", + toolCallId: "w2", + isError: false, + result: { ok: true }, + }); + + expect(subscription.getLastToolError()).toBeUndefined(); + }); + + it("keeps unresolved mutating failure when same tool succeeds on a different target", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const subscription = subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run-tools-3", + sessionKey: "test-session", + }); + + handler?.({ + type: "tool_execution_start", + toolName: "write", + toolCallId: "w1", + args: { path: "/tmp/a.txt", content: "first" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "write", + toolCallId: "w1", + isError: true, + result: { error: "disk full" }, + }); + + handler?.({ + type: "tool_execution_start", + toolName: "write", + toolCallId: "w2", + args: { path: "/tmp/b.txt", content: "second" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "write", + toolCallId: "w2", + isError: false, + result: { ok: true }, + }); + + expect(subscription.getLastToolError()?.toolName).toBe("write"); + }); + + it("keeps unresolved session_status model-mutation failure on later read-only status success", () => { + let handler: ((evt: unknown) => void) | undefined; + const session: StubSession = { + subscribe: (fn) => { + handler = fn; + return () => {}; + }, + }; + + const subscription = subscribeEmbeddedPiSession({ + session: session as unknown as Parameters[0]["session"], + runId: "run-tools-4", + sessionKey: "test-session", + }); + + handler?.({ + type: "tool_execution_start", + toolName: "session_status", + toolCallId: "s1", + args: { sessionKey: "agent:main:main", model: "openai/gpt-4o" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "session_status", + toolCallId: "s1", + isError: true, + result: { error: "Model not allowed." }, + }); + + handler?.({ + type: "tool_execution_start", + toolName: "session_status", + toolCallId: "s2", + args: { sessionKey: "agent:main:main" }, + }); + handler?.({ + type: "tool_execution_end", + toolName: "session_status", + toolCallId: "s2", + isError: false, + result: { ok: true }, + }); + + expect(subscription.getLastToolError()?.toolName).toBe("session_status"); + }); + it("emits lifecycle:error event on agent_end when last assistant message was an error", async () => { let handler: ((evt: unknown) => void) | undefined; const session: StubSession = { diff --git a/src/agents/system-prompt-report.test.ts b/src/agents/system-prompt-report.test.ts new file mode 100644 index 0000000000..c2737865b6 --- /dev/null +++ b/src/agents/system-prompt-report.test.ts @@ -0,0 +1,47 @@ +import { describe, expect, it } from "vitest"; +import type { WorkspaceBootstrapFile } from "./workspace.js"; +import { buildSystemPromptReport } from "./system-prompt-report.js"; + +function makeBootstrapFile(overrides: Partial): WorkspaceBootstrapFile { + return { + name: "AGENTS.md", + path: "/tmp/workspace/AGENTS.md", + content: "alpha", + missing: false, + ...overrides, + }; +} + +describe("buildSystemPromptReport", () => { + it("counts injected chars when injected file paths are absolute", () => { + const file = makeBootstrapFile({ path: "/tmp/workspace/policies/AGENTS.md" }); + const report = buildSystemPromptReport({ + source: "run", + generatedAt: 0, + bootstrapMaxChars: 20_000, + systemPrompt: "system", + bootstrapFiles: [file], + injectedFiles: [{ path: "/tmp/workspace/policies/AGENTS.md", content: "trimmed" }], + skillsPrompt: "", + tools: [], + }); + + expect(report.injectedWorkspaceFiles[0]?.injectedChars).toBe("trimmed".length); + }); + + it("keeps legacy basename matching for injected files", () => { + const file = makeBootstrapFile({ path: "/tmp/workspace/policies/AGENTS.md" }); + const report = buildSystemPromptReport({ + source: "run", + generatedAt: 0, + bootstrapMaxChars: 20_000, + systemPrompt: "system", + bootstrapFiles: [file], + injectedFiles: [{ path: "AGENTS.md", content: "trimmed" }], + skillsPrompt: "", + tools: [], + }); + + expect(report.injectedWorkspaceFiles[0]?.injectedChars).toBe("trimmed".length); + }); +}); diff --git a/src/agents/system-prompt-report.ts b/src/agents/system-prompt-report.ts index 4f4b43fb06..5783202e10 100644 --- a/src/agents/system-prompt-report.ts +++ b/src/agents/system-prompt-report.ts @@ -1,4 +1,5 @@ import type { AgentTool } from "@mariozechner/pi-agent-core"; +import path from "node:path"; import type { SessionSystemPromptReport } from "../config/sessions/types.js"; import type { EmbeddedContextFile } from "./pi-embedded-helpers.js"; import type { WorkspaceBootstrapFile } from "./workspace.js"; @@ -40,10 +41,21 @@ function buildInjectedWorkspaceFiles(params: { injectedFiles: EmbeddedContextFile[]; bootstrapMaxChars: number; }): SessionSystemPromptReport["injectedWorkspaceFiles"] { - const injectedByName = new Map(params.injectedFiles.map((f) => [f.path, f.content])); + const injectedByPath = new Map(params.injectedFiles.map((f) => [f.path, f.content])); + const injectedByBaseName = new Map(); + for (const file of params.injectedFiles) { + const normalizedPath = file.path.replace(/\\/g, "/"); + const baseName = path.posix.basename(normalizedPath); + if (!injectedByBaseName.has(baseName)) { + injectedByBaseName.set(baseName, file.content); + } + } return params.bootstrapFiles.map((file) => { const rawChars = file.missing ? 0 : (file.content ?? "").trimEnd().length; - const injected = injectedByName.get(file.name); + const injected = + injectedByPath.get(file.path) ?? + injectedByPath.get(file.name) ?? + injectedByBaseName.get(file.name); const injectedChars = injected ? injected.length : 0; const truncated = !file.missing && rawChars > params.bootstrapMaxChars; return { diff --git a/src/agents/tool-mutation.test.ts b/src/agents/tool-mutation.test.ts new file mode 100644 index 0000000000..3eb417a71b --- /dev/null +++ b/src/agents/tool-mutation.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from "vitest"; +import { + buildToolActionFingerprint, + buildToolMutationState, + isLikelyMutatingToolName, + isMutatingToolCall, + isSameToolMutationAction, +} from "./tool-mutation.js"; + +describe("tool mutation helpers", () => { + it("treats session_status as mutating only when model override is provided", () => { + expect(isMutatingToolCall("session_status", { sessionKey: "agent:main:main" })).toBe(false); + expect( + isMutatingToolCall("session_status", { + sessionKey: "agent:main:main", + model: "openai/gpt-4o", + }), + ).toBe(true); + }); + + it("builds stable fingerprints for mutating calls and omits read-only calls", () => { + const writeFingerprint = buildToolActionFingerprint( + "write", + { path: "/tmp/demo.txt", id: 42 }, + "write /tmp/demo.txt", + ); + expect(writeFingerprint).toContain("tool=write"); + expect(writeFingerprint).toContain("path=/tmp/demo.txt"); + expect(writeFingerprint).toContain("id=42"); + expect(writeFingerprint).toContain("meta=write /tmp/demo.txt"); + + const readFingerprint = buildToolActionFingerprint("read", { path: "/tmp/demo.txt" }); + expect(readFingerprint).toBeUndefined(); + }); + + it("exposes mutation state for downstream payload rendering", () => { + expect( + buildToolMutationState("message", { action: "send", to: "telegram:1" }).mutatingAction, + ).toBe(true); + expect(buildToolMutationState("browser", { action: "list" }).mutatingAction).toBe(false); + }); + + it("matches tool actions by fingerprint and fails closed on asymmetric data", () => { + expect( + isSameToolMutationAction( + { toolName: "write", actionFingerprint: "tool=write|path=/tmp/a" }, + { toolName: "write", actionFingerprint: "tool=write|path=/tmp/a" }, + ), + ).toBe(true); + expect( + isSameToolMutationAction( + { toolName: "write", actionFingerprint: "tool=write|path=/tmp/a" }, + { toolName: "write", actionFingerprint: "tool=write|path=/tmp/b" }, + ), + ).toBe(false); + expect( + isSameToolMutationAction( + { toolName: "write", actionFingerprint: "tool=write|path=/tmp/a" }, + { toolName: "write" }, + ), + ).toBe(false); + }); + + it("keeps legacy name-only mutating heuristics for payload fallback", () => { + expect(isLikelyMutatingToolName("sessions_send")).toBe(true); + expect(isLikelyMutatingToolName("browser_actions")).toBe(true); + expect(isLikelyMutatingToolName("message_slack")).toBe(true); + expect(isLikelyMutatingToolName("browser")).toBe(false); + }); +}); diff --git a/src/agents/tool-mutation.ts b/src/agents/tool-mutation.ts new file mode 100644 index 0000000000..22b0e7af9d --- /dev/null +++ b/src/agents/tool-mutation.ts @@ -0,0 +1,201 @@ +const MUTATING_TOOL_NAMES = new Set([ + "write", + "edit", + "apply_patch", + "exec", + "bash", + "process", + "message", + "sessions_send", + "cron", + "gateway", + "canvas", + "nodes", + "session_status", +]); + +const READ_ONLY_ACTIONS = new Set([ + "get", + "list", + "read", + "status", + "show", + "fetch", + "search", + "query", + "view", + "poll", + "log", + "inspect", + "check", + "probe", +]); + +const PROCESS_MUTATING_ACTIONS = new Set(["write", "send_keys", "submit", "paste", "kill"]); + +const MESSAGE_MUTATING_ACTIONS = new Set([ + "send", + "reply", + "thread_reply", + "threadreply", + "edit", + "delete", + "react", + "pin", + "unpin", +]); + +export type ToolMutationState = { + mutatingAction: boolean; + actionFingerprint?: string; +}; + +export type ToolActionRef = { + toolName: string; + meta?: string; + actionFingerprint?: string; +}; + +function asRecord(value: unknown): Record | undefined { + return value && typeof value === "object" ? (value as Record) : undefined; +} + +function normalizeActionName(value: unknown): string | undefined { + if (typeof value !== "string") { + return undefined; + } + const normalized = value + .trim() + .toLowerCase() + .replace(/[\s-]+/g, "_"); + return normalized || undefined; +} + +function normalizeFingerprintValue(value: unknown): string | undefined { + if (typeof value === "string") { + const normalized = value.trim(); + return normalized ? normalized.toLowerCase() : undefined; + } + if (typeof value === "number" || typeof value === "bigint" || typeof value === "boolean") { + return String(value).toLowerCase(); + } + return undefined; +} + +export function isLikelyMutatingToolName(toolName: string): boolean { + const normalized = toolName.trim().toLowerCase(); + if (!normalized) { + return false; + } + return ( + MUTATING_TOOL_NAMES.has(normalized) || + normalized.endsWith("_actions") || + normalized.startsWith("message_") || + normalized.includes("send") + ); +} + +export function isMutatingToolCall(toolName: string, args: unknown): boolean { + const normalized = toolName.trim().toLowerCase(); + const record = asRecord(args); + const action = normalizeActionName(record?.action); + + switch (normalized) { + case "write": + case "edit": + case "apply_patch": + case "exec": + case "bash": + case "sessions_send": + return true; + case "process": + return action != null && PROCESS_MUTATING_ACTIONS.has(action); + case "message": + return ( + (action != null && MESSAGE_MUTATING_ACTIONS.has(action)) || + typeof record?.content === "string" || + typeof record?.message === "string" + ); + case "session_status": + return typeof record?.model === "string" && record.model.trim().length > 0; + default: { + if (normalized === "cron" || normalized === "gateway" || normalized === "canvas") { + return action == null || !READ_ONLY_ACTIONS.has(action); + } + if (normalized === "nodes") { + return action == null || action !== "list"; + } + if (normalized.endsWith("_actions")) { + return action == null || !READ_ONLY_ACTIONS.has(action); + } + if (normalized.startsWith("message_") || normalized.includes("send")) { + return true; + } + return false; + } + } +} + +export function buildToolActionFingerprint( + toolName: string, + args: unknown, + meta?: string, +): string | undefined { + if (!isMutatingToolCall(toolName, args)) { + return undefined; + } + const normalizedTool = toolName.trim().toLowerCase(); + const record = asRecord(args); + const action = normalizeActionName(record?.action); + const parts = [`tool=${normalizedTool}`]; + if (action) { + parts.push(`action=${action}`); + } + for (const key of [ + "path", + "filePath", + "oldPath", + "newPath", + "to", + "target", + "messageId", + "sessionKey", + "jobId", + "id", + "model", + ]) { + const value = normalizeFingerprintValue(record?.[key]); + if (value) { + parts.push(`${key.toLowerCase()}=${value}`); + } + } + const normalizedMeta = meta?.trim().replace(/\s+/g, " ").toLowerCase(); + if (normalizedMeta) { + parts.push(`meta=${normalizedMeta}`); + } + return parts.join("|"); +} + +export function buildToolMutationState( + toolName: string, + args: unknown, + meta?: string, +): ToolMutationState { + const actionFingerprint = buildToolActionFingerprint(toolName, args, meta); + return { + mutatingAction: actionFingerprint != null, + actionFingerprint, + }; +} + +export function isSameToolMutationAction(existing: ToolActionRef, next: ToolActionRef): boolean { + if (existing.actionFingerprint != null || next.actionFingerprint != null) { + // For mutating flows, fail closed: only clear when both fingerprints exist and match. + return ( + existing.actionFingerprint != null && + next.actionFingerprint != null && + existing.actionFingerprint === next.actionFingerprint + ); + } + return existing.toolName === next.toolName && (existing.meta ?? "") === (next.meta ?? ""); +}