diff --git a/docs/gateway/configuration.md b/docs/gateway/configuration.md index 46ba7af67b..bfce441c30 100644 --- a/docs/gateway/configuration.md +++ b/docs/gateway/configuration.md @@ -126,6 +126,7 @@ When validation fails: - `agents.defaults.models` defines the model catalog and acts as the allowlist for `/model`. - Model refs use `provider/model` format (e.g. `anthropic/claude-opus-4-6`). + - `agents.defaults.imageMaxDimensionPx` controls transcript/tool image downscaling (default `1200`). - See [Models CLI](/concepts/models) for switching models in chat and [Model Failover](/concepts/model-failover) for auth rotation and fallback behavior. - For custom/self-hosted providers, see [Custom providers](/gateway/configuration-reference#custom-providers-and-base-urls) in the reference. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index 5155f2f297..95e029aec7 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -57,6 +57,7 @@ Implementation: - `sanitizeSessionMessagesImages` in `src/agents/pi-embedded-helpers/images.ts` - `sanitizeContentBlocksImages` in `src/agents/tool-images.ts` +- Max image side is configurable via `agents.defaults.imageMaxDimensionPx` (default: `1200`). --- diff --git a/src/agents/image-sanitization.test.ts b/src/agents/image-sanitization.test.ts new file mode 100644 index 0000000000..f6eb34e4b2 --- /dev/null +++ b/src/agents/image-sanitization.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, it } from "vitest"; +import type { OpenClawConfig } from "../config/config.js"; +import { resolveImageSanitizationLimits } from "./image-sanitization.js"; + +describe("image sanitization config", () => { + it("defaults when no config value exists", () => { + expect(resolveImageSanitizationLimits(undefined)).toEqual({}); + expect( + resolveImageSanitizationLimits({ agents: { defaults: {} } } as unknown as OpenClawConfig), + ).toEqual({}); + }); + + it("reads and normalizes agents.defaults.imageMaxDimensionPx", () => { + expect( + resolveImageSanitizationLimits({ + agents: { defaults: { imageMaxDimensionPx: 1600.9 } }, + } as unknown as OpenClawConfig), + ).toEqual({ maxDimensionPx: 1600 }); + }); +}); diff --git a/src/agents/image-sanitization.ts b/src/agents/image-sanitization.ts new file mode 100644 index 0000000000..0c40972671 --- /dev/null +++ b/src/agents/image-sanitization.ts @@ -0,0 +1,17 @@ +import type { OpenClawConfig } from "../config/config.js"; + +export type ImageSanitizationLimits = { + maxDimensionPx?: number; + maxBytes?: number; +}; + +export const DEFAULT_IMAGE_MAX_DIMENSION_PX = 1200; +export const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024; + +export function resolveImageSanitizationLimits(cfg?: OpenClawConfig): ImageSanitizationLimits { + const configured = cfg?.agents?.defaults?.imageMaxDimensionPx; + if (typeof configured !== "number" || !Number.isFinite(configured)) { + return {}; + } + return { maxDimensionPx: Math.max(1, Math.floor(configured)) }; +} diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index 83590d3bf8..cad2b88d94 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -1,12 +1,12 @@ import type { OpenClawConfig } from "../config/config.js"; -import { resolvePluginTools } from "../plugins/tools.js"; import type { GatewayMessageChannel } from "../utils/message-channel.js"; -import { resolveSessionAgentId } from "./agent-scope.js"; import type { SandboxFsBridge } from "./sandbox/fs-bridge.js"; +import type { AnyAgentTool } from "./tools/common.js"; +import { resolvePluginTools } from "../plugins/tools.js"; +import { resolveSessionAgentId } from "./agent-scope.js"; import { createAgentsListTool } from "./tools/agents-list-tool.js"; import { createBrowserTool } from "./tools/browser-tool.js"; import { createCanvasTool } from "./tools/canvas-tool.js"; -import type { AnyAgentTool } from "./tools/common.js"; import { createCronTool } from "./tools/cron-tool.js"; import { createGatewayTool } from "./tools/gateway-tool.js"; import { createImageTool } from "./tools/image-tool.js"; @@ -102,7 +102,7 @@ export function createOpenClawTools(options?: { sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl, allowHostControl: options?.allowHostBrowserControl, }), - createCanvasTool(), + createCanvasTool({ config: options?.config }), createNodesTool({ agentSessionKey: options?.agentSessionKey, config: options?.config, diff --git a/src/agents/pi-embedded-helpers/images.ts b/src/agents/pi-embedded-helpers/images.ts index 9162bb812b..c3b4d0a371 100644 --- a/src/agents/pi-embedded-helpers/images.ts +++ b/src/agents/pi-embedded-helpers/images.ts @@ -1,4 +1,5 @@ import type { AgentMessage, AgentToolResult } from "@mariozechner/pi-agent-core"; +import type { ImageSanitizationLimits } from "../image-sanitization.js"; import type { ToolCallIdMode } from "../tool-call-id.js"; import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js"; import { sanitizeContentBlocksImages } from "../tool-images.js"; @@ -45,12 +46,16 @@ export async function sanitizeSessionMessagesImages( allowBase64Only?: boolean; includeCamelCase?: boolean; }; - }, + } & ImageSanitizationLimits, ): Promise { const sanitizeMode = options?.sanitizeMode ?? "full"; const allowNonImageSanitization = sanitizeMode === "full"; + const imageSanitization = { + maxDimensionPx: options?.maxDimensionPx, + maxBytes: options?.maxBytes, + }; // We sanitize historical session messages because Anthropic can reject a request - // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). + // if the transcript contains oversized base64 images (default max side 1200px). const sanitizedIds = allowNonImageSanitization && options?.sanitizeToolCallIds ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) @@ -69,6 +74,7 @@ export async function sanitizeSessionMessagesImages( const nextContent = (await sanitizeContentBlocksImages( content, label, + imageSanitization, )) as unknown as typeof toolMsg.content; out.push({ ...toolMsg, content: nextContent }); continue; @@ -81,6 +87,7 @@ export async function sanitizeSessionMessagesImages( const nextContent = (await sanitizeContentBlocksImages( content as unknown as ContentBlock[], label, + imageSanitization, )) as unknown as typeof userMsg.content; out.push({ ...userMsg, content: nextContent }); continue; @@ -95,6 +102,7 @@ export async function sanitizeSessionMessagesImages( const nextContent = (await sanitizeContentBlocksImages( content as unknown as ContentBlock[], label, + imageSanitization, )) as unknown as typeof assistantMsg.content; out.push({ ...assistantMsg, content: nextContent }); } else { @@ -108,6 +116,7 @@ export async function sanitizeSessionMessagesImages( const nextContent = (await sanitizeContentBlocksImages( content as unknown as ContentBlock[], label, + imageSanitization, )) as unknown as typeof assistantMsg.content; out.push({ ...assistantMsg, content: nextContent }); continue; @@ -129,6 +138,7 @@ export async function sanitizeSessionMessagesImages( const finalContent = (await sanitizeContentBlocksImages( filteredContent as unknown as ContentBlock[], label, + imageSanitization, )) as unknown as typeof assistantMsg.content; if (finalContent.length === 0) { continue; diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 4197da59bf..94e48576e6 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -1,5 +1,3 @@ -import fs from "node:fs/promises"; -import os from "node:os"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { createAgentSession, @@ -7,10 +5,14 @@ import { SessionManager, SettingsManager, } from "@mariozechner/pi-coding-agent"; -import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js"; +import fs from "node:fs/promises"; +import os from "node:os"; import type { ReasoningLevel, ThinkLevel } from "../../auto-reply/thinking.js"; -import { resolveChannelCapabilities } from "../../config/channel-capabilities.js"; import type { OpenClawConfig } from "../../config/config.js"; +import type { ExecElevatedDefaults } from "../bash-tools.js"; +import type { EmbeddedPiCompactResult } from "./types.js"; +import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js"; +import { resolveChannelCapabilities } from "../../config/channel-capabilities.js"; import { getMachineDisplayName } from "../../infra/machine-name.js"; import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js"; import { type enqueueCommand, enqueueCommandInLane } from "../../process/command-queue.js"; @@ -24,7 +26,6 @@ import { normalizeMessageChannel } from "../../utils/message-channel.js"; import { isReasoningTagProvider } from "../../utils/provider-utils.js"; import { resolveOpenClawAgentDir } from "../agent-paths.js"; import { resolveSessionAgentIds } from "../agent-scope.js"; -import type { ExecElevatedDefaults } from "../bash-tools.js"; import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js"; import { listChannelSupportedActions, resolveChannelMessageToolHints } from "../channel-tools.js"; import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js"; @@ -81,7 +82,6 @@ import { createSystemPromptOverride, } from "./system-prompt.js"; import { splitSdkTools } from "./tool-split.js"; -import type { EmbeddedPiCompactResult } from "./types.js"; import { describeUnknownError, mapThinkingLevel } from "./utils.js"; import { flushPendingToolResultsAfterIdle } from "./wait-for-idle-before-flush.js"; @@ -570,6 +570,7 @@ export async function compactEmbeddedPiSessionDirect( modelApi: model.api, modelId, provider, + config: params.config, sessionManager, sessionId: params.sessionId, policy: transcriptPolicy, diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 6cd261e4f9..7c3767fbc1 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -1,12 +1,15 @@ -import { EventEmitter } from "node:events"; import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core"; import type { SessionManager } from "@mariozechner/pi-coding-agent"; import type { TSchema } from "@sinclair/typebox"; +import { EventEmitter } from "node:events"; +import type { OpenClawConfig } from "../../config/config.js"; +import type { TranscriptPolicy } from "../transcript-policy.js"; import { registerUnhandledRejectionHandler } from "../../infra/unhandled-rejections.js"; import { hasInterSessionUserProvenance, normalizeInputProvenance, } from "../../sessions/input-provenance.js"; +import { resolveImageSanitizationLimits } from "../image-sanitization.js"; import { downgradeOpenAIReasoningBlocks, isCompactionFailureError, @@ -20,7 +23,6 @@ import { stripToolResultDetails, sanitizeToolUseResultPairing, } from "../session-transcript-repair.js"; -import type { TranscriptPolicy } from "../transcript-policy.js"; import { resolveTranscriptPolicy } from "../transcript-policy.js"; import { log } from "./logger.js"; import { describeUnknownError } from "./utils.js"; @@ -416,6 +418,7 @@ export async function sanitizeSessionHistory(params: { modelApi?: string | null; modelId?: string; provider?: string; + config?: OpenClawConfig; sessionManager: SessionManager; sessionId: string; policy?: TranscriptPolicy; @@ -438,6 +441,7 @@ export async function sanitizeSessionHistory(params: { toolCallIdMode: policy.toolCallIdMode, preserveSignatures: policy.preserveSignatures, sanitizeThoughtSignatures: policy.sanitizeThoughtSignatures, + ...resolveImageSanitizationLimits(params.config), }, ); const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 3c41489896..e9901636ca 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1,9 +1,10 @@ -import fs from "node:fs/promises"; -import os from "node:os"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ImageContent } from "@mariozechner/pi-ai"; import { streamSimple } from "@mariozechner/pi-ai"; import { createAgentSession, SessionManager, SettingsManager } from "@mariozechner/pi-coding-agent"; +import fs from "node:fs/promises"; +import os from "node:os"; +import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js"; import { resolveHeartbeatPrompt } from "../../../auto-reply/heartbeat.js"; import { resolveChannelCapabilities } from "../../../config/channel-capabilities.js"; import { getMachineDisplayName } from "../../../infra/machine-name.js"; @@ -33,6 +34,7 @@ import { import { DEFAULT_CONTEXT_TOKENS } from "../../defaults.js"; import { resolveOpenClawDocsPath } from "../../docs-path.js"; import { isTimeoutError } from "../../failover-error.js"; +import { resolveImageSanitizationLimits } from "../../image-sanitization.js"; import { resolveModelAuthMode } from "../../model-auth.js"; import { resolveDefaultModelForAgent } from "../../model-selection.js"; import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js"; @@ -105,7 +107,6 @@ import { shouldFlagCompactionTimeout, } from "./compaction-timeout.js"; import { detectAndLoadPromptImages } from "./images.js"; -import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js"; export function injectHistoryImagesIntoMessages( messages: AgentMessage[], @@ -666,6 +667,7 @@ export async function runEmbeddedAttempt( modelApi: params.model.api, modelId: params.modelId, provider: params.provider, + config: params.config, sessionManager, sessionId: params.sessionId, policy: transcriptPolicy, @@ -968,6 +970,7 @@ export async function runEmbeddedAttempt( existingImages: params.images, historyMessages: activeSession.messages, maxBytes: MAX_IMAGE_BYTES, + maxDimensionPx: resolveImageSanitizationLimits(params.config).maxDimensionPx, // Enforce sandbox path restrictions when sandbox is enabled sandbox: sandbox?.enabled && sandbox?.fsBridge diff --git a/src/agents/pi-embedded-runner/run/images.ts b/src/agents/pi-embedded-runner/run/images.ts index be6f8d0373..c328fd53f7 100644 --- a/src/agents/pi-embedded-runner/run/images.ts +++ b/src/agents/pi-embedded-runner/run/images.ts @@ -1,9 +1,10 @@ +import type { ImageContent } from "@mariozechner/pi-ai"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import type { ImageContent } from "@mariozechner/pi-ai"; +import type { ImageSanitizationLimits } from "../../image-sanitization.js"; +import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js"; import { resolveUserPath } from "../../../utils.js"; import { loadWebMedia } from "../../../web/media.js"; -import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js"; import { sanitizeImageBlocks } from "../../tool-images.js"; import { log } from "../logger.js"; @@ -48,8 +49,13 @@ function isImageExtension(filePath: string): boolean { async function sanitizeImagesWithLog( images: ImageContent[], label: string, + imageSanitization?: ImageSanitizationLimits, ): Promise { - const { images: sanitized, dropped } = await sanitizeImageBlocks(images, label); + const { images: sanitized, dropped } = await sanitizeImageBlocks( + images, + label, + imageSanitization, + ); if (dropped > 0) { log.warn(`Native image: dropped ${dropped} image(s) after sanitization (${label}).`); } @@ -354,6 +360,7 @@ export async function detectAndLoadPromptImages(params: { existingImages?: ImageContent[]; historyMessages?: unknown[]; maxBytes?: number; + maxDimensionPx?: number; sandbox?: { root: string; bridge: SandboxFsBridge }; }): Promise<{ /** Images for the current prompt (existingImages + detected in current prompt) */ @@ -437,10 +444,21 @@ export async function detectAndLoadPromptImages(params: { } } - const sanitizedPromptImages = await sanitizeImagesWithLog(promptImages, "prompt:images"); + const imageSanitization: ImageSanitizationLimits = { + maxDimensionPx: params.maxDimensionPx, + }; + const sanitizedPromptImages = await sanitizeImagesWithLog( + promptImages, + "prompt:images", + imageSanitization, + ); const sanitizedHistoryImagesByIndex = new Map(); for (const [index, images] of historyImagesByIndex) { - const sanitized = await sanitizeImagesWithLog(images, `history:images:${index}`); + const sanitized = await sanitizeImagesWithLog( + images, + `history:images:${index}`, + imageSanitization, + ); if (sanitized.length > 0) { sanitizedHistoryImagesByIndex.set(index, sanitized); } diff --git a/src/agents/pi-tools.read.ts b/src/agents/pi-tools.read.ts index f621ba52b4..1fac1190ba 100644 --- a/src/agents/pi-tools.read.ts +++ b/src/agents/pi-tools.read.ts @@ -1,10 +1,11 @@ import type { AgentToolResult } from "@mariozechner/pi-agent-core"; import { createEditTool, createReadTool, createWriteTool } from "@mariozechner/pi-coding-agent"; +import type { ImageSanitizationLimits } from "./image-sanitization.js"; +import type { AnyAgentTool } from "./pi-tools.types.js"; +import type { SandboxFsBridge } from "./sandbox/fs-bridge.js"; import { detectMime } from "../media/mime.js"; import { sniffMimeFromBase64 } from "../media/sniff-mime-from-base64.js"; -import type { AnyAgentTool } from "./pi-tools.types.js"; import { assertSandboxPath } from "./sandbox-paths.js"; -import type { SandboxFsBridge } from "./sandbox/fs-bridge.js"; import { sanitizeToolResultImages } from "./tool-images.js"; // NOTE(steipete): Upstream read now does file-magic MIME detection; we keep the wrapper @@ -21,6 +22,7 @@ const MAX_ADAPTIVE_READ_PAGES = 8; type OpenClawReadToolOptions = { modelContextWindowTokens?: number; + imageSanitization?: ImageSanitizationLimits; }; type ReadTruncationDetails = { @@ -566,6 +568,7 @@ type SandboxToolParams = { root: string; bridge: SandboxFsBridge; modelContextWindowTokens?: number; + imageSanitization?: ImageSanitizationLimits; }; export function createSandboxedReadTool(params: SandboxToolParams) { @@ -574,6 +577,7 @@ export function createSandboxedReadTool(params: SandboxToolParams) { }) as unknown as AnyAgentTool; return createOpenClawReadTool(base, { modelContextWindowTokens: params.modelContextWindowTokens, + imageSanitization: params.imageSanitization, }); } @@ -614,7 +618,11 @@ export function createOpenClawReadTool( const filePath = typeof record?.path === "string" ? String(record.path) : ""; const strippedDetailsResult = stripReadTruncationContentDetails(result); const normalizedResult = await normalizeReadImageResult(strippedDetailsResult, filePath); - return sanitizeToolResultImages(normalizedResult, `read:${filePath}`); + return sanitizeToolResultImages( + normalizedResult, + `read:${filePath}`, + options?.imageSanitization, + ); }, }; } diff --git a/src/agents/pi-tools.ts b/src/agents/pi-tools.ts index 66f7902695..f4c4eec6cc 100644 --- a/src/agents/pi-tools.ts +++ b/src/agents/pi-tools.ts @@ -7,6 +7,9 @@ import { } from "@mariozechner/pi-coding-agent"; import type { OpenClawConfig } from "../config/config.js"; import type { ToolLoopDetectionConfig } from "../config/types.tools.js"; +import type { ModelAuthMode } from "./model-auth.js"; +import type { AnyAgentTool } from "./pi-tools.types.js"; +import type { SandboxContext } from "./sandbox.js"; import { logWarn } from "../logger.js"; import { getPluginToolMeta } from "../plugins/tools.js"; import { isSubagentSessionKey } from "../routing/session-key.js"; @@ -20,7 +23,7 @@ import { type ProcessToolDefaults, } from "./bash-tools.js"; import { listChannelAgentTools } from "./channel-tools.js"; -import type { ModelAuthMode } from "./model-auth.js"; +import { resolveImageSanitizationLimits } from "./image-sanitization.js"; import { createOpenClawTools } from "./openclaw-tools.js"; import { wrapToolWithAbortSignal } from "./pi-tools.abort.js"; import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js"; @@ -43,8 +46,6 @@ import { wrapToolParamNormalization, } from "./pi-tools.read.js"; import { cleanToolSchemaForGemini, normalizeToolParameters } from "./pi-tools.schema.js"; -import type { AnyAgentTool } from "./pi-tools.types.js"; -import type { SandboxContext } from "./sandbox.js"; import { getSubagentDepthFromSessionStore } from "./subagent-depth.js"; import { applyToolPolicyPipeline, @@ -300,6 +301,7 @@ export function createOpenClawCodingTools(options?: { if (sandboxRoot && !sandboxFsBridge) { throw new Error("Sandbox filesystem bridge is unavailable."); } + const imageSanitization = resolveImageSanitizationLimits(options?.config); const base = (codingTools as unknown as AnyAgentTool[]).flatMap((tool) => { if (tool.name === readTool.name) { @@ -308,12 +310,14 @@ export function createOpenClawCodingTools(options?: { root: sandboxRoot, bridge: sandboxFsBridge!, modelContextWindowTokens: options?.modelContextWindowTokens, + imageSanitization, }); return [workspaceOnly ? wrapToolWorkspaceRootGuard(sandboxed, sandboxRoot) : sandboxed]; } const freshReadTool = createReadTool(workspaceRoot); const wrapped = createOpenClawReadTool(freshReadTool, { modelContextWindowTokens: options?.modelContextWindowTokens, + imageSanitization, }); return [workspaceOnly ? wrapToolWorkspaceRootGuard(wrapped, workspaceRoot) : wrapped]; } diff --git a/src/agents/tool-images.e2e.test.ts b/src/agents/tool-images.e2e.test.ts index e5dff0a9e9..e51f9bc699 100644 --- a/src/agents/tool-images.e2e.test.ts +++ b/src/agents/tool-images.e2e.test.ts @@ -49,8 +49,8 @@ describe("tool image sanitizing", () => { expect(dropped).toBe(0); expect(out.length).toBe(1); const meta = await sharp(Buffer.from(out[0].data, "base64")).metadata(); - expect(meta.width).toBeLessThanOrEqual(2000); - expect(meta.height).toBeLessThanOrEqual(2000); + expect(meta.width).toBeLessThanOrEqual(1200); + expect(meta.height).toBeLessThanOrEqual(1200); }, 20_000); it("shrinks images that exceed max dimension even if size is small", async () => { @@ -77,8 +77,8 @@ describe("tool image sanitizing", () => { throw new Error("expected image block"); } const meta = await sharp(Buffer.from(image.data, "base64")).metadata(); - expect(meta.width).toBeLessThanOrEqual(2000); - expect(meta.height).toBeLessThanOrEqual(2000); + expect(meta.width).toBeLessThanOrEqual(1200); + expect(meta.height).toBeLessThanOrEqual(1200); expect(image.mimeType).toBe("image/jpeg"); }, 20_000); diff --git a/src/agents/tool-images.ts b/src/agents/tool-images.ts index e66ff29df2..4851d99ad6 100644 --- a/src/agents/tool-images.ts +++ b/src/agents/tool-images.ts @@ -2,6 +2,11 @@ import type { AgentToolResult } from "@mariozechner/pi-agent-core"; import type { ImageContent } from "@mariozechner/pi-ai"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js"; +import { + DEFAULT_IMAGE_MAX_BYTES, + DEFAULT_IMAGE_MAX_DIMENSION_PX, + type ImageSanitizationLimits, +} from "./image-sanitization.js"; type ToolContentBlock = AgentToolResult["content"][number]; type ImageContentBlock = Extract; @@ -13,8 +18,8 @@ type TextContentBlock = Extract; // // To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale // and recompress base64 image blocks when they exceed these limits. -const MAX_IMAGE_DIMENSION_PX = 1200; -const MAX_IMAGE_BYTES = 5 * 1024 * 1024; +const MAX_IMAGE_DIMENSION_PX = DEFAULT_IMAGE_MAX_DIMENSION_PX; +const MAX_IMAGE_BYTES = DEFAULT_IMAGE_MAX_BYTES; const log = createSubsystemLogger("agents/tool-images"); function isImageBlock(block: unknown): block is ImageContentBlock { @@ -100,7 +105,7 @@ async function resizeImageBase64IfNeeded(params: { const maxDim = hasDimensions ? Math.max(width ?? 0, height ?? 0) : params.maxDimensionPx; const sideStart = maxDim > 0 ? Math.min(params.maxDimensionPx, maxDim) : params.maxDimensionPx; const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800] - .map((v) => Math.min(params.maxDimensionPx, v)) + .filter((v) => v > 0 && v <= params.maxDimensionPx) .filter((v, i, arr) => v > 0 && arr.indexOf(v) === i) .toSorted((a, b) => b - a); @@ -148,7 +153,7 @@ async function resizeImageBase64IfNeeded(params: { export async function sanitizeContentBlocksImages( blocks: ToolContentBlock[], label: string, - opts: { maxDimensionPx?: number; maxBytes?: number } = {}, + opts: ImageSanitizationLimits = {}, ): Promise { const maxDimensionPx = Math.max(opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX, 1); const maxBytes = Math.max(opts.maxBytes ?? MAX_IMAGE_BYTES, 1); @@ -198,7 +203,7 @@ export async function sanitizeContentBlocksImages( export async function sanitizeImageBlocks( images: ImageContent[], label: string, - opts: { maxDimensionPx?: number; maxBytes?: number } = {}, + opts: ImageSanitizationLimits = {}, ): Promise<{ images: ImageContent[]; dropped: number }> { if (images.length === 0) { return { images, dropped: 0 }; @@ -211,7 +216,7 @@ export async function sanitizeImageBlocks( export async function sanitizeToolResultImages( result: AgentToolResult, label: string, - opts: { maxDimensionPx?: number; maxBytes?: number } = {}, + opts: ImageSanitizationLimits = {}, ): Promise> { const content = Array.isArray(result.content) ? result.content : []; if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) { diff --git a/src/agents/tools/canvas-tool.ts b/src/agents/tools/canvas-tool.ts index 77ddb56db4..41475f668b 100644 --- a/src/agents/tools/canvas-tool.ts +++ b/src/agents/tools/canvas-tool.ts @@ -1,9 +1,11 @@ +import { Type } from "@sinclair/typebox"; import crypto from "node:crypto"; import fs from "node:fs/promises"; -import { Type } from "@sinclair/typebox"; +import type { OpenClawConfig } from "../../config/config.js"; import { writeBase64ToFile } from "../../cli/nodes-camera.js"; import { canvasSnapshotTempPath, parseCanvasSnapshotPayload } from "../../cli/nodes-canvas.js"; import { imageMimeFromFormat } from "../../media/mime.js"; +import { resolveImageSanitizationLimits } from "../image-sanitization.js"; import { optionalStringEnum, stringEnum } from "../schema/typebox.js"; import { type AnyAgentTool, imageResult, jsonResult, readStringParam } from "./common.js"; import { callGatewayTool, readGatewayCallOptions } from "./gateway.js"; @@ -48,7 +50,8 @@ const CanvasToolSchema = Type.Object({ jsonlPath: Type.Optional(Type.String()), }); -export function createCanvasTool(): AnyAgentTool { +export function createCanvasTool(options?: { config?: OpenClawConfig }): AnyAgentTool { + const imageSanitization = resolveImageSanitizationLimits(options?.config); return { label: "Canvas", name: "canvas", @@ -158,6 +161,7 @@ export function createCanvasTool(): AnyAgentTool { base64: payload.base64, mimeType, details: { format: payload.format }, + imageSanitization, }); } case "a2ui_push": { diff --git a/src/agents/tools/common.ts b/src/agents/tools/common.ts index a1358b08b7..5dbfd9e6b6 100644 --- a/src/agents/tools/common.ts +++ b/src/agents/tools/common.ts @@ -1,5 +1,6 @@ -import fs from "node:fs/promises"; import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core"; +import fs from "node:fs/promises"; +import type { ImageSanitizationLimits } from "../image-sanitization.js"; import { detectMime } from "../../media/mime.js"; import { sanitizeToolResultImages } from "../tool-images.js"; @@ -214,6 +215,7 @@ export async function imageResult(params: { mimeType: string; extraText?: string; details?: Record; + imageSanitization?: ImageSanitizationLimits; }): Promise> { const content: AgentToolResult["content"] = [ { @@ -230,7 +232,7 @@ export async function imageResult(params: { content, details: { path: params.path, ...params.details }, }; - return await sanitizeToolResultImages(result, params.label); + return await sanitizeToolResultImages(result, params.label, params.imageSanitization); } export async function imageResultFromFile(params: { @@ -238,6 +240,7 @@ export async function imageResultFromFile(params: { path: string; extraText?: string; details?: Record; + imageSanitization?: ImageSanitizationLimits; }): Promise> { const buf = await fs.readFile(params.path); const mimeType = (await detectMime({ buffer: buf.slice(0, 256) })) ?? "image/png"; @@ -248,5 +251,6 @@ export async function imageResultFromFile(params: { mimeType, extraText: params.extraText, details: params.details, + imageSanitization: params.imageSanitization, }); } diff --git a/src/agents/tools/nodes-tool.ts b/src/agents/tools/nodes-tool.ts index 7add129efa..902a453288 100644 --- a/src/agents/tools/nodes-tool.ts +++ b/src/agents/tools/nodes-tool.ts @@ -1,6 +1,7 @@ -import crypto from "node:crypto"; import type { AgentToolResult } from "@mariozechner/pi-agent-core"; import { Type } from "@sinclair/typebox"; +import crypto from "node:crypto"; +import type { OpenClawConfig } from "../../config/config.js"; import { type CameraFacing, cameraTempPath, @@ -16,9 +17,9 @@ import { writeScreenRecordToFile, } from "../../cli/nodes-screen.js"; import { parseDurationMs } from "../../cli/parse-duration.js"; -import type { OpenClawConfig } from "../../config/config.js"; import { imageMimeFromFormat } from "../../media/mime.js"; import { resolveSessionAgentId } from "../agent-scope.js"; +import { resolveImageSanitizationLimits } from "../image-sanitization.js"; import { optionalStringEnum, stringEnum } from "../schema/typebox.js"; import { sanitizeToolResultImages } from "../tool-images.js"; import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js"; @@ -100,6 +101,7 @@ export function createNodesTool(options?: { sessionKey: options?.agentSessionKey, config: options?.config, }); + const imageSanitization = resolveImageSanitizationLimits(options?.config); return { label: "Nodes", name: "nodes", @@ -250,7 +252,7 @@ export function createNodesTool(options?: { } const result: AgentToolResult = { content, details }; - return await sanitizeToolResultImages(result, "nodes:camera_snap"); + return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization); } case "camera_list": { const node = readStringParam(params, "node", { required: true }); diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index 5f9445d47f..81c194016f 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -290,6 +290,8 @@ export const FIELD_HELP: Record = { "agents.defaults.imageModel.primary": "Optional image model (provider/model) used when the primary model lacks image input.", "agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).", + "agents.defaults.imageMaxDimensionPx": + "Max image side length in pixels when sanitizing transcript/tool-result image payloads (default: 1200).", "agents.defaults.cliBackends": "Optional CLI backends for text-only fallback (claude-cli, etc.).", "agents.defaults.humanDelay.mode": 'Delay style for block replies ("off", "natural", "custom").', "agents.defaults.humanDelay.minMs": "Minimum delay in ms for custom humanDelay (default: 800).", diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 1f351c8efd..95e75b8fb5 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -204,6 +204,7 @@ export const FIELD_LABELS: Record = { "agents.defaults.model.fallbacks": "Model Fallbacks", "agents.defaults.imageModel.primary": "Image Model", "agents.defaults.imageModel.fallbacks": "Image Model Fallbacks", + "agents.defaults.imageMaxDimensionPx": "Image Max Dimension (px)", "agents.defaults.humanDelay.mode": "Human Delay Mode", "agents.defaults.humanDelay.minMs": "Human Delay Min (ms)", "agents.defaults.humanDelay.maxMs": "Human Delay Max (ms)", diff --git a/src/config/types.agent-defaults.ts b/src/config/types.agent-defaults.ts index 4c9dba0a23..1829c71839 100644 --- a/src/config/types.agent-defaults.ts +++ b/src/config/types.agent-defaults.ts @@ -190,6 +190,11 @@ export type AgentDefaultsConfig = { timeoutSeconds?: number; /** Max inbound media size in MB for agent-visible attachments (text note or future image attach). */ mediaMaxMb?: number; + /** + * Max image side length (pixels) when sanitizing base64 image payloads in transcripts/tool results. + * Default: 1200. + */ + imageMaxDimensionPx?: number; typingIntervalSeconds?: number; /** Typing indicator start mode (never|instant|thinking|message). */ typingMode?: TypingMode; diff --git a/src/config/zod-schema.agent-defaults.ts b/src/config/zod-schema.agent-defaults.ts index 2508179707..d99af6dc2b 100644 --- a/src/config/zod-schema.agent-defaults.ts +++ b/src/config/zod-schema.agent-defaults.ts @@ -126,6 +126,7 @@ export const AgentDefaultsSchema = z humanDelay: HumanDelaySchema.optional(), timeoutSeconds: z.number().int().positive().optional(), mediaMaxMb: z.number().positive().optional(), + imageMaxDimensionPx: z.number().int().positive().optional(), typingIntervalSeconds: z.number().int().positive().optional(), typingMode: z .union([