fix(agents): make image sanitization dimension configurable

This commit is contained in:
Peter Steinberger
2026-02-18 00:43:31 +01:00
parent 5ee79f80eb
commit b05e89e5e6
21 changed files with 156 additions and 45 deletions

View File

@@ -126,6 +126,7 @@ When validation fails:
- `agents.defaults.models` defines the model catalog and acts as the allowlist for `/model`.
- Model refs use `provider/model` format (e.g. `anthropic/claude-opus-4-6`).
- `agents.defaults.imageMaxDimensionPx` controls transcript/tool image downscaling (default `1200`).
- See [Models CLI](/concepts/models) for switching models in chat and [Model Failover](/concepts/model-failover) for auth rotation and fallback behavior.
- For custom/self-hosted providers, see [Custom providers](/gateway/configuration-reference#custom-providers-and-base-urls) in the reference.

View File

@@ -57,6 +57,7 @@ Implementation:
- `sanitizeSessionMessagesImages` in `src/agents/pi-embedded-helpers/images.ts`
- `sanitizeContentBlocksImages` in `src/agents/tool-images.ts`
- Max image side is configurable via `agents.defaults.imageMaxDimensionPx` (default: `1200`).
---

View File

@@ -0,0 +1,20 @@
import { describe, expect, it } from "vitest";
import type { OpenClawConfig } from "../config/config.js";
import { resolveImageSanitizationLimits } from "./image-sanitization.js";
describe("image sanitization config", () => {
it("defaults when no config value exists", () => {
expect(resolveImageSanitizationLimits(undefined)).toEqual({});
expect(
resolveImageSanitizationLimits({ agents: { defaults: {} } } as unknown as OpenClawConfig),
).toEqual({});
});
it("reads and normalizes agents.defaults.imageMaxDimensionPx", () => {
expect(
resolveImageSanitizationLimits({
agents: { defaults: { imageMaxDimensionPx: 1600.9 } },
} as unknown as OpenClawConfig),
).toEqual({ maxDimensionPx: 1600 });
});
});

View File

@@ -0,0 +1,17 @@
import type { OpenClawConfig } from "../config/config.js";
export type ImageSanitizationLimits = {
maxDimensionPx?: number;
maxBytes?: number;
};
export const DEFAULT_IMAGE_MAX_DIMENSION_PX = 1200;
export const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024;
export function resolveImageSanitizationLimits(cfg?: OpenClawConfig): ImageSanitizationLimits {
const configured = cfg?.agents?.defaults?.imageMaxDimensionPx;
if (typeof configured !== "number" || !Number.isFinite(configured)) {
return {};
}
return { maxDimensionPx: Math.max(1, Math.floor(configured)) };
}

View File

@@ -1,12 +1,12 @@
import type { OpenClawConfig } from "../config/config.js";
import { resolvePluginTools } from "../plugins/tools.js";
import type { GatewayMessageChannel } from "../utils/message-channel.js";
import { resolveSessionAgentId } from "./agent-scope.js";
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
import type { AnyAgentTool } from "./tools/common.js";
import { resolvePluginTools } from "../plugins/tools.js";
import { resolveSessionAgentId } from "./agent-scope.js";
import { createAgentsListTool } from "./tools/agents-list-tool.js";
import { createBrowserTool } from "./tools/browser-tool.js";
import { createCanvasTool } from "./tools/canvas-tool.js";
import type { AnyAgentTool } from "./tools/common.js";
import { createCronTool } from "./tools/cron-tool.js";
import { createGatewayTool } from "./tools/gateway-tool.js";
import { createImageTool } from "./tools/image-tool.js";
@@ -102,7 +102,7 @@ export function createOpenClawTools(options?: {
sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl,
allowHostControl: options?.allowHostBrowserControl,
}),
createCanvasTool(),
createCanvasTool({ config: options?.config }),
createNodesTool({
agentSessionKey: options?.agentSessionKey,
config: options?.config,

View File

@@ -1,4 +1,5 @@
import type { AgentMessage, AgentToolResult } from "@mariozechner/pi-agent-core";
import type { ImageSanitizationLimits } from "../image-sanitization.js";
import type { ToolCallIdMode } from "../tool-call-id.js";
import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
import { sanitizeContentBlocksImages } from "../tool-images.js";
@@ -45,12 +46,16 @@ export async function sanitizeSessionMessagesImages(
allowBase64Only?: boolean;
includeCamelCase?: boolean;
};
},
} & ImageSanitizationLimits,
): Promise<AgentMessage[]> {
const sanitizeMode = options?.sanitizeMode ?? "full";
const allowNonImageSanitization = sanitizeMode === "full";
const imageSanitization = {
maxDimensionPx: options?.maxDimensionPx,
maxBytes: options?.maxBytes,
};
// We sanitize historical session messages because Anthropic can reject a request
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
// if the transcript contains oversized base64 images (default max side 1200px).
const sanitizedIds =
allowNonImageSanitization && options?.sanitizeToolCallIds
? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode)
@@ -69,6 +74,7 @@ export async function sanitizeSessionMessagesImages(
const nextContent = (await sanitizeContentBlocksImages(
content,
label,
imageSanitization,
)) as unknown as typeof toolMsg.content;
out.push({ ...toolMsg, content: nextContent });
continue;
@@ -81,6 +87,7 @@ export async function sanitizeSessionMessagesImages(
const nextContent = (await sanitizeContentBlocksImages(
content as unknown as ContentBlock[],
label,
imageSanitization,
)) as unknown as typeof userMsg.content;
out.push({ ...userMsg, content: nextContent });
continue;
@@ -95,6 +102,7 @@ export async function sanitizeSessionMessagesImages(
const nextContent = (await sanitizeContentBlocksImages(
content as unknown as ContentBlock[],
label,
imageSanitization,
)) as unknown as typeof assistantMsg.content;
out.push({ ...assistantMsg, content: nextContent });
} else {
@@ -108,6 +116,7 @@ export async function sanitizeSessionMessagesImages(
const nextContent = (await sanitizeContentBlocksImages(
content as unknown as ContentBlock[],
label,
imageSanitization,
)) as unknown as typeof assistantMsg.content;
out.push({ ...assistantMsg, content: nextContent });
continue;
@@ -129,6 +138,7 @@ export async function sanitizeSessionMessagesImages(
const finalContent = (await sanitizeContentBlocksImages(
filteredContent as unknown as ContentBlock[],
label,
imageSanitization,
)) as unknown as typeof assistantMsg.content;
if (finalContent.length === 0) {
continue;

View File

@@ -1,5 +1,3 @@
import fs from "node:fs/promises";
import os from "node:os";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import {
createAgentSession,
@@ -7,10 +5,14 @@ import {
SessionManager,
SettingsManager,
} from "@mariozechner/pi-coding-agent";
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
import fs from "node:fs/promises";
import os from "node:os";
import type { ReasoningLevel, ThinkLevel } from "../../auto-reply/thinking.js";
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
import type { OpenClawConfig } from "../../config/config.js";
import type { ExecElevatedDefaults } from "../bash-tools.js";
import type { EmbeddedPiCompactResult } from "./types.js";
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
import { getMachineDisplayName } from "../../infra/machine-name.js";
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
import { type enqueueCommand, enqueueCommandInLane } from "../../process/command-queue.js";
@@ -24,7 +26,6 @@ import { normalizeMessageChannel } from "../../utils/message-channel.js";
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
import { resolveOpenClawAgentDir } from "../agent-paths.js";
import { resolveSessionAgentIds } from "../agent-scope.js";
import type { ExecElevatedDefaults } from "../bash-tools.js";
import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js";
import { listChannelSupportedActions, resolveChannelMessageToolHints } from "../channel-tools.js";
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js";
@@ -81,7 +82,6 @@ import {
createSystemPromptOverride,
} from "./system-prompt.js";
import { splitSdkTools } from "./tool-split.js";
import type { EmbeddedPiCompactResult } from "./types.js";
import { describeUnknownError, mapThinkingLevel } from "./utils.js";
import { flushPendingToolResultsAfterIdle } from "./wait-for-idle-before-flush.js";
@@ -570,6 +570,7 @@ export async function compactEmbeddedPiSessionDirect(
modelApi: model.api,
modelId,
provider,
config: params.config,
sessionManager,
sessionId: params.sessionId,
policy: transcriptPolicy,

View File

@@ -1,12 +1,15 @@
import { EventEmitter } from "node:events";
import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core";
import type { SessionManager } from "@mariozechner/pi-coding-agent";
import type { TSchema } from "@sinclair/typebox";
import { EventEmitter } from "node:events";
import type { OpenClawConfig } from "../../config/config.js";
import type { TranscriptPolicy } from "../transcript-policy.js";
import { registerUnhandledRejectionHandler } from "../../infra/unhandled-rejections.js";
import {
hasInterSessionUserProvenance,
normalizeInputProvenance,
} from "../../sessions/input-provenance.js";
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
import {
downgradeOpenAIReasoningBlocks,
isCompactionFailureError,
@@ -20,7 +23,6 @@ import {
stripToolResultDetails,
sanitizeToolUseResultPairing,
} from "../session-transcript-repair.js";
import type { TranscriptPolicy } from "../transcript-policy.js";
import { resolveTranscriptPolicy } from "../transcript-policy.js";
import { log } from "./logger.js";
import { describeUnknownError } from "./utils.js";
@@ -416,6 +418,7 @@ export async function sanitizeSessionHistory(params: {
modelApi?: string | null;
modelId?: string;
provider?: string;
config?: OpenClawConfig;
sessionManager: SessionManager;
sessionId: string;
policy?: TranscriptPolicy;
@@ -438,6 +441,7 @@ export async function sanitizeSessionHistory(params: {
toolCallIdMode: policy.toolCallIdMode,
preserveSignatures: policy.preserveSignatures,
sanitizeThoughtSignatures: policy.sanitizeThoughtSignatures,
...resolveImageSanitizationLimits(params.config),
},
);
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks

View File

@@ -1,9 +1,10 @@
import fs from "node:fs/promises";
import os from "node:os";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { streamSimple } from "@mariozechner/pi-ai";
import { createAgentSession, SessionManager, SettingsManager } from "@mariozechner/pi-coding-agent";
import fs from "node:fs/promises";
import os from "node:os";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
import { resolveHeartbeatPrompt } from "../../../auto-reply/heartbeat.js";
import { resolveChannelCapabilities } from "../../../config/channel-capabilities.js";
import { getMachineDisplayName } from "../../../infra/machine-name.js";
@@ -33,6 +34,7 @@ import {
import { DEFAULT_CONTEXT_TOKENS } from "../../defaults.js";
import { resolveOpenClawDocsPath } from "../../docs-path.js";
import { isTimeoutError } from "../../failover-error.js";
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
import { resolveModelAuthMode } from "../../model-auth.js";
import { resolveDefaultModelForAgent } from "../../model-selection.js";
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
@@ -105,7 +107,6 @@ import {
shouldFlagCompactionTimeout,
} from "./compaction-timeout.js";
import { detectAndLoadPromptImages } from "./images.js";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
export function injectHistoryImagesIntoMessages(
messages: AgentMessage[],
@@ -666,6 +667,7 @@ export async function runEmbeddedAttempt(
modelApi: params.model.api,
modelId: params.modelId,
provider: params.provider,
config: params.config,
sessionManager,
sessionId: params.sessionId,
policy: transcriptPolicy,
@@ -968,6 +970,7 @@ export async function runEmbeddedAttempt(
existingImages: params.images,
historyMessages: activeSession.messages,
maxBytes: MAX_IMAGE_BYTES,
maxDimensionPx: resolveImageSanitizationLimits(params.config).maxDimensionPx,
// Enforce sandbox path restrictions when sandbox is enabled
sandbox:
sandbox?.enabled && sandbox?.fsBridge

View File

@@ -1,9 +1,10 @@
import type { ImageContent } from "@mariozechner/pi-ai";
import path from "node:path";
import { fileURLToPath } from "node:url";
import type { ImageContent } from "@mariozechner/pi-ai";
import type { ImageSanitizationLimits } from "../../image-sanitization.js";
import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js";
import { resolveUserPath } from "../../../utils.js";
import { loadWebMedia } from "../../../web/media.js";
import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js";
import { sanitizeImageBlocks } from "../../tool-images.js";
import { log } from "../logger.js";
@@ -48,8 +49,13 @@ function isImageExtension(filePath: string): boolean {
async function sanitizeImagesWithLog(
images: ImageContent[],
label: string,
imageSanitization?: ImageSanitizationLimits,
): Promise<ImageContent[]> {
const { images: sanitized, dropped } = await sanitizeImageBlocks(images, label);
const { images: sanitized, dropped } = await sanitizeImageBlocks(
images,
label,
imageSanitization,
);
if (dropped > 0) {
log.warn(`Native image: dropped ${dropped} image(s) after sanitization (${label}).`);
}
@@ -354,6 +360,7 @@ export async function detectAndLoadPromptImages(params: {
existingImages?: ImageContent[];
historyMessages?: unknown[];
maxBytes?: number;
maxDimensionPx?: number;
sandbox?: { root: string; bridge: SandboxFsBridge };
}): Promise<{
/** Images for the current prompt (existingImages + detected in current prompt) */
@@ -437,10 +444,21 @@ export async function detectAndLoadPromptImages(params: {
}
}
const sanitizedPromptImages = await sanitizeImagesWithLog(promptImages, "prompt:images");
const imageSanitization: ImageSanitizationLimits = {
maxDimensionPx: params.maxDimensionPx,
};
const sanitizedPromptImages = await sanitizeImagesWithLog(
promptImages,
"prompt:images",
imageSanitization,
);
const sanitizedHistoryImagesByIndex = new Map<number, ImageContent[]>();
for (const [index, images] of historyImagesByIndex) {
const sanitized = await sanitizeImagesWithLog(images, `history:images:${index}`);
const sanitized = await sanitizeImagesWithLog(
images,
`history:images:${index}`,
imageSanitization,
);
if (sanitized.length > 0) {
sanitizedHistoryImagesByIndex.set(index, sanitized);
}

View File

@@ -1,10 +1,11 @@
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { createEditTool, createReadTool, createWriteTool } from "@mariozechner/pi-coding-agent";
import type { ImageSanitizationLimits } from "./image-sanitization.js";
import type { AnyAgentTool } from "./pi-tools.types.js";
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
import { detectMime } from "../media/mime.js";
import { sniffMimeFromBase64 } from "../media/sniff-mime-from-base64.js";
import type { AnyAgentTool } from "./pi-tools.types.js";
import { assertSandboxPath } from "./sandbox-paths.js";
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
import { sanitizeToolResultImages } from "./tool-images.js";
// NOTE(steipete): Upstream read now does file-magic MIME detection; we keep the wrapper
@@ -21,6 +22,7 @@ const MAX_ADAPTIVE_READ_PAGES = 8;
type OpenClawReadToolOptions = {
modelContextWindowTokens?: number;
imageSanitization?: ImageSanitizationLimits;
};
type ReadTruncationDetails = {
@@ -566,6 +568,7 @@ type SandboxToolParams = {
root: string;
bridge: SandboxFsBridge;
modelContextWindowTokens?: number;
imageSanitization?: ImageSanitizationLimits;
};
export function createSandboxedReadTool(params: SandboxToolParams) {
@@ -574,6 +577,7 @@ export function createSandboxedReadTool(params: SandboxToolParams) {
}) as unknown as AnyAgentTool;
return createOpenClawReadTool(base, {
modelContextWindowTokens: params.modelContextWindowTokens,
imageSanitization: params.imageSanitization,
});
}
@@ -614,7 +618,11 @@ export function createOpenClawReadTool(
const filePath = typeof record?.path === "string" ? String(record.path) : "<unknown>";
const strippedDetailsResult = stripReadTruncationContentDetails(result);
const normalizedResult = await normalizeReadImageResult(strippedDetailsResult, filePath);
return sanitizeToolResultImages(normalizedResult, `read:${filePath}`);
return sanitizeToolResultImages(
normalizedResult,
`read:${filePath}`,
options?.imageSanitization,
);
},
};
}

View File

@@ -7,6 +7,9 @@ import {
} from "@mariozechner/pi-coding-agent";
import type { OpenClawConfig } from "../config/config.js";
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
import type { ModelAuthMode } from "./model-auth.js";
import type { AnyAgentTool } from "./pi-tools.types.js";
import type { SandboxContext } from "./sandbox.js";
import { logWarn } from "../logger.js";
import { getPluginToolMeta } from "../plugins/tools.js";
import { isSubagentSessionKey } from "../routing/session-key.js";
@@ -20,7 +23,7 @@ import {
type ProcessToolDefaults,
} from "./bash-tools.js";
import { listChannelAgentTools } from "./channel-tools.js";
import type { ModelAuthMode } from "./model-auth.js";
import { resolveImageSanitizationLimits } from "./image-sanitization.js";
import { createOpenClawTools } from "./openclaw-tools.js";
import { wrapToolWithAbortSignal } from "./pi-tools.abort.js";
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
@@ -43,8 +46,6 @@ import {
wrapToolParamNormalization,
} from "./pi-tools.read.js";
import { cleanToolSchemaForGemini, normalizeToolParameters } from "./pi-tools.schema.js";
import type { AnyAgentTool } from "./pi-tools.types.js";
import type { SandboxContext } from "./sandbox.js";
import { getSubagentDepthFromSessionStore } from "./subagent-depth.js";
import {
applyToolPolicyPipeline,
@@ -300,6 +301,7 @@ export function createOpenClawCodingTools(options?: {
if (sandboxRoot && !sandboxFsBridge) {
throw new Error("Sandbox filesystem bridge is unavailable.");
}
const imageSanitization = resolveImageSanitizationLimits(options?.config);
const base = (codingTools as unknown as AnyAgentTool[]).flatMap((tool) => {
if (tool.name === readTool.name) {
@@ -308,12 +310,14 @@ export function createOpenClawCodingTools(options?: {
root: sandboxRoot,
bridge: sandboxFsBridge!,
modelContextWindowTokens: options?.modelContextWindowTokens,
imageSanitization,
});
return [workspaceOnly ? wrapToolWorkspaceRootGuard(sandboxed, sandboxRoot) : sandboxed];
}
const freshReadTool = createReadTool(workspaceRoot);
const wrapped = createOpenClawReadTool(freshReadTool, {
modelContextWindowTokens: options?.modelContextWindowTokens,
imageSanitization,
});
return [workspaceOnly ? wrapToolWorkspaceRootGuard(wrapped, workspaceRoot) : wrapped];
}

View File

@@ -49,8 +49,8 @@ describe("tool image sanitizing", () => {
expect(dropped).toBe(0);
expect(out.length).toBe(1);
const meta = await sharp(Buffer.from(out[0].data, "base64")).metadata();
expect(meta.width).toBeLessThanOrEqual(2000);
expect(meta.height).toBeLessThanOrEqual(2000);
expect(meta.width).toBeLessThanOrEqual(1200);
expect(meta.height).toBeLessThanOrEqual(1200);
}, 20_000);
it("shrinks images that exceed max dimension even if size is small", async () => {
@@ -77,8 +77,8 @@ describe("tool image sanitizing", () => {
throw new Error("expected image block");
}
const meta = await sharp(Buffer.from(image.data, "base64")).metadata();
expect(meta.width).toBeLessThanOrEqual(2000);
expect(meta.height).toBeLessThanOrEqual(2000);
expect(meta.width).toBeLessThanOrEqual(1200);
expect(meta.height).toBeLessThanOrEqual(1200);
expect(image.mimeType).toBe("image/jpeg");
}, 20_000);

View File

@@ -2,6 +2,11 @@ import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import type { ImageContent } from "@mariozechner/pi-ai";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
import {
DEFAULT_IMAGE_MAX_BYTES,
DEFAULT_IMAGE_MAX_DIMENSION_PX,
type ImageSanitizationLimits,
} from "./image-sanitization.js";
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
@@ -13,8 +18,8 @@ type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
//
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
// and recompress base64 image blocks when they exceed these limits.
const MAX_IMAGE_DIMENSION_PX = 1200;
const MAX_IMAGE_BYTES = 5 * 1024 * 1024;
const MAX_IMAGE_DIMENSION_PX = DEFAULT_IMAGE_MAX_DIMENSION_PX;
const MAX_IMAGE_BYTES = DEFAULT_IMAGE_MAX_BYTES;
const log = createSubsystemLogger("agents/tool-images");
function isImageBlock(block: unknown): block is ImageContentBlock {
@@ -100,7 +105,7 @@ async function resizeImageBase64IfNeeded(params: {
const maxDim = hasDimensions ? Math.max(width ?? 0, height ?? 0) : params.maxDimensionPx;
const sideStart = maxDim > 0 ? Math.min(params.maxDimensionPx, maxDim) : params.maxDimensionPx;
const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800]
.map((v) => Math.min(params.maxDimensionPx, v))
.filter((v) => v > 0 && v <= params.maxDimensionPx)
.filter((v, i, arr) => v > 0 && arr.indexOf(v) === i)
.toSorted((a, b) => b - a);
@@ -148,7 +153,7 @@ async function resizeImageBase64IfNeeded(params: {
export async function sanitizeContentBlocksImages(
blocks: ToolContentBlock[],
label: string,
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
opts: ImageSanitizationLimits = {},
): Promise<ToolContentBlock[]> {
const maxDimensionPx = Math.max(opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX, 1);
const maxBytes = Math.max(opts.maxBytes ?? MAX_IMAGE_BYTES, 1);
@@ -198,7 +203,7 @@ export async function sanitizeContentBlocksImages(
export async function sanitizeImageBlocks(
images: ImageContent[],
label: string,
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
opts: ImageSanitizationLimits = {},
): Promise<{ images: ImageContent[]; dropped: number }> {
if (images.length === 0) {
return { images, dropped: 0 };
@@ -211,7 +216,7 @@ export async function sanitizeImageBlocks(
export async function sanitizeToolResultImages(
result: AgentToolResult<unknown>,
label: string,
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
opts: ImageSanitizationLimits = {},
): Promise<AgentToolResult<unknown>> {
const content = Array.isArray(result.content) ? result.content : [];
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) {

View File

@@ -1,9 +1,11 @@
import { Type } from "@sinclair/typebox";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import { writeBase64ToFile } from "../../cli/nodes-camera.js";
import { canvasSnapshotTempPath, parseCanvasSnapshotPayload } from "../../cli/nodes-canvas.js";
import { imageMimeFromFormat } from "../../media/mime.js";
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
import { type AnyAgentTool, imageResult, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool, readGatewayCallOptions } from "./gateway.js";
@@ -48,7 +50,8 @@ const CanvasToolSchema = Type.Object({
jsonlPath: Type.Optional(Type.String()),
});
export function createCanvasTool(): AnyAgentTool {
export function createCanvasTool(options?: { config?: OpenClawConfig }): AnyAgentTool {
const imageSanitization = resolveImageSanitizationLimits(options?.config);
return {
label: "Canvas",
name: "canvas",
@@ -158,6 +161,7 @@ export function createCanvasTool(): AnyAgentTool {
base64: payload.base64,
mimeType,
details: { format: payload.format },
imageSanitization,
});
}
case "a2ui_push": {

View File

@@ -1,5 +1,6 @@
import fs from "node:fs/promises";
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
import fs from "node:fs/promises";
import type { ImageSanitizationLimits } from "../image-sanitization.js";
import { detectMime } from "../../media/mime.js";
import { sanitizeToolResultImages } from "../tool-images.js";
@@ -214,6 +215,7 @@ export async function imageResult(params: {
mimeType: string;
extraText?: string;
details?: Record<string, unknown>;
imageSanitization?: ImageSanitizationLimits;
}): Promise<AgentToolResult<unknown>> {
const content: AgentToolResult<unknown>["content"] = [
{
@@ -230,7 +232,7 @@ export async function imageResult(params: {
content,
details: { path: params.path, ...params.details },
};
return await sanitizeToolResultImages(result, params.label);
return await sanitizeToolResultImages(result, params.label, params.imageSanitization);
}
export async function imageResultFromFile(params: {
@@ -238,6 +240,7 @@ export async function imageResultFromFile(params: {
path: string;
extraText?: string;
details?: Record<string, unknown>;
imageSanitization?: ImageSanitizationLimits;
}): Promise<AgentToolResult<unknown>> {
const buf = await fs.readFile(params.path);
const mimeType = (await detectMime({ buffer: buf.slice(0, 256) })) ?? "image/png";
@@ -248,5 +251,6 @@ export async function imageResultFromFile(params: {
mimeType,
extraText: params.extraText,
details: params.details,
imageSanitization: params.imageSanitization,
});
}

View File

@@ -1,6 +1,7 @@
import crypto from "node:crypto";
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
import { Type } from "@sinclair/typebox";
import crypto from "node:crypto";
import type { OpenClawConfig } from "../../config/config.js";
import {
type CameraFacing,
cameraTempPath,
@@ -16,9 +17,9 @@ import {
writeScreenRecordToFile,
} from "../../cli/nodes-screen.js";
import { parseDurationMs } from "../../cli/parse-duration.js";
import type { OpenClawConfig } from "../../config/config.js";
import { imageMimeFromFormat } from "../../media/mime.js";
import { resolveSessionAgentId } from "../agent-scope.js";
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
import { sanitizeToolResultImages } from "../tool-images.js";
import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js";
@@ -100,6 +101,7 @@ export function createNodesTool(options?: {
sessionKey: options?.agentSessionKey,
config: options?.config,
});
const imageSanitization = resolveImageSanitizationLimits(options?.config);
return {
label: "Nodes",
name: "nodes",
@@ -250,7 +252,7 @@ export function createNodesTool(options?: {
}
const result: AgentToolResult<unknown> = { content, details };
return await sanitizeToolResultImages(result, "nodes:camera_snap");
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
}
case "camera_list": {
const node = readStringParam(params, "node", { required: true });

View File

@@ -290,6 +290,8 @@ export const FIELD_HELP: Record<string, string> = {
"agents.defaults.imageModel.primary":
"Optional image model (provider/model) used when the primary model lacks image input.",
"agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).",
"agents.defaults.imageMaxDimensionPx":
"Max image side length in pixels when sanitizing transcript/tool-result image payloads (default: 1200).",
"agents.defaults.cliBackends": "Optional CLI backends for text-only fallback (claude-cli, etc.).",
"agents.defaults.humanDelay.mode": 'Delay style for block replies ("off", "natural", "custom").',
"agents.defaults.humanDelay.minMs": "Minimum delay in ms for custom humanDelay (default: 800).",

View File

@@ -204,6 +204,7 @@ export const FIELD_LABELS: Record<string, string> = {
"agents.defaults.model.fallbacks": "Model Fallbacks",
"agents.defaults.imageModel.primary": "Image Model",
"agents.defaults.imageModel.fallbacks": "Image Model Fallbacks",
"agents.defaults.imageMaxDimensionPx": "Image Max Dimension (px)",
"agents.defaults.humanDelay.mode": "Human Delay Mode",
"agents.defaults.humanDelay.minMs": "Human Delay Min (ms)",
"agents.defaults.humanDelay.maxMs": "Human Delay Max (ms)",

View File

@@ -190,6 +190,11 @@ export type AgentDefaultsConfig = {
timeoutSeconds?: number;
/** Max inbound media size in MB for agent-visible attachments (text note or future image attach). */
mediaMaxMb?: number;
/**
* Max image side length (pixels) when sanitizing base64 image payloads in transcripts/tool results.
* Default: 1200.
*/
imageMaxDimensionPx?: number;
typingIntervalSeconds?: number;
/** Typing indicator start mode (never|instant|thinking|message). */
typingMode?: TypingMode;

View File

@@ -126,6 +126,7 @@ export const AgentDefaultsSchema = z
humanDelay: HumanDelaySchema.optional(),
timeoutSeconds: z.number().int().positive().optional(),
mediaMaxMb: z.number().positive().optional(),
imageMaxDimensionPx: z.number().int().positive().optional(),
typingIntervalSeconds: z.number().int().positive().optional(),
typingMode: z
.union([