mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(agents): make image sanitization dimension configurable
This commit is contained in:
@@ -126,6 +126,7 @@ When validation fails:
|
||||
|
||||
- `agents.defaults.models` defines the model catalog and acts as the allowlist for `/model`.
|
||||
- Model refs use `provider/model` format (e.g. `anthropic/claude-opus-4-6`).
|
||||
- `agents.defaults.imageMaxDimensionPx` controls transcript/tool image downscaling (default `1200`).
|
||||
- See [Models CLI](/concepts/models) for switching models in chat and [Model Failover](/concepts/model-failover) for auth rotation and fallback behavior.
|
||||
- For custom/self-hosted providers, see [Custom providers](/gateway/configuration-reference#custom-providers-and-base-urls) in the reference.
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ Implementation:
|
||||
|
||||
- `sanitizeSessionMessagesImages` in `src/agents/pi-embedded-helpers/images.ts`
|
||||
- `sanitizeContentBlocksImages` in `src/agents/tool-images.ts`
|
||||
- Max image side is configurable via `agents.defaults.imageMaxDimensionPx` (default: `1200`).
|
||||
|
||||
---
|
||||
|
||||
|
||||
20
src/agents/image-sanitization.test.ts
Normal file
20
src/agents/image-sanitization.test.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolveImageSanitizationLimits } from "./image-sanitization.js";
|
||||
|
||||
describe("image sanitization config", () => {
|
||||
it("defaults when no config value exists", () => {
|
||||
expect(resolveImageSanitizationLimits(undefined)).toEqual({});
|
||||
expect(
|
||||
resolveImageSanitizationLimits({ agents: { defaults: {} } } as unknown as OpenClawConfig),
|
||||
).toEqual({});
|
||||
});
|
||||
|
||||
it("reads and normalizes agents.defaults.imageMaxDimensionPx", () => {
|
||||
expect(
|
||||
resolveImageSanitizationLimits({
|
||||
agents: { defaults: { imageMaxDimensionPx: 1600.9 } },
|
||||
} as unknown as OpenClawConfig),
|
||||
).toEqual({ maxDimensionPx: 1600 });
|
||||
});
|
||||
});
|
||||
17
src/agents/image-sanitization.ts
Normal file
17
src/agents/image-sanitization.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
|
||||
export type ImageSanitizationLimits = {
|
||||
maxDimensionPx?: number;
|
||||
maxBytes?: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_IMAGE_MAX_DIMENSION_PX = 1200;
|
||||
export const DEFAULT_IMAGE_MAX_BYTES = 5 * 1024 * 1024;
|
||||
|
||||
export function resolveImageSanitizationLimits(cfg?: OpenClawConfig): ImageSanitizationLimits {
|
||||
const configured = cfg?.agents?.defaults?.imageMaxDimensionPx;
|
||||
if (typeof configured !== "number" || !Number.isFinite(configured)) {
|
||||
return {};
|
||||
}
|
||||
return { maxDimensionPx: Math.max(1, Math.floor(configured)) };
|
||||
}
|
||||
@@ -1,12 +1,12 @@
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import { resolvePluginTools } from "../plugins/tools.js";
|
||||
import type { GatewayMessageChannel } from "../utils/message-channel.js";
|
||||
import { resolveSessionAgentId } from "./agent-scope.js";
|
||||
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import { resolvePluginTools } from "../plugins/tools.js";
|
||||
import { resolveSessionAgentId } from "./agent-scope.js";
|
||||
import { createAgentsListTool } from "./tools/agents-list-tool.js";
|
||||
import { createBrowserTool } from "./tools/browser-tool.js";
|
||||
import { createCanvasTool } from "./tools/canvas-tool.js";
|
||||
import type { AnyAgentTool } from "./tools/common.js";
|
||||
import { createCronTool } from "./tools/cron-tool.js";
|
||||
import { createGatewayTool } from "./tools/gateway-tool.js";
|
||||
import { createImageTool } from "./tools/image-tool.js";
|
||||
@@ -102,7 +102,7 @@ export function createOpenClawTools(options?: {
|
||||
sandboxBridgeUrl: options?.sandboxBrowserBridgeUrl,
|
||||
allowHostControl: options?.allowHostBrowserControl,
|
||||
}),
|
||||
createCanvasTool(),
|
||||
createCanvasTool({ config: options?.config }),
|
||||
createNodesTool({
|
||||
agentSessionKey: options?.agentSessionKey,
|
||||
config: options?.config,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { AgentMessage, AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import type { ImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import type { ToolCallIdMode } from "../tool-call-id.js";
|
||||
import { sanitizeToolCallIdsForCloudCodeAssist } from "../tool-call-id.js";
|
||||
import { sanitizeContentBlocksImages } from "../tool-images.js";
|
||||
@@ -45,12 +46,16 @@ export async function sanitizeSessionMessagesImages(
|
||||
allowBase64Only?: boolean;
|
||||
includeCamelCase?: boolean;
|
||||
};
|
||||
},
|
||||
} & ImageSanitizationLimits,
|
||||
): Promise<AgentMessage[]> {
|
||||
const sanitizeMode = options?.sanitizeMode ?? "full";
|
||||
const allowNonImageSanitization = sanitizeMode === "full";
|
||||
const imageSanitization = {
|
||||
maxDimensionPx: options?.maxDimensionPx,
|
||||
maxBytes: options?.maxBytes,
|
||||
};
|
||||
// We sanitize historical session messages because Anthropic can reject a request
|
||||
// if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX).
|
||||
// if the transcript contains oversized base64 images (default max side 1200px).
|
||||
const sanitizedIds =
|
||||
allowNonImageSanitization && options?.sanitizeToolCallIds
|
||||
? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode)
|
||||
@@ -69,6 +74,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
const nextContent = (await sanitizeContentBlocksImages(
|
||||
content,
|
||||
label,
|
||||
imageSanitization,
|
||||
)) as unknown as typeof toolMsg.content;
|
||||
out.push({ ...toolMsg, content: nextContent });
|
||||
continue;
|
||||
@@ -81,6 +87,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
const nextContent = (await sanitizeContentBlocksImages(
|
||||
content as unknown as ContentBlock[],
|
||||
label,
|
||||
imageSanitization,
|
||||
)) as unknown as typeof userMsg.content;
|
||||
out.push({ ...userMsg, content: nextContent });
|
||||
continue;
|
||||
@@ -95,6 +102,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
const nextContent = (await sanitizeContentBlocksImages(
|
||||
content as unknown as ContentBlock[],
|
||||
label,
|
||||
imageSanitization,
|
||||
)) as unknown as typeof assistantMsg.content;
|
||||
out.push({ ...assistantMsg, content: nextContent });
|
||||
} else {
|
||||
@@ -108,6 +116,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
const nextContent = (await sanitizeContentBlocksImages(
|
||||
content as unknown as ContentBlock[],
|
||||
label,
|
||||
imageSanitization,
|
||||
)) as unknown as typeof assistantMsg.content;
|
||||
out.push({ ...assistantMsg, content: nextContent });
|
||||
continue;
|
||||
@@ -129,6 +138,7 @@ export async function sanitizeSessionMessagesImages(
|
||||
const finalContent = (await sanitizeContentBlocksImages(
|
||||
filteredContent as unknown as ContentBlock[],
|
||||
label,
|
||||
imageSanitization,
|
||||
)) as unknown as typeof assistantMsg.content;
|
||||
if (finalContent.length === 0) {
|
||||
continue;
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import {
|
||||
createAgentSession,
|
||||
@@ -7,10 +5,14 @@ import {
|
||||
SessionManager,
|
||||
SettingsManager,
|
||||
} from "@mariozechner/pi-coding-agent";
|
||||
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import type { ReasoningLevel, ThinkLevel } from "../../auto-reply/thinking.js";
|
||||
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { ExecElevatedDefaults } from "../bash-tools.js";
|
||||
import type { EmbeddedPiCompactResult } from "./types.js";
|
||||
import { resolveHeartbeatPrompt } from "../../auto-reply/heartbeat.js";
|
||||
import { resolveChannelCapabilities } from "../../config/channel-capabilities.js";
|
||||
import { getMachineDisplayName } from "../../infra/machine-name.js";
|
||||
import { getGlobalHookRunner } from "../../plugins/hook-runner-global.js";
|
||||
import { type enqueueCommand, enqueueCommandInLane } from "../../process/command-queue.js";
|
||||
@@ -24,7 +26,6 @@ import { normalizeMessageChannel } from "../../utils/message-channel.js";
|
||||
import { isReasoningTagProvider } from "../../utils/provider-utils.js";
|
||||
import { resolveOpenClawAgentDir } from "../agent-paths.js";
|
||||
import { resolveSessionAgentIds } from "../agent-scope.js";
|
||||
import type { ExecElevatedDefaults } from "../bash-tools.js";
|
||||
import { makeBootstrapWarn, resolveBootstrapContextForRun } from "../bootstrap-files.js";
|
||||
import { listChannelSupportedActions, resolveChannelMessageToolHints } from "../channel-tools.js";
|
||||
import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../date-time.js";
|
||||
@@ -81,7 +82,6 @@ import {
|
||||
createSystemPromptOverride,
|
||||
} from "./system-prompt.js";
|
||||
import { splitSdkTools } from "./tool-split.js";
|
||||
import type { EmbeddedPiCompactResult } from "./types.js";
|
||||
import { describeUnknownError, mapThinkingLevel } from "./utils.js";
|
||||
import { flushPendingToolResultsAfterIdle } from "./wait-for-idle-before-flush.js";
|
||||
|
||||
@@ -570,6 +570,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
modelApi: model.api,
|
||||
modelId,
|
||||
provider,
|
||||
config: params.config,
|
||||
sessionManager,
|
||||
sessionId: params.sessionId,
|
||||
policy: transcriptPolicy,
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
import { EventEmitter } from "node:events";
|
||||
import type { AgentMessage, AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import type { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||
import type { TSchema } from "@sinclair/typebox";
|
||||
import { EventEmitter } from "node:events";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { TranscriptPolicy } from "../transcript-policy.js";
|
||||
import { registerUnhandledRejectionHandler } from "../../infra/unhandled-rejections.js";
|
||||
import {
|
||||
hasInterSessionUserProvenance,
|
||||
normalizeInputProvenance,
|
||||
} from "../../sessions/input-provenance.js";
|
||||
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import {
|
||||
downgradeOpenAIReasoningBlocks,
|
||||
isCompactionFailureError,
|
||||
@@ -20,7 +23,6 @@ import {
|
||||
stripToolResultDetails,
|
||||
sanitizeToolUseResultPairing,
|
||||
} from "../session-transcript-repair.js";
|
||||
import type { TranscriptPolicy } from "../transcript-policy.js";
|
||||
import { resolveTranscriptPolicy } from "../transcript-policy.js";
|
||||
import { log } from "./logger.js";
|
||||
import { describeUnknownError } from "./utils.js";
|
||||
@@ -416,6 +418,7 @@ export async function sanitizeSessionHistory(params: {
|
||||
modelApi?: string | null;
|
||||
modelId?: string;
|
||||
provider?: string;
|
||||
config?: OpenClawConfig;
|
||||
sessionManager: SessionManager;
|
||||
sessionId: string;
|
||||
policy?: TranscriptPolicy;
|
||||
@@ -438,6 +441,7 @@ export async function sanitizeSessionHistory(params: {
|
||||
toolCallIdMode: policy.toolCallIdMode,
|
||||
preserveSignatures: policy.preserveSignatures,
|
||||
sanitizeThoughtSignatures: policy.sanitizeThoughtSignatures,
|
||||
...resolveImageSanitizationLimits(params.config),
|
||||
},
|
||||
);
|
||||
const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
import { streamSimple } from "@mariozechner/pi-ai";
|
||||
import { createAgentSession, SessionManager, SettingsManager } from "@mariozechner/pi-coding-agent";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
|
||||
import { resolveHeartbeatPrompt } from "../../../auto-reply/heartbeat.js";
|
||||
import { resolveChannelCapabilities } from "../../../config/channel-capabilities.js";
|
||||
import { getMachineDisplayName } from "../../../infra/machine-name.js";
|
||||
@@ -33,6 +34,7 @@ import {
|
||||
import { DEFAULT_CONTEXT_TOKENS } from "../../defaults.js";
|
||||
import { resolveOpenClawDocsPath } from "../../docs-path.js";
|
||||
import { isTimeoutError } from "../../failover-error.js";
|
||||
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
|
||||
import { resolveModelAuthMode } from "../../model-auth.js";
|
||||
import { resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
|
||||
@@ -105,7 +107,6 @@ import {
|
||||
shouldFlagCompactionTimeout,
|
||||
} from "./compaction-timeout.js";
|
||||
import { detectAndLoadPromptImages } from "./images.js";
|
||||
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
|
||||
|
||||
export function injectHistoryImagesIntoMessages(
|
||||
messages: AgentMessage[],
|
||||
@@ -666,6 +667,7 @@ export async function runEmbeddedAttempt(
|
||||
modelApi: params.model.api,
|
||||
modelId: params.modelId,
|
||||
provider: params.provider,
|
||||
config: params.config,
|
||||
sessionManager,
|
||||
sessionId: params.sessionId,
|
||||
policy: transcriptPolicy,
|
||||
@@ -968,6 +970,7 @@ export async function runEmbeddedAttempt(
|
||||
existingImages: params.images,
|
||||
historyMessages: activeSession.messages,
|
||||
maxBytes: MAX_IMAGE_BYTES,
|
||||
maxDimensionPx: resolveImageSanitizationLimits(params.config).maxDimensionPx,
|
||||
// Enforce sandbox path restrictions when sandbox is enabled
|
||||
sandbox:
|
||||
sandbox?.enabled && sandbox?.fsBridge
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
import type { ImageSanitizationLimits } from "../../image-sanitization.js";
|
||||
import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js";
|
||||
import { resolveUserPath } from "../../../utils.js";
|
||||
import { loadWebMedia } from "../../../web/media.js";
|
||||
import type { SandboxFsBridge } from "../../sandbox/fs-bridge.js";
|
||||
import { sanitizeImageBlocks } from "../../tool-images.js";
|
||||
import { log } from "../logger.js";
|
||||
|
||||
@@ -48,8 +49,13 @@ function isImageExtension(filePath: string): boolean {
|
||||
async function sanitizeImagesWithLog(
|
||||
images: ImageContent[],
|
||||
label: string,
|
||||
imageSanitization?: ImageSanitizationLimits,
|
||||
): Promise<ImageContent[]> {
|
||||
const { images: sanitized, dropped } = await sanitizeImageBlocks(images, label);
|
||||
const { images: sanitized, dropped } = await sanitizeImageBlocks(
|
||||
images,
|
||||
label,
|
||||
imageSanitization,
|
||||
);
|
||||
if (dropped > 0) {
|
||||
log.warn(`Native image: dropped ${dropped} image(s) after sanitization (${label}).`);
|
||||
}
|
||||
@@ -354,6 +360,7 @@ export async function detectAndLoadPromptImages(params: {
|
||||
existingImages?: ImageContent[];
|
||||
historyMessages?: unknown[];
|
||||
maxBytes?: number;
|
||||
maxDimensionPx?: number;
|
||||
sandbox?: { root: string; bridge: SandboxFsBridge };
|
||||
}): Promise<{
|
||||
/** Images for the current prompt (existingImages + detected in current prompt) */
|
||||
@@ -437,10 +444,21 @@ export async function detectAndLoadPromptImages(params: {
|
||||
}
|
||||
}
|
||||
|
||||
const sanitizedPromptImages = await sanitizeImagesWithLog(promptImages, "prompt:images");
|
||||
const imageSanitization: ImageSanitizationLimits = {
|
||||
maxDimensionPx: params.maxDimensionPx,
|
||||
};
|
||||
const sanitizedPromptImages = await sanitizeImagesWithLog(
|
||||
promptImages,
|
||||
"prompt:images",
|
||||
imageSanitization,
|
||||
);
|
||||
const sanitizedHistoryImagesByIndex = new Map<number, ImageContent[]>();
|
||||
for (const [index, images] of historyImagesByIndex) {
|
||||
const sanitized = await sanitizeImagesWithLog(images, `history:images:${index}`);
|
||||
const sanitized = await sanitizeImagesWithLog(
|
||||
images,
|
||||
`history:images:${index}`,
|
||||
imageSanitization,
|
||||
);
|
||||
if (sanitized.length > 0) {
|
||||
sanitizedHistoryImagesByIndex.set(index, sanitized);
|
||||
}
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { createEditTool, createReadTool, createWriteTool } from "@mariozechner/pi-coding-agent";
|
||||
import type { ImageSanitizationLimits } from "./image-sanitization.js";
|
||||
import type { AnyAgentTool } from "./pi-tools.types.js";
|
||||
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
|
||||
import { detectMime } from "../media/mime.js";
|
||||
import { sniffMimeFromBase64 } from "../media/sniff-mime-from-base64.js";
|
||||
import type { AnyAgentTool } from "./pi-tools.types.js";
|
||||
import { assertSandboxPath } from "./sandbox-paths.js";
|
||||
import type { SandboxFsBridge } from "./sandbox/fs-bridge.js";
|
||||
import { sanitizeToolResultImages } from "./tool-images.js";
|
||||
|
||||
// NOTE(steipete): Upstream read now does file-magic MIME detection; we keep the wrapper
|
||||
@@ -21,6 +22,7 @@ const MAX_ADAPTIVE_READ_PAGES = 8;
|
||||
|
||||
type OpenClawReadToolOptions = {
|
||||
modelContextWindowTokens?: number;
|
||||
imageSanitization?: ImageSanitizationLimits;
|
||||
};
|
||||
|
||||
type ReadTruncationDetails = {
|
||||
@@ -566,6 +568,7 @@ type SandboxToolParams = {
|
||||
root: string;
|
||||
bridge: SandboxFsBridge;
|
||||
modelContextWindowTokens?: number;
|
||||
imageSanitization?: ImageSanitizationLimits;
|
||||
};
|
||||
|
||||
export function createSandboxedReadTool(params: SandboxToolParams) {
|
||||
@@ -574,6 +577,7 @@ export function createSandboxedReadTool(params: SandboxToolParams) {
|
||||
}) as unknown as AnyAgentTool;
|
||||
return createOpenClawReadTool(base, {
|
||||
modelContextWindowTokens: params.modelContextWindowTokens,
|
||||
imageSanitization: params.imageSanitization,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -614,7 +618,11 @@ export function createOpenClawReadTool(
|
||||
const filePath = typeof record?.path === "string" ? String(record.path) : "<unknown>";
|
||||
const strippedDetailsResult = stripReadTruncationContentDetails(result);
|
||||
const normalizedResult = await normalizeReadImageResult(strippedDetailsResult, filePath);
|
||||
return sanitizeToolResultImages(normalizedResult, `read:${filePath}`);
|
||||
return sanitizeToolResultImages(
|
||||
normalizedResult,
|
||||
`read:${filePath}`,
|
||||
options?.imageSanitization,
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -7,6 +7,9 @@ import {
|
||||
} from "@mariozechner/pi-coding-agent";
|
||||
import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
|
||||
import type { ModelAuthMode } from "./model-auth.js";
|
||||
import type { AnyAgentTool } from "./pi-tools.types.js";
|
||||
import type { SandboxContext } from "./sandbox.js";
|
||||
import { logWarn } from "../logger.js";
|
||||
import { getPluginToolMeta } from "../plugins/tools.js";
|
||||
import { isSubagentSessionKey } from "../routing/session-key.js";
|
||||
@@ -20,7 +23,7 @@ import {
|
||||
type ProcessToolDefaults,
|
||||
} from "./bash-tools.js";
|
||||
import { listChannelAgentTools } from "./channel-tools.js";
|
||||
import type { ModelAuthMode } from "./model-auth.js";
|
||||
import { resolveImageSanitizationLimits } from "./image-sanitization.js";
|
||||
import { createOpenClawTools } from "./openclaw-tools.js";
|
||||
import { wrapToolWithAbortSignal } from "./pi-tools.abort.js";
|
||||
import { wrapToolWithBeforeToolCallHook } from "./pi-tools.before-tool-call.js";
|
||||
@@ -43,8 +46,6 @@ import {
|
||||
wrapToolParamNormalization,
|
||||
} from "./pi-tools.read.js";
|
||||
import { cleanToolSchemaForGemini, normalizeToolParameters } from "./pi-tools.schema.js";
|
||||
import type { AnyAgentTool } from "./pi-tools.types.js";
|
||||
import type { SandboxContext } from "./sandbox.js";
|
||||
import { getSubagentDepthFromSessionStore } from "./subagent-depth.js";
|
||||
import {
|
||||
applyToolPolicyPipeline,
|
||||
@@ -300,6 +301,7 @@ export function createOpenClawCodingTools(options?: {
|
||||
if (sandboxRoot && !sandboxFsBridge) {
|
||||
throw new Error("Sandbox filesystem bridge is unavailable.");
|
||||
}
|
||||
const imageSanitization = resolveImageSanitizationLimits(options?.config);
|
||||
|
||||
const base = (codingTools as unknown as AnyAgentTool[]).flatMap((tool) => {
|
||||
if (tool.name === readTool.name) {
|
||||
@@ -308,12 +310,14 @@ export function createOpenClawCodingTools(options?: {
|
||||
root: sandboxRoot,
|
||||
bridge: sandboxFsBridge!,
|
||||
modelContextWindowTokens: options?.modelContextWindowTokens,
|
||||
imageSanitization,
|
||||
});
|
||||
return [workspaceOnly ? wrapToolWorkspaceRootGuard(sandboxed, sandboxRoot) : sandboxed];
|
||||
}
|
||||
const freshReadTool = createReadTool(workspaceRoot);
|
||||
const wrapped = createOpenClawReadTool(freshReadTool, {
|
||||
modelContextWindowTokens: options?.modelContextWindowTokens,
|
||||
imageSanitization,
|
||||
});
|
||||
return [workspaceOnly ? wrapToolWorkspaceRootGuard(wrapped, workspaceRoot) : wrapped];
|
||||
}
|
||||
|
||||
@@ -49,8 +49,8 @@ describe("tool image sanitizing", () => {
|
||||
expect(dropped).toBe(0);
|
||||
expect(out.length).toBe(1);
|
||||
const meta = await sharp(Buffer.from(out[0].data, "base64")).metadata();
|
||||
expect(meta.width).toBeLessThanOrEqual(2000);
|
||||
expect(meta.height).toBeLessThanOrEqual(2000);
|
||||
expect(meta.width).toBeLessThanOrEqual(1200);
|
||||
expect(meta.height).toBeLessThanOrEqual(1200);
|
||||
}, 20_000);
|
||||
|
||||
it("shrinks images that exceed max dimension even if size is small", async () => {
|
||||
@@ -77,8 +77,8 @@ describe("tool image sanitizing", () => {
|
||||
throw new Error("expected image block");
|
||||
}
|
||||
const meta = await sharp(Buffer.from(image.data, "base64")).metadata();
|
||||
expect(meta.width).toBeLessThanOrEqual(2000);
|
||||
expect(meta.height).toBeLessThanOrEqual(2000);
|
||||
expect(meta.width).toBeLessThanOrEqual(1200);
|
||||
expect(meta.height).toBeLessThanOrEqual(1200);
|
||||
expect(image.mimeType).toBe("image/jpeg");
|
||||
}, 20_000);
|
||||
|
||||
|
||||
@@ -2,6 +2,11 @@ import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import type { ImageContent } from "@mariozechner/pi-ai";
|
||||
import { createSubsystemLogger } from "../logging/subsystem.js";
|
||||
import { getImageMetadata, resizeToJpeg } from "../media/image-ops.js";
|
||||
import {
|
||||
DEFAULT_IMAGE_MAX_BYTES,
|
||||
DEFAULT_IMAGE_MAX_DIMENSION_PX,
|
||||
type ImageSanitizationLimits,
|
||||
} from "./image-sanitization.js";
|
||||
|
||||
type ToolContentBlock = AgentToolResult<unknown>["content"][number];
|
||||
type ImageContentBlock = Extract<ToolContentBlock, { type: "image" }>;
|
||||
@@ -13,8 +18,8 @@ type TextContentBlock = Extract<ToolContentBlock, { type: "text" }>;
|
||||
//
|
||||
// To keep sessions resilient (and avoid "silent" WhatsApp non-replies), we auto-downscale
|
||||
// and recompress base64 image blocks when they exceed these limits.
|
||||
const MAX_IMAGE_DIMENSION_PX = 1200;
|
||||
const MAX_IMAGE_BYTES = 5 * 1024 * 1024;
|
||||
const MAX_IMAGE_DIMENSION_PX = DEFAULT_IMAGE_MAX_DIMENSION_PX;
|
||||
const MAX_IMAGE_BYTES = DEFAULT_IMAGE_MAX_BYTES;
|
||||
const log = createSubsystemLogger("agents/tool-images");
|
||||
|
||||
function isImageBlock(block: unknown): block is ImageContentBlock {
|
||||
@@ -100,7 +105,7 @@ async function resizeImageBase64IfNeeded(params: {
|
||||
const maxDim = hasDimensions ? Math.max(width ?? 0, height ?? 0) : params.maxDimensionPx;
|
||||
const sideStart = maxDim > 0 ? Math.min(params.maxDimensionPx, maxDim) : params.maxDimensionPx;
|
||||
const sideGrid = [sideStart, 1800, 1600, 1400, 1200, 1000, 800]
|
||||
.map((v) => Math.min(params.maxDimensionPx, v))
|
||||
.filter((v) => v > 0 && v <= params.maxDimensionPx)
|
||||
.filter((v, i, arr) => v > 0 && arr.indexOf(v) === i)
|
||||
.toSorted((a, b) => b - a);
|
||||
|
||||
@@ -148,7 +153,7 @@ async function resizeImageBase64IfNeeded(params: {
|
||||
export async function sanitizeContentBlocksImages(
|
||||
blocks: ToolContentBlock[],
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
|
||||
opts: ImageSanitizationLimits = {},
|
||||
): Promise<ToolContentBlock[]> {
|
||||
const maxDimensionPx = Math.max(opts.maxDimensionPx ?? MAX_IMAGE_DIMENSION_PX, 1);
|
||||
const maxBytes = Math.max(opts.maxBytes ?? MAX_IMAGE_BYTES, 1);
|
||||
@@ -198,7 +203,7 @@ export async function sanitizeContentBlocksImages(
|
||||
export async function sanitizeImageBlocks(
|
||||
images: ImageContent[],
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
|
||||
opts: ImageSanitizationLimits = {},
|
||||
): Promise<{ images: ImageContent[]; dropped: number }> {
|
||||
if (images.length === 0) {
|
||||
return { images, dropped: 0 };
|
||||
@@ -211,7 +216,7 @@ export async function sanitizeImageBlocks(
|
||||
export async function sanitizeToolResultImages(
|
||||
result: AgentToolResult<unknown>,
|
||||
label: string,
|
||||
opts: { maxDimensionPx?: number; maxBytes?: number } = {},
|
||||
opts: ImageSanitizationLimits = {},
|
||||
): Promise<AgentToolResult<unknown>> {
|
||||
const content = Array.isArray(result.content) ? result.content : [];
|
||||
if (!content.some((b) => isImageBlock(b) || isTextBlock(b))) {
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import crypto from "node:crypto";
|
||||
import fs from "node:fs/promises";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { writeBase64ToFile } from "../../cli/nodes-camera.js";
|
||||
import { canvasSnapshotTempPath, parseCanvasSnapshotPayload } from "../../cli/nodes-canvas.js";
|
||||
import { imageMimeFromFormat } from "../../media/mime.js";
|
||||
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
|
||||
import { type AnyAgentTool, imageResult, jsonResult, readStringParam } from "./common.js";
|
||||
import { callGatewayTool, readGatewayCallOptions } from "./gateway.js";
|
||||
@@ -48,7 +50,8 @@ const CanvasToolSchema = Type.Object({
|
||||
jsonlPath: Type.Optional(Type.String()),
|
||||
});
|
||||
|
||||
export function createCanvasTool(): AnyAgentTool {
|
||||
export function createCanvasTool(options?: { config?: OpenClawConfig }): AnyAgentTool {
|
||||
const imageSanitization = resolveImageSanitizationLimits(options?.config);
|
||||
return {
|
||||
label: "Canvas",
|
||||
name: "canvas",
|
||||
@@ -158,6 +161,7 @@ export function createCanvasTool(): AnyAgentTool {
|
||||
base64: payload.base64,
|
||||
mimeType,
|
||||
details: { format: payload.format },
|
||||
imageSanitization,
|
||||
});
|
||||
}
|
||||
case "a2ui_push": {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import fs from "node:fs/promises";
|
||||
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import fs from "node:fs/promises";
|
||||
import type { ImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import { detectMime } from "../../media/mime.js";
|
||||
import { sanitizeToolResultImages } from "../tool-images.js";
|
||||
|
||||
@@ -214,6 +215,7 @@ export async function imageResult(params: {
|
||||
mimeType: string;
|
||||
extraText?: string;
|
||||
details?: Record<string, unknown>;
|
||||
imageSanitization?: ImageSanitizationLimits;
|
||||
}): Promise<AgentToolResult<unknown>> {
|
||||
const content: AgentToolResult<unknown>["content"] = [
|
||||
{
|
||||
@@ -230,7 +232,7 @@ export async function imageResult(params: {
|
||||
content,
|
||||
details: { path: params.path, ...params.details },
|
||||
};
|
||||
return await sanitizeToolResultImages(result, params.label);
|
||||
return await sanitizeToolResultImages(result, params.label, params.imageSanitization);
|
||||
}
|
||||
|
||||
export async function imageResultFromFile(params: {
|
||||
@@ -238,6 +240,7 @@ export async function imageResultFromFile(params: {
|
||||
path: string;
|
||||
extraText?: string;
|
||||
details?: Record<string, unknown>;
|
||||
imageSanitization?: ImageSanitizationLimits;
|
||||
}): Promise<AgentToolResult<unknown>> {
|
||||
const buf = await fs.readFile(params.path);
|
||||
const mimeType = (await detectMime({ buffer: buf.slice(0, 256) })) ?? "image/png";
|
||||
@@ -248,5 +251,6 @@ export async function imageResultFromFile(params: {
|
||||
mimeType,
|
||||
extraText: params.extraText,
|
||||
details: params.details,
|
||||
imageSanitization: params.imageSanitization,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import crypto from "node:crypto";
|
||||
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import crypto from "node:crypto";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import {
|
||||
type CameraFacing,
|
||||
cameraTempPath,
|
||||
@@ -16,9 +17,9 @@ import {
|
||||
writeScreenRecordToFile,
|
||||
} from "../../cli/nodes-screen.js";
|
||||
import { parseDurationMs } from "../../cli/parse-duration.js";
|
||||
import type { OpenClawConfig } from "../../config/config.js";
|
||||
import { imageMimeFromFormat } from "../../media/mime.js";
|
||||
import { resolveSessionAgentId } from "../agent-scope.js";
|
||||
import { resolveImageSanitizationLimits } from "../image-sanitization.js";
|
||||
import { optionalStringEnum, stringEnum } from "../schema/typebox.js";
|
||||
import { sanitizeToolResultImages } from "../tool-images.js";
|
||||
import { type AnyAgentTool, jsonResult, readStringParam } from "./common.js";
|
||||
@@ -100,6 +101,7 @@ export function createNodesTool(options?: {
|
||||
sessionKey: options?.agentSessionKey,
|
||||
config: options?.config,
|
||||
});
|
||||
const imageSanitization = resolveImageSanitizationLimits(options?.config);
|
||||
return {
|
||||
label: "Nodes",
|
||||
name: "nodes",
|
||||
@@ -250,7 +252,7 @@ export function createNodesTool(options?: {
|
||||
}
|
||||
|
||||
const result: AgentToolResult<unknown> = { content, details };
|
||||
return await sanitizeToolResultImages(result, "nodes:camera_snap");
|
||||
return await sanitizeToolResultImages(result, "nodes:camera_snap", imageSanitization);
|
||||
}
|
||||
case "camera_list": {
|
||||
const node = readStringParam(params, "node", { required: true });
|
||||
|
||||
@@ -290,6 +290,8 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"agents.defaults.imageModel.primary":
|
||||
"Optional image model (provider/model) used when the primary model lacks image input.",
|
||||
"agents.defaults.imageModel.fallbacks": "Ordered fallback image models (provider/model).",
|
||||
"agents.defaults.imageMaxDimensionPx":
|
||||
"Max image side length in pixels when sanitizing transcript/tool-result image payloads (default: 1200).",
|
||||
"agents.defaults.cliBackends": "Optional CLI backends for text-only fallback (claude-cli, etc.).",
|
||||
"agents.defaults.humanDelay.mode": 'Delay style for block replies ("off", "natural", "custom").',
|
||||
"agents.defaults.humanDelay.minMs": "Minimum delay in ms for custom humanDelay (default: 800).",
|
||||
|
||||
@@ -204,6 +204,7 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.model.fallbacks": "Model Fallbacks",
|
||||
"agents.defaults.imageModel.primary": "Image Model",
|
||||
"agents.defaults.imageModel.fallbacks": "Image Model Fallbacks",
|
||||
"agents.defaults.imageMaxDimensionPx": "Image Max Dimension (px)",
|
||||
"agents.defaults.humanDelay.mode": "Human Delay Mode",
|
||||
"agents.defaults.humanDelay.minMs": "Human Delay Min (ms)",
|
||||
"agents.defaults.humanDelay.maxMs": "Human Delay Max (ms)",
|
||||
|
||||
@@ -190,6 +190,11 @@ export type AgentDefaultsConfig = {
|
||||
timeoutSeconds?: number;
|
||||
/** Max inbound media size in MB for agent-visible attachments (text note or future image attach). */
|
||||
mediaMaxMb?: number;
|
||||
/**
|
||||
* Max image side length (pixels) when sanitizing base64 image payloads in transcripts/tool results.
|
||||
* Default: 1200.
|
||||
*/
|
||||
imageMaxDimensionPx?: number;
|
||||
typingIntervalSeconds?: number;
|
||||
/** Typing indicator start mode (never|instant|thinking|message). */
|
||||
typingMode?: TypingMode;
|
||||
|
||||
@@ -126,6 +126,7 @@ export const AgentDefaultsSchema = z
|
||||
humanDelay: HumanDelaySchema.optional(),
|
||||
timeoutSeconds: z.number().int().positive().optional(),
|
||||
mediaMaxMb: z.number().positive().optional(),
|
||||
imageMaxDimensionPx: z.number().int().positive().optional(),
|
||||
typingIntervalSeconds: z.number().int().positive().optional(),
|
||||
typingMode: z
|
||||
.union([
|
||||
|
||||
Reference in New Issue
Block a user