mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
fix(image): allow workspace and sandbox media paths (#15541)
This commit is contained in:
@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
|
||||
- BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x.
|
||||
- WhatsApp: honor per-account `dmPolicy` overrides (account-level settings now take precedence over channel defaults for inbound DMs). (#10082) Thanks @mcaxtr.
|
||||
- Media: accept `MEDIA:`-prefixed paths (lenient whitespace) when loading outbound media to prevent `ENOENT` for tool-returned local media paths. (#13107) Thanks @mcaxtr.
|
||||
- Agents/Image tool: allow workspace-local image paths by including the active workspace directory in local media allowlists, and trust sandbox-validated paths in image loaders to prevent false "not under an allowed directory" rejections. (#15541)
|
||||
- Cron/Slack: preserve agent identity (name and icon) when cron jobs deliver outbound messages. (#16242) Thanks @robbyczgw-cla.
|
||||
- Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x.
|
||||
- Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750)
|
||||
|
||||
@@ -64,6 +64,7 @@ export function createOpenClawTools(options?: {
|
||||
? createImageTool({
|
||||
config: options?.config,
|
||||
agentDir: options.agentDir,
|
||||
workspaceDir: options?.workspaceDir,
|
||||
sandbox:
|
||||
options?.sandboxRoot && options?.sandboxFsBridge
|
||||
? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { detectAndLoadPromptImages, detectImageReferences, modelSupportsImages } from "./images.js";
|
||||
import { createHostSandboxFsBridge } from "../../test-helpers/host-sandbox-fs-bridge.js";
|
||||
import {
|
||||
detectAndLoadPromptImages,
|
||||
detectImageReferences,
|
||||
loadImageFromRef,
|
||||
modelSupportsImages,
|
||||
} from "./images.js";
|
||||
|
||||
describe("detectImageReferences", () => {
|
||||
it("detects absolute file paths with common extensions", () => {
|
||||
@@ -196,6 +205,41 @@ describe("modelSupportsImages", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("loadImageFromRef", () => {
|
||||
it("allows sandbox-validated host paths outside default media roots", async () => {
|
||||
const sandboxParent = await fs.mkdtemp(path.join(os.homedir(), "openclaw-sandbox-image-"));
|
||||
try {
|
||||
const sandboxRoot = path.join(sandboxParent, "sandbox");
|
||||
await fs.mkdir(sandboxRoot, { recursive: true });
|
||||
const imagePath = path.join(sandboxRoot, "photo.png");
|
||||
const pngB64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
|
||||
|
||||
const image = await loadImageFromRef(
|
||||
{
|
||||
raw: "./photo.png",
|
||||
type: "path",
|
||||
resolved: "./photo.png",
|
||||
},
|
||||
sandboxRoot,
|
||||
{
|
||||
sandbox: {
|
||||
root: sandboxRoot,
|
||||
bridge: createHostSandboxFsBridge(sandboxRoot),
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(image).not.toBeNull();
|
||||
expect(image?.type).toBe("image");
|
||||
expect(image?.data.length).toBeGreaterThan(0);
|
||||
} finally {
|
||||
await fs.rm(sandboxParent, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("detectAndLoadPromptImages", () => {
|
||||
it("returns no images for non-vision models even when existing images are provided", async () => {
|
||||
const result = await detectAndLoadPromptImages({
|
||||
|
||||
@@ -211,6 +211,7 @@ export async function loadImageFromRef(
|
||||
const media = options?.sandbox
|
||||
? await loadWebMedia(targetPath, {
|
||||
maxBytes: options.maxBytes,
|
||||
localRoots: "any",
|
||||
readFile: (filePath) =>
|
||||
options.sandbox!.bridge.readFile({ filePath, cwd: options.sandbox!.root }),
|
||||
})
|
||||
|
||||
@@ -150,6 +150,75 @@ describe("image tool implicit imageModel config", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("allows workspace images outside default local media roots", async () => {
|
||||
const workspaceParent = await fs.mkdtemp(
|
||||
path.join(process.cwd(), ".openclaw-workspace-image-"),
|
||||
);
|
||||
try {
|
||||
const workspaceDir = path.join(workspaceParent, "workspace");
|
||||
await fs.mkdir(workspaceDir, { recursive: true });
|
||||
const imagePath = path.join(workspaceDir, "photo.png");
|
||||
const pngB64 =
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
|
||||
await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
|
||||
|
||||
const fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: "OK",
|
||||
headers: new Headers(),
|
||||
json: async () => ({
|
||||
content: "ok",
|
||||
base_resp: { status_code: 0, status_msg: "" },
|
||||
}),
|
||||
});
|
||||
// @ts-expect-error partial global
|
||||
global.fetch = fetch;
|
||||
vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
|
||||
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: {
|
||||
defaults: {
|
||||
model: { primary: "minimax/MiniMax-M2.1" },
|
||||
imageModel: { primary: "minimax/MiniMax-VL-01" },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const withoutWorkspace = createImageTool({ config: cfg, agentDir });
|
||||
expect(withoutWorkspace).not.toBeNull();
|
||||
if (!withoutWorkspace) {
|
||||
throw new Error("expected image tool");
|
||||
}
|
||||
await expect(
|
||||
withoutWorkspace.execute("t0", {
|
||||
prompt: "Describe the image.",
|
||||
image: imagePath,
|
||||
}),
|
||||
).rejects.toThrow(/Local media path is not under an allowed directory/i);
|
||||
|
||||
const withWorkspace = createImageTool({ config: cfg, agentDir, workspaceDir });
|
||||
expect(withWorkspace).not.toBeNull();
|
||||
if (!withWorkspace) {
|
||||
throw new Error("expected image tool");
|
||||
}
|
||||
|
||||
await expect(
|
||||
withWorkspace.execute("t1", {
|
||||
prompt: "Describe the image.",
|
||||
image: imagePath,
|
||||
}),
|
||||
).resolves.toMatchObject({
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
} finally {
|
||||
await fs.rm(workspaceParent, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it("sandboxes image paths like the read tool", async () => {
|
||||
const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-sandbox-"));
|
||||
const agentDir = path.join(stateDir, "agent");
|
||||
|
||||
@@ -5,7 +5,7 @@ import type { OpenClawConfig } from "../../config/config.js";
|
||||
import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
|
||||
import type { AnyAgentTool } from "./common.js";
|
||||
import { resolveUserPath } from "../../utils.js";
|
||||
import { loadWebMedia } from "../../web/media.js";
|
||||
import { getDefaultLocalRoots, loadWebMedia } from "../../web/media.js";
|
||||
import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js";
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { minimaxUnderstandImage } from "../minimax-vlm.js";
|
||||
@@ -325,6 +325,7 @@ async function runImagePrompt(params: {
|
||||
export function createImageTool(options?: {
|
||||
config?: OpenClawConfig;
|
||||
agentDir?: string;
|
||||
workspaceDir?: string;
|
||||
sandbox?: ImageSandboxConfig;
|
||||
/** If true, the model has native vision capability and images in the prompt are auto-injected */
|
||||
modelHasVision?: boolean;
|
||||
@@ -351,6 +352,19 @@ export function createImageTool(options?: {
|
||||
? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
|
||||
: "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";
|
||||
|
||||
const localRoots = (() => {
|
||||
const roots = getDefaultLocalRoots();
|
||||
const workspaceDir = options?.workspaceDir?.trim();
|
||||
if (!workspaceDir) {
|
||||
return roots;
|
||||
}
|
||||
const normalized = workspaceDir.startsWith("~") ? resolveUserPath(workspaceDir) : workspaceDir;
|
||||
if (!roots.includes(normalized)) {
|
||||
roots.push(normalized);
|
||||
}
|
||||
return roots;
|
||||
})();
|
||||
|
||||
return {
|
||||
label: "Image",
|
||||
name: "image",
|
||||
@@ -441,10 +455,14 @@ export function createImageTool(options?: {
|
||||
: sandboxConfig
|
||||
? await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes,
|
||||
localRoots: "any",
|
||||
readFile: (filePath) =>
|
||||
sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
|
||||
})
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes);
|
||||
: await loadWebMedia(resolvedPath ?? resolvedImage, {
|
||||
maxBytes,
|
||||
localRoots,
|
||||
});
|
||||
if (media.kind !== "image") {
|
||||
throw new Error(`Unsupported media type: ${media.kind}`);
|
||||
}
|
||||
|
||||
@@ -32,7 +32,8 @@ type WebMediaOptions = {
|
||||
readFile?: (filePath: string) => Promise<Buffer>;
|
||||
};
|
||||
|
||||
function getDefaultLocalRoots(): string[] {
|
||||
<<<<<<< HEAD
|
||||
export function getDefaultLocalRoots(): string[] {
|
||||
return [
|
||||
os.tmpdir(),
|
||||
path.join(STATE_DIR, "media"),
|
||||
|
||||
Reference in New Issue
Block a user