fix(image): allow workspace and sandbox media paths (#15541)

2026-02-19 18:39:20 -05:00 · 2026-02-14 17:15:15 -08:00
parent ceae46ce33
commit edb06170f5
7 changed files with 139 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
 - BlueBubbles: include sender identity in group chat envelopes and pass clean message text to the agent prompt, aligning with iMessage/Signal formatting. (#16210) Thanks @zerone0x.
 - WhatsApp: honor per-account `dmPolicy` overrides (account-level settings now take precedence over channel defaults for inbound DMs). (#10082) Thanks @mcaxtr.
 - Media: accept `MEDIA:`-prefixed paths (lenient whitespace) when loading outbound media to prevent `ENOENT` for tool-returned local media paths. (#13107) Thanks @mcaxtr.
+- Agents/Image tool: allow workspace-local image paths by including the active workspace directory in local media allowlists, and trust sandbox-validated paths in image loaders to prevent false "not under an allowed directory" rejections. (#15541)
 - Cron/Slack: preserve agent identity (name and icon) when cron jobs deliver outbound messages. (#16242) Thanks @robbyczgw-cla.
 - Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x.
 - Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750)
--- a/src/agents/openclaw-tools.ts
+++ b/src/agents/openclaw-tools.ts
@@ -64,6 +64,7 @@ export function createOpenClawTools(options?: {
    ? createImageTool({
        config: options?.config,
        agentDir: options.agentDir,
+        workspaceDir: options?.workspaceDir,
        sandbox:
          options?.sandboxRoot && options?.sandboxFsBridge
            ? { root: options.sandboxRoot, bridge: options.sandboxFsBridge }
--- a/src/agents/pi-embedded-runner/run/images.e2e.test.ts
+++ b/src/agents/pi-embedded-runner/run/images.e2e.test.ts
@@ -1,5 +1,14 @@
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
 import { describe, expect, it } from "vitest";
-import { detectAndLoadPromptImages, detectImageReferences, modelSupportsImages } from "./images.js";
+import { createHostSandboxFsBridge } from "../../test-helpers/host-sandbox-fs-bridge.js";
+import {
+  detectAndLoadPromptImages,
+  detectImageReferences,
+  loadImageFromRef,
+  modelSupportsImages,
+} from "./images.js";

 describe("detectImageReferences", () => {
  it("detects absolute file paths with common extensions", () => {
@@ -196,6 +205,41 @@ describe("modelSupportsImages", () => {
  });
 });

+describe("loadImageFromRef", () => {
+  it("allows sandbox-validated host paths outside default media roots", async () => {
+    const sandboxParent = await fs.mkdtemp(path.join(os.homedir(), "openclaw-sandbox-image-"));
+    try {
+      const sandboxRoot = path.join(sandboxParent, "sandbox");
+      await fs.mkdir(sandboxRoot, { recursive: true });
+      const imagePath = path.join(sandboxRoot, "photo.png");
+      const pngB64 =
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
+      await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
+
+      const image = await loadImageFromRef(
+        {
+          raw: "./photo.png",
+          type: "path",
+          resolved: "./photo.png",
+        },
+        sandboxRoot,
+        {
+          sandbox: {
+            root: sandboxRoot,
+            bridge: createHostSandboxFsBridge(sandboxRoot),
+          },
+        },
+      );
+
+      expect(image).not.toBeNull();
+      expect(image?.type).toBe("image");
+      expect(image?.data.length).toBeGreaterThan(0);
+    } finally {
+      await fs.rm(sandboxParent, { recursive: true, force: true });
+    }
+  });
+});
+
 describe("detectAndLoadPromptImages", () => {
  it("returns no images for non-vision models even when existing images are provided", async () => {
    const result = await detectAndLoadPromptImages({
--- a/src/agents/pi-embedded-runner/run/images.ts
+++ b/src/agents/pi-embedded-runner/run/images.ts
@@ -211,6 +211,7 @@ export async function loadImageFromRef(
    const media = options?.sandbox
      ? await loadWebMedia(targetPath, {
          maxBytes: options.maxBytes,
+          localRoots: "any",
          readFile: (filePath) =>
            options.sandbox!.bridge.readFile({ filePath, cwd: options.sandbox!.root }),
        })
--- a/src/agents/tools/image-tool.e2e.test.ts
+++ b/src/agents/tools/image-tool.e2e.test.ts
@@ -150,6 +150,75 @@ describe("image tool implicit imageModel config", () => {
    );
  });

+  it("allows workspace images outside default local media roots", async () => {
+    const workspaceParent = await fs.mkdtemp(
+      path.join(process.cwd(), ".openclaw-workspace-image-"),
+    );
+    try {
+      const workspaceDir = path.join(workspaceParent, "workspace");
+      await fs.mkdir(workspaceDir, { recursive: true });
+      const imagePath = path.join(workspaceDir, "photo.png");
+      const pngB64 =
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/woAAn8B9FD5fHAAAAAASUVORK5CYII=";
+      await fs.writeFile(imagePath, Buffer.from(pngB64, "base64"));
+
+      const fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        status: 200,
+        statusText: "OK",
+        headers: new Headers(),
+        json: async () => ({
+          content: "ok",
+          base_resp: { status_code: 0, status_msg: "" },
+        }),
+      });
+      // @ts-expect-error partial global
+      global.fetch = fetch;
+      vi.stubEnv("MINIMAX_API_KEY", "minimax-test");
+
+      const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
+      const cfg: OpenClawConfig = {
+        agents: {
+          defaults: {
+            model: { primary: "minimax/MiniMax-M2.1" },
+            imageModel: { primary: "minimax/MiniMax-VL-01" },
+          },
+        },
+      };
+
+      const withoutWorkspace = createImageTool({ config: cfg, agentDir });
+      expect(withoutWorkspace).not.toBeNull();
+      if (!withoutWorkspace) {
+        throw new Error("expected image tool");
+      }
+      await expect(
+        withoutWorkspace.execute("t0", {
+          prompt: "Describe the image.",
+          image: imagePath,
+        }),
+      ).rejects.toThrow(/Local media path is not under an allowed directory/i);
+
+      const withWorkspace = createImageTool({ config: cfg, agentDir, workspaceDir });
+      expect(withWorkspace).not.toBeNull();
+      if (!withWorkspace) {
+        throw new Error("expected image tool");
+      }
+
+      await expect(
+        withWorkspace.execute("t1", {
+          prompt: "Describe the image.",
+          image: imagePath,
+        }),
+      ).resolves.toMatchObject({
+        content: [{ type: "text", text: "ok" }],
+      });
+
+      expect(fetch).toHaveBeenCalledTimes(1);
+    } finally {
+      await fs.rm(workspaceParent, { recursive: true, force: true });
+    }
+  });
+
  it("sandboxes image paths like the read tool", async () => {
    const stateDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-sandbox-"));
    const agentDir = path.join(stateDir, "agent");
--- a/src/agents/tools/image-tool.ts
+++ b/src/agents/tools/image-tool.ts
@@ -5,7 +5,7 @@ import type { OpenClawConfig } from "../../config/config.js";
 import type { SandboxFsBridge } from "../sandbox/fs-bridge.js";
 import type { AnyAgentTool } from "./common.js";
 import { resolveUserPath } from "../../utils.js";
-import { loadWebMedia } from "../../web/media.js";
+import { getDefaultLocalRoots, loadWebMedia } from "../../web/media.js";
 import { ensureAuthProfileStore, listProfilesForProvider } from "../auth-profiles.js";
 import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
 import { minimaxUnderstandImage } from "../minimax-vlm.js";
@@ -325,6 +325,7 @@ async function runImagePrompt(params: {
 export function createImageTool(options?: {
  config?: OpenClawConfig;
  agentDir?: string;
+  workspaceDir?: string;
  sandbox?: ImageSandboxConfig;
  /** If true, the model has native vision capability and images in the prompt are auto-injected */
  modelHasVision?: boolean;
@@ -351,6 +352,19 @@ export function createImageTool(options?: {
    ? "Analyze an image with a vision model. Only use this tool when the image was NOT already provided in the user's message. Images mentioned in the prompt are automatically visible to you."
    : "Analyze an image with the configured image model (agents.defaults.imageModel). Provide a prompt and image path or URL.";

+  const localRoots = (() => {
+    const roots = getDefaultLocalRoots();
+    const workspaceDir = options?.workspaceDir?.trim();
+    if (!workspaceDir) {
+      return roots;
+    }
+    const normalized = workspaceDir.startsWith("~") ? resolveUserPath(workspaceDir) : workspaceDir;
+    if (!roots.includes(normalized)) {
+      roots.push(normalized);
+    }
+    return roots;
+  })();
+
  return {
    label: "Image",
    name: "image",
@@ -441,10 +455,14 @@ export function createImageTool(options?: {
        : sandboxConfig
          ? await loadWebMedia(resolvedPath ?? resolvedImage, {
              maxBytes,
+              localRoots: "any",
              readFile: (filePath) =>
                sandboxConfig.bridge.readFile({ filePath, cwd: sandboxConfig.root }),
            })
-          : await loadWebMedia(resolvedPath ?? resolvedImage, maxBytes);
+          : await loadWebMedia(resolvedPath ?? resolvedImage, {
+              maxBytes,
+              localRoots,
+            });
      if (media.kind !== "image") {
        throw new Error(`Unsupported media type: ${media.kind}`);
      }
--- a/src/web/media.ts
+++ b/src/web/media.ts
@@ -32,7 +32,8 @@ type WebMediaOptions = {
  readFile?: (filePath: string) => Promise<Buffer>;
 };

-function getDefaultLocalRoots(): string[] {
+<<<<<<< HEAD
+export function getDefaultLocalRoots(): string[] {
  return [
    os.tmpdir(),
    path.join(STATE_DIR, "media"),