diff --git a/src/infra/fs-safe.test.ts b/src/infra/fs-safe.test.ts new file mode 100644 index 0000000000..e15a953ece --- /dev/null +++ b/src/infra/fs-safe.test.ts @@ -0,0 +1,99 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, it } from "vitest"; +import { SafeOpenError, openFileWithinRoot, readLocalFileSafely } from "./fs-safe.js"; + +const tempDirs: string[] = []; + +async function makeTempDir(prefix: string): Promise { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), prefix)); + tempDirs.push(dir); + return dir; +} + +afterEach(async () => { + await Promise.all(tempDirs.splice(0).map((dir) => fs.rm(dir, { recursive: true, force: true }))); +}); + +describe("fs-safe", () => { + it("reads a local file safely", async () => { + const dir = await makeTempDir("openclaw-fs-safe-"); + const file = path.join(dir, "payload.txt"); + await fs.writeFile(file, "hello"); + + const result = await readLocalFileSafely({ filePath: file }); + expect(result.buffer.toString("utf8")).toBe("hello"); + expect(result.stat.size).toBe(5); + expect(result.realPath).toContain("payload.txt"); + }); + + it("rejects directories", async () => { + const dir = await makeTempDir("openclaw-fs-safe-"); + await expect(readLocalFileSafely({ filePath: dir })).rejects.toMatchObject({ + code: "not-file", + }); + }); + + it("enforces maxBytes", async () => { + const dir = await makeTempDir("openclaw-fs-safe-"); + const file = path.join(dir, "big.bin"); + await fs.writeFile(file, Buffer.alloc(8)); + + await expect(readLocalFileSafely({ filePath: file, maxBytes: 4 })).rejects.toMatchObject({ + code: "too-large", + }); + }); + + it.runIf(process.platform !== "win32")("rejects symlinks", async () => { + const dir = await makeTempDir("openclaw-fs-safe-"); + const target = path.join(dir, "target.txt"); + const link = path.join(dir, "link.txt"); + await fs.writeFile(target, "target"); + await fs.symlink(target, link); + + await expect(readLocalFileSafely({ filePath: link })).rejects.toMatchObject({ + code: "symlink", + }); + }); + + it("blocks traversal outside root", async () => { + const root = await makeTempDir("openclaw-fs-safe-root-"); + const outside = await makeTempDir("openclaw-fs-safe-outside-"); + const file = path.join(outside, "outside.txt"); + await fs.writeFile(file, "outside"); + + await expect( + openFileWithinRoot({ + rootDir: root, + relativePath: path.join("..", path.basename(outside), "outside.txt"), + }), + ).rejects.toMatchObject({ code: "invalid-path" }); + }); + + it.runIf(process.platform !== "win32")("blocks symlink escapes under root", async () => { + const root = await makeTempDir("openclaw-fs-safe-root-"); + const outside = await makeTempDir("openclaw-fs-safe-outside-"); + const target = path.join(outside, "outside.txt"); + const link = path.join(root, "link.txt"); + await fs.writeFile(target, "outside"); + await fs.symlink(target, link); + + await expect( + openFileWithinRoot({ + rootDir: root, + relativePath: "link.txt", + }), + ).rejects.toMatchObject({ code: "invalid-path" }); + }); + + it("returns not-found for missing files", async () => { + const dir = await makeTempDir("openclaw-fs-safe-"); + const missing = path.join(dir, "missing.txt"); + + await expect(readLocalFileSafely({ filePath: missing })).rejects.toBeInstanceOf(SafeOpenError); + await expect(readLocalFileSafely({ filePath: missing })).rejects.toMatchObject({ + code: "not-found", + }); + }); +}); diff --git a/src/infra/fs-safe.ts b/src/infra/fs-safe.ts index 64c0288002..1549787561 100644 --- a/src/infra/fs-safe.ts +++ b/src/infra/fs-safe.ts @@ -1,16 +1,22 @@ import type { Stats } from "node:fs"; -import { constants as fsConstants } from "node:fs"; import type { FileHandle } from "node:fs/promises"; +import { constants as fsConstants } from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; -export type SafeOpenErrorCode = "invalid-path" | "not-found"; +export type SafeOpenErrorCode = + | "invalid-path" + | "not-found" + | "symlink" + | "not-file" + | "path-mismatch" + | "too-large"; export class SafeOpenError extends Error { code: SafeOpenErrorCode; - constructor(code: SafeOpenErrorCode, message: string) { - super(message); + constructor(code: SafeOpenErrorCode, message: string, options?: ErrorOptions) { + super(message, options); this.code = code; this.name = "SafeOpenError"; } @@ -22,7 +28,15 @@ export type SafeOpenResult = { stat: Stats; }; +export type SafeLocalReadResult = { + buffer: Buffer; + realPath: string; + stat: Stats; +}; + const NOT_FOUND_CODES = new Set(["ENOENT", "ENOTDIR"]); +const SUPPORTS_NOFOLLOW = process.platform !== "win32" && "O_NOFOLLOW" in fsConstants; +const OPEN_READ_FLAGS = fsConstants.O_RDONLY | (SUPPORTS_NOFOLLOW ? fsConstants.O_NOFOLLOW : 0); const ensureTrailingSep = (value: string) => (value.endsWith(path.sep) ? value : value + path.sep); @@ -35,6 +49,51 @@ const isNotFoundError = (err: unknown) => const isSymlinkOpenError = (err: unknown) => isNodeError(err) && (err.code === "ELOOP" || err.code === "EINVAL" || err.code === "ENOTSUP"); +async function openVerifiedLocalFile(filePath: string): Promise { + let handle: FileHandle; + try { + handle = await fs.open(filePath, OPEN_READ_FLAGS); + } catch (err) { + if (isNotFoundError(err)) { + throw new SafeOpenError("not-found", "file not found"); + } + if (isSymlinkOpenError(err)) { + throw new SafeOpenError("symlink", "symlink open blocked", { cause: err }); + } + throw err; + } + + try { + const [stat, lstat] = await Promise.all([handle.stat(), fs.lstat(filePath)]); + if (lstat.isSymbolicLink()) { + throw new SafeOpenError("symlink", "symlink not allowed"); + } + if (!stat.isFile()) { + throw new SafeOpenError("not-file", "not a file"); + } + if (stat.ino !== lstat.ino || stat.dev !== lstat.dev) { + throw new SafeOpenError("path-mismatch", "path changed during read"); + } + + const realPath = await fs.realpath(filePath); + const realStat = await fs.stat(realPath); + if (stat.ino !== realStat.ino || stat.dev !== realStat.dev) { + throw new SafeOpenError("path-mismatch", "path mismatch"); + } + + return { handle, realPath, stat }; + } catch (err) { + await handle.close().catch(() => {}); + if (err instanceof SafeOpenError) { + throw err; + } + if (isNotFoundError(err)) { + throw new SafeOpenError("not-found", "file not found"); + } + throw err; + } +} + export async function openFileWithinRoot(params: { rootDir: string; relativePath: string; @@ -54,52 +113,44 @@ export async function openFileWithinRoot(params: { throw new SafeOpenError("invalid-path", "path escapes root"); } - const supportsNoFollow = process.platform !== "win32" && "O_NOFOLLOW" in fsConstants; - const flags = fsConstants.O_RDONLY | (supportsNoFollow ? fsConstants.O_NOFOLLOW : 0); - - let handle: FileHandle; + let opened: SafeOpenResult; try { - handle = await fs.open(resolved, flags); + opened = await openVerifiedLocalFile(resolved); } catch (err) { - if (isNotFoundError(err)) { - throw new SafeOpenError("not-found", "file not found"); - } - if (isSymlinkOpenError(err)) { - throw new SafeOpenError("invalid-path", "symlink open blocked"); + if (err instanceof SafeOpenError) { + if (err.code === "not-found") { + throw err; + } + throw new SafeOpenError("invalid-path", "path is not a regular file under root", { + cause: err, + }); } throw err; } + if (!opened.realPath.startsWith(rootWithSep)) { + await opened.handle.close().catch(() => {}); + throw new SafeOpenError("invalid-path", "path escapes root"); + } + + return opened; +} + +export async function readLocalFileSafely(params: { + filePath: string; + maxBytes?: number; +}): Promise { + const opened = await openVerifiedLocalFile(params.filePath); try { - const lstat = await fs.lstat(resolved).catch(() => null); - if (lstat?.isSymbolicLink()) { - throw new SafeOpenError("invalid-path", "symlink not allowed"); + if (params.maxBytes !== undefined && opened.stat.size > params.maxBytes) { + throw new SafeOpenError( + "too-large", + `file exceeds limit of ${params.maxBytes} bytes (got ${opened.stat.size})`, + ); } - - const realPath = await fs.realpath(resolved); - if (!realPath.startsWith(rootWithSep)) { - throw new SafeOpenError("invalid-path", "path escapes root"); - } - - const stat = await handle.stat(); - if (!stat.isFile()) { - throw new SafeOpenError("invalid-path", "not a file"); - } - - const realStat = await fs.stat(realPath); - if (stat.ino !== realStat.ino || stat.dev !== realStat.dev) { - throw new SafeOpenError("invalid-path", "path mismatch"); - } - - return { handle, realPath, stat }; - } catch (err) { - await handle.close().catch(() => {}); - if (err instanceof SafeOpenError) { - throw err; - } - if (isNotFoundError(err)) { - throw new SafeOpenError("not-found", "file not found"); - } - throw err; + const buffer = await opened.handle.readFile(); + return { buffer, realPath: opened.realPath, stat: opened.stat }; + } finally { + await opened.handle.close().catch(() => {}); } } diff --git a/src/media/store.test.ts b/src/media/store.test.ts index 59529ea956..59cffb7ff8 100644 --- a/src/media/store.test.ts +++ b/src/media/store.test.ts @@ -110,6 +110,13 @@ describe("media store", () => { await fs.symlink(target, source); await expect(store.saveMediaSource(source)).rejects.toThrow("symlink"); + await expect(store.saveMediaSource(source)).rejects.toMatchObject({ code: "invalid-path" }); + }); + }); + + it("rejects directory sources with typed error code", async () => { + await withTempStore(async (store, home) => { + await expect(store.saveMediaSource(home)).rejects.toMatchObject({ code: "not-file" }); }); }); diff --git a/src/media/store.ts b/src/media/store.ts index b4c4ead54d..7c2477afac 100644 --- a/src/media/store.ts +++ b/src/media/store.ts @@ -1,11 +1,11 @@ import crypto from "node:crypto"; -import { constants as fsConstants } from "node:fs"; import { createWriteStream } from "node:fs"; import fs from "node:fs/promises"; import { request as httpRequest } from "node:http"; import { request as httpsRequest } from "node:https"; import path from "node:path"; import { pipeline } from "node:stream/promises"; +import { SafeOpenError, readLocalFileSafely } from "../infra/fs-safe.js"; import { resolvePinnedHostname } from "../infra/net/ssrf.js"; import { resolveConfigDir } from "../utils.js"; import { detectMime, extensionForMime } from "./mime.js"; @@ -21,12 +21,6 @@ const defaultHttpRequestImpl: RequestImpl = httpRequest; const defaultHttpsRequestImpl: RequestImpl = httpsRequest; const defaultResolvePinnedHostnameImpl: ResolvePinnedHostnameImpl = resolvePinnedHostname; -const isNodeError = (err: unknown): err is NodeJS.ErrnoException => - Boolean(err && typeof err === "object" && "code" in (err as Record)); - -const isSymlinkOpenError = (err: unknown) => - isNodeError(err) && (err.code === "ELOOP" || err.code === "EINVAL" || err.code === "ENOTSUP"); - let httpRequestImpl: RequestImpl = defaultHttpRequestImpl; let httpsRequestImpl: RequestImpl = defaultHttpsRequestImpl; let resolvePinnedHostnameImpl: ResolvePinnedHostnameImpl = defaultResolvePinnedHostnameImpl; @@ -214,6 +208,47 @@ export type SavedMedia = { contentType?: string; }; +export type SaveMediaSourceErrorCode = + | "invalid-path" + | "not-found" + | "not-file" + | "path-mismatch" + | "too-large"; + +export class SaveMediaSourceError extends Error { + code: SaveMediaSourceErrorCode; + + constructor(code: SaveMediaSourceErrorCode, message: string, options?: ErrorOptions) { + super(message, options); + this.code = code; + this.name = "SaveMediaSourceError"; + } +} + +function toSaveMediaSourceError(err: SafeOpenError): SaveMediaSourceError { + switch (err.code) { + case "symlink": + return new SaveMediaSourceError("invalid-path", "Media path must not be a symlink", { + cause: err, + }); + case "not-file": + return new SaveMediaSourceError("not-file", "Media path is not a file", { cause: err }); + case "path-mismatch": + return new SaveMediaSourceError("path-mismatch", "Media path changed during read", { + cause: err, + }); + case "too-large": + return new SaveMediaSourceError("too-large", "Media exceeds 5MB limit", { cause: err }); + case "not-found": + return new SaveMediaSourceError("not-found", "Media path does not exist", { cause: err }); + case "invalid-path": + default: + return new SaveMediaSourceError("invalid-path", "Media path is not safe to read", { + cause: err, + }); + } +} + export async function saveMediaSource( source: string, headers?: Record, @@ -239,40 +274,19 @@ export async function saveMediaSource( return { id, path: finalDest, size, contentType: mime }; } // local path - const supportsNoFollow = process.platform !== "win32" && "O_NOFOLLOW" in fsConstants; - const flags = fsConstants.O_RDONLY | (supportsNoFollow ? fsConstants.O_NOFOLLOW : 0); - let handle: Awaited>; try { - handle = await fs.open(source, flags); - } catch (err) { - if (isSymlinkOpenError(err)) { - throw new Error("Media path must not be a symlink", { cause: err }); - } - throw err; - } - try { - const [stat, lstat] = await Promise.all([handle.stat(), fs.lstat(source)]); - if (lstat.isSymbolicLink()) { - throw new Error("Media path must not be a symlink"); - } - if (!stat.isFile()) { - throw new Error("Media path is not a file"); - } - if (stat.ino !== lstat.ino || stat.dev !== lstat.dev) { - throw new Error("Media path changed during read"); - } - if (stat.size > MAX_BYTES) { - throw new Error("Media exceeds 5MB limit"); - } - const buffer = await handle.readFile(); + const { buffer, stat } = await readLocalFileSafely({ filePath: source, maxBytes: MAX_BYTES }); const mime = await detectMime({ buffer, filePath: source }); const ext = extensionForMime(mime) ?? path.extname(source); const id = ext ? `${baseId}${ext}` : baseId; const dest = path.join(dir, id); await fs.writeFile(dest, buffer, { mode: 0o600 }); return { id, path: dest, size: stat.size, contentType: mime }; - } finally { - await handle.close().catch(() => {}); + } catch (err) { + if (err instanceof SafeOpenError) { + throw toSaveMediaSourceError(err); + } + throw err; } } diff --git a/src/web/media.test.ts b/src/web/media.test.ts index d484cdc8f3..ff606dae38 100644 --- a/src/web/media.test.ts +++ b/src/web/media.test.ts @@ -7,7 +7,12 @@ import { sendVoiceMessageDiscord } from "../discord/send.js"; import * as ssrf from "../infra/net/ssrf.js"; import { optimizeImageToPng } from "../media/image-ops.js"; import { captureEnv } from "../test-utils/env.js"; -import { loadWebMedia, loadWebMediaRaw, optimizeImageToJpeg } from "./media.js"; +import { + LocalMediaAccessError, + loadWebMedia, + loadWebMediaRaw, + optimizeImageToJpeg, +} from "./media.js"; let fixtureRoot = ""; let fixtureFileCount = 0; @@ -329,7 +334,7 @@ describe("local media root guard", () => { // Explicit roots that don't contain the temp file. await expect( loadWebMedia(tinyPngFile, 1024 * 1024, { localRoots: ["/nonexistent-root"] }), - ).rejects.toThrow(/not under an allowed directory/i); + ).rejects.toMatchObject({ code: "path-not-allowed" }); }); it("allows local paths under an explicit root", async () => { @@ -337,6 +342,21 @@ describe("local media root guard", () => { expect(result.kind).toBe("image"); }); + it("requires readFile override for localRoots bypass", async () => { + await expect( + loadWebMedia(tinyPngFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + }), + ).rejects.toBeInstanceOf(LocalMediaAccessError); + await expect( + loadWebMedia(tinyPngFile, { + maxBytes: 1024 * 1024, + localRoots: "any", + }), + ).rejects.toMatchObject({ code: "unsafe-bypass" }); + }); + it("allows any path when localRoots is 'any'", async () => { const result = await loadWebMedia(tinyPngFile, { maxBytes: 1024 * 1024, @@ -351,7 +371,7 @@ describe("local media root guard", () => { loadWebMedia(tinyPngFile, 1024 * 1024, { localRoots: [path.parse(tinyPngFile).root], }), - ).rejects.toThrow(/refuses filesystem root/i); + ).rejects.toMatchObject({ code: "invalid-root" }); }); it("allows default OpenClaw state workspace and sandbox roots", async () => { @@ -392,7 +412,7 @@ describe("local media root guard", () => { maxBytes: 1024 * 1024, readFile, }), - ).rejects.toThrow(/not under an allowed directory/i); + ).rejects.toMatchObject({ code: "path-not-allowed" }); }); it("allows per-agent workspace-* paths with explicit local roots", async () => { diff --git a/src/web/media.ts b/src/web/media.ts index cc63814170..3e9a864f03 100644 --- a/src/web/media.ts +++ b/src/web/media.ts @@ -1,8 +1,9 @@ import fs from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; -import { logVerbose, shouldLogVerbose } from "../globals.js"; import type { SsrFPolicy } from "../infra/net/ssrf.js"; +import { logVerbose, shouldLogVerbose } from "../globals.js"; +import { SafeOpenError, readLocalFileSafely } from "../infra/fs-safe.js"; import { type MediaKind, maxBytesForKind, mediaKindFromMime } from "../media/constants.js"; import { fetchRemoteMedia } from "../media/fetch.js"; import { @@ -33,6 +34,25 @@ type WebMediaOptions = { readFile?: (filePath: string) => Promise; }; +export type LocalMediaAccessErrorCode = + | "path-not-allowed" + | "invalid-root" + | "invalid-file-url" + | "unsafe-bypass" + | "not-found" + | "invalid-path" + | "not-file"; + +export class LocalMediaAccessError extends Error { + code: LocalMediaAccessErrorCode; + + constructor(code: LocalMediaAccessErrorCode, message: string, options?: ErrorOptions) { + super(message, options); + this.code = code; + this.name = "LocalMediaAccessError"; + } +} + export function getDefaultLocalRoots(): readonly string[] { return getDefaultMediaLocalRoots(); } @@ -65,7 +85,10 @@ async function assertLocalMediaAllowed( if (rel && !rel.startsWith("..") && !path.isAbsolute(rel)) { const firstSegment = rel.split(path.sep)[0] ?? ""; if (firstSegment.startsWith("workspace-")) { - throw new Error(`Local media path is not under an allowed directory: ${mediaPath}`); + throw new LocalMediaAccessError( + "path-not-allowed", + `Local media path is not under an allowed directory: ${mediaPath}`, + ); } } } @@ -78,7 +101,8 @@ async function assertLocalMediaAllowed( resolvedRoot = path.resolve(root); } if (resolvedRoot === path.parse(resolvedRoot).root) { - throw new Error( + throw new LocalMediaAccessError( + "invalid-root", `Invalid localRoots entry (refuses filesystem root): ${root}. Pass a narrower directory.`, ); } @@ -86,7 +110,10 @@ async function assertLocalMediaAllowed( return; } } - throw new Error(`Local media path is not under an allowed directory: ${mediaPath}`); + throw new LocalMediaAccessError( + "path-not-allowed", + `Local media path is not under an allowed directory: ${mediaPath}`, + ); } const HEIC_MIME_RE = /^image\/hei[cf]$/i; @@ -202,7 +229,7 @@ async function loadWebMediaInternal( try { mediaUrl = fileURLToPath(mediaUrl); } catch { - throw new Error(`Invalid file:// URL: ${mediaUrl}`); + throw new LocalMediaAccessError("invalid-file-url", `Invalid file:// URL: ${mediaUrl}`); } } @@ -295,7 +322,8 @@ async function loadWebMediaInternal( } if ((sandboxValidated || localRoots === "any") && !readFileOverride) { - throw new Error( + throw new LocalMediaAccessError( + "unsafe-bypass", "Refusing localRoots bypass without readFile override. Use sandboxValidated with readFile, or pass explicit localRoots.", ); } @@ -306,7 +334,35 @@ async function loadWebMediaInternal( } // Local path - const data = readFileOverride ? await readFileOverride(mediaUrl) : await fs.readFile(mediaUrl); + let data: Buffer; + if (readFileOverride) { + data = await readFileOverride(mediaUrl); + } else { + try { + data = (await readLocalFileSafely({ filePath: mediaUrl })).buffer; + } catch (err) { + if (err instanceof SafeOpenError) { + if (err.code === "not-found") { + throw new LocalMediaAccessError("not-found", `Local media file not found: ${mediaUrl}`, { + cause: err, + }); + } + if (err.code === "not-file") { + throw new LocalMediaAccessError( + "not-file", + `Local media path is not a file: ${mediaUrl}`, + { cause: err }, + ); + } + throw new LocalMediaAccessError( + "invalid-path", + `Local media path is not safe to read: ${mediaUrl}`, + { cause: err }, + ); + } + throw err; + } + } const mime = await detectMime({ buffer: data, filePath: mediaUrl }); const kind = mediaKindFromMime(mime); let fileName = path.basename(mediaUrl) || undefined;