mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 03:03:24 -04:00
refactor(test): share memory embedding fixture
This commit is contained in:
105
src/memory/embedding-manager.test-harness.ts
Normal file
105
src/memory/embedding-manager.test-harness.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, expect } from "vitest";
|
||||
import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js";
|
||||
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
|
||||
export function installEmbeddingManagerFixture(opts: {
|
||||
fixturePrefix: string;
|
||||
largeTokens: number;
|
||||
smallTokens: number;
|
||||
createCfg: (params: { workspaceDir: string; indexPath: string; tokens: number }) => unknown;
|
||||
resetIndexEachTest?: boolean;
|
||||
}) {
|
||||
const embedBatch = getEmbedBatchMock();
|
||||
const resetIndexEachTest = opts.resetIndexEachTest ?? true;
|
||||
|
||||
let fixtureRoot: string | undefined;
|
||||
let workspaceDir: string | undefined;
|
||||
let memoryDir: string | undefined;
|
||||
let managerLarge: MemoryIndexManager | undefined;
|
||||
let managerSmall: MemoryIndexManager | undefined;
|
||||
|
||||
const resetManager = (manager: MemoryIndexManager) => {
|
||||
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
};
|
||||
|
||||
const requireValue = <T>(value: T | undefined, name: string): T => {
|
||||
if (!value) {
|
||||
throw new Error(`${name} missing`);
|
||||
}
|
||||
return value;
|
||||
};
|
||||
|
||||
beforeAll(async () => {
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), opts.fixturePrefix));
|
||||
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
|
||||
const indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
|
||||
|
||||
const large = await getMemorySearchManager({
|
||||
cfg: opts.createCfg({
|
||||
workspaceDir,
|
||||
indexPath: indexPathLarge,
|
||||
tokens: opts.largeTokens,
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(large.manager).not.toBeNull();
|
||||
managerLarge = large.manager ?? undefined;
|
||||
|
||||
const small = await getMemorySearchManager({
|
||||
cfg: opts.createCfg({
|
||||
workspaceDir,
|
||||
indexPath: indexPathSmall,
|
||||
tokens: opts.smallTokens,
|
||||
}),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(small.manager).not.toBeNull();
|
||||
managerSmall = small.manager ?? undefined;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (managerLarge) {
|
||||
await managerLarge.close();
|
||||
managerLarge = undefined;
|
||||
}
|
||||
if (managerSmall) {
|
||||
await managerSmall.close();
|
||||
managerSmall = undefined;
|
||||
}
|
||||
if (fixtureRoot) {
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
fixtureRoot = undefined;
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
resetEmbeddingMocks();
|
||||
|
||||
const dir = requireValue(memoryDir, "memoryDir");
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
|
||||
if (resetIndexEachTest) {
|
||||
resetManager(requireValue(managerLarge, "managerLarge"));
|
||||
resetManager(requireValue(managerSmall, "managerSmall"));
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
embedBatch,
|
||||
getFixtureRoot: () => requireValue(fixtureRoot, "fixtureRoot"),
|
||||
getWorkspaceDir: () => requireValue(workspaceDir, "workspaceDir"),
|
||||
getMemoryDir: () => requireValue(memoryDir, "memoryDir"),
|
||||
getManagerLarge: () => requireValue(managerLarge, "managerLarge"),
|
||||
getManagerSmall: () => requireValue(managerSmall, "managerSmall"),
|
||||
resetManager,
|
||||
};
|
||||
}
|
||||
@@ -1,106 +1,40 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js";
|
||||
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js";
|
||||
|
||||
const embedBatch = getEmbedBatchMock();
|
||||
const fx = installEmbeddingManagerFixture({
|
||||
fixturePrefix: "openclaw-mem-",
|
||||
largeTokens: 1250,
|
||||
smallTokens: 200,
|
||||
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
chunking: { tokens, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { embedBatch } = fx;
|
||||
|
||||
describe("memory embedding batches", () => {
|
||||
let fixtureRoot: string;
|
||||
let workspaceDir: string;
|
||||
let memoryDir: string;
|
||||
let indexPathLarge: string;
|
||||
let indexPathSmall: string;
|
||||
let managerLarge: MemoryIndexManager | null = null;
|
||||
let managerSmall: MemoryIndexManager | null = null;
|
||||
|
||||
function resetManagerForTest(manager: MemoryIndexManager | null) {
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
}
|
||||
|
||||
function createCfg(params: { indexPath: string; tokens: number }) {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: params.indexPath, vector: { enabled: false } },
|
||||
chunking: { tokens: params.tokens, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0, hybrid: { enabled: false } },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-"));
|
||||
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
|
||||
indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const large = await getMemorySearchManager({
|
||||
cfg: createCfg({ indexPath: indexPathLarge, tokens: 1250 }),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(large.manager).not.toBeNull();
|
||||
if (!large.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
managerLarge = large.manager;
|
||||
|
||||
const small = await getMemorySearchManager({
|
||||
cfg: createCfg({ indexPath: indexPathSmall, tokens: 200 }),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(small.manager).not.toBeNull();
|
||||
if (!small.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
managerSmall = small.manager;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (managerLarge) {
|
||||
await managerLarge.close();
|
||||
managerLarge = null;
|
||||
}
|
||||
if (managerSmall) {
|
||||
await managerSmall.close();
|
||||
managerSmall = null;
|
||||
}
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
resetEmbeddingMocks();
|
||||
|
||||
await fs.rm(memoryDir, { recursive: true, force: true });
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
});
|
||||
|
||||
it("splits large files across multiple embedding batches", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerLarge = fx.getManagerLarge();
|
||||
// Keep this small but above the embedding batch byte threshold (8k) so we
|
||||
// exercise multi-batch behavior without generating lots of chunks/DB rows.
|
||||
const line = "a".repeat(4200);
|
||||
const content = [line, line].join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-03.md"), content);
|
||||
resetManagerForTest(managerLarge);
|
||||
if (!managerLarge) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
const updates: Array<{ completed: number; total: number; label?: string }> = [];
|
||||
await managerLarge.sync({
|
||||
progress: (update) => {
|
||||
@@ -120,19 +54,19 @@ describe("memory embedding batches", () => {
|
||||
});
|
||||
|
||||
it("keeps small files in a single embedding batch", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "b".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-04.md"), content);
|
||||
resetManagerForTest(managerSmall);
|
||||
if (!managerSmall) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
|
||||
expect(embedBatch.mock.calls.length).toBe(1);
|
||||
});
|
||||
|
||||
it("retries embeddings on transient rate limit and 5xx errors", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "d".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-06.md"), content);
|
||||
@@ -163,10 +97,6 @@ describe("memory embedding batches", () => {
|
||||
}
|
||||
return realSetTimeout(handler, delay, ...args);
|
||||
}) as typeof setTimeout);
|
||||
resetManagerForTest(managerSmall);
|
||||
if (!managerSmall) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
try {
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
} finally {
|
||||
@@ -177,11 +107,9 @@ describe("memory embedding batches", () => {
|
||||
}, 10000);
|
||||
|
||||
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-07.md"), "\n\n\n");
|
||||
resetManagerForTest(managerSmall);
|
||||
if (!managerSmall) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
|
||||
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
||||
|
||||
@@ -1,104 +1,37 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { getEmbedBatchMock, resetEmbeddingMocks } from "./embedding.test-mocks.js";
|
||||
import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { installEmbeddingManagerFixture } from "./embedding-manager.test-harness.js";
|
||||
|
||||
const embedBatch = getEmbedBatchMock();
|
||||
const fx = installEmbeddingManagerFixture({
|
||||
fixturePrefix: "openclaw-mem-token-",
|
||||
largeTokens: 10_000,
|
||||
smallTokens: 1000,
|
||||
createCfg: ({ workspaceDir, indexPath, tokens }) => ({
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: indexPath, vector: { enabled: false } },
|
||||
chunking: { tokens, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0 },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
}),
|
||||
});
|
||||
const { embedBatch } = fx;
|
||||
|
||||
describe("memory embedding token limits", () => {
|
||||
let fixtureRoot: string;
|
||||
let workspaceDir: string;
|
||||
let memoryDir: string;
|
||||
let indexPathLarge: string;
|
||||
let indexPathSmall: string;
|
||||
let managerLarge: MemoryIndexManager | null = null;
|
||||
let managerSmall: MemoryIndexManager | null = null;
|
||||
|
||||
function createCfg(params: { indexPath: string; tokens: number }) {
|
||||
return {
|
||||
agents: {
|
||||
defaults: {
|
||||
workspace: workspaceDir,
|
||||
memorySearch: {
|
||||
provider: "openai",
|
||||
model: "mock-embed",
|
||||
store: { path: params.indexPath, vector: { enabled: false } },
|
||||
chunking: { tokens: params.tokens, overlap: 0 },
|
||||
sync: { watch: false, onSessionStart: false, onSearch: false },
|
||||
query: { minScore: 0 },
|
||||
},
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
fixtureRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-mem-token-"));
|
||||
workspaceDir = path.join(fixtureRoot, "workspace");
|
||||
memoryDir = path.join(workspaceDir, "memory");
|
||||
indexPathLarge = path.join(fixtureRoot, "index.large.sqlite");
|
||||
indexPathSmall = path.join(fixtureRoot, "index.small.sqlite");
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const large = await getMemorySearchManager({
|
||||
cfg: createCfg({ indexPath: indexPathLarge, tokens: 10_000 }),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(large.manager).not.toBeNull();
|
||||
if (!large.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
managerLarge = large.manager;
|
||||
|
||||
const small = await getMemorySearchManager({
|
||||
cfg: createCfg({ indexPath: indexPathSmall, tokens: 1000 }),
|
||||
agentId: "main",
|
||||
});
|
||||
expect(small.manager).not.toBeNull();
|
||||
if (!small.manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
managerSmall = small.manager;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (managerLarge) {
|
||||
await managerLarge.close();
|
||||
managerLarge = null;
|
||||
}
|
||||
if (managerSmall) {
|
||||
await managerSmall.close();
|
||||
managerSmall = null;
|
||||
}
|
||||
await fs.rm(fixtureRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
resetEmbeddingMocks();
|
||||
|
||||
await fs.rm(memoryDir, { recursive: true, force: true });
|
||||
await fs.mkdir(memoryDir, { recursive: true });
|
||||
|
||||
const reset = (manager: MemoryIndexManager | null) => {
|
||||
if (!manager) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
(manager as unknown as { resetIndex: () => void }).resetIndex();
|
||||
(manager as unknown as { dirty: boolean }).dirty = true;
|
||||
};
|
||||
reset(managerLarge);
|
||||
reset(managerSmall);
|
||||
});
|
||||
|
||||
it("splits oversized chunks so each embedding input stays <= 8192 UTF-8 bytes", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerLarge = fx.getManagerLarge();
|
||||
const content = "x".repeat(9500);
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-09.md"), content);
|
||||
if (!managerLarge) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await managerLarge.sync({ reason: "test" });
|
||||
|
||||
const inputs = embedBatch.mock.calls.flatMap((call) => call[0] ?? []);
|
||||
@@ -109,12 +42,11 @@ describe("memory embedding token limits", () => {
|
||||
});
|
||||
|
||||
it("uses UTF-8 byte estimates when batching multibyte chunks", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "😀".repeat(1800);
|
||||
const content = `${line}\n${line}\n${line}`;
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-10.md"), content);
|
||||
if (!managerSmall) {
|
||||
throw new Error("manager missing");
|
||||
}
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
|
||||
const batchSizes = embedBatch.mock.calls.map(
|
||||
|
||||
Reference in New Issue
Block a user