memory-neo4j: add 'openclaw memory neo4j index' reindex command

Adds a CLI command to re-embed all Memory and Entity nodes after
changing the embedding model or provider. Drops old vector indexes,
re-embeds in batches via the configured provider, and recreates
indexes with the correct dimensions.
This commit is contained in:
Tarun Sukhani
2026-02-05 18:43:11 +00:00
parent bf5a7a05dd
commit 370adb0f4b
2 changed files with 189 additions and 0 deletions

View File

@@ -659,6 +659,57 @@ const memoryNeo4jPlugin = {
process.exitCode = 1;
}
});
memory
.command("index")
.description(
"Re-embed all memories and entities — use after changing embedding model/provider",
)
.option("--batch-size <n>", "Embedding batch size (default: 50)")
.action(async (opts: { batchSize?: string }) => {
const batchSize = opts.batchSize ? parseInt(opts.batchSize, 10) : 50;
if (Number.isNaN(batchSize) || batchSize <= 0) {
console.error("Error: --batch-size must be greater than 0");
process.exitCode = 1;
return;
}
console.log("\nMemory Neo4j — Reindex Embeddings");
console.log("═════════════════════════════════════════════════════════════");
console.log(`Model: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(`Dimensions: ${vectorDim}`);
console.log(`Batch size: ${batchSize}\n`);
try {
const startedAt = Date.now();
const result = await db.reindex((texts) => embeddings.embedBatch(texts), {
batchSize,
onProgress: (phase, done, total) => {
if (phase === "drop-indexes" && done === 0) {
console.log("▶ Dropping old vector indexes…");
} else if (phase === "memories") {
console.log(` Memories: ${done}/${total}`);
} else if (phase === "entities") {
console.log(` Entities: ${done}/${total}`);
} else if (phase === "create-indexes" && done === 0) {
console.log("▶ Recreating vector indexes…");
}
},
});
const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
console.log("\n═════════════════════════════════════════════════════════════");
console.log(
`✅ Reindex complete in ${elapsed}s — ${result.memories} memories, ${result.entities} entities`,
);
console.log("");
} catch (err) {
console.error(
`\n❌ Reindex failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
});
},
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
);

View File

@@ -1665,6 +1665,144 @@ export class Neo4jMemoryClient {
}
}
// --------------------------------------------------------------------------
// Reindex: re-embed all Memory and Entity nodes
// --------------------------------------------------------------------------
/**
* Re-embed all Memory and Entity nodes with a new embedding model.
*
* Steps:
* 1. Drop old vector indexes (dimensions may have changed)
* 2. Fetch all Memory nodes and re-embed their text
* 3. Fetch all Entity nodes and re-embed their name
* 4. Recreate vector indexes with current dimensions
*
* Used after changing the embedding model/provider in config.
*/
async reindex(
embedFn: (texts: string[]) => Promise<number[][]>,
options?: {
batchSize?: number;
onProgress?: (phase: string, done: number, total: number) => void;
},
): Promise<{ memories: number; entities: number }> {
const batchSize = options?.batchSize ?? 50;
const progress = options?.onProgress ?? (() => {});
await this.ensureInitialized();
// Step 1: Drop old vector indexes
progress("drop-indexes", 0, 2);
const dropSession = this.driver!.session();
try {
await this.runSafe(dropSession, "DROP INDEX memory_embedding_index IF EXISTS");
await this.runSafe(dropSession, "DROP INDEX entity_embedding_index IF EXISTS");
} finally {
await dropSession.close();
}
progress("drop-indexes", 2, 2);
// Step 2: Fetch and re-embed memories
const fetchSession = this.driver!.session();
let memories: Array<{ id: string; text: string }>;
try {
const result = await fetchSession.run(
"MATCH (m:Memory) RETURN m.id AS id, m.text AS text ORDER BY m.createdAt ASC",
);
memories = result.records.map((r) => ({
id: r.get("id") as string,
text: r.get("text") as string,
}));
} finally {
await fetchSession.close();
}
progress("memories", 0, memories.length);
for (let i = 0; i < memories.length; i += batchSize) {
const batch = memories.slice(i, i + batchSize);
const vectors = await embedFn(batch.map((m) => m.text));
const session = this.driver!.session();
try {
for (let j = 0; j < batch.length; j++) {
if (vectors[j] && vectors[j].length > 0) {
await session.run("MATCH (m:Memory {id: $id}) SET m.embedding = $embedding", {
id: batch[j].id,
embedding: vectors[j],
});
}
}
} finally {
await session.close();
}
progress("memories", Math.min(i + batchSize, memories.length), memories.length);
}
// Step 3: Fetch and re-embed entities
const entitySession = this.driver!.session();
let entities: Array<{ id: string; name: string }>;
try {
const result = await entitySession.run("MATCH (e:Entity) RETURN e.id AS id, e.name AS name");
entities = result.records.map((r) => ({
id: r.get("id") as string,
name: r.get("name") as string,
}));
} finally {
await entitySession.close();
}
progress("entities", 0, entities.length);
for (let i = 0; i < entities.length; i += batchSize) {
const batch = entities.slice(i, i + batchSize);
const vectors = await embedFn(batch.map((e) => e.name));
const session = this.driver!.session();
try {
for (let j = 0; j < batch.length; j++) {
if (vectors[j] && vectors[j].length > 0) {
await session.run("MATCH (e:Entity {id: $id}) SET e.embedding = $embedding", {
id: batch[j].id,
embedding: vectors[j],
});
}
}
} finally {
await session.close();
}
progress("entities", Math.min(i + batchSize, entities.length), entities.length);
}
// Step 4: Recreate vector indexes with current dimensions
progress("create-indexes", 0, 2);
const indexSession = this.driver!.session();
try {
await this.runSafe(
indexSession,
`CREATE VECTOR INDEX memory_embedding_index IF NOT EXISTS
FOR (m:Memory) ON m.embedding
OPTIONS {indexConfig: {
\`vector.dimensions\`: ${this.dimensions},
\`vector.similarity_function\`: 'cosine'
}}`,
);
await this.runSafe(
indexSession,
`CREATE VECTOR INDEX entity_embedding_index IF NOT EXISTS
FOR (e:Entity) ON e.embedding
OPTIONS {indexConfig: {
\`vector.dimensions\`: ${this.dimensions},
\`vector.similarity_function\`: 'cosine'
}}`,
);
} finally {
await indexSession.close();
}
progress("create-indexes", 2, 2);
return { memories: memories.length, entities: entities.length };
}
// --------------------------------------------------------------------------
// Retry Logic
// --------------------------------------------------------------------------