memory-neo4j: drop entity vector embeddings (use fulltext search)

This commit is contained in:
Tarun Sukhani
2026-02-06 23:20:47 +08:00
parent f1f32d5723
commit b0a9eb9407
4 changed files with 19 additions and 94 deletions

View File

@@ -313,19 +313,7 @@ export async function runBackgroundExtraction(
return;
}
// Generate embeddings for entity names (for entity vector search)
let entityEmbeddings: Map<string, number[]> | undefined;
if (result.entities.length > 0) {
try {
const names = result.entities.map((e) => e.name);
const vectors = await embeddings.embedBatch(names);
entityEmbeddings = new Map(names.map((n, i) => [n, vectors[i]]));
} catch (err) {
logger.debug?.(`memory-neo4j: entity embedding generation failed: ${String(err)}`);
}
}
// MERGE Entity nodes
// MERGE Entity nodes (entities use fulltext search, not vector embeddings)
for (const entity of result.entities) {
try {
await db.mergeEntity({
@@ -334,7 +322,6 @@ export async function runBackgroundExtraction(
type: entity.type,
aliases: entity.aliases,
description: entity.description,
embedding: entityEmbeddings?.get(entity.name),
});
// Create MENTIONS relationship

View File

@@ -686,22 +686,18 @@ const memoryNeo4jPlugin = {
batchSize,
onProgress: (phase, done, total) => {
if (phase === "drop-indexes" && done === 0) {
console.log("▶ Dropping old vector indexes…");
console.log("▶ Dropping old vector index…");
} else if (phase === "memories") {
console.log(` Memories: ${done}/${total}`);
} else if (phase === "entities") {
console.log(` Entities: ${done}/${total}`);
} else if (phase === "create-indexes" && done === 0) {
console.log("▶ Recreating vector indexes…");
console.log("▶ Recreating vector index…");
}
},
});
const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1);
console.log("\n═════════════════════════════════════════════════════════════");
console.log(
`✅ Reindex complete in ${elapsed}s — ${result.memories} memories, ${result.entities} entities`,
);
console.log(`✅ Reindex complete in ${elapsed}s — ${result.memories} memories`);
console.log("");
} catch (err) {
console.error(

View File

@@ -113,18 +113,6 @@ export class Neo4jMemoryClient {
}}
`,
);
await this.runSafe(
session,
`
CREATE VECTOR INDEX entity_embedding_index IF NOT EXISTS
FOR (e:Entity) ON e.embedding
OPTIONS {indexConfig: {
\`vector.dimensions\`: ${this.dimensions},
\`vector.similarity_function\`: 'cosine'
}}
`,
);
// Full-text indexes (Lucene BM25)
await this.runSafe(
session,
@@ -719,12 +707,11 @@ export class Neo4jMemoryClient {
`MERGE (e:Entity {name: $name})
ON CREATE SET
e.id = $id, e.type = $type, e.aliases = $aliases,
e.description = $description, e.embedding = $embedding,
e.description = $description,
e.firstSeen = $now, e.lastSeen = $now, e.mentionCount = 1
ON MATCH SET
e.type = COALESCE($type, e.type),
e.description = COALESCE($description, e.description),
e.embedding = COALESCE($embedding, e.embedding),
e.lastSeen = $now,
e.mentionCount = e.mentionCount + 1
RETURN e.id AS id, e.name AS name`,
@@ -734,7 +721,6 @@ export class Neo4jMemoryClient {
type: input.type,
aliases: input.aliases ?? [],
description: input.description ?? null,
embedding: input.embedding ?? null,
now: new Date().toISOString(),
},
);
@@ -1670,13 +1656,15 @@ export class Neo4jMemoryClient {
// --------------------------------------------------------------------------
/**
* Re-embed all Memory and Entity nodes with a new embedding model.
* Re-embed all Memory nodes with a new embedding model.
*
* Steps:
* 1. Drop old vector indexes (dimensions may have changed)
* 1. Drop old vector index (dimensions may have changed)
* 2. Fetch all Memory nodes and re-embed their text
* 3. Fetch all Entity nodes and re-embed their name
* 4. Recreate vector indexes with current dimensions
* 3. Recreate vector index with current dimensions
*
* Entities and tags are not affected — they use fulltext search
* and graph traversal, not vector embeddings.
*
* Used after changing the embedding model/provider in config.
*/
@@ -1686,22 +1674,21 @@ export class Neo4jMemoryClient {
batchSize?: number;
onProgress?: (phase: string, done: number, total: number) => void;
},
): Promise<{ memories: number; entities: number }> {
): Promise<{ memories: number }> {
const batchSize = options?.batchSize ?? 50;
const progress = options?.onProgress ?? (() => {});
await this.ensureInitialized();
// Step 1: Drop old vector indexes
progress("drop-indexes", 0, 2);
// Step 1: Drop old vector index
progress("drop-indexes", 0, 1);
const dropSession = this.driver!.session();
try {
await this.runSafe(dropSession, "DROP INDEX memory_embedding_index IF EXISTS");
await this.runSafe(dropSession, "DROP INDEX entity_embedding_index IF EXISTS");
} finally {
await dropSession.close();
}
progress("drop-indexes", 2, 2);
progress("drop-indexes", 1, 1);
// Step 2: Fetch and re-embed memories
const fetchSession = this.driver!.session();
@@ -1739,42 +1726,8 @@ export class Neo4jMemoryClient {
progress("memories", Math.min(i + batchSize, memories.length), memories.length);
}
// Step 3: Fetch and re-embed entities
const entitySession = this.driver!.session();
let entities: Array<{ id: string; name: string }>;
try {
const result = await entitySession.run("MATCH (e:Entity) RETURN e.id AS id, e.name AS name");
entities = result.records.map((r) => ({
id: r.get("id") as string,
name: r.get("name") as string,
}));
} finally {
await entitySession.close();
}
progress("entities", 0, entities.length);
for (let i = 0; i < entities.length; i += batchSize) {
const batch = entities.slice(i, i + batchSize);
const vectors = await embedFn(batch.map((e) => e.name));
const session = this.driver!.session();
try {
for (let j = 0; j < batch.length; j++) {
if (vectors[j] && vectors[j].length > 0) {
await session.run("MATCH (e:Entity {id: $id}) SET e.embedding = $embedding", {
id: batch[j].id,
embedding: vectors[j],
});
}
}
} finally {
await session.close();
}
progress("entities", Math.min(i + batchSize, entities.length), entities.length);
}
// Step 4: Recreate vector indexes with current dimensions
progress("create-indexes", 0, 2);
// Step 3: Recreate vector index with current dimensions
progress("create-indexes", 0, 1);
const indexSession = this.driver!.session();
try {
await this.runSafe(
@@ -1786,21 +1739,12 @@ export class Neo4jMemoryClient {
\`vector.similarity_function\`: 'cosine'
}}`,
);
await this.runSafe(
indexSession,
`CREATE VECTOR INDEX entity_embedding_index IF NOT EXISTS
FOR (e:Entity) ON e.embedding
OPTIONS {indexConfig: {
\`vector.dimensions\`: ${this.dimensions},
\`vector.similarity_function\`: 'cosine'
}}`,
);
} finally {
await indexSession.close();
}
progress("create-indexes", 2, 2);
progress("create-indexes", 1, 1);
return { memories: memories.length, entities: entities.length };
return { memories: memories.length };
}
// --------------------------------------------------------------------------

View File

@@ -30,7 +30,6 @@ export type EntityNode = {
name: string;
type: EntityType;
aliases: string[];
embedding?: number[];
description?: string;
firstSeen: string;
lastSeen: string;
@@ -118,7 +117,6 @@ export type MergeEntityInput = {
type: EntityType;
aliases?: string[];
description?: string;
embedding?: number[];
};
// ============================================================================