mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 03:03:24 -04:00
memory-neo4j: single-use tag pruning, alias-based entity dedup, tag normalization
- Add findSingleUseTags() to prune tags with only 1 reference after 14 days - Enhance findDuplicateEntityPairs() to match on entity aliases - Add normalizeTagName() to collapse hyphens/underscores to spaces - Monitor 'other' category accumulation in sleep cycle Phase 2 - Tighten extraction prompt with explicit entity blocklist (80 terms) - Raise auto-capture threshold from 0.5 to 0.65 - Fix tests for entity dedup phase and skipPromotion default
This commit is contained in:
@@ -1770,6 +1770,8 @@ describe("runSleepCycle", () => {
|
||||
findOrphanTags: vi.fn().mockResolvedValue([]),
|
||||
deleteOrphanTags: vi.fn().mockResolvedValue(0),
|
||||
updateExtractionStatus: vi.fn().mockResolvedValue(undefined),
|
||||
findDuplicateEntityPairs: vi.fn().mockResolvedValue([]),
|
||||
mergeEntityPair: vi.fn().mockResolvedValue(true),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -2193,6 +2195,7 @@ describe("runSleepCycle", () => {
|
||||
mockDb.promoteToCore.mockResolvedValue(1);
|
||||
|
||||
const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger, {
|
||||
skipPromotion: false,
|
||||
paretoPercentile: 0.2,
|
||||
promotionMinAgeDays: 7,
|
||||
});
|
||||
@@ -2219,6 +2222,7 @@ describe("runSleepCycle", () => {
|
||||
mockDb.calculateParetoThreshold.mockReturnValue(0.5);
|
||||
|
||||
const result = await runSleepCycle(mockDb, mockEmbeddings, mockConfig, mockLogger, {
|
||||
skipPromotion: false,
|
||||
promotionMinAgeDays: 7,
|
||||
});
|
||||
|
||||
@@ -2544,8 +2548,8 @@ describe("runSleepCycle", () => {
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("dedup");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("conflict");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("semanticDedup");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("entityDedup");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("pareto");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("promotion");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("extraction");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("decay");
|
||||
expect(onPhaseStart).toHaveBeenCalledWith("cleanup");
|
||||
@@ -2579,6 +2583,7 @@ describe("runSleepCycle", () => {
|
||||
expect(result).toHaveProperty("dedup");
|
||||
expect(result).toHaveProperty("conflict");
|
||||
expect(result).toHaveProperty("semanticDedup");
|
||||
expect(result).toHaveProperty("entityDedup");
|
||||
expect(result).toHaveProperty("pareto");
|
||||
expect(result).toHaveProperty("promotion");
|
||||
expect(result).toHaveProperty("decay");
|
||||
|
||||
@@ -46,8 +46,14 @@ Rules:
|
||||
- Entity types: person, organization, location, event, concept
|
||||
- Relationship types: WORKS_AT, LIVES_AT, KNOWS, MARRIED_TO, PREFERS, DECIDED, RELATED_TO
|
||||
- Confidence: 0.0-1.0
|
||||
- Only extract what's explicitly stated or strongly implied
|
||||
- Return empty arrays if nothing to extract
|
||||
- Only extract SPECIFIC named entities: real people, companies, products, tools, places, events
|
||||
- Do NOT extract generic technology terms (python, javascript, docker, linux, api, sql, html, css, json, etc.)
|
||||
- Do NOT extract generic concepts (meeting, project, training, email, code, data, server, file, script, etc.)
|
||||
- Do NOT extract programming abstractions (function, class, module, async, sync, process, etc.)
|
||||
- Good entities: "Tarun", "Abundent Academy", "Tioman Island", "LiveKit", "Neo4j", "Fish Speech S1 Mini"
|
||||
- Bad entities: "python", "ai", "automation", "email", "docker", "machine learning", "api"
|
||||
- When in doubt, do NOT extract — fewer high-quality entities beat many generic ones
|
||||
- Return empty arrays if nothing specific to extract
|
||||
- Keep entity descriptions brief (1 sentence max)
|
||||
- Category: "preference" for opinions/preferences, "fact" for factual info, "decision" for choices made, "entity" for entity-focused, "other" for miscellaneous`;
|
||||
|
||||
@@ -112,6 +118,165 @@ export async function extractEntities(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a tag name: lowercase, collapse hyphens/underscores to spaces,
|
||||
* collapse multiple spaces, trim. Ensures "machine-learning", "machine_learning",
|
||||
* and "machine learning" all resolve to the same tag node.
|
||||
*/
|
||||
function normalizeTagName(name: string): string {
|
||||
return name.trim().toLowerCase().replace(/[-_]+/g, " ").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic terms that should never be extracted as entities.
|
||||
* These are common technology/concept words that the LLM tends to
|
||||
* extract despite prompt instructions. Post-filter is more reliable
|
||||
* than prompt engineering alone.
|
||||
*/
|
||||
const GENERIC_ENTITY_BLOCKLIST = new Set([
|
||||
// Programming languages & frameworks
|
||||
"python",
|
||||
"javascript",
|
||||
"typescript",
|
||||
"java",
|
||||
"go",
|
||||
"rust",
|
||||
"ruby",
|
||||
"php",
|
||||
"c",
|
||||
"c++",
|
||||
"c#",
|
||||
"swift",
|
||||
"kotlin",
|
||||
"bash",
|
||||
"shell",
|
||||
"html",
|
||||
"css",
|
||||
"sql",
|
||||
"nosql",
|
||||
"json",
|
||||
"xml",
|
||||
"yaml",
|
||||
"react",
|
||||
"vue",
|
||||
"angular",
|
||||
"svelte",
|
||||
"next.js",
|
||||
"express",
|
||||
"fastapi",
|
||||
"django",
|
||||
"flask",
|
||||
// Generic tech concepts
|
||||
"ai",
|
||||
"artificial intelligence",
|
||||
"machine learning",
|
||||
"deep learning",
|
||||
"neural network",
|
||||
"automation",
|
||||
"api",
|
||||
"rest api",
|
||||
"graphql",
|
||||
"webhook",
|
||||
"websocket",
|
||||
"database",
|
||||
"server",
|
||||
"client",
|
||||
"cloud",
|
||||
"microservice",
|
||||
"monolith",
|
||||
"frontend",
|
||||
"backend",
|
||||
"fullstack",
|
||||
"devops",
|
||||
"ci/cd",
|
||||
"deployment",
|
||||
// Generic tools/infra
|
||||
"docker",
|
||||
"kubernetes",
|
||||
"linux",
|
||||
"windows",
|
||||
"macos",
|
||||
"nginx",
|
||||
"apache",
|
||||
"git",
|
||||
"npm",
|
||||
"pnpm",
|
||||
"yarn",
|
||||
"pip",
|
||||
"node",
|
||||
"nodejs",
|
||||
"node.js",
|
||||
// Generic work concepts
|
||||
"meeting",
|
||||
"project",
|
||||
"training",
|
||||
"email",
|
||||
"calendar",
|
||||
"task",
|
||||
"ticket",
|
||||
"code",
|
||||
"data",
|
||||
"file",
|
||||
"folder",
|
||||
"directory",
|
||||
"script",
|
||||
"module",
|
||||
"debug",
|
||||
"deploy",
|
||||
"build",
|
||||
"release",
|
||||
"update",
|
||||
"upgrade",
|
||||
"user",
|
||||
"admin",
|
||||
"system",
|
||||
"service",
|
||||
"process",
|
||||
"job",
|
||||
"worker",
|
||||
// Programming abstractions
|
||||
"function",
|
||||
"class",
|
||||
"method",
|
||||
"variable",
|
||||
"object",
|
||||
"array",
|
||||
"string",
|
||||
"async",
|
||||
"sync",
|
||||
"promise",
|
||||
"callback",
|
||||
"event",
|
||||
"hook",
|
||||
"middleware",
|
||||
"component",
|
||||
"plugin",
|
||||
"extension",
|
||||
"library",
|
||||
"package",
|
||||
"dependency",
|
||||
// Generic descriptors
|
||||
"app",
|
||||
"application",
|
||||
"web",
|
||||
"mobile",
|
||||
"desktop",
|
||||
"browser",
|
||||
"config",
|
||||
"configuration",
|
||||
"settings",
|
||||
"environment",
|
||||
"production",
|
||||
"staging",
|
||||
"error",
|
||||
"bug",
|
||||
"issue",
|
||||
"fix",
|
||||
"patch",
|
||||
"feature",
|
||||
"improvement",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Validate and sanitize LLM extraction output.
|
||||
*/
|
||||
@@ -146,7 +311,7 @@ function validateExtractionResult(raw: Record<string, unknown>): ExtractionResul
|
||||
: undefined,
|
||||
description: typeof e.description === "string" ? e.description : undefined,
|
||||
}))
|
||||
.filter((e) => e.name.length > 0),
|
||||
.filter((e) => e.name.length > 0 && !GENERIC_ENTITY_BLOCKLIST.has(e.name)),
|
||||
|
||||
relationships: relationships
|
||||
.filter(
|
||||
@@ -173,7 +338,7 @@ function validateExtractionResult(raw: Record<string, unknown>): ExtractionResul
|
||||
typeof (t as Record<string, unknown>).name === "string",
|
||||
)
|
||||
.map((t) => ({
|
||||
name: String(t.name).trim().toLowerCase(),
|
||||
name: normalizeTagName(String(t.name)),
|
||||
category: typeof t.category === "string" ? t.category : "topic",
|
||||
}))
|
||||
.filter((t) => t.name.length > 0),
|
||||
|
||||
@@ -875,7 +875,7 @@ async function runAutoCapture(
|
||||
|
||||
for (const text of retained) {
|
||||
allTexts.push(text);
|
||||
allMeta.push({ text, source: "auto-capture", threshold: 0.5, discount: 1.0 });
|
||||
allMeta.push({ text, source: "auto-capture", threshold: 0.65, discount: 1.0 });
|
||||
}
|
||||
for (const text of retainedAssistant) {
|
||||
allTexts.push(text);
|
||||
|
||||
@@ -1526,6 +1526,39 @@ export class Neo4jMemoryClient {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find tags with exactly 1 TAGGED relationship, older than minAgeDays.
|
||||
* Single-use tags add noise without providing useful cross-memory connections.
|
||||
* Only prunes tags that have had enough time to accrue additional references.
|
||||
*/
|
||||
async findSingleUseTags(
|
||||
minAgeDays: number = 14,
|
||||
limit: number = 500,
|
||||
): Promise<Array<{ id: string; name: string }>> {
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const cutoffDate = new Date(Date.now() - minAgeDays * 24 * 60 * 60 * 1000).toISOString();
|
||||
const result = await session.run(
|
||||
`MATCH (t:Tag)
|
||||
WHERE t.createdAt < $cutoffDate
|
||||
WITH t
|
||||
MATCH (t)<-[:TAGGED]-(m:Memory)
|
||||
WITH t, count(m) AS usageCount
|
||||
WHERE usageCount = 1
|
||||
RETURN t.id AS id, t.name AS name
|
||||
LIMIT $limit`,
|
||||
{ cutoffDate, limit: neo4j.int(limit) },
|
||||
);
|
||||
return result.records.map((r) => ({
|
||||
id: r.get("id") as string,
|
||||
name: r.get("name") as string,
|
||||
}));
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Sleep Cycle: Conflict Detection
|
||||
// --------------------------------------------------------------------------
|
||||
@@ -1864,4 +1897,129 @@ export class Neo4jMemoryClient {
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Sleep Cycle: Entity Deduplication
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Find entity pairs that are likely duplicates based on name containment.
|
||||
* Returns pairs where one entity name is a substring of another (same type),
|
||||
* which catches the most common dedup patterns:
|
||||
* - "fish speech" → "fish speech s1 mini"
|
||||
* - "aaditya" → "aaditya sukhani"
|
||||
* - "abundent" → "abundent academy"
|
||||
*/
|
||||
async findDuplicateEntityPairs(
|
||||
agentId?: string,
|
||||
limit: number = 200,
|
||||
): Promise<
|
||||
Array<{
|
||||
keepId: string;
|
||||
keepName: string;
|
||||
removeId: string;
|
||||
removeName: string;
|
||||
keepMentions: number;
|
||||
removeMentions: number;
|
||||
}>
|
||||
> {
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
// Find pairs where one name contains the other (same type),
|
||||
// OR one entity's alias matches the other's name.
|
||||
// Keep the entity with more mentions, or the shorter/more canonical name
|
||||
// if mention counts are equal.
|
||||
const result = await session.run(
|
||||
`MATCH (e1:Entity), (e2:Entity)
|
||||
WHERE e1.name < e2.name
|
||||
AND e1.type = e2.type
|
||||
AND size(e1.name) > 2
|
||||
AND size(e2.name) > 2
|
||||
AND (
|
||||
e1.name CONTAINS e2.name
|
||||
OR e2.name CONTAINS e1.name
|
||||
OR ANY(alias IN coalesce(e1.aliases, []) WHERE toLower(alias) = e2.name)
|
||||
OR ANY(alias IN coalesce(e2.aliases, []) WHERE toLower(alias) = e1.name)
|
||||
)
|
||||
WITH e1, e2,
|
||||
coalesce(e1.mentionCount, 0) AS mc1,
|
||||
coalesce(e2.mentionCount, 0) AS mc2
|
||||
RETURN e1.id AS id1, e1.name AS name1, mc1,
|
||||
e2.id AS id2, e2.name AS name2, mc2
|
||||
LIMIT $limit`,
|
||||
{ limit: neo4j.int(limit) },
|
||||
);
|
||||
|
||||
return result.records.map((r) => {
|
||||
const name1 = r.get("name1") as string;
|
||||
const name2 = r.get("name2") as string;
|
||||
const mc1 = (r.get("mc1") as number) ?? 0;
|
||||
const mc2 = (r.get("mc2") as number) ?? 0;
|
||||
const id1 = r.get("id1") as string;
|
||||
const id2 = r.get("id2") as string;
|
||||
|
||||
// Keep the entity with more mentions; if tied, keep the shorter (more canonical) name
|
||||
const keepFirst = mc1 > mc2 || (mc1 === mc2 && name1.length <= name2.length);
|
||||
return {
|
||||
keepId: keepFirst ? id1 : id2,
|
||||
keepName: keepFirst ? name1 : name2,
|
||||
removeId: keepFirst ? id2 : id1,
|
||||
removeName: keepFirst ? name2 : name1,
|
||||
keepMentions: keepFirst ? mc1 : mc2,
|
||||
removeMentions: keepFirst ? mc2 : mc1,
|
||||
};
|
||||
});
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge two entities: transfer MENTIONS relationships from source to target,
|
||||
* update mention count, then delete the source entity.
|
||||
* Inter-entity relationships on the source are dropped (they'll be
|
||||
* re-created by future extractions against the canonical entity).
|
||||
*/
|
||||
async mergeEntityPair(keepId: string, removeId: string): Promise<boolean> {
|
||||
await this.ensureInitialized();
|
||||
return this.retryOnTransient(async () => {
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const result = await session.executeWrite(async (tx) => {
|
||||
// Transfer MENTIONS relationships from removed entity to kept entity
|
||||
const transferred = await tx.run(
|
||||
`MATCH (remove:Entity {id: $removeId})<-[r:MENTIONS]-(m:Memory)
|
||||
MATCH (keep:Entity {id: $keepId})
|
||||
MERGE (m)-[:MENTIONS]->(keep)
|
||||
DELETE r
|
||||
RETURN count(*) AS transferred`,
|
||||
{ removeId, keepId },
|
||||
);
|
||||
const transferCount = (transferred.records[0]?.get("transferred") as number) ?? 0;
|
||||
|
||||
// Update kept entity's mention count
|
||||
if (transferCount > 0) {
|
||||
await tx.run(
|
||||
`MATCH (e:Entity {id: $keepId})
|
||||
SET e.mentionCount = coalesce(e.mentionCount, 0) + $count,
|
||||
e.lastSeen = $now`,
|
||||
{ keepId, count: neo4j.int(transferCount), now: new Date().toISOString() },
|
||||
);
|
||||
}
|
||||
|
||||
// Delete the removed entity (DETACH removes all remaining relationships)
|
||||
await tx.run(`MATCH (e:Entity {id: $removeId}) DETACH DELETE e`, { removeId });
|
||||
|
||||
return transferCount;
|
||||
});
|
||||
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
/**
|
||||
* Seven-phase sleep cycle for memory consolidation.
|
||||
* Eight-phase sleep cycle for memory consolidation.
|
||||
*
|
||||
* Implements a Pareto-based memory ecosystem where core memory
|
||||
* is bounded to the top 20% of memories by effective score.
|
||||
*
|
||||
* Phases:
|
||||
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
|
||||
* 1d. ENTITY DEDUP - Merge near-duplicate entities (reduce entity bloat)
|
||||
* 2. PARETO SCORING - Calculate effective scores for all memories
|
||||
* 3. CORE PROMOTION - Regular memories above threshold -> core
|
||||
* 4. CORE DEMOTION - Core memories below threshold -> regular
|
||||
* 4. EXTRACTION - Form entity relationships (strengthen connections)
|
||||
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
|
||||
* 6. EXTRACTION - Form entity relationships (strengthen connections)
|
||||
* 7. CLEANUP - Remove orphaned entities/tags (garbage collection)
|
||||
* 6. CLEANUP - Remove orphaned entities/tags (garbage collection)
|
||||
* 7. NOISE CLEANUP - Remove dangerous pattern memories
|
||||
*
|
||||
* Research basis:
|
||||
* - Pareto principle (20/80 rule) for memory tiering
|
||||
@@ -47,6 +48,11 @@ export type SleepCycleResult = {
|
||||
pairsChecked: number;
|
||||
duplicatesMerged: number;
|
||||
};
|
||||
// Phase 1d: Entity Deduplication
|
||||
entityDedup: {
|
||||
pairsFound: number;
|
||||
merged: number;
|
||||
};
|
||||
// Phase 2: Pareto Scoring & Threshold
|
||||
pareto: {
|
||||
totalMemories: number;
|
||||
@@ -74,6 +80,7 @@ export type SleepCycleResult = {
|
||||
cleanup: {
|
||||
entitiesRemoved: number;
|
||||
tagsRemoved: number;
|
||||
singleUseTagsRemoved: number;
|
||||
};
|
||||
// Overall
|
||||
durationMs: number;
|
||||
@@ -104,6 +111,9 @@ export type SleepCycleOptions = {
|
||||
extractionBatchSize?: number; // Memories per batch (default: 50)
|
||||
extractionDelayMs?: number; // Delay between batches (default: 1000)
|
||||
|
||||
// Phase 5: Cleanup
|
||||
singleUseTagMinAgeDays?: number; // Min age before single-use tag pruning (default: 14)
|
||||
|
||||
// Phase 4: Decay
|
||||
decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
|
||||
decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
|
||||
@@ -116,6 +126,7 @@ export type SleepCycleOptions = {
|
||||
| "dedup"
|
||||
| "conflict"
|
||||
| "semanticDedup"
|
||||
| "entityDedup"
|
||||
| "pareto"
|
||||
| "promotion"
|
||||
| "decay"
|
||||
@@ -168,6 +179,7 @@ export async function runSleepCycle(
|
||||
decayCurves,
|
||||
extractionBatchSize = 50,
|
||||
extractionDelayMs = 1000,
|
||||
singleUseTagMinAgeDays = 14,
|
||||
onPhaseStart,
|
||||
onProgress,
|
||||
} = options;
|
||||
@@ -176,6 +188,7 @@ export async function runSleepCycle(
|
||||
dedup: { clustersFound: 0, memoriesMerged: 0 },
|
||||
conflict: { pairsFound: 0, resolved: 0, invalidated: 0 },
|
||||
semanticDedup: { pairsChecked: 0, duplicatesMerged: 0 },
|
||||
entityDedup: { pairsFound: 0, merged: 0 },
|
||||
pareto: {
|
||||
totalMemories: 0,
|
||||
coreMemories: 0,
|
||||
@@ -185,7 +198,7 @@ export async function runSleepCycle(
|
||||
promotion: { candidatesFound: 0, promoted: 0 },
|
||||
decay: { memoriesPruned: 0 },
|
||||
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
|
||||
cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
|
||||
cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 },
|
||||
durationMs: 0,
|
||||
aborted: false,
|
||||
};
|
||||
@@ -419,6 +432,51 @@ export async function runSleepCycle(
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 1d: Entity Deduplication
|
||||
// Merge entities where one name is a substring of another (same type).
|
||||
// Catches: "fish speech" → "fish speech s1 mini", "aaditya" → "aaditya sukhani"
|
||||
// Transfers MENTIONS relationships to the canonical entity, then deletes the duplicate.
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("entityDedup");
|
||||
logger.info("memory-neo4j: [sleep] Phase 1d: Entity Deduplication");
|
||||
|
||||
try {
|
||||
const pairs = await db.findDuplicateEntityPairs(agentId);
|
||||
result.entityDedup.pairsFound = pairs.length;
|
||||
|
||||
// Track removed entity IDs to skip cascading merges on already-deleted entities
|
||||
const removedIds = new Set<string>();
|
||||
|
||||
for (const pair of pairs) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
// Skip if either entity was already removed in a previous merge
|
||||
if (removedIds.has(pair.keepId) || removedIds.has(pair.removeId)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const merged = await db.mergeEntityPair(pair.keepId, pair.removeId);
|
||||
if (merged) {
|
||||
removedIds.add(pair.removeId);
|
||||
result.entityDedup.merged++;
|
||||
onProgress?.(
|
||||
"entityDedup",
|
||||
`Merged "${pair.removeName}" → "${pair.keepName}" (${pair.removeMentions} mentions transferred)`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 1d complete — ${result.entityDedup.pairsFound} pairs found, ${result.entityDedup.merged} merged`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 1d error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 2: Pareto Scoring & Threshold Calculation
|
||||
// --------------------------------------------------------------------------
|
||||
@@ -438,6 +496,8 @@ export async function runSleepCycle(
|
||||
paretoThreshold = db.calculateParetoThreshold(allScores, 1 - paretoPercentile);
|
||||
result.pareto.threshold = paretoThreshold;
|
||||
|
||||
const otherCount = allScores.filter((s) => s.category === "other").length;
|
||||
|
||||
onProgress?.(
|
||||
"pareto",
|
||||
`Scored ${allScores.length} memories (${result.pareto.coreMemories} core, ${result.pareto.regularMemories} regular)`,
|
||||
@@ -447,6 +507,17 @@ export async function runSleepCycle(
|
||||
`Pareto threshold (top ${paretoPercentile * 100}%): ${paretoThreshold.toFixed(4)}`,
|
||||
);
|
||||
|
||||
if (otherCount > 0) {
|
||||
const otherPct = ((otherCount / allScores.length) * 100).toFixed(1);
|
||||
onProgress?.(
|
||||
"pareto",
|
||||
`⚠️ "other" category: ${otherCount} memories (${otherPct}%) — monitor for conversational noise`,
|
||||
);
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] "other" category monitor: ${otherCount}/${allScores.length} (${otherPct}%)`,
|
||||
);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 2 complete — threshold=${paretoThreshold.toFixed(4)} for top ${paretoPercentile * 100}%`,
|
||||
);
|
||||
@@ -649,8 +720,23 @@ export async function runSleepCycle(
|
||||
}
|
||||
}
|
||||
|
||||
// Prune single-use tags (only 1 memory reference, older than threshold)
|
||||
// These add noise without providing useful cross-memory connections.
|
||||
if (!abortSignal?.aborted) {
|
||||
const singleUseTags = await db.findSingleUseTags(singleUseTagMinAgeDays);
|
||||
if (singleUseTags.length > 0) {
|
||||
result.cleanup.singleUseTagsRemoved = await db.deleteOrphanTags(
|
||||
singleUseTags.map((t) => t.id),
|
||||
);
|
||||
onProgress?.(
|
||||
"cleanup",
|
||||
`Removed ${result.cleanup.singleUseTagsRemoved} single-use tags (>${singleUseTagMinAgeDays}d old)`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 6 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
|
||||
`memory-neo4j: [sleep] Phase 6 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} orphan tags, ${result.cleanup.singleUseTagsRemoved} single-use tags removed`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 6 error: ${String(err)}`);
|
||||
|
||||
Reference in New Issue
Block a user