memory-neo4j: make semantic dedup cap and LLM concurrency configurable

The hardcoded MAX_SEMANTIC_DEDUP_PAIRS (50) and LLM_CONCURRENCY (8) were
designed for expensive cloud LLM calls. For local Ollama inference these
caps are unnecessarily restrictive, especially during long sleep windows.

- Add maxSemanticDedupPairs to SleepCycleOptions (default: 500)
- Add llmConcurrency to SleepCycleOptions (default: 8)
- Add --max-semantic-pairs and --concurrency CLI flags
- Raise semantic dedup default from 50 → 500 pairs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Tarun Sukhani
2026-02-10 19:19:24 +08:00
parent e0e98c2c0d
commit a5ebbe4b55
2 changed files with 42 additions and 7 deletions

View File

@@ -601,6 +601,12 @@ export type SleepCycleOptions = {
paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
promotionMinAgeDays?: number; // Min age before promotion (default: 7)
// Phase 1b: Semantic Dedup
maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
// Concurrency
llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
// Phase 5: Extraction
extractionBatchSize?: number; // Memories per batch (default: 50)
extractionDelayMs?: number; // Delay between batches (default: 1000)
@@ -675,6 +681,8 @@ export async function runSleepCycle(
abortSignal,
dedupThreshold = 0.95,
skipSemanticDedup = false,
maxSemanticDedupPairs = 500,
llmConcurrency = 8,
paretoPercentile = 0.2,
promotionMinAgeDays = 7,
decayRetentionThreshold = 0.1,
@@ -701,7 +709,7 @@ export async function runSleepCycle(
aborted: false,
};
const LLM_CONCURRENCY = 8;
const LLM_CONCURRENCY = llmConcurrency;
// --------------------------------------------------------------------------
// Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
@@ -809,17 +817,16 @@ export async function runSleepCycle(
// Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
// Sort by similarity descending so higher-similarity pairs (more likely
// to be duplicates) are checked first.
const MAX_SEMANTIC_DEDUP_PAIRS = 50;
if (allPairs.length > MAX_SEMANTIC_DEDUP_PAIRS) {
if (allPairs.length > maxSemanticDedupPairs) {
allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
const skipped = allPairs.length - MAX_SEMANTIC_DEDUP_PAIRS;
allPairs.length = MAX_SEMANTIC_DEDUP_PAIRS;
const skipped = allPairs.length - maxSemanticDedupPairs;
allPairs.length = maxSemanticDedupPairs;
onProgress?.(
"semanticDedup",
`Capped at ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} lower-similarity pairs skipped)`,
`Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
);
logger.info(
`memory-neo4j: [sleep] Phase 1b capped to ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} skipped)`,
`memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
);
}

View File

@@ -510,6 +510,11 @@ const memoryNeo4jPlugin = {
.option("--decay-half-life <days>", "Base half-life in days (default: 30)")
.option("--batch-size <n>", "Extraction batch size (default: 50)")
.option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
.option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
.option(
"--concurrency <n>",
"Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)",
)
.option(
"--skip-semantic",
"Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
@@ -524,6 +529,8 @@ const memoryNeo4jPlugin = {
decayHalfLife?: string;
batchSize?: string;
delay?: string;
maxSemanticPairs?: string;
concurrency?: string;
skipSemantic?: boolean;
}) => {
console.log("\n🌙 Memory Sleep Cycle");
@@ -595,12 +602,33 @@ const memoryNeo4jPlugin = {
return;
}
const maxSemanticPairs = opts.maxSemanticPairs
? parseInt(opts.maxSemanticPairs, 10)
: undefined;
if (
maxSemanticPairs != null &&
(Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
) {
console.error("Error: --max-semantic-pairs must be greater than 0");
process.exitCode = 1;
return;
}
const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
console.error("Error: --concurrency must be greater than 0");
process.exitCode = 1;
return;
}
await db.ensureInitialized();
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
agentId: opts.agent,
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
skipSemanticDedup: opts.skipSemantic === true,
maxSemanticDedupPairs: maxSemanticPairs,
llmConcurrency: concurrency,
paretoPercentile: pareto,
promotionMinAgeDays: promotionMinAge,
decayRetentionThreshold: decayThreshold,