memory-neo4j: make semantic dedup cap and LLM concurrency configurable

The hardcoded MAX_SEMANTIC_DEDUP_PAIRS (50) and LLM_CONCURRENCY (8) were designed for expensive cloud LLM calls. For local Ollama inference these caps are unnecessarily restrictive, especially during long sleep windows. - Add maxSemanticDedupPairs to SleepCycleOptions (default: 500) - Add llmConcurrency to SleepCycleOptions (default: 8) - Add --max-semantic-pairs and --concurrency CLI flags - Raise semantic dedup default from 50 → 500 pairs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-03 03:03:24 -04:00 · 2026-02-10 19:19:24 +08:00
parent e0e98c2c0d
commit a5ebbe4b55
2 changed files with 42 additions and 7 deletions
--- a/extensions/memory-neo4j/extractor.ts
+++ b/extensions/memory-neo4j/extractor.ts
@@ -601,6 +601,12 @@ export type SleepCycleOptions = {
  paretoPercentile?: number; // Top N% for core (default: 0.2 = top 20%)
  promotionMinAgeDays?: number; // Min age before promotion (default: 7)

+  // Phase 1b: Semantic Dedup
+  maxSemanticDedupPairs?: number; // Max LLM-checked pairs (default: 500)
+
+  // Concurrency
+  llmConcurrency?: number; // Parallel LLM calls (default: 8, match OLLAMA_NUM_PARALLEL)
+
  // Phase 5: Extraction
  extractionBatchSize?: number; // Memories per batch (default: 50)
  extractionDelayMs?: number; // Delay between batches (default: 1000)
@@ -675,6 +681,8 @@ export async function runSleepCycle(
    abortSignal,
    dedupThreshold = 0.95,
    skipSemanticDedup = false,
+    maxSemanticDedupPairs = 500,
+    llmConcurrency = 8,
    paretoPercentile = 0.2,
    promotionMinAgeDays = 7,
    decayRetentionThreshold = 0.1,
@@ -701,7 +709,7 @@ export async function runSleepCycle(
    aborted: false,
  };

-  const LLM_CONCURRENCY = 8;
+  const LLM_CONCURRENCY = llmConcurrency;

  // --------------------------------------------------------------------------
  // Phase 1: Deduplication (Optimized - combined vector + semantic dedup)
@@ -809,17 +817,16 @@ export async function runSleepCycle(
        // Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
        // Sort by similarity descending so higher-similarity pairs (more likely
        // to be duplicates) are checked first.
-        const MAX_SEMANTIC_DEDUP_PAIRS = 50;
-        if (allPairs.length > MAX_SEMANTIC_DEDUP_PAIRS) {
+        if (allPairs.length > maxSemanticDedupPairs) {
          allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
-          const skipped = allPairs.length - MAX_SEMANTIC_DEDUP_PAIRS;
-          allPairs.length = MAX_SEMANTIC_DEDUP_PAIRS;
+          const skipped = allPairs.length - maxSemanticDedupPairs;
+          allPairs.length = maxSemanticDedupPairs;
          onProgress?.(
            "semanticDedup",
-            `Capped at ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} lower-similarity pairs skipped)`,
+            `Capped at ${maxSemanticDedupPairs} pairs (${skipped} lower-similarity pairs skipped)`,
          );
          logger.info(
-            `memory-neo4j: [sleep] Phase 1b capped to ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} skipped)`,
+            `memory-neo4j: [sleep] Phase 1b capped to ${maxSemanticDedupPairs} pairs (${skipped} skipped)`,
          );
        }

--- a/extensions/memory-neo4j/index.ts
+++ b/extensions/memory-neo4j/index.ts
@@ -510,6 +510,11 @@ const memoryNeo4jPlugin = {
          .option("--decay-half-life <days>", "Base half-life in days (default: 30)")
          .option("--batch-size <n>", "Extraction batch size (default: 50)")
          .option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
+          .option("--max-semantic-pairs <n>", "Max LLM-checked semantic dedup pairs (default: 500)")
+          .option(
+            "--concurrency <n>",
+            "Parallel LLM calls — match OLLAMA_NUM_PARALLEL (default: 8)",
+          )
          .option(
            "--skip-semantic",
            "Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
@@ -524,6 +529,8 @@ const memoryNeo4jPlugin = {
              decayHalfLife?: string;
              batchSize?: string;
              delay?: string;
+              maxSemanticPairs?: string;
+              concurrency?: string;
              skipSemantic?: boolean;
            }) => {
              console.log("\n🌙 Memory Sleep Cycle");
@@ -595,12 +602,33 @@ const memoryNeo4jPlugin = {
                  return;
                }

+                const maxSemanticPairs = opts.maxSemanticPairs
+                  ? parseInt(opts.maxSemanticPairs, 10)
+                  : undefined;
+                if (
+                  maxSemanticPairs != null &&
+                  (Number.isNaN(maxSemanticPairs) || maxSemanticPairs <= 0)
+                ) {
+                  console.error("Error: --max-semantic-pairs must be greater than 0");
+                  process.exitCode = 1;
+                  return;
+                }
+
+                const concurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
+                if (concurrency != null && (Number.isNaN(concurrency) || concurrency <= 0)) {
+                  console.error("Error: --concurrency must be greater than 0");
+                  process.exitCode = 1;
+                  return;
+                }
+
                await db.ensureInitialized();

                const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
                  agentId: opts.agent,
                  dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
                  skipSemanticDedup: opts.skipSemantic === true,
+                  maxSemanticDedupPairs: maxSemanticPairs,
+                  llmConcurrency: concurrency,
                  paretoPercentile: pareto,
                  promotionMinAgeDays: promotionMinAge,
                  decayRetentionThreshold: decayThreshold,