memory-neo4j: tighten attention gate, add gate to memory_store, add cleanup command

2026-02-19 18:39:20 -05:00 · 2026-02-07 01:25:19 +08:00
parent ae1d35aab3
commit 91efe2e432
3 changed files with 213 additions and 3 deletions
--- a/extensions/memory-neo4j/extractor.test.ts
+++ b/extensions/memory-neo4j/extractor.test.ts
@@ -1,14 +1,110 @@
 /**
- * Tests for extractor.ts — Extraction Logic.
+ * Tests for extractor.ts and attention gate — Extraction Logic + Auto-capture Filtering.
 *
 * Tests exported functions: extractEntities(), extractUserMessages(), runBackgroundExtraction().
+ * Tests passesAttentionGate() from index.ts.
 * Note: validateExtractionResult() is not exported; it is tested indirectly through extractEntities().
- * Note: passesAttentionGate() is defined in index.ts and not exported; cannot be tested directly.
 */

 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import type { ExtractionConfig } from "./config.js";
 import { extractUserMessages, extractEntities, runBackgroundExtraction } from "./extractor.js";
+import { passesAttentionGate } from "./index.js";
+
+// ============================================================================
+// passesAttentionGate()
+// ============================================================================
+
+describe("passesAttentionGate", () => {
+  // --- Should REJECT ---
+
+  it("should reject short messages below MIN_CAPTURE_CHARS", () => {
+    expect(passesAttentionGate("Hi")).toBe(false);
+    expect(passesAttentionGate("Yup")).toBe(false);
+    expect(passesAttentionGate("yes")).toBe(false);
+    expect(passesAttentionGate("ok")).toBe(false);
+    expect(passesAttentionGate("")).toBe(false);
+  });
+
+  it("should reject noise greetings/acknowledgments", () => {
+    expect(passesAttentionGate("sounds good")).toBe(false);
+    expect(passesAttentionGate("Got it")).toBe(false);
+    expect(passesAttentionGate("thanks!")).toBe(false);
+    expect(passesAttentionGate("thank you!")).toBe(false);
+    expect(passesAttentionGate("perfect.")).toBe(false);
+  });
+
+  it("should reject messages with fewer than MIN_WORD_COUNT words", () => {
+    expect(passesAttentionGate("I need those")).toBe(false); // 3 words
+    expect(passesAttentionGate("yes please do")).toBe(false); // 3 words
+    expect(passesAttentionGate("that works fine")).toBe(false); // 3 words
+  });
+
+  it("should reject short contextual/deictic phrases", () => {
+    expect(passesAttentionGate("Ok, let me test it out")).toBe(false);
+    expect(passesAttentionGate("ok great")).toBe(false);
+    expect(passesAttentionGate("yes please")).toBe(false);
+    expect(passesAttentionGate("ok sure thanks")).toBe(false);
+  });
+
+  it("should reject two-word affirmations", () => {
+    expect(passesAttentionGate("ok great")).toBe(false);
+    expect(passesAttentionGate("yes please")).toBe(false);
+    expect(passesAttentionGate("sure thanks")).toBe(false);
+    expect(passesAttentionGate("cool noted")).toBe(false);
+    expect(passesAttentionGate("alright fine")).toBe(false);
+  });
+
+  it("should reject pure emoji messages", () => {
+    expect(passesAttentionGate("🎉🎉🎉🎉🎉")).toBe(false);
+  });
+
+  it("should reject messages exceeding MAX_CAPTURE_CHARS", () => {
+    expect(passesAttentionGate("a ".repeat(1500))).toBe(false);
+  });
+
+  it("should reject messages with injected memory context tags", () => {
+    expect(
+      passesAttentionGate(
+        "<relevant-memories>some context here for the agent</relevant-memories> and more text after that",
+      ),
+    ).toBe(false);
+    expect(
+      passesAttentionGate(
+        "<core-memory-refresh>refreshed data here for the agent</core-memory-refresh> and more text",
+      ),
+    ).toBe(false);
+  });
+
+  it("should reject XML/system markup", () => {
+    expect(passesAttentionGate("<system>You are a helpful assistant with context</system>")).toBe(
+      false,
+    );
+  });
+
+  // --- Should ACCEPT ---
+
+  it("should accept substantive messages with enough words", () => {
+    expect(passesAttentionGate("I noticed the LinkedIn posts are not auto-liking")).toBe(true);
+    expect(passesAttentionGate("Please update the deployment script for the new server")).toBe(
+      true,
+    );
+    expect(passesAttentionGate("The database migration failed on the staging environment")).toBe(
+      true,
+    );
+  });
+
+  it("should accept messages with specific information/preferences", () => {
+    expect(passesAttentionGate("I prefer using TypeScript over JavaScript")).toBe(true);
+    expect(passesAttentionGate("My meeting with John is on Thursday")).toBe(true);
+    expect(passesAttentionGate("The project deadline was moved to March")).toBe(true);
+  });
+
+  it("should accept actionable requests with context", () => {
+    expect(passesAttentionGate("Let's limit the wa-group-monitoring to business hours")).toBe(true);
+    expect(passesAttentionGate("Can you check the error logs on the production server")).toBe(true);
+  });
+});

 // ============================================================================
 // extractUserMessages()
--- a/extensions/memory-neo4j/index.ts
+++ b/extensions/memory-neo4j/index.ts
@@ -184,6 +184,19 @@ const memoryNeo4jPlugin = {
              category?: MemoryCategory;
            };

+            // Attention gate — reject noise even when the agent explicitly stores
+            if (!passesAttentionGate(text)) {
+              return {
+                content: [
+                  {
+                    type: "text",
+                    text: `Skipped: "${text.slice(0, 60)}" — too short or low-substance for long-term memory.`,
+                  },
+                ],
+                details: { action: "rejected", reason: "attention_gate" },
+              };
+            }
+
            // 1. Generate embedding
            const vector = await embeddings.embed(text);

@@ -706,6 +719,64 @@ const memoryNeo4jPlugin = {
              process.exitCode = 1;
            }
          });
+
+        memory
+          .command("cleanup")
+          .description(
+            "Retroactively apply the attention gate — find and remove low-substance memories",
+          )
+          .option("--execute", "Actually delete (default: dry-run preview)")
+          .option("--agent <id>", "Only clean up memories for a specific agent")
+          .action(async (opts: { execute?: boolean; agent?: string }) => {
+            try {
+              await db.ensureInitialized();
+
+              // Fetch all memories (id + text)
+              const agentFilter = opts.agent ? "WHERE m.agentId = $agentId" : "";
+              const allMemories = await db.runQuery<{ id: string; text: string; source: string }>(
+                `MATCH (m:Memory) ${agentFilter}
+                 RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
+                 ORDER BY m.createdAt ASC`,
+                opts.agent ? { agentId: opts.agent } : {},
+              );
+
+              // Run each through the attention gate
+              const noise: Array<{ id: string; text: string; source: string }> = [];
+              for (const mem of allMemories) {
+                if (!passesAttentionGate(mem.text)) {
+                  noise.push(mem);
+                }
+              }
+
+              if (noise.length === 0) {
+                console.log("\nNo low-substance memories found. Everything passes the gate.");
+                return;
+              }
+
+              console.log(
+                `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
+              );
+
+              for (const mem of noise) {
+                const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
+                console.log(`  [${mem.source}] "${preview}"`);
+              }
+
+              if (!opts.execute) {
+                console.log(
+                  `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
+                );
+                return;
+              }
+
+              // Delete in batch
+              const deleted = await db.pruneMemories(noise.map((m) => m.id));
+              console.log(`\nDeleted ${deleted} low-substance memories.\n`);
+            } catch (err) {
+              console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
+              process.exitCode = 1;
+            }
+          });
      },
      { commands: [] }, // Adds subcommands to existing "memory" command, no conflict
    );
@@ -1103,8 +1174,15 @@ const memoryNeo4jPlugin = {
 // ============================================================================

 const NOISE_PATTERNS = [
-  // Greetings / acknowledgments
+  // Greetings / acknowledgments (exact match, with optional punctuation)
  /^(hi|hey|hello|yo|sup|ok|okay|sure|thanks|thank you|thx|ty|yep|yup|nope|no|yes|yeah|cool|nice|great|got it|sounds good|perfect|alright|fine|noted|ack|kk|k)\s*[.!?]*$/i,
+  // Two-word affirmations: "ok great", "sounds good", "yes please", etc.
+  /^(ok|okay|yes|yeah|yep|sure|no|nope|alright|right|fine|cool|nice|great)\s+(great|good|sure|thanks|please|ok|fine|cool|yeah|perfect|noted|absolutely|definitely|exactly)\s*[.!?]*$/i,
+  // Deictic: messages that are only pronouns/articles/common verbs — no standalone meaning
+  // e.g. "I need those", "let me do it", "ok let me test it out", "I got it"
+  /^(ok[,.]?\s+)?(i('ll|'m|'d|'ve)?\s+)?(just\s+)?(need|want|got|have|let|let's|let me|give me|send|do|did|try|check|see|look at|test|take|get|go|use)\s+(it|that|this|those|these|them|some|one|the|a|an|me|him|her|us)\s*(out|up|now|then|too|again|later|first|here|there|please)?\s*[.!?]*$/i,
+  // Short acknowledgments with trailing context: "ok, ..." / "yes, ..." when total is brief
+  /^(ok|okay|yes|yeah|yep|sure|no|nope|right|alright|fine|cool|nice|great|perfect)[,.]?\s+.{0,20}$/i,
  // Single-word or near-empty
  /^\S{0,3}$/,
  // Pure emoji
@@ -1119,6 +1197,9 @@ const MAX_CAPTURE_CHARS = 2000;
 /** Minimum message length — too short to be meaningful. */
 const MIN_CAPTURE_CHARS = 10;

+/** Minimum word count — short contextual phrases lack standalone meaning. */
+const MIN_WORD_COUNT = 5;
+
 function passesAttentionGate(text: string): boolean {
  const trimmed = text.trim();

@@ -1127,6 +1208,12 @@ function passesAttentionGate(text: string): boolean {
    return false;
  }

+  // Word count — short phrases ("I need those") lack context for recall
+  const wordCount = trimmed.split(/\s+/).length;
+  if (wordCount < MIN_WORD_COUNT) {
+    return false;
+  }
+
  // Injected context from the memory system itself
  if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
    return false;
@@ -1147,6 +1234,9 @@ function passesAttentionGate(text: string): boolean {
  return true;
 }

+// Exported for testing
+export { passesAttentionGate };
+
 // ============================================================================
 // Export
 // ============================================================================
--- a/extensions/memory-neo4j/neo4j-client.ts
+++ b/extensions/memory-neo4j/neo4j-client.ts
@@ -179,6 +179,30 @@ export class Neo4jMemoryClient {
    }
  }

+  /**
+   * Run a raw Cypher query and return records as plain objects.
+   * Keys in the RETURN clause become object properties.
+   */
+  async runQuery<T extends Record<string, unknown>>(
+    cypher: string,
+    params: Record<string, unknown> = {},
+  ): Promise<T[]> {
+    await this.ensureInitialized();
+    const session = this.driver!.session();
+    try {
+      const result = await session.run(cypher, params);
+      return result.records.map((r) => {
+        const obj: Record<string, unknown> = {};
+        for (const key of r.keys) {
+          obj[key as string] = r.get(key as string);
+        }
+        return obj as T;
+      });
+    } finally {
+      await session.close();
+    }
+  }
+
  async verifyConnection(): Promise<boolean> {
    if (!this.driver) {
      return false;