From 91efe2e43269bb126b5fe46d9e6b075bc49f1871 Mon Sep 17 00:00:00 2001 From: Tarun Sukhani Date: Sat, 7 Feb 2026 01:25:19 +0800 Subject: [PATCH] memory-neo4j: tighten attention gate, add gate to memory_store, add cleanup command --- extensions/memory-neo4j/extractor.test.ts | 100 +++++++++++++++++++++- extensions/memory-neo4j/index.ts | 92 +++++++++++++++++++- extensions/memory-neo4j/neo4j-client.ts | 24 ++++++ 3 files changed, 213 insertions(+), 3 deletions(-) diff --git a/extensions/memory-neo4j/extractor.test.ts b/extensions/memory-neo4j/extractor.test.ts index a87140cd79..801eec6856 100644 --- a/extensions/memory-neo4j/extractor.test.ts +++ b/extensions/memory-neo4j/extractor.test.ts @@ -1,14 +1,110 @@ /** - * Tests for extractor.ts — Extraction Logic. + * Tests for extractor.ts and attention gate — Extraction Logic + Auto-capture Filtering. * * Tests exported functions: extractEntities(), extractUserMessages(), runBackgroundExtraction(). + * Tests passesAttentionGate() from index.ts. * Note: validateExtractionResult() is not exported; it is tested indirectly through extractEntities(). - * Note: passesAttentionGate() is defined in index.ts and not exported; cannot be tested directly. */ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; import type { ExtractionConfig } from "./config.js"; import { extractUserMessages, extractEntities, runBackgroundExtraction } from "./extractor.js"; +import { passesAttentionGate } from "./index.js"; + +// ============================================================================ +// passesAttentionGate() +// ============================================================================ + +describe("passesAttentionGate", () => { + // --- Should REJECT --- + + it("should reject short messages below MIN_CAPTURE_CHARS", () => { + expect(passesAttentionGate("Hi")).toBe(false); + expect(passesAttentionGate("Yup")).toBe(false); + expect(passesAttentionGate("yes")).toBe(false); + expect(passesAttentionGate("ok")).toBe(false); + expect(passesAttentionGate("")).toBe(false); + }); + + it("should reject noise greetings/acknowledgments", () => { + expect(passesAttentionGate("sounds good")).toBe(false); + expect(passesAttentionGate("Got it")).toBe(false); + expect(passesAttentionGate("thanks!")).toBe(false); + expect(passesAttentionGate("thank you!")).toBe(false); + expect(passesAttentionGate("perfect.")).toBe(false); + }); + + it("should reject messages with fewer than MIN_WORD_COUNT words", () => { + expect(passesAttentionGate("I need those")).toBe(false); // 3 words + expect(passesAttentionGate("yes please do")).toBe(false); // 3 words + expect(passesAttentionGate("that works fine")).toBe(false); // 3 words + }); + + it("should reject short contextual/deictic phrases", () => { + expect(passesAttentionGate("Ok, let me test it out")).toBe(false); + expect(passesAttentionGate("ok great")).toBe(false); + expect(passesAttentionGate("yes please")).toBe(false); + expect(passesAttentionGate("ok sure thanks")).toBe(false); + }); + + it("should reject two-word affirmations", () => { + expect(passesAttentionGate("ok great")).toBe(false); + expect(passesAttentionGate("yes please")).toBe(false); + expect(passesAttentionGate("sure thanks")).toBe(false); + expect(passesAttentionGate("cool noted")).toBe(false); + expect(passesAttentionGate("alright fine")).toBe(false); + }); + + it("should reject pure emoji messages", () => { + expect(passesAttentionGate("🎉🎉🎉🎉🎉")).toBe(false); + }); + + it("should reject messages exceeding MAX_CAPTURE_CHARS", () => { + expect(passesAttentionGate("a ".repeat(1500))).toBe(false); + }); + + it("should reject messages with injected memory context tags", () => { + expect( + passesAttentionGate( + "some context here for the agent and more text after that", + ), + ).toBe(false); + expect( + passesAttentionGate( + "refreshed data here for the agent and more text", + ), + ).toBe(false); + }); + + it("should reject XML/system markup", () => { + expect(passesAttentionGate("You are a helpful assistant with context")).toBe( + false, + ); + }); + + // --- Should ACCEPT --- + + it("should accept substantive messages with enough words", () => { + expect(passesAttentionGate("I noticed the LinkedIn posts are not auto-liking")).toBe(true); + expect(passesAttentionGate("Please update the deployment script for the new server")).toBe( + true, + ); + expect(passesAttentionGate("The database migration failed on the staging environment")).toBe( + true, + ); + }); + + it("should accept messages with specific information/preferences", () => { + expect(passesAttentionGate("I prefer using TypeScript over JavaScript")).toBe(true); + expect(passesAttentionGate("My meeting with John is on Thursday")).toBe(true); + expect(passesAttentionGate("The project deadline was moved to March")).toBe(true); + }); + + it("should accept actionable requests with context", () => { + expect(passesAttentionGate("Let's limit the wa-group-monitoring to business hours")).toBe(true); + expect(passesAttentionGate("Can you check the error logs on the production server")).toBe(true); + }); +}); // ============================================================================ // extractUserMessages() diff --git a/extensions/memory-neo4j/index.ts b/extensions/memory-neo4j/index.ts index adcd267ebc..66b7b96c66 100644 --- a/extensions/memory-neo4j/index.ts +++ b/extensions/memory-neo4j/index.ts @@ -184,6 +184,19 @@ const memoryNeo4jPlugin = { category?: MemoryCategory; }; + // Attention gate — reject noise even when the agent explicitly stores + if (!passesAttentionGate(text)) { + return { + content: [ + { + type: "text", + text: `Skipped: "${text.slice(0, 60)}" — too short or low-substance for long-term memory.`, + }, + ], + details: { action: "rejected", reason: "attention_gate" }, + }; + } + // 1. Generate embedding const vector = await embeddings.embed(text); @@ -706,6 +719,64 @@ const memoryNeo4jPlugin = { process.exitCode = 1; } }); + + memory + .command("cleanup") + .description( + "Retroactively apply the attention gate — find and remove low-substance memories", + ) + .option("--execute", "Actually delete (default: dry-run preview)") + .option("--agent ", "Only clean up memories for a specific agent") + .action(async (opts: { execute?: boolean; agent?: string }) => { + try { + await db.ensureInitialized(); + + // Fetch all memories (id + text) + const agentFilter = opts.agent ? "WHERE m.agentId = $agentId" : ""; + const allMemories = await db.runQuery<{ id: string; text: string; source: string }>( + `MATCH (m:Memory) ${agentFilter} + RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source + ORDER BY m.createdAt ASC`, + opts.agent ? { agentId: opts.agent } : {}, + ); + + // Run each through the attention gate + const noise: Array<{ id: string; text: string; source: string }> = []; + for (const mem of allMemories) { + if (!passesAttentionGate(mem.text)) { + noise.push(mem); + } + } + + if (noise.length === 0) { + console.log("\nNo low-substance memories found. Everything passes the gate."); + return; + } + + console.log( + `\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`, + ); + + for (const mem of noise) { + const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text; + console.log(` [${mem.source}] "${preview}"`); + } + + if (!opts.execute) { + console.log( + `\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`, + ); + return; + } + + // Delete in batch + const deleted = await db.pruneMemories(noise.map((m) => m.id)); + console.log(`\nDeleted ${deleted} low-substance memories.\n`); + } catch (err) { + console.error(`Error: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + }); }, { commands: [] }, // Adds subcommands to existing "memory" command, no conflict ); @@ -1103,8 +1174,15 @@ const memoryNeo4jPlugin = { // ============================================================================ const NOISE_PATTERNS = [ - // Greetings / acknowledgments + // Greetings / acknowledgments (exact match, with optional punctuation) /^(hi|hey|hello|yo|sup|ok|okay|sure|thanks|thank you|thx|ty|yep|yup|nope|no|yes|yeah|cool|nice|great|got it|sounds good|perfect|alright|fine|noted|ack|kk|k)\s*[.!?]*$/i, + // Two-word affirmations: "ok great", "sounds good", "yes please", etc. + /^(ok|okay|yes|yeah|yep|sure|no|nope|alright|right|fine|cool|nice|great)\s+(great|good|sure|thanks|please|ok|fine|cool|yeah|perfect|noted|absolutely|definitely|exactly)\s*[.!?]*$/i, + // Deictic: messages that are only pronouns/articles/common verbs — no standalone meaning + // e.g. "I need those", "let me do it", "ok let me test it out", "I got it" + /^(ok[,.]?\s+)?(i('ll|'m|'d|'ve)?\s+)?(just\s+)?(need|want|got|have|let|let's|let me|give me|send|do|did|try|check|see|look at|test|take|get|go|use)\s+(it|that|this|those|these|them|some|one|the|a|an|me|him|her|us)\s*(out|up|now|then|too|again|later|first|here|there|please)?\s*[.!?]*$/i, + // Short acknowledgments with trailing context: "ok, ..." / "yes, ..." when total is brief + /^(ok|okay|yes|yeah|yep|sure|no|nope|right|alright|fine|cool|nice|great|perfect)[,.]?\s+.{0,20}$/i, // Single-word or near-empty /^\S{0,3}$/, // Pure emoji @@ -1119,6 +1197,9 @@ const MAX_CAPTURE_CHARS = 2000; /** Minimum message length — too short to be meaningful. */ const MIN_CAPTURE_CHARS = 10; +/** Minimum word count — short contextual phrases lack standalone meaning. */ +const MIN_WORD_COUNT = 5; + function passesAttentionGate(text: string): boolean { const trimmed = text.trim(); @@ -1127,6 +1208,12 @@ function passesAttentionGate(text: string): boolean { return false; } + // Word count — short phrases ("I need those") lack context for recall + const wordCount = trimmed.split(/\s+/).length; + if (wordCount < MIN_WORD_COUNT) { + return false; + } + // Injected context from the memory system itself if (trimmed.includes("") || trimmed.includes("")) { return false; @@ -1147,6 +1234,9 @@ function passesAttentionGate(text: string): boolean { return true; } +// Exported for testing +export { passesAttentionGate }; + // ============================================================================ // Export // ============================================================================ diff --git a/extensions/memory-neo4j/neo4j-client.ts b/extensions/memory-neo4j/neo4j-client.ts index a4c492917a..c64c7cea89 100644 --- a/extensions/memory-neo4j/neo4j-client.ts +++ b/extensions/memory-neo4j/neo4j-client.ts @@ -179,6 +179,30 @@ export class Neo4jMemoryClient { } } + /** + * Run a raw Cypher query and return records as plain objects. + * Keys in the RETURN clause become object properties. + */ + async runQuery>( + cypher: string, + params: Record = {}, + ): Promise { + await this.ensureInitialized(); + const session = this.driver!.session(); + try { + const result = await session.run(cypher, params); + return result.records.map((r) => { + const obj: Record = {}; + for (const key of r.keys) { + obj[key as string] = r.get(key as string); + } + return obj as T; + }); + } finally { + await session.close(); + } + } + async verifyConnection(): Promise { if (!this.driver) { return false;