memory-neo4j: tighten attention gate, add gate to memory_store, add cleanup command

This commit is contained in:
Tarun Sukhani
2026-02-07 01:25:19 +08:00
parent ae1d35aab3
commit 91efe2e432
3 changed files with 213 additions and 3 deletions

View File

@@ -1,14 +1,110 @@
/**
* Tests for extractor.ts — Extraction Logic.
* Tests for extractor.ts and attention gate — Extraction Logic + Auto-capture Filtering.
*
* Tests exported functions: extractEntities(), extractUserMessages(), runBackgroundExtraction().
* Tests passesAttentionGate() from index.ts.
* Note: validateExtractionResult() is not exported; it is tested indirectly through extractEntities().
* Note: passesAttentionGate() is defined in index.ts and not exported; cannot be tested directly.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import type { ExtractionConfig } from "./config.js";
import { extractUserMessages, extractEntities, runBackgroundExtraction } from "./extractor.js";
import { passesAttentionGate } from "./index.js";
// ============================================================================
// passesAttentionGate()
// ============================================================================
describe("passesAttentionGate", () => {
// --- Should REJECT ---
it("should reject short messages below MIN_CAPTURE_CHARS", () => {
expect(passesAttentionGate("Hi")).toBe(false);
expect(passesAttentionGate("Yup")).toBe(false);
expect(passesAttentionGate("yes")).toBe(false);
expect(passesAttentionGate("ok")).toBe(false);
expect(passesAttentionGate("")).toBe(false);
});
it("should reject noise greetings/acknowledgments", () => {
expect(passesAttentionGate("sounds good")).toBe(false);
expect(passesAttentionGate("Got it")).toBe(false);
expect(passesAttentionGate("thanks!")).toBe(false);
expect(passesAttentionGate("thank you!")).toBe(false);
expect(passesAttentionGate("perfect.")).toBe(false);
});
it("should reject messages with fewer than MIN_WORD_COUNT words", () => {
expect(passesAttentionGate("I need those")).toBe(false); // 3 words
expect(passesAttentionGate("yes please do")).toBe(false); // 3 words
expect(passesAttentionGate("that works fine")).toBe(false); // 3 words
});
it("should reject short contextual/deictic phrases", () => {
expect(passesAttentionGate("Ok, let me test it out")).toBe(false);
expect(passesAttentionGate("ok great")).toBe(false);
expect(passesAttentionGate("yes please")).toBe(false);
expect(passesAttentionGate("ok sure thanks")).toBe(false);
});
it("should reject two-word affirmations", () => {
expect(passesAttentionGate("ok great")).toBe(false);
expect(passesAttentionGate("yes please")).toBe(false);
expect(passesAttentionGate("sure thanks")).toBe(false);
expect(passesAttentionGate("cool noted")).toBe(false);
expect(passesAttentionGate("alright fine")).toBe(false);
});
it("should reject pure emoji messages", () => {
expect(passesAttentionGate("🎉🎉🎉🎉🎉")).toBe(false);
});
it("should reject messages exceeding MAX_CAPTURE_CHARS", () => {
expect(passesAttentionGate("a ".repeat(1500))).toBe(false);
});
it("should reject messages with injected memory context tags", () => {
expect(
passesAttentionGate(
"<relevant-memories>some context here for the agent</relevant-memories> and more text after that",
),
).toBe(false);
expect(
passesAttentionGate(
"<core-memory-refresh>refreshed data here for the agent</core-memory-refresh> and more text",
),
).toBe(false);
});
it("should reject XML/system markup", () => {
expect(passesAttentionGate("<system>You are a helpful assistant with context</system>")).toBe(
false,
);
});
// --- Should ACCEPT ---
it("should accept substantive messages with enough words", () => {
expect(passesAttentionGate("I noticed the LinkedIn posts are not auto-liking")).toBe(true);
expect(passesAttentionGate("Please update the deployment script for the new server")).toBe(
true,
);
expect(passesAttentionGate("The database migration failed on the staging environment")).toBe(
true,
);
});
it("should accept messages with specific information/preferences", () => {
expect(passesAttentionGate("I prefer using TypeScript over JavaScript")).toBe(true);
expect(passesAttentionGate("My meeting with John is on Thursday")).toBe(true);
expect(passesAttentionGate("The project deadline was moved to March")).toBe(true);
});
it("should accept actionable requests with context", () => {
expect(passesAttentionGate("Let's limit the wa-group-monitoring to business hours")).toBe(true);
expect(passesAttentionGate("Can you check the error logs on the production server")).toBe(true);
});
});
// ============================================================================
// extractUserMessages()

View File

@@ -184,6 +184,19 @@ const memoryNeo4jPlugin = {
category?: MemoryCategory;
};
// Attention gate — reject noise even when the agent explicitly stores
if (!passesAttentionGate(text)) {
return {
content: [
{
type: "text",
text: `Skipped: "${text.slice(0, 60)}" — too short or low-substance for long-term memory.`,
},
],
details: { action: "rejected", reason: "attention_gate" },
};
}
// 1. Generate embedding
const vector = await embeddings.embed(text);
@@ -706,6 +719,64 @@ const memoryNeo4jPlugin = {
process.exitCode = 1;
}
});
memory
.command("cleanup")
.description(
"Retroactively apply the attention gate — find and remove low-substance memories",
)
.option("--execute", "Actually delete (default: dry-run preview)")
.option("--agent <id>", "Only clean up memories for a specific agent")
.action(async (opts: { execute?: boolean; agent?: string }) => {
try {
await db.ensureInitialized();
// Fetch all memories (id + text)
const agentFilter = opts.agent ? "WHERE m.agentId = $agentId" : "";
const allMemories = await db.runQuery<{ id: string; text: string; source: string }>(
`MATCH (m:Memory) ${agentFilter}
RETURN m.id AS id, m.text AS text, COALESCE(m.source, 'unknown') AS source
ORDER BY m.createdAt ASC`,
opts.agent ? { agentId: opts.agent } : {},
);
// Run each through the attention gate
const noise: Array<{ id: string; text: string; source: string }> = [];
for (const mem of allMemories) {
if (!passesAttentionGate(mem.text)) {
noise.push(mem);
}
}
if (noise.length === 0) {
console.log("\nNo low-substance memories found. Everything passes the gate.");
return;
}
console.log(
`\nFound ${noise.length}/${allMemories.length} memories that fail the attention gate:\n`,
);
for (const mem of noise) {
const preview = mem.text.length > 80 ? `${mem.text.slice(0, 77)}...` : mem.text;
console.log(` [${mem.source}] "${preview}"`);
}
if (!opts.execute) {
console.log(
`\nDry run — ${noise.length} memories would be removed. Re-run with --execute to delete.\n`,
);
return;
}
// Delete in batch
const deleted = await db.pruneMemories(noise.map((m) => m.id));
console.log(`\nDeleted ${deleted} low-substance memories.\n`);
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
},
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
);
@@ -1103,8 +1174,15 @@ const memoryNeo4jPlugin = {
// ============================================================================
const NOISE_PATTERNS = [
// Greetings / acknowledgments
// Greetings / acknowledgments (exact match, with optional punctuation)
/^(hi|hey|hello|yo|sup|ok|okay|sure|thanks|thank you|thx|ty|yep|yup|nope|no|yes|yeah|cool|nice|great|got it|sounds good|perfect|alright|fine|noted|ack|kk|k)\s*[.!?]*$/i,
// Two-word affirmations: "ok great", "sounds good", "yes please", etc.
/^(ok|okay|yes|yeah|yep|sure|no|nope|alright|right|fine|cool|nice|great)\s+(great|good|sure|thanks|please|ok|fine|cool|yeah|perfect|noted|absolutely|definitely|exactly)\s*[.!?]*$/i,
// Deictic: messages that are only pronouns/articles/common verbs — no standalone meaning
// e.g. "I need those", "let me do it", "ok let me test it out", "I got it"
/^(ok[,.]?\s+)?(i('ll|'m|'d|'ve)?\s+)?(just\s+)?(need|want|got|have|let|let's|let me|give me|send|do|did|try|check|see|look at|test|take|get|go|use)\s+(it|that|this|those|these|them|some|one|the|a|an|me|him|her|us)\s*(out|up|now|then|too|again|later|first|here|there|please)?\s*[.!?]*$/i,
// Short acknowledgments with trailing context: "ok, ..." / "yes, ..." when total is brief
/^(ok|okay|yes|yeah|yep|sure|no|nope|right|alright|fine|cool|nice|great|perfect)[,.]?\s+.{0,20}$/i,
// Single-word or near-empty
/^\S{0,3}$/,
// Pure emoji
@@ -1119,6 +1197,9 @@ const MAX_CAPTURE_CHARS = 2000;
/** Minimum message length — too short to be meaningful. */
const MIN_CAPTURE_CHARS = 10;
/** Minimum word count — short contextual phrases lack standalone meaning. */
const MIN_WORD_COUNT = 5;
function passesAttentionGate(text: string): boolean {
const trimmed = text.trim();
@@ -1127,6 +1208,12 @@ function passesAttentionGate(text: string): boolean {
return false;
}
// Word count — short phrases ("I need those") lack context for recall
const wordCount = trimmed.split(/\s+/).length;
if (wordCount < MIN_WORD_COUNT) {
return false;
}
// Injected context from the memory system itself
if (trimmed.includes("<relevant-memories>") || trimmed.includes("<core-memory-refresh>")) {
return false;
@@ -1147,6 +1234,9 @@ function passesAttentionGate(text: string): boolean {
return true;
}
// Exported for testing
export { passesAttentionGate };
// ============================================================================
// Export
// ============================================================================

View File

@@ -179,6 +179,30 @@ export class Neo4jMemoryClient {
}
}
/**
* Run a raw Cypher query and return records as plain objects.
* Keys in the RETURN clause become object properties.
*/
async runQuery<T extends Record<string, unknown>>(
cypher: string,
params: Record<string, unknown> = {},
): Promise<T[]> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const result = await session.run(cypher, params);
return result.records.map((r) => {
const obj: Record<string, unknown> = {};
for (const key of r.keys) {
obj[key as string] = r.get(key as string);
}
return obj as T;
});
} finally {
await session.close();
}
}
async verifyConnection(): Promise<boolean> {
if (!this.driver) {
return false;