memory-neo4j: strip channel metadata wrappers, reject system infra messages in attention gate

This commit is contained in:
Tarun Sukhani
2026-02-07 07:16:42 +08:00
parent 7674fa8c15
commit 6747967b83
3 changed files with 121 additions and 9 deletions

View File

@@ -82,6 +82,46 @@ describe("passesAttentionGate", () => {
);
});
it("should reject system infrastructure messages", () => {
// Heartbeat prompts
expect(
passesAttentionGate(
"Read HEARTBEAT.md if it exists (workspace context). Follow it strictly.",
),
).toBe(false);
// Pre-compaction flush
expect(passesAttentionGate("Pre-compaction memory flush. Store durable memories now.")).toBe(
false,
);
// System cron/exec messages
expect(
passesAttentionGate(
"System: [2026-02-06 10:25:00 UTC] Reminder: Check if wa-group-monitor updated",
),
).toBe(false);
// Cron job wrappers
expect(
passesAttentionGate(
"[cron:720b01aa-03d1-4888-a2d4-0f0a9e0d7b6c Memory Sleep Cycle] Run the sleep cycle",
),
).toBe(false);
// Gateway restart payloads
expect(passesAttentionGate('GatewayRestart:\n{ "kind": "restart", "status": "ok" }')).toBe(
false,
);
// Background task completion
expect(
passesAttentionGate(
"[Sat 2026-02-07 01:02 GMT+8] A background task just completed successfully.",
),
).toBe(false);
});
// --- Should ACCEPT ---
it("should accept substantive messages with enough words", () => {
@@ -248,6 +288,55 @@ describe("extractUserMessages", () => {
const result = extractUserMessages(messages as unknown[]);
expect(result).toEqual([]);
});
it("should strip Telegram channel metadata and extract raw user text", () => {
const messages = [
{
role: "user",
content:
"[Telegram Tarun (@ts1974_001) id:878224171 +1m 2026-02-06 23:18 GMT+8] I restarted the gateway but it still shows UTC time\n[message_id: 6363]",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["I restarted the gateway but it still shows UTC time"]);
});
it("should strip Telegram wrapper and filter if remaining text is too short", () => {
const messages = [
{
role: "user",
content:
"[Telegram Tarun (@ts1974_001) id:878224171 +1m 2026-02-06 13:32 UTC] Hi\n[message_id: 6302]",
},
];
const result = extractUserMessages(messages);
// "Hi" is < 10 chars after stripping — should be filtered out
expect(result).toEqual([]);
});
it("should strip media attachment preamble and keep user text", () => {
const messages = [
{
role: "user",
content:
"[media attached: /path/to/file.jpg (image/jpeg) | /path/to/file.jpg]\nTo send an image back, prefer the message tool.\n[Telegram Tarun (@ts1974_001) id:878224171 +5m 2026-02-06 14:01 UTC] My claim for the business expense\n[message_id: 6334]",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["My claim for the business expense"]);
});
it("should strip System exec output prefixes", () => {
const messages = [
{
role: "user",
content:
"System: [2026-01-31 05:44:57 UTC] Exec completed (gentle-s, code 0)\n\n[Telegram User id:123 +1m 2026-01-31 05:46 UTC] I want 4k imax copy of Interstellar\n[message_id: 2098]",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["I want 4k imax copy of Interstellar"]);
});
});
// ============================================================================

View File

@@ -872,15 +872,24 @@ export function extractUserMessages(messages: unknown[]): string[] {
}
}
// Strip injected context blocks (auto-recall prepends these into user messages)
// then filter out noise
// Strip injected context, channel metadata wrappers, and system prefixes
// so the attention gate sees only the raw user text.
return texts
.map((t) =>
t
.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "")
.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "")
.replace(/<system>[\s\S]*?<\/system>\s*/g, "")
.trim(),
)
.map((t) => {
let s = t;
// Injected context from memory system
s = s.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "");
s = s.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "");
s = s.replace(/<system>[\s\S]*?<\/system>\s*/g, "");
// Media attachment preamble (appears before Telegram wrapper)
s = s.replace(/^\[media attached:[^\]]*\]\s*(?:To send an image[^\n]*\n?)*/i, "");
// System exec output blocks (may appear before Telegram wrapper)
s = s.replace(/^(?:System:\s*\[[^\]]*\][^\n]*\n?)+/gi, "");
// Telegram wrapper — may now be at start after previous strips
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
// "[message_id: NNN]" suffix
s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
return s.trim();
})
.filter((t) => t.length >= 10);
}

View File

@@ -1184,6 +1184,20 @@ const NOISE_PATTERNS = [
/^[\p{Emoji}\s]+$/u,
// System/XML markup
/^<[a-z-]+>[\s\S]*<\/[a-z-]+>$/i,
// --- System infrastructure messages (never user-generated) ---
// Heartbeat prompts
/Read HEARTBEAT\.md if it exists/i,
// Pre-compaction flush prompts
/^Pre-compaction memory flush/i,
// System timestamp messages (cron outputs, reminders, exec reports)
/^System:\s*\[/i,
// Cron job wrappers
/^\[cron:[0-9a-f-]+/i,
// Gateway restart JSON payloads
/^GatewayRestart:\s*\{/i,
// Background task completion reports
/^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s.*\]\s*A background task/i,
];
/** Maximum message length — code dumps, logs, etc. are not memories. */