mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 03:03:24 -04:00
memory-neo4j: strip channel metadata wrappers, reject system infra messages in attention gate
This commit is contained in:
@@ -82,6 +82,46 @@ describe("passesAttentionGate", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("should reject system infrastructure messages", () => {
|
||||
// Heartbeat prompts
|
||||
expect(
|
||||
passesAttentionGate(
|
||||
"Read HEARTBEAT.md if it exists (workspace context). Follow it strictly.",
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
// Pre-compaction flush
|
||||
expect(passesAttentionGate("Pre-compaction memory flush. Store durable memories now.")).toBe(
|
||||
false,
|
||||
);
|
||||
|
||||
// System cron/exec messages
|
||||
expect(
|
||||
passesAttentionGate(
|
||||
"System: [2026-02-06 10:25:00 UTC] Reminder: Check if wa-group-monitor updated",
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
// Cron job wrappers
|
||||
expect(
|
||||
passesAttentionGate(
|
||||
"[cron:720b01aa-03d1-4888-a2d4-0f0a9e0d7b6c Memory Sleep Cycle] Run the sleep cycle",
|
||||
),
|
||||
).toBe(false);
|
||||
|
||||
// Gateway restart payloads
|
||||
expect(passesAttentionGate('GatewayRestart:\n{ "kind": "restart", "status": "ok" }')).toBe(
|
||||
false,
|
||||
);
|
||||
|
||||
// Background task completion
|
||||
expect(
|
||||
passesAttentionGate(
|
||||
"[Sat 2026-02-07 01:02 GMT+8] A background task just completed successfully.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
// --- Should ACCEPT ---
|
||||
|
||||
it("should accept substantive messages with enough words", () => {
|
||||
@@ -248,6 +288,55 @@ describe("extractUserMessages", () => {
|
||||
const result = extractUserMessages(messages as unknown[]);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("should strip Telegram channel metadata and extract raw user text", () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"[Telegram Tarun (@ts1974_001) id:878224171 +1m 2026-02-06 23:18 GMT+8] I restarted the gateway but it still shows UTC time\n[message_id: 6363]",
|
||||
},
|
||||
];
|
||||
const result = extractUserMessages(messages);
|
||||
expect(result).toEqual(["I restarted the gateway but it still shows UTC time"]);
|
||||
});
|
||||
|
||||
it("should strip Telegram wrapper and filter if remaining text is too short", () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"[Telegram Tarun (@ts1974_001) id:878224171 +1m 2026-02-06 13:32 UTC] Hi\n[message_id: 6302]",
|
||||
},
|
||||
];
|
||||
const result = extractUserMessages(messages);
|
||||
// "Hi" is < 10 chars after stripping — should be filtered out
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it("should strip media attachment preamble and keep user text", () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"[media attached: /path/to/file.jpg (image/jpeg) | /path/to/file.jpg]\nTo send an image back, prefer the message tool.\n[Telegram Tarun (@ts1974_001) id:878224171 +5m 2026-02-06 14:01 UTC] My claim for the business expense\n[message_id: 6334]",
|
||||
},
|
||||
];
|
||||
const result = extractUserMessages(messages);
|
||||
expect(result).toEqual(["My claim for the business expense"]);
|
||||
});
|
||||
|
||||
it("should strip System exec output prefixes", () => {
|
||||
const messages = [
|
||||
{
|
||||
role: "user",
|
||||
content:
|
||||
"System: [2026-01-31 05:44:57 UTC] Exec completed (gentle-s, code 0)\n\n[Telegram User id:123 +1m 2026-01-31 05:46 UTC] I want 4k imax copy of Interstellar\n[message_id: 2098]",
|
||||
},
|
||||
];
|
||||
const result = extractUserMessages(messages);
|
||||
expect(result).toEqual(["I want 4k imax copy of Interstellar"]);
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
|
||||
@@ -872,15 +872,24 @@ export function extractUserMessages(messages: unknown[]): string[] {
|
||||
}
|
||||
}
|
||||
|
||||
// Strip injected context blocks (auto-recall prepends these into user messages)
|
||||
// then filter out noise
|
||||
// Strip injected context, channel metadata wrappers, and system prefixes
|
||||
// so the attention gate sees only the raw user text.
|
||||
return texts
|
||||
.map((t) =>
|
||||
t
|
||||
.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "")
|
||||
.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "")
|
||||
.replace(/<system>[\s\S]*?<\/system>\s*/g, "")
|
||||
.trim(),
|
||||
)
|
||||
.map((t) => {
|
||||
let s = t;
|
||||
// Injected context from memory system
|
||||
s = s.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "");
|
||||
s = s.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "");
|
||||
s = s.replace(/<system>[\s\S]*?<\/system>\s*/g, "");
|
||||
// Media attachment preamble (appears before Telegram wrapper)
|
||||
s = s.replace(/^\[media attached:[^\]]*\]\s*(?:To send an image[^\n]*\n?)*/i, "");
|
||||
// System exec output blocks (may appear before Telegram wrapper)
|
||||
s = s.replace(/^(?:System:\s*\[[^\]]*\][^\n]*\n?)+/gi, "");
|
||||
// Telegram wrapper — may now be at start after previous strips
|
||||
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
|
||||
// "[message_id: NNN]" suffix
|
||||
s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
|
||||
return s.trim();
|
||||
})
|
||||
.filter((t) => t.length >= 10);
|
||||
}
|
||||
|
||||
@@ -1184,6 +1184,20 @@ const NOISE_PATTERNS = [
|
||||
/^[\p{Emoji}\s]+$/u,
|
||||
// System/XML markup
|
||||
/^<[a-z-]+>[\s\S]*<\/[a-z-]+>$/i,
|
||||
|
||||
// --- System infrastructure messages (never user-generated) ---
|
||||
// Heartbeat prompts
|
||||
/Read HEARTBEAT\.md if it exists/i,
|
||||
// Pre-compaction flush prompts
|
||||
/^Pre-compaction memory flush/i,
|
||||
// System timestamp messages (cron outputs, reminders, exec reports)
|
||||
/^System:\s*\[/i,
|
||||
// Cron job wrappers
|
||||
/^\[cron:[0-9a-f-]+/i,
|
||||
// Gateway restart JSON payloads
|
||||
/^GatewayRestart:\s*\{/i,
|
||||
// Background task completion reports
|
||||
/^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s.*\]\s*A background task/i,
|
||||
];
|
||||
|
||||
/** Maximum message length — code dumps, logs, etc. are not memories. */
|
||||
|
||||
Reference in New Issue
Block a user