mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 03:03:24 -04:00
- Add composite index on (agentId, category) for faster filtered queries - Combine graph search into single UNION Cypher query (was 2 sequential) - Parallelize conflict resolution with LLM_CONCURRENCY chunks - Batch entity operations (merge, mentions, relationships, tags, category, extraction status) into a single managed transaction - Make auto-capture fire-and-forget with shared captureMessage helper - Extract attention-gate.ts and message-utils.ts modules from index.ts and extractor.ts for better separation of concerns - Update tests to match new batched/combined APIs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
148 lines
4.9 KiB
TypeScript
148 lines
4.9 KiB
TypeScript
/**
|
|
* Message extraction utilities for the memory pipeline.
|
|
*
|
|
* Extracts and cleans user/assistant messages from the raw event.messages
|
|
* array, stripping channel wrappers, injected context, tool output, and
|
|
* other noise so downstream consumers (attention gate, memory store) see
|
|
* only the substantive text.
|
|
*/
|
|
|
|
// ============================================================================
|
|
// User Message Extraction
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Extract user message texts from the event.messages array.
|
|
* Handles both string content and content block arrays.
|
|
*/
|
|
export function extractUserMessages(messages: unknown[]): string[] {
|
|
const texts: string[] = [];
|
|
|
|
for (const msg of messages) {
|
|
if (!msg || typeof msg !== "object") {
|
|
continue;
|
|
}
|
|
const msgObj = msg as Record<string, unknown>;
|
|
|
|
// Only process user messages for auto-capture
|
|
if (msgObj.role !== "user") {
|
|
continue;
|
|
}
|
|
|
|
const content = msgObj.content;
|
|
if (typeof content === "string") {
|
|
texts.push(content);
|
|
continue;
|
|
}
|
|
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
"type" in block &&
|
|
(block as Record<string, unknown>).type === "text" &&
|
|
"text" in block &&
|
|
typeof (block as Record<string, unknown>).text === "string"
|
|
) {
|
|
texts.push((block as Record<string, unknown>).text as string);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Strip wrappers then filter by length
|
|
return texts.map(stripMessageWrappers).filter((t) => t.length >= 10);
|
|
}
|
|
|
|
/**
|
|
* Strip injected context, channel metadata wrappers, and system prefixes
|
|
* so the attention gate sees only the raw user text.
|
|
* Exported for use by the cleanup command.
|
|
*/
|
|
export function stripMessageWrappers(text: string): string {
|
|
let s = text;
|
|
// Injected context from memory system
|
|
s = s.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "");
|
|
s = s.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "");
|
|
s = s.replace(/<system>[\s\S]*?<\/system>\s*/g, "");
|
|
// File attachments (PDFs, images, etc. forwarded inline by channels)
|
|
s = s.replace(/<file\b[^>]*>[\s\S]*?<\/file>\s*/g, "");
|
|
// Media attachment preamble (appears before Telegram wrapper)
|
|
s = s.replace(/^\[media attached:[^\]]*\]\s*(?:To send an image[^\n]*\n?)*/i, "");
|
|
// System exec output blocks (may appear before Telegram wrapper)
|
|
s = s.replace(/^(?:System:\s*\[[^\]]*\][^\n]*\n?)+/gi, "");
|
|
// Telegram wrapper — may now be at start after previous strips
|
|
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
|
|
// "[message_id: NNN]" suffix (Telegram)
|
|
s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
|
|
// Slack wrapper — "[Slack <workspace> #channel @user] MESSAGE [slack message id: ...]"
|
|
s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, "");
|
|
s = s.replace(/\n?\[slack message id:\s*[^\]]*\]\s*$/i, "");
|
|
return s.trim();
|
|
}
|
|
|
|
// ============================================================================
|
|
// Assistant Message Extraction
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Strip tool-use, thinking, and code-output blocks from assistant messages
|
|
* so the attention gate sees only the substantive assistant text.
|
|
*/
|
|
export function stripAssistantWrappers(text: string): string {
|
|
let s = text;
|
|
// Tool-use / tool-result / function_call blocks
|
|
s = s.replace(/<tool_use>[\s\S]*?<\/tool_use>\s*/g, "");
|
|
s = s.replace(/<tool_result>[\s\S]*?<\/tool_result>\s*/g, "");
|
|
s = s.replace(/<function_call>[\s\S]*?<\/function_call>\s*/g, "");
|
|
// Thinking tags
|
|
s = s.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, "");
|
|
s = s.replace(/<antThinking>[\s\S]*?<\/antThinking>\s*/g, "");
|
|
// Code execution output
|
|
s = s.replace(/<code_output>[\s\S]*?<\/code_output>\s*/g, "");
|
|
return s.trim();
|
|
}
|
|
|
|
/**
|
|
* Extract assistant message texts from the event.messages array.
|
|
* Handles both string content and content block arrays.
|
|
*/
|
|
export function extractAssistantMessages(messages: unknown[]): string[] {
|
|
const texts: string[] = [];
|
|
|
|
for (const msg of messages) {
|
|
if (!msg || typeof msg !== "object") {
|
|
continue;
|
|
}
|
|
const msgObj = msg as Record<string, unknown>;
|
|
|
|
if (msgObj.role !== "assistant") {
|
|
continue;
|
|
}
|
|
|
|
const content = msgObj.content;
|
|
if (typeof content === "string") {
|
|
texts.push(content);
|
|
continue;
|
|
}
|
|
|
|
if (Array.isArray(content)) {
|
|
for (const block of content) {
|
|
if (
|
|
block &&
|
|
typeof block === "object" &&
|
|
"type" in block &&
|
|
(block as Record<string, unknown>).type === "text" &&
|
|
"text" in block &&
|
|
typeof (block as Record<string, unknown>).text === "string"
|
|
) {
|
|
texts.push((block as Record<string, unknown>).text as string);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return texts.map(stripAssistantWrappers).filter((t) => t.length >= 10);
|
|
}
|