memory-neo4j: strip injected context blocks, add core category, widen embeddings context

This commit is contained in:
Tarun Sukhani
2026-02-07 00:46:08 +08:00
parent c002574371
commit bcbeba400e
6 changed files with 69 additions and 18 deletions

View File

@@ -195,6 +195,7 @@ export const memoryNeo4jConfigSchema = {
if (typeof neo4jRaw.uri !== "string" || !neo4jRaw.uri) {
throw new Error("neo4j.uri is required");
}
const neo4jUri = neo4jRaw.uri as string;
// Validate URI scheme — must be a valid Neo4j connection protocol
const VALID_NEO4J_SCHEMES = [
"bolt://",
@@ -204,9 +205,9 @@ export const memoryNeo4jConfigSchema = {
"neo4j+s://",
"neo4j+ssc://",
];
if (!VALID_NEO4J_SCHEMES.some((scheme) => neo4jRaw.uri.startsWith(scheme))) {
if (!VALID_NEO4J_SCHEMES.some((scheme) => neo4jUri.startsWith(scheme))) {
throw new Error(
`neo4j.uri must start with a valid scheme (${VALID_NEO4J_SCHEMES.join(", ")}), got: "${neo4jRaw.uri}"`,
`neo4j.uri must start with a valid scheme (${VALID_NEO4J_SCHEMES.join(", ")}), got: "${neo4jUri}"`,
);
}

View File

@@ -44,11 +44,11 @@ export class Embeddings {
/**
* Truncate text to fit within the model's context length.
* Uses a conservative ~3 chars/token estimate to leave headroom.
* Uses a conservative ~4 chars/token estimate to leave headroom.
* Truncates at a word boundary when possible.
*/
private truncateToContext(text: string): string {
const maxChars = this.contextLength * 3;
const maxChars = this.contextLength * 4;
if (text.length <= maxChars) {
return text;
}

View File

@@ -70,9 +70,24 @@ describe("extractUserMessages", () => {
expect(result).toEqual(["1234567890", "This is longer than ten characters"]);
});
it("should filter out messages containing <relevant-memories>", () => {
it("should strip <relevant-memories> blocks and keep user content", () => {
const messages = [
{ role: "user", content: "Normal user message that is long enough here" },
{
role: "user",
content:
"<relevant-memories>Some injected context</relevant-memories>\n\nWhat does Tarun prefer for meetings?",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual([
"Normal user message that is long enough here",
"What does Tarun prefer for meetings?",
]);
});
it("should drop message if only injected context remains after stripping", () => {
const messages = [
{
role: "user",
content:
@@ -80,18 +95,32 @@ describe("extractUserMessages", () => {
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["Normal user message that is long enough here"]);
expect(result).toEqual([]);
});
it("should filter out messages containing <system>", () => {
it("should strip <system> blocks and keep user content", () => {
const messages = [
{ role: "user", content: "<system>System markup that should be filtered</system>" },
{ role: "user", content: "Normal user message that is long enough here" },
{
role: "user",
content: "<system>System markup</system>\n\nNormal user message that is long enough here",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["Normal user message that is long enough here"]);
});
it("should strip <core-memory-refresh> blocks and keep user content", () => {
const messages = [
{
role: "user",
content:
"<core-memory-refresh>refreshed memories</core-memory-refresh>\n\nTell me about the project status",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["Tell me about the project status"]);
});
it("should handle null and non-object messages gracefully", () => {
const messages = [
null,

View File

@@ -382,7 +382,7 @@ export async function runBackgroundExtraction(
}
// ============================================================================
// Sleep Cycle - Five Phase Memory Consolidation
// Sleep Cycle - Seven Phase Memory Consolidation
// ============================================================================
/**
@@ -872,8 +872,15 @@ export function extractUserMessages(messages: unknown[]): string[] {
}
}
// Filter out noise
return texts.filter(
(t) => t.length >= 10 && !t.includes("<relevant-memories>") && !t.includes("<system>"),
);
// Strip injected context blocks (auto-recall prepends these into user messages)
// then filter out noise
return texts
.map((t) =>
t
.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "")
.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "")
.replace(/<system>[\s\S]*?<\/system>\s*/g, "")
.trim(),
)
.filter((t) => t.length >= 10);
}

View File

@@ -16,7 +16,9 @@ import type {
SearchSignalResult,
StoreMemoryInput,
} from "./schema.js";
import { escapeLucene, validateRelationshipType } from "./schema.js";
import { ALLOWED_RELATIONSHIP_TYPES, escapeLucene, validateRelationshipType } from "./schema.js";
const RELATIONSHIP_TYPE_PATTERN = [...ALLOWED_RELATIONSHIP_TYPES].join("|");
// ============================================================================
// Types
@@ -526,7 +528,7 @@ export class Neo4jMemoryClient {
UNWIND $entityIds AS eid
// 1-hop: Entity → relationship → Entity ← MENTIONS ← Memory
OPTIONAL MATCH (e:Entity {id: eid})-[r1:RELATED_TO|KNOWS|WORKS_AT|LIVES_AT|MARRIED_TO|PREFERS|DECIDED]-(e2:Entity)
OPTIONAL MATCH (e:Entity {id: eid})-[r1:${RELATIONSHIP_TYPE_PATTERN}]-(e2:Entity)
WHERE coalesce(r1.confidence, 0.7) >= $firingThreshold
OPTIONAL MATCH (e2)<-[rm:MENTIONS]-(m:Memory)
WHERE m IS NOT NULL ${agentFilter}

View File

@@ -6,7 +6,7 @@
// Node Types
// ============================================================================
export type MemoryCategory = "preference" | "fact" | "decision" | "entity" | "other";
export type MemoryCategory = "core" | "preference" | "fact" | "decision" | "entity" | "other";
export type EntityType = "person" | "organization" | "location" | "event" | "concept";
export type ExtractionStatus = "pending" | "complete" | "failed" | "skipped";
export type MemorySource = "user" | "auto-capture" | "memory-watcher" | "import";
@@ -21,8 +21,13 @@ export type MemoryNode = {
createdAt: string;
updatedAt: string;
extractionStatus: ExtractionStatus;
extractionRetries: number;
agentId: string;
sessionKey?: string;
retrievalCount: number;
lastRetrievedAt?: string;
promotedAt?: string;
demotedAt?: string;
};
export type EntityNode = {
@@ -123,7 +128,14 @@ export type MergeEntityInput = {
// Constants
// ============================================================================
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
export const MEMORY_CATEGORIES = [
"core",
"preference",
"fact",
"decision",
"entity",
"other",
] as const;
export const ENTITY_TYPES = ["person", "organization", "location", "event", "concept"] as const;