mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-03 03:03:24 -04:00
Sync adabot changes on top of origin/main
Includes: - memory-neo4j: four-phase sleep cycle (dedup, decay, extraction, cleanup) - memory-neo4j: full plugin implementation with hybrid search - memory-lancedb: updates and benchmarks - OpenSpec workflow skills and commands - Session memory hooks - Various CLI and config improvements Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
85
extensions/memory-lancedb/benchmark.mjs
Normal file
85
extensions/memory-lancedb/benchmark.mjs
Normal file
@@ -0,0 +1,85 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* LanceDB performance benchmark
|
||||
*/
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import OpenAI from "openai";
|
||||
|
||||
const LANCEDB_PATH = "/home/tsukhani/.openclaw/memory/lancedb";
|
||||
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
|
||||
|
||||
const openai = new OpenAI({ apiKey: OPENAI_API_KEY });
|
||||
|
||||
async function embed(text) {
|
||||
const start = Date.now();
|
||||
const response = await openai.embeddings.create({
|
||||
model: "text-embedding-3-small",
|
||||
input: text,
|
||||
});
|
||||
const embedTime = Date.now() - start;
|
||||
return { vector: response.data[0].embedding, embedTime };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
console.log("📊 LanceDB Performance Benchmark");
|
||||
console.log("================================\n");
|
||||
|
||||
// Connect
|
||||
const connectStart = Date.now();
|
||||
const db = await lancedb.connect(LANCEDB_PATH);
|
||||
const table = await db.openTable("memories");
|
||||
const connectTime = Date.now() - connectStart;
|
||||
console.log(`Connection time: ${connectTime}ms`);
|
||||
|
||||
const count = await table.countRows();
|
||||
console.log(`Total memories: ${count}\n`);
|
||||
|
||||
// Test queries
|
||||
const queries = [
|
||||
"Tarun's preferences",
|
||||
"What is the OpenRouter API key location?",
|
||||
"meeting schedule",
|
||||
"Abundent Academy training",
|
||||
"slate blue",
|
||||
];
|
||||
|
||||
console.log("Search benchmarks (5 runs each, limit=5):\n");
|
||||
|
||||
for (const query of queries) {
|
||||
const times = [];
|
||||
let embedTime = 0;
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
const { vector, embedTime: et } = await embed(query);
|
||||
embedTime = et; // Last one
|
||||
|
||||
const searchStart = Date.now();
|
||||
const _results = await table.vectorSearch(vector).limit(5).toArray();
|
||||
const searchTime = Date.now() - searchStart;
|
||||
times.push(searchTime);
|
||||
}
|
||||
|
||||
const avg = Math.round(times.reduce((a, b) => a + b, 0) / times.length);
|
||||
const min = Math.min(...times);
|
||||
const max = Math.max(...times);
|
||||
|
||||
console.log(`"${query}"`);
|
||||
console.log(` Embedding: ${embedTime}ms`);
|
||||
console.log(` Search: avg=${avg}ms, min=${min}ms, max=${max}ms`);
|
||||
console.log("");
|
||||
}
|
||||
|
||||
// Raw vector search (no embedding)
|
||||
console.log("\nRaw vector search (pre-computed embedding):");
|
||||
const { vector } = await embed("test query");
|
||||
const rawTimes = [];
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const start = Date.now();
|
||||
await table.vectorSearch(vector).limit(5).toArray();
|
||||
rawTimes.push(Date.now() - start);
|
||||
}
|
||||
const avgRaw = Math.round(rawTimes.reduce((a, b) => a + b, 0) / rawTimes.length);
|
||||
console.log(` avg=${avgRaw}ms, min=${Math.min(...rawTimes)}ms, max=${Math.max(...rawTimes)}ms`);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -2,6 +2,20 @@ import fs from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
export type AutoCaptureConfig = {
|
||||
enabled: boolean;
|
||||
/** LLM provider for memory extraction: "openrouter" (default) or "openai" */
|
||||
provider?: "openrouter" | "openai";
|
||||
/** LLM model for memory extraction (default: google/gemini-2.0-flash-001) */
|
||||
model?: string;
|
||||
/** API key for the LLM provider (supports ${ENV_VAR} syntax) */
|
||||
apiKey?: string;
|
||||
/** Base URL for the LLM provider (default: https://openrouter.ai/api/v1) */
|
||||
baseUrl?: string;
|
||||
/** Maximum messages to send for extraction (default: 10) */
|
||||
maxMessages?: number;
|
||||
};
|
||||
|
||||
export type MemoryConfig = {
|
||||
embedding: {
|
||||
provider: "openai";
|
||||
@@ -9,16 +23,29 @@ export type MemoryConfig = {
|
||||
apiKey: string;
|
||||
};
|
||||
dbPath?: string;
|
||||
autoCapture?: boolean;
|
||||
/** @deprecated Use autoCapture object instead. Boolean true enables with defaults. */
|
||||
autoCapture?: boolean | AutoCaptureConfig;
|
||||
autoRecall?: boolean;
|
||||
captureMaxChars?: number;
|
||||
coreMemory?: {
|
||||
enabled?: boolean;
|
||||
/** Maximum number of core memories to load */
|
||||
maxEntries?: number;
|
||||
/** Minimum importance threshold for core memories */
|
||||
minImportance?: number;
|
||||
};
|
||||
};
|
||||
|
||||
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
|
||||
export const MEMORY_CATEGORIES = [
|
||||
"preference",
|
||||
"fact",
|
||||
"decision",
|
||||
"entity",
|
||||
"other",
|
||||
"core",
|
||||
] as const;
|
||||
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
|
||||
|
||||
const DEFAULT_MODEL = "text-embedding-3-small";
|
||||
export const DEFAULT_CAPTURE_MAX_CHARS = 500;
|
||||
const LEGACY_STATE_DIRS: string[] = [];
|
||||
|
||||
function resolveDefaultDbPath(): string {
|
||||
@@ -93,7 +120,7 @@ export const memoryConfigSchema = {
|
||||
const cfg = value as Record<string, unknown>;
|
||||
assertAllowedKeys(
|
||||
cfg,
|
||||
["embedding", "dbPath", "autoCapture", "autoRecall", "captureMaxChars"],
|
||||
["embedding", "dbPath", "autoCapture", "autoRecall", "coreMemory"],
|
||||
"memory config",
|
||||
);
|
||||
|
||||
@@ -105,13 +132,41 @@ export const memoryConfigSchema = {
|
||||
|
||||
const model = resolveEmbeddingModel(embedding);
|
||||
|
||||
const captureMaxChars =
|
||||
typeof cfg.captureMaxChars === "number" ? Math.floor(cfg.captureMaxChars) : undefined;
|
||||
if (
|
||||
typeof captureMaxChars === "number" &&
|
||||
(captureMaxChars < 100 || captureMaxChars > 10_000)
|
||||
) {
|
||||
throw new Error("captureMaxChars must be between 100 and 10000");
|
||||
// Parse autoCapture (supports boolean for backward compat, or object for LLM config)
|
||||
let autoCapture: MemoryConfig["autoCapture"];
|
||||
if (cfg.autoCapture === false) {
|
||||
autoCapture = false;
|
||||
} else if (cfg.autoCapture === true || cfg.autoCapture === undefined) {
|
||||
// Legacy boolean or default — enable with defaults
|
||||
autoCapture = { enabled: true };
|
||||
} else if (typeof cfg.autoCapture === "object" && !Array.isArray(cfg.autoCapture)) {
|
||||
const ac = cfg.autoCapture as Record<string, unknown>;
|
||||
assertAllowedKeys(
|
||||
ac,
|
||||
["enabled", "provider", "model", "apiKey", "baseUrl", "maxMessages"],
|
||||
"autoCapture config",
|
||||
);
|
||||
autoCapture = {
|
||||
enabled: ac.enabled !== false,
|
||||
provider:
|
||||
ac.provider === "openai" || ac.provider === "openrouter" ? ac.provider : "openrouter",
|
||||
model: typeof ac.model === "string" ? ac.model : undefined,
|
||||
apiKey: typeof ac.apiKey === "string" ? resolveEnvVars(ac.apiKey) : undefined,
|
||||
baseUrl: typeof ac.baseUrl === "string" ? ac.baseUrl : undefined,
|
||||
maxMessages: typeof ac.maxMessages === "number" ? ac.maxMessages : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse coreMemory
|
||||
let coreMemory: MemoryConfig["coreMemory"];
|
||||
if (cfg.coreMemory && typeof cfg.coreMemory === "object" && !Array.isArray(cfg.coreMemory)) {
|
||||
const bc = cfg.coreMemory as Record<string, unknown>;
|
||||
assertAllowedKeys(bc, ["enabled", "maxEntries", "minImportance"], "coreMemory config");
|
||||
coreMemory = {
|
||||
enabled: bc.enabled === true,
|
||||
maxEntries: typeof bc.maxEntries === "number" ? bc.maxEntries : 50,
|
||||
minImportance: typeof bc.minImportance === "number" ? bc.minImportance : 0.5,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -121,9 +176,10 @@ export const memoryConfigSchema = {
|
||||
apiKey: resolveEnvVars(embedding.apiKey),
|
||||
},
|
||||
dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
|
||||
autoCapture: cfg.autoCapture === true,
|
||||
autoCapture: autoCapture ?? { enabled: true },
|
||||
autoRecall: cfg.autoRecall !== false,
|
||||
captureMaxChars: captureMaxChars ?? DEFAULT_CAPTURE_MAX_CHARS,
|
||||
// Default coreMemory to enabled for consistency with autoCapture/autoRecall
|
||||
coreMemory: coreMemory ?? { enabled: true, maxEntries: 50, minImportance: 0.5 },
|
||||
};
|
||||
},
|
||||
uiHints: {
|
||||
@@ -143,19 +199,47 @@ export const memoryConfigSchema = {
|
||||
placeholder: "~/.openclaw/memory/lancedb",
|
||||
advanced: true,
|
||||
},
|
||||
autoCapture: {
|
||||
"autoCapture.enabled": {
|
||||
label: "Auto-Capture",
|
||||
help: "Automatically capture important information from conversations",
|
||||
help: "Automatically capture important information from conversations using LLM extraction",
|
||||
},
|
||||
"autoCapture.provider": {
|
||||
label: "Capture LLM Provider",
|
||||
placeholder: "openrouter",
|
||||
advanced: true,
|
||||
help: "LLM provider for memory extraction (openrouter or openai)",
|
||||
},
|
||||
"autoCapture.model": {
|
||||
label: "Capture Model",
|
||||
placeholder: "google/gemini-2.0-flash-001",
|
||||
advanced: true,
|
||||
help: "LLM model for memory extraction (use a fast/cheap model)",
|
||||
},
|
||||
"autoCapture.apiKey": {
|
||||
label: "Capture API Key",
|
||||
sensitive: true,
|
||||
advanced: true,
|
||||
help: "API key for capture LLM (defaults to OpenRouter key from provider config)",
|
||||
},
|
||||
autoRecall: {
|
||||
label: "Auto-Recall",
|
||||
help: "Automatically inject relevant memories into context",
|
||||
},
|
||||
captureMaxChars: {
|
||||
label: "Capture Max Chars",
|
||||
help: "Maximum message length eligible for auto-capture",
|
||||
"coreMemory.enabled": {
|
||||
label: "Core Memory",
|
||||
help: "Inject core memories as virtual MEMORY.md at session start (replaces MEMORY.md file)",
|
||||
},
|
||||
"coreMemory.maxEntries": {
|
||||
label: "Max Core Entries",
|
||||
placeholder: "50",
|
||||
advanced: true,
|
||||
placeholder: String(DEFAULT_CAPTURE_MAX_CHARS),
|
||||
help: "Maximum number of core memories to load",
|
||||
},
|
||||
"coreMemory.minImportance": {
|
||||
label: "Min Core Importance",
|
||||
placeholder: "0.5",
|
||||
advanced: true,
|
||||
help: "Minimum importance threshold for core memories (0-1)",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
102
extensions/memory-lancedb/export-memories.mjs
Normal file
102
extensions/memory-lancedb/export-memories.mjs
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Export memories from LanceDB for migration to memory-neo4j
|
||||
*
|
||||
* Usage:
|
||||
* pnpm exec node export-memories.mjs [output-file.json]
|
||||
*
|
||||
* Default output: memories-export.json
|
||||
*/
|
||||
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
import { writeFileSync } from "fs";
|
||||
|
||||
const LANCEDB_PATH = process.env.LANCEDB_PATH || "/home/tsukhani/.openclaw/memory/lancedb";
|
||||
const AGENT_ID = process.env.AGENT_ID || "main";
|
||||
const outputFile = process.argv[2] || "memories-export.json";
|
||||
|
||||
console.log("📦 Memory Export Tool (LanceDB)");
|
||||
console.log(` LanceDB path: ${LANCEDB_PATH}`);
|
||||
console.log(` Output: ${outputFile}`);
|
||||
console.log("");
|
||||
|
||||
// Transform for neo4j format
|
||||
function transformMemory(lanceEntry) {
|
||||
const createdAtISO = new Date(lanceEntry.createdAt).toISOString();
|
||||
|
||||
return {
|
||||
id: lanceEntry.id,
|
||||
text: lanceEntry.text,
|
||||
embedding: lanceEntry.vector,
|
||||
importance: lanceEntry.importance,
|
||||
category: lanceEntry.category,
|
||||
createdAt: createdAtISO,
|
||||
updatedAt: createdAtISO,
|
||||
source: "import",
|
||||
extractionStatus: "skipped",
|
||||
agentId: AGENT_ID,
|
||||
};
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// Load from LanceDB
|
||||
console.log("📥 Loading from LanceDB...");
|
||||
const db = await lancedb.connect(LANCEDB_PATH);
|
||||
const table = await db.openTable("memories");
|
||||
const count = await table.countRows();
|
||||
console.log(` Found ${count} memories`);
|
||||
|
||||
const memories = await table
|
||||
.query()
|
||||
.limit(count + 100)
|
||||
.toArray();
|
||||
console.log(` Loaded ${memories.length} memories`);
|
||||
|
||||
// Transform
|
||||
console.log("🔄 Transforming...");
|
||||
const transformed = memories.map(transformMemory);
|
||||
|
||||
// Stats
|
||||
const stats = {};
|
||||
transformed.forEach((m) => {
|
||||
stats[m.category] = (stats[m.category] || 0) + 1;
|
||||
});
|
||||
console.log(" Categories:", stats);
|
||||
|
||||
// Export
|
||||
console.log(`📤 Exporting to ${outputFile}...`);
|
||||
const exportData = {
|
||||
exportedAt: new Date().toISOString(),
|
||||
sourcePlugin: "memory-lancedb",
|
||||
targetPlugin: "memory-neo4j",
|
||||
agentId: AGENT_ID,
|
||||
vectorDim: transformed[0]?.embedding?.length || 1536,
|
||||
count: transformed.length,
|
||||
stats,
|
||||
memories: transformed,
|
||||
};
|
||||
|
||||
writeFileSync(outputFile, JSON.stringify(exportData, null, 2));
|
||||
|
||||
// Also write a preview without embeddings
|
||||
const previewFile = outputFile.replace(".json", "-preview.json");
|
||||
const preview = {
|
||||
...exportData,
|
||||
memories: transformed.map((m) => ({
|
||||
...m,
|
||||
embedding: `[${m.embedding?.length} dims]`,
|
||||
})),
|
||||
};
|
||||
writeFileSync(previewFile, JSON.stringify(preview, null, 2));
|
||||
|
||||
console.log(`✅ Exported ${transformed.length} memories`);
|
||||
console.log(
|
||||
` Full export: ${outputFile} (${(JSON.stringify(exportData).length / 1024 / 1024).toFixed(2)} MB)`,
|
||||
);
|
||||
console.log(` Preview: ${previewFile}`);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("❌ Error:", err.message);
|
||||
process.exit(1);
|
||||
});
|
||||
File diff suppressed because it is too large
Load Diff
26
extensions/memory-lancedb/inspect.mjs
Normal file
26
extensions/memory-lancedb/inspect.mjs
Normal file
@@ -0,0 +1,26 @@
|
||||
import * as lancedb from "@lancedb/lancedb";
|
||||
|
||||
const db = await lancedb.connect("/home/tsukhani/.openclaw/memory/lancedb");
|
||||
const tables = await db.tableNames();
|
||||
console.log("Tables:", tables);
|
||||
|
||||
if (tables.includes("memories")) {
|
||||
const table = await db.openTable("memories");
|
||||
const count = await table.countRows();
|
||||
console.log("Memory count:", count);
|
||||
|
||||
const all = await table.query().limit(200).toArray();
|
||||
|
||||
const stats = { preference: 0, fact: 0, decision: 0, entity: 0, other: 0, core: 0 };
|
||||
|
||||
all.forEach((e) => {
|
||||
stats[e.category] = (stats[e.category] || 0) + 1;
|
||||
});
|
||||
|
||||
console.log("\nCategory breakdown:", stats);
|
||||
console.log("\nSample entries:");
|
||||
all.slice(0, 5).forEach((e, i) => {
|
||||
console.log(`${i + 1}. [${e.category}] ${(e.text || "").substring(0, 100)}...`);
|
||||
console.log(` id: ${e.id}, importance: ${e.importance}, vectorDim: ${e.vector?.length}`);
|
||||
});
|
||||
}
|
||||
@@ -26,11 +26,21 @@
|
||||
"label": "Auto-Recall",
|
||||
"help": "Automatically inject relevant memories into context"
|
||||
},
|
||||
"captureMaxChars": {
|
||||
"label": "Capture Max Chars",
|
||||
"help": "Maximum message length eligible for auto-capture",
|
||||
"coreMemory.enabled": {
|
||||
"label": "Core Memory",
|
||||
"help": "Inject core memories as virtual MEMORY.md at session start (replaces MEMORY.md file)"
|
||||
},
|
||||
"coreMemory.maxEntries": {
|
||||
"label": "Max Core Entries",
|
||||
"placeholder": "50",
|
||||
"advanced": true,
|
||||
"placeholder": "500"
|
||||
"help": "Maximum number of core memories to load"
|
||||
},
|
||||
"coreMemory.minImportance": {
|
||||
"label": "Min Core Importance",
|
||||
"placeholder": "0.5",
|
||||
"advanced": true,
|
||||
"help": "Minimum importance threshold for core memories (0-1)"
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
@@ -60,10 +70,20 @@
|
||||
"autoRecall": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"captureMaxChars": {
|
||||
"type": "number",
|
||||
"minimum": 100,
|
||||
"maximum": 10000
|
||||
"coreMemory": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"maxEntries": {
|
||||
"type": "number"
|
||||
},
|
||||
"minImportance": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["embedding"]
|
||||
|
||||
209
extensions/memory-neo4j/config.ts
Normal file
209
extensions/memory-neo4j/config.ts
Normal file
@@ -0,0 +1,209 @@
|
||||
/**
|
||||
* Configuration schema for memory-neo4j plugin.
|
||||
*
|
||||
* Matches the JSON Schema in openclaw.plugin.json.
|
||||
* Provides runtime parsing with env var resolution and defaults.
|
||||
*/
|
||||
|
||||
export type EmbeddingProvider = "openai" | "ollama";
|
||||
|
||||
export type MemoryNeo4jConfig = {
|
||||
neo4j: {
|
||||
uri: string;
|
||||
username: string;
|
||||
password: string;
|
||||
};
|
||||
embedding: {
|
||||
provider: EmbeddingProvider;
|
||||
apiKey?: string;
|
||||
model: string;
|
||||
baseUrl?: string;
|
||||
};
|
||||
autoCapture: boolean;
|
||||
autoRecall: boolean;
|
||||
coreMemory: {
|
||||
enabled: boolean;
|
||||
maxEntries: number;
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Extraction configuration resolved from environment variables.
|
||||
* Entity extraction auto-enables when OPENROUTER_API_KEY is set.
|
||||
*/
|
||||
export type ExtractionConfig = {
|
||||
enabled: boolean;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
baseUrl: string;
|
||||
temperature: number;
|
||||
maxRetries: number;
|
||||
};
|
||||
|
||||
export const MEMORY_CATEGORIES = [
|
||||
"core",
|
||||
"preference",
|
||||
"fact",
|
||||
"decision",
|
||||
"entity",
|
||||
"other",
|
||||
] as const;
|
||||
|
||||
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
|
||||
|
||||
const EMBEDDING_DIMENSIONS: Record<string, number> = {
|
||||
// OpenAI models
|
||||
"text-embedding-3-small": 1536,
|
||||
"text-embedding-3-large": 3072,
|
||||
// Ollama models (common ones)
|
||||
"mxbai-embed-large": 1024,
|
||||
"mxbai-embed-large-2k:latest": 1024,
|
||||
"nomic-embed-text": 768,
|
||||
"all-minilm": 384,
|
||||
};
|
||||
|
||||
// Default dimension for unknown models (Ollama models vary)
|
||||
const DEFAULT_EMBEDDING_DIMS = 1024;
|
||||
|
||||
export function vectorDimsForModel(model: string): number {
|
||||
// Check exact match first
|
||||
if (EMBEDDING_DIMENSIONS[model]) {
|
||||
return EMBEDDING_DIMENSIONS[model];
|
||||
}
|
||||
// Check prefix match (for versioned models like mxbai-embed-large:latest)
|
||||
for (const [known, dims] of Object.entries(EMBEDDING_DIMENSIONS)) {
|
||||
if (model.startsWith(known)) {
|
||||
return dims;
|
||||
}
|
||||
}
|
||||
// Return default for unknown models
|
||||
return DEFAULT_EMBEDDING_DIMS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve ${ENV_VAR} references in string values.
|
||||
*/
|
||||
function resolveEnvVars(value: string): string {
|
||||
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
|
||||
const envValue = process.env[envVar];
|
||||
if (!envValue) {
|
||||
throw new Error(`Environment variable ${envVar} is not set`);
|
||||
}
|
||||
return envValue;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve extraction config from environment variables.
|
||||
* Returns enabled: false if OPENROUTER_API_KEY is not set.
|
||||
*/
|
||||
export function resolveExtractionConfig(): ExtractionConfig {
|
||||
const apiKey = process.env.OPENROUTER_API_KEY ?? "";
|
||||
return {
|
||||
enabled: apiKey.length > 0,
|
||||
apiKey,
|
||||
model: process.env.EXTRACTION_MODEL ?? "google/gemini-2.0-flash-001",
|
||||
baseUrl: process.env.EXTRACTION_BASE_URL ?? "https://openrouter.ai/api/v1",
|
||||
temperature: 0.0,
|
||||
maxRetries: 2,
|
||||
};
|
||||
}
|
||||
|
||||
function assertAllowedKeys(value: Record<string, unknown>, allowed: string[], label: string) {
|
||||
const unknown = Object.keys(value).filter((key) => !allowed.includes(key));
|
||||
if (unknown.length > 0) {
|
||||
throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Config schema with parse method for runtime validation & transformation.
|
||||
* JSON Schema validation is handled by openclaw.plugin.json; this handles
|
||||
* env var resolution and defaults.
|
||||
*/
|
||||
export const memoryNeo4jConfigSchema = {
|
||||
parse(value: unknown): MemoryNeo4jConfig {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
||||
throw new Error("memory-neo4j config required");
|
||||
}
|
||||
const cfg = value as Record<string, unknown>;
|
||||
assertAllowedKeys(
|
||||
cfg,
|
||||
["embedding", "neo4j", "autoCapture", "autoRecall", "coreMemory"],
|
||||
"memory-neo4j config",
|
||||
);
|
||||
|
||||
// Parse neo4j section
|
||||
const neo4jRaw = cfg.neo4j as Record<string, unknown> | undefined;
|
||||
if (!neo4jRaw || typeof neo4jRaw !== "object") {
|
||||
throw new Error("neo4j config section is required");
|
||||
}
|
||||
assertAllowedKeys(neo4jRaw, ["uri", "user", "username", "password"], "neo4j config");
|
||||
if (typeof neo4jRaw.uri !== "string" || !neo4jRaw.uri) {
|
||||
throw new Error("neo4j.uri is required");
|
||||
}
|
||||
|
||||
const neo4jPassword =
|
||||
typeof neo4jRaw.password === "string" ? resolveEnvVars(neo4jRaw.password) : "";
|
||||
// Support both 'user' and 'username' for neo4j config
|
||||
const neo4jUsername =
|
||||
typeof neo4jRaw.user === "string"
|
||||
? neo4jRaw.user
|
||||
: typeof neo4jRaw.username === "string"
|
||||
? neo4jRaw.username
|
||||
: "neo4j";
|
||||
|
||||
// Parse embedding section (optional for ollama without apiKey)
|
||||
const embeddingRaw = cfg.embedding as Record<string, unknown> | undefined;
|
||||
assertAllowedKeys(
|
||||
embeddingRaw ?? {},
|
||||
["provider", "apiKey", "model", "baseUrl"],
|
||||
"embedding config",
|
||||
);
|
||||
|
||||
const provider: EmbeddingProvider = embeddingRaw?.provider === "ollama" ? "ollama" : "openai";
|
||||
|
||||
// apiKey is required for openai, optional for ollama
|
||||
let apiKey: string | undefined;
|
||||
if (typeof embeddingRaw?.apiKey === "string" && embeddingRaw.apiKey) {
|
||||
apiKey = resolveEnvVars(embeddingRaw.apiKey);
|
||||
} else if (provider === "openai") {
|
||||
throw new Error("embedding.apiKey is required for OpenAI provider");
|
||||
}
|
||||
|
||||
const embeddingModel =
|
||||
typeof embeddingRaw?.model === "string"
|
||||
? embeddingRaw.model
|
||||
: provider === "ollama"
|
||||
? "mxbai-embed-large"
|
||||
: "text-embedding-3-small";
|
||||
|
||||
const baseUrl = typeof embeddingRaw?.baseUrl === "string" ? embeddingRaw.baseUrl : undefined;
|
||||
|
||||
// Parse coreMemory section (optional with defaults)
|
||||
const coreMemoryRaw = cfg.coreMemory as Record<string, unknown> | undefined;
|
||||
const coreMemoryEnabled = coreMemoryRaw?.enabled !== false; // enabled by default
|
||||
const coreMemoryMaxEntries =
|
||||
typeof coreMemoryRaw?.maxEntries === "number" ? coreMemoryRaw.maxEntries : 50;
|
||||
|
||||
return {
|
||||
neo4j: {
|
||||
uri: neo4jRaw.uri,
|
||||
username: neo4jUsername,
|
||||
password: neo4jPassword,
|
||||
},
|
||||
embedding: {
|
||||
provider,
|
||||
apiKey,
|
||||
model: embeddingModel,
|
||||
baseUrl,
|
||||
},
|
||||
autoCapture: cfg.autoCapture !== false,
|
||||
autoRecall: cfg.autoRecall !== false,
|
||||
coreMemory: {
|
||||
enabled: coreMemoryEnabled,
|
||||
maxEntries: coreMemoryMaxEntries,
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
104
extensions/memory-neo4j/embeddings.ts
Normal file
104
extensions/memory-neo4j/embeddings.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Embedding generation for memory-neo4j.
|
||||
*
|
||||
* Supports both OpenAI and Ollama providers.
|
||||
*/
|
||||
|
||||
import OpenAI from "openai";
|
||||
import type { EmbeddingProvider } from "./config.js";
|
||||
|
||||
export class Embeddings {
|
||||
private client: OpenAI | null = null;
|
||||
private readonly provider: EmbeddingProvider;
|
||||
private readonly baseUrl: string;
|
||||
|
||||
constructor(
|
||||
private readonly apiKey: string | undefined,
|
||||
private readonly model: string = "text-embedding-3-small",
|
||||
provider: EmbeddingProvider = "openai",
|
||||
baseUrl?: string,
|
||||
) {
|
||||
this.provider = provider;
|
||||
this.baseUrl = baseUrl ?? (provider === "ollama" ? "http://localhost:11434" : "");
|
||||
|
||||
if (provider === "openai") {
|
||||
if (!apiKey) {
|
||||
throw new Error("API key required for OpenAI embeddings");
|
||||
}
|
||||
this.client = new OpenAI({ apiKey });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate an embedding vector for a single text.
|
||||
*/
|
||||
async embed(text: string): Promise<number[]> {
|
||||
if (this.provider === "ollama") {
|
||||
return this.embedOllama(text);
|
||||
}
|
||||
return this.embedOpenAI(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for multiple texts.
|
||||
* Returns array of embeddings in the same order as input.
|
||||
*/
|
||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||
if (texts.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (this.provider === "ollama") {
|
||||
// Ollama doesn't support batch, so we do sequential
|
||||
return Promise.all(texts.map((t) => this.embedOllama(t)));
|
||||
}
|
||||
|
||||
return this.embedBatchOpenAI(texts);
|
||||
}
|
||||
|
||||
private async embedOpenAI(text: string): Promise<number[]> {
|
||||
if (!this.client) {
|
||||
throw new Error("OpenAI client not initialized");
|
||||
}
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.model,
|
||||
input: text,
|
||||
});
|
||||
return response.data[0].embedding;
|
||||
}
|
||||
|
||||
private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
|
||||
if (!this.client) {
|
||||
throw new Error("OpenAI client not initialized");
|
||||
}
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.model,
|
||||
input: texts,
|
||||
});
|
||||
// Sort by index to ensure correct order
|
||||
return response.data.toSorted((a, b) => a.index - b.index).map((d) => d.embedding);
|
||||
}
|
||||
|
||||
private async embedOllama(text: string): Promise<number[]> {
|
||||
const url = `${this.baseUrl}/api/embed`;
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: text,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Ollama embedding failed: ${response.status} ${error}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as { embeddings?: number[][] };
|
||||
if (!data.embeddings?.[0]) {
|
||||
throw new Error("No embedding returned from Ollama");
|
||||
}
|
||||
return data.embeddings[0];
|
||||
}
|
||||
}
|
||||
774
extensions/memory-neo4j/extractor.ts
Normal file
774
extensions/memory-neo4j/extractor.ts
Normal file
@@ -0,0 +1,774 @@
|
||||
/**
|
||||
* LLM-based entity extraction and auto-capture decision for memory-neo4j.
|
||||
*
|
||||
* Uses Gemini Flash via OpenRouter for:
|
||||
* 1. Entity extraction: Extract entities and relationships from stored memories
|
||||
* 2. Auto-capture decision: Decide what's worth remembering from conversations
|
||||
*
|
||||
* Both run as background fire-and-forget operations with graceful degradation.
|
||||
*/
|
||||
|
||||
import { randomUUID } from "node:crypto";
|
||||
import type { ExtractionConfig } from "./config.js";
|
||||
import type { Embeddings } from "./embeddings.js";
|
||||
import type { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||
import type { CaptureItem, EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
|
||||
import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES } from "./schema.js";
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
type Logger = {
|
||||
info: (msg: string) => void;
|
||||
warn: (msg: string) => void;
|
||||
error: (msg: string) => void;
|
||||
debug?: (msg: string) => void;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Extraction Prompt
|
||||
// ============================================================================
|
||||
|
||||
const ENTITY_EXTRACTION_PROMPT = `You are an entity extraction system for a personal memory store.
|
||||
Extract entities and relationships from this memory text.
|
||||
|
||||
Memory: "{text}"
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"entities": [
|
||||
{"name": "tarun", "type": "person", "aliases": ["boss"], "description": "brief description"}
|
||||
],
|
||||
"relationships": [
|
||||
{"source": "tarun", "target": "abundent", "type": "WORKS_AT", "confidence": 0.95}
|
||||
],
|
||||
"tags": [
|
||||
{"name": "neo4j", "category": "technology"}
|
||||
]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Normalize entity names to lowercase
|
||||
- Entity types: person, organization, location, event, concept
|
||||
- Relationship types: WORKS_AT, LIVES_AT, KNOWS, MARRIED_TO, PREFERS, DECIDED, RELATED_TO
|
||||
- Confidence: 0.0-1.0
|
||||
- Only extract what's explicitly stated or strongly implied
|
||||
- Return empty arrays if nothing to extract
|
||||
- Keep entity descriptions brief (1 sentence max)`;
|
||||
|
||||
// ============================================================================
|
||||
// Auto-Capture Decision Prompt
|
||||
// ============================================================================
|
||||
|
||||
const AUTO_CAPTURE_PROMPT = `You are an AI memory curator. Given these user messages from a conversation, identify information worth storing as long-term memories.
|
||||
|
||||
Only extract:
|
||||
- Personal preferences and opinions ("I prefer dark mode", "I like TypeScript")
|
||||
- Important facts about people, places, organizations
|
||||
- Decisions made ("We decided to use Neo4j", "Going with plan A")
|
||||
- Contact information (emails, phone numbers, usernames)
|
||||
- Important events or dates
|
||||
- Technical decisions and configurations
|
||||
|
||||
Do NOT extract:
|
||||
- General questions or instructions to the AI
|
||||
- Routine greetings or acknowledgments
|
||||
- Information that is too vague or contextual
|
||||
- Information already in system prompts or documentation
|
||||
|
||||
Categories:
|
||||
- "core": Foundational identity info that should ALWAYS be remembered (user's name, role, company, key relationships, critical preferences that define who they are). Use sparingly - only for truly foundational facts.
|
||||
- "preference": User preferences and opinions
|
||||
- "fact": Facts about people, places, things
|
||||
- "decision": Decisions made
|
||||
- "entity": Entity-focused memories
|
||||
- "other": Miscellaneous
|
||||
|
||||
Messages:
|
||||
"""
|
||||
{messages}
|
||||
"""
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"memories": [
|
||||
{"text": "concise memory text", "category": "core|preference|fact|decision|entity|other", "importance": 0.7}
|
||||
]
|
||||
}
|
||||
|
||||
If nothing is worth remembering, return: {"memories": []}`;
|
||||
|
||||
// ============================================================================
|
||||
// OpenRouter API Client
|
||||
// ============================================================================
|
||||
|
||||
async function callOpenRouter(config: ExtractionConfig, prompt: string): Promise<string | null> {
|
||||
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
|
||||
try {
|
||||
const response = await fetch(`${config.baseUrl}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${config.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
temperature: config.temperature,
|
||||
response_format: { type: "json_object" },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text().catch(() => "");
|
||||
throw new Error(`OpenRouter API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as {
|
||||
choices?: Array<{ message?: { content?: string } }>;
|
||||
};
|
||||
return data.choices?.[0]?.message?.content ?? null;
|
||||
} catch (err) {
|
||||
if (attempt >= config.maxRetries) {
|
||||
throw err;
|
||||
}
|
||||
// Exponential backoff
|
||||
await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Entity Extraction
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Extract entities and relationships from a memory text using LLM.
|
||||
*/
|
||||
export async function extractEntities(
|
||||
text: string,
|
||||
config: ExtractionConfig,
|
||||
): Promise<ExtractionResult | null> {
|
||||
if (!config.enabled) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const prompt = ENTITY_EXTRACTION_PROMPT.replace("{text}", text);
|
||||
|
||||
try {
|
||||
const content = await callOpenRouter(config, prompt);
|
||||
if (!content) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as Record<string, unknown>;
|
||||
return validateExtractionResult(parsed);
|
||||
} catch {
|
||||
// Will be handled by caller; don't throw for parse errors
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and sanitize LLM extraction output.
|
||||
*/
|
||||
function validateExtractionResult(raw: Record<string, unknown>): ExtractionResult {
|
||||
const entities = Array.isArray(raw.entities) ? raw.entities : [];
|
||||
const relationships = Array.isArray(raw.relationships) ? raw.relationships : [];
|
||||
const tags = Array.isArray(raw.tags) ? raw.tags : [];
|
||||
|
||||
const validEntityTypes = new Set<string>(ENTITY_TYPES);
|
||||
|
||||
return {
|
||||
entities: entities
|
||||
.filter(
|
||||
(e: unknown): e is Record<string, unknown> =>
|
||||
e !== null &&
|
||||
typeof e === "object" &&
|
||||
typeof (e as Record<string, unknown>).name === "string" &&
|
||||
typeof (e as Record<string, unknown>).type === "string",
|
||||
)
|
||||
.map((e) => ({
|
||||
name: String(e.name).trim().toLowerCase(),
|
||||
type: validEntityTypes.has(String(e.type)) ? (String(e.type) as EntityType) : "concept",
|
||||
aliases: Array.isArray(e.aliases)
|
||||
? (e.aliases as unknown[])
|
||||
.filter((a): a is string => typeof a === "string")
|
||||
.map((a) => a.trim().toLowerCase())
|
||||
: undefined,
|
||||
description: typeof e.description === "string" ? e.description : undefined,
|
||||
}))
|
||||
.filter((e) => e.name.length > 0),
|
||||
|
||||
relationships: relationships
|
||||
.filter(
|
||||
(r: unknown): r is Record<string, unknown> =>
|
||||
r !== null &&
|
||||
typeof r === "object" &&
|
||||
typeof (r as Record<string, unknown>).source === "string" &&
|
||||
typeof (r as Record<string, unknown>).target === "string" &&
|
||||
typeof (r as Record<string, unknown>).type === "string" &&
|
||||
ALLOWED_RELATIONSHIP_TYPES.has(String((r as Record<string, unknown>).type)),
|
||||
)
|
||||
.map((r) => ({
|
||||
source: String(r.source).trim().toLowerCase(),
|
||||
target: String(r.target).trim().toLowerCase(),
|
||||
type: String(r.type),
|
||||
confidence: typeof r.confidence === "number" ? Math.min(1, Math.max(0, r.confidence)) : 0.7,
|
||||
})),
|
||||
|
||||
tags: tags
|
||||
.filter(
|
||||
(t: unknown): t is Record<string, unknown> =>
|
||||
t !== null &&
|
||||
typeof t === "object" &&
|
||||
typeof (t as Record<string, unknown>).name === "string",
|
||||
)
|
||||
.map((t) => ({
|
||||
name: String(t.name).trim().toLowerCase(),
|
||||
category: typeof t.category === "string" ? t.category : "topic",
|
||||
}))
|
||||
.filter((t) => t.name.length > 0),
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Background Extraction Pipeline
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Run entity extraction in the background for a stored memory.
|
||||
* Fire-and-forget: errors are logged but never propagated.
|
||||
*
|
||||
* Flow:
|
||||
* 1. Call LLM to extract entities and relationships
|
||||
* 2. MERGE Entity nodes (idempotent)
|
||||
* 3. Create MENTIONS relationships from Memory → Entity
|
||||
* 4. Create inter-Entity relationships (WORKS_AT, KNOWS, etc.)
|
||||
* 5. Tag the memory
|
||||
* 6. Update extractionStatus to "complete" or "failed"
|
||||
*/
|
||||
export async function runBackgroundExtraction(
|
||||
memoryId: string,
|
||||
text: string,
|
||||
db: Neo4jMemoryClient,
|
||||
embeddings: Embeddings,
|
||||
config: ExtractionConfig,
|
||||
logger: Logger,
|
||||
): Promise<void> {
|
||||
if (!config.enabled) {
|
||||
await db.updateExtractionStatus(memoryId, "skipped").catch(() => {});
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await extractEntities(text, config);
|
||||
|
||||
if (!result) {
|
||||
await db.updateExtractionStatus(memoryId, "failed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Empty extraction is valid — not all memories have extractable entities
|
||||
if (
|
||||
result.entities.length === 0 &&
|
||||
result.relationships.length === 0 &&
|
||||
result.tags.length === 0
|
||||
) {
|
||||
await db.updateExtractionStatus(memoryId, "complete");
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate embeddings for entity names (for entity vector search)
|
||||
let entityEmbeddings: Map<string, number[]> | undefined;
|
||||
if (result.entities.length > 0) {
|
||||
try {
|
||||
const names = result.entities.map((e) => e.name);
|
||||
const vectors = await embeddings.embedBatch(names);
|
||||
entityEmbeddings = new Map(names.map((n, i) => [n, vectors[i]]));
|
||||
} catch (err) {
|
||||
logger.debug?.(`memory-neo4j: entity embedding generation failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// MERGE Entity nodes
|
||||
for (const entity of result.entities) {
|
||||
try {
|
||||
await db.mergeEntity({
|
||||
id: randomUUID(),
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
aliases: entity.aliases,
|
||||
description: entity.description,
|
||||
embedding: entityEmbeddings?.get(entity.name),
|
||||
});
|
||||
|
||||
// Create MENTIONS relationship
|
||||
await db.createMentions(memoryId, entity.name, "context", 1.0);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: entity merge failed for "${entity.name}": ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Create inter-Entity relationships
|
||||
for (const rel of result.relationships) {
|
||||
try {
|
||||
await db.createEntityRelationship(rel.source, rel.target, rel.type, rel.confidence);
|
||||
} catch (err) {
|
||||
logger.debug?.(
|
||||
`memory-neo4j: relationship creation failed: ${rel.source}->${rel.target}: ${String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Tag the memory
|
||||
for (const tag of result.tags) {
|
||||
try {
|
||||
await db.tagMemory(memoryId, tag.name, tag.category);
|
||||
} catch (err) {
|
||||
logger.debug?.(`memory-neo4j: tagging failed for "${tag.name}": ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
await db.updateExtractionStatus(memoryId, "complete");
|
||||
logger.info(
|
||||
`memory-neo4j: extraction complete for ${memoryId.slice(0, 8)} — ` +
|
||||
`${result.entities.length} entities, ${result.relationships.length} rels, ${result.tags.length} tags`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: extraction failed for ${memoryId.slice(0, 8)}: ${String(err)}`);
|
||||
await db.updateExtractionStatus(memoryId, "failed").catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Sleep Cycle - Five Phase Memory Consolidation
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Sleep Cycle Result - aggregated stats from all five phases.
|
||||
*/
|
||||
export type SleepCycleResult = {
|
||||
// Phase 1: Deduplication
|
||||
dedup: {
|
||||
clustersFound: number;
|
||||
memoriesMerged: number;
|
||||
};
|
||||
// Phase 2: Core Promotion
|
||||
promotion: {
|
||||
candidatesFound: number;
|
||||
promoted: number;
|
||||
};
|
||||
// Phase 3: Decay & Pruning
|
||||
decay: {
|
||||
memoriesPruned: number;
|
||||
};
|
||||
// Phase 4: Entity Extraction
|
||||
extraction: {
|
||||
total: number;
|
||||
processed: number;
|
||||
succeeded: number;
|
||||
failed: number;
|
||||
};
|
||||
// Phase 5: Orphan Cleanup
|
||||
cleanup: {
|
||||
entitiesRemoved: number;
|
||||
tagsRemoved: number;
|
||||
};
|
||||
// Overall
|
||||
durationMs: number;
|
||||
aborted: boolean;
|
||||
};
|
||||
|
||||
export type SleepCycleOptions = {
|
||||
// Common
|
||||
agentId?: string;
|
||||
abortSignal?: AbortSignal;
|
||||
|
||||
// Phase 1: Deduplication
|
||||
dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
|
||||
|
||||
// Phase 2: Core Promotion
|
||||
promotionImportanceThreshold?: number; // Min importance to auto-promote (default: 0.9)
|
||||
promotionMinAgeDays?: number; // Min age before promotion (default: 7)
|
||||
|
||||
// Phase 3: Decay
|
||||
decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
|
||||
decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
|
||||
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
|
||||
|
||||
// Phase 4: Extraction
|
||||
extractionBatchSize?: number; // Memories per batch (default: 50)
|
||||
extractionDelayMs?: number; // Delay between batches (default: 1000)
|
||||
|
||||
// Progress callback
|
||||
onPhaseStart?: (phase: "dedup" | "promotion" | "decay" | "extraction" | "cleanup") => void;
|
||||
onProgress?: (phase: string, message: string) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
* Run the full sleep cycle - five phases of memory consolidation.
|
||||
*
|
||||
* This mimics how human memory consolidation works during sleep:
|
||||
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
|
||||
* 2. CORE PROMOTION - Promote high-importance memories to core status
|
||||
* 3. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
|
||||
* 4. EXTRACTION - Form entity relationships (strengthen connections)
|
||||
* 5. CLEANUP - Remove orphaned entities/tags (garbage collection)
|
||||
*
|
||||
* Benefits:
|
||||
* - Reduces latency during active conversations
|
||||
* - Prevents memory bloat and "self-degradation"
|
||||
* - Cleaner separation between capture and consolidation
|
||||
*
|
||||
* Research basis:
|
||||
* - Ebbinghaus forgetting curve for decay
|
||||
* - FadeMem importance-weighted retention
|
||||
* - Graphiti/Zep edge deduplication patterns
|
||||
*/
|
||||
export async function runSleepCycle(
|
||||
db: Neo4jMemoryClient,
|
||||
embeddings: Embeddings,
|
||||
config: ExtractionConfig,
|
||||
logger: Logger,
|
||||
options: SleepCycleOptions = {},
|
||||
): Promise<SleepCycleResult> {
|
||||
const startTime = Date.now();
|
||||
const {
|
||||
agentId,
|
||||
abortSignal,
|
||||
dedupThreshold = 0.95,
|
||||
promotionImportanceThreshold = 0.9,
|
||||
promotionMinAgeDays = 7,
|
||||
decayRetentionThreshold = 0.1,
|
||||
decayBaseHalfLifeDays = 30,
|
||||
decayImportanceMultiplier = 2,
|
||||
extractionBatchSize = 50,
|
||||
extractionDelayMs = 1000,
|
||||
onPhaseStart,
|
||||
onProgress,
|
||||
} = options;
|
||||
|
||||
const result: SleepCycleResult = {
|
||||
dedup: { clustersFound: 0, memoriesMerged: 0 },
|
||||
promotion: { candidatesFound: 0, promoted: 0 },
|
||||
decay: { memoriesPruned: 0 },
|
||||
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
|
||||
cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
|
||||
durationMs: 0,
|
||||
aborted: false,
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 1: Deduplication
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("dedup");
|
||||
logger.info("memory-neo4j: [sleep] Phase 1: Deduplication");
|
||||
|
||||
try {
|
||||
const clusters = await db.findDuplicateClusters(dedupThreshold, agentId);
|
||||
result.dedup.clustersFound = clusters.length;
|
||||
|
||||
for (const cluster of clusters) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
const { deletedCount } = await db.mergeMemoryCluster(
|
||||
cluster.memoryIds,
|
||||
cluster.importances,
|
||||
);
|
||||
result.dedup.memoriesMerged += deletedCount;
|
||||
onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} → 1`);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 1 complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 2: Core Promotion
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("promotion");
|
||||
logger.info("memory-neo4j: [sleep] Phase 2: Core Promotion");
|
||||
|
||||
try {
|
||||
const candidates = await db.findPromotionCandidates({
|
||||
importanceThreshold: promotionImportanceThreshold,
|
||||
minAgeDays: promotionMinAgeDays,
|
||||
agentId,
|
||||
});
|
||||
result.promotion.candidatesFound = candidates.length;
|
||||
|
||||
if (candidates.length > 0) {
|
||||
const ids = candidates.map((m) => m.id);
|
||||
result.promotion.promoted = await db.promoteToCore(ids);
|
||||
for (const c of candidates) {
|
||||
onProgress?.("promotion", `Promoted "${c.text.slice(0, 50)}..." to core`);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 2 complete — ${result.promotion.promoted} memories promoted to core`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 3: Decay & Pruning
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("decay");
|
||||
logger.info("memory-neo4j: [sleep] Phase 3: Decay & Pruning");
|
||||
|
||||
try {
|
||||
const decayed = await db.findDecayedMemories({
|
||||
retentionThreshold: decayRetentionThreshold,
|
||||
baseHalfLifeDays: decayBaseHalfLifeDays,
|
||||
importanceMultiplier: decayImportanceMultiplier,
|
||||
agentId,
|
||||
});
|
||||
|
||||
if (decayed.length > 0) {
|
||||
const ids = decayed.map((m) => m.id);
|
||||
result.decay.memoriesPruned = await db.pruneMemories(ids);
|
||||
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 3 complete — ${result.decay.memoriesPruned} memories pruned`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 4: Entity Extraction
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted && config.enabled) {
|
||||
onPhaseStart?.("extraction");
|
||||
logger.info("memory-neo4j: [sleep] Phase 4: Entity Extraction");
|
||||
|
||||
try {
|
||||
// Get initial count
|
||||
const counts = await db.countByExtractionStatus(agentId);
|
||||
result.extraction.total = counts.pending;
|
||||
|
||||
if (result.extraction.total > 0) {
|
||||
let hasMore = true;
|
||||
while (hasMore && !abortSignal?.aborted) {
|
||||
const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
|
||||
|
||||
if (pending.length === 0) {
|
||||
hasMore = false;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const memory of pending) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
await runBackgroundExtraction(memory.id, memory.text, db, embeddings, config, logger);
|
||||
result.extraction.succeeded++;
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
`memory-neo4j: extraction failed for ${memory.id.slice(0, 8)}: ${String(err)}`,
|
||||
);
|
||||
result.extraction.failed++;
|
||||
}
|
||||
|
||||
result.extraction.processed++;
|
||||
|
||||
if (result.extraction.processed % 10 === 0) {
|
||||
onProgress?.(
|
||||
"extraction",
|
||||
`${result.extraction.processed}/${result.extraction.total} processed`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Delay between batches
|
||||
if (hasMore && !abortSignal?.aborted) {
|
||||
await new Promise((resolve) => setTimeout(resolve, extractionDelayMs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 4 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
|
||||
}
|
||||
} else if (!config.enabled) {
|
||||
logger.info("memory-neo4j: [sleep] Phase 4 skipped — extraction not enabled");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 5: Orphan Cleanup
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("cleanup");
|
||||
logger.info("memory-neo4j: [sleep] Phase 5: Orphan Cleanup");
|
||||
|
||||
try {
|
||||
// Clean up orphan entities
|
||||
const orphanEntities = await db.findOrphanEntities();
|
||||
if (orphanEntities.length > 0) {
|
||||
result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
|
||||
orphanEntities.map((e) => e.id),
|
||||
);
|
||||
onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
|
||||
}
|
||||
|
||||
// Clean up orphan tags
|
||||
const orphanTags = await db.findOrphanTags();
|
||||
if (orphanTags.length > 0) {
|
||||
result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
|
||||
onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 5 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
result.durationMs = Date.now() - startTime;
|
||||
result.aborted = abortSignal?.aborted ?? false;
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
|
||||
(result.aborted ? " (aborted)" : ""),
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Auto-Capture Decision
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Evaluate user messages and decide what's worth storing as long-term memory.
|
||||
* Returns a list of memory items to store, or empty if nothing worth keeping.
|
||||
*/
|
||||
export async function evaluateAutoCapture(
|
||||
userMessages: string[],
|
||||
config: ExtractionConfig,
|
||||
): Promise<CaptureItem[]> {
|
||||
if (!config.enabled || userMessages.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const combined = userMessages.join("\n\n");
|
||||
if (combined.length < 10) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const prompt = AUTO_CAPTURE_PROMPT.replace("{messages}", combined);
|
||||
|
||||
try {
|
||||
const content = await callOpenRouter(config, prompt);
|
||||
if (!content) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as Record<string, unknown>;
|
||||
return validateCaptureDecision(parsed);
|
||||
} catch {
|
||||
// Silently fail — auto-capture is best-effort
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and sanitize the auto-capture LLM output.
|
||||
*/
|
||||
function validateCaptureDecision(raw: Record<string, unknown>): CaptureItem[] {
|
||||
const memories = Array.isArray(raw.memories) ? raw.memories : [];
|
||||
|
||||
const validCategories = new Set<string>(["preference", "fact", "decision", "entity", "other"]);
|
||||
|
||||
return memories
|
||||
.filter(
|
||||
(m: unknown): m is Record<string, unknown> =>
|
||||
m !== null &&
|
||||
typeof m === "object" &&
|
||||
typeof (m as Record<string, unknown>).text === "string" &&
|
||||
(m as Record<string, unknown>).text !== "",
|
||||
)
|
||||
.map((m) => ({
|
||||
text: String(m.text).slice(0, 2000), // cap length
|
||||
category: validCategories.has(String(m.category))
|
||||
? (String(m.category) as MemoryCategory)
|
||||
: "other",
|
||||
importance: typeof m.importance === "number" ? Math.min(1, Math.max(0, m.importance)) : 0.7,
|
||||
}))
|
||||
.slice(0, 5); // Max 5 captures per conversation
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Message Extraction Helper
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Extract user message texts from the event.messages array.
|
||||
* Handles both string content and content block arrays.
|
||||
*/
|
||||
export function extractUserMessages(messages: unknown[]): string[] {
|
||||
const texts: string[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!msg || typeof msg !== "object") {
|
||||
continue;
|
||||
}
|
||||
const msgObj = msg as Record<string, unknown>;
|
||||
|
||||
// Only process user messages for auto-capture
|
||||
if (msgObj.role !== "user") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = msgObj.content;
|
||||
if (typeof content === "string") {
|
||||
texts.push(content);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (
|
||||
block &&
|
||||
typeof block === "object" &&
|
||||
"type" in block &&
|
||||
(block as Record<string, unknown>).type === "text" &&
|
||||
"text" in block &&
|
||||
typeof (block as Record<string, unknown>).text === "string"
|
||||
) {
|
||||
texts.push((block as Record<string, unknown>).text as string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Filter out noise
|
||||
return texts.filter(
|
||||
(t) => t.length >= 10 && !t.includes("<relevant-memories>") && !t.includes("<system>"),
|
||||
);
|
||||
}
|
||||
942
extensions/memory-neo4j/index.ts
Normal file
942
extensions/memory-neo4j/index.ts
Normal file
@@ -0,0 +1,942 @@
|
||||
/**
|
||||
* OpenClaw Memory (Neo4j) Plugin
|
||||
*
|
||||
* Drop-in replacement for memory-lancedb with three-signal hybrid search,
|
||||
* entity extraction, and knowledge graph capabilities.
|
||||
*
|
||||
* Provides:
|
||||
* - memory_recall: Hybrid search (vector + BM25 + graph traversal)
|
||||
* - memory_store: Store memories with background entity extraction
|
||||
* - memory_forget: Delete memories with cascade cleanup
|
||||
*
|
||||
* Architecture decisions: see docs/memory-neo4j/ARCHITECTURE.md
|
||||
*/
|
||||
|
||||
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { stringEnum } from "openclaw/plugin-sdk";
|
||||
import type { MemoryCategory, MemorySource } from "./schema.js";
|
||||
import {
|
||||
MEMORY_CATEGORIES,
|
||||
memoryNeo4jConfigSchema,
|
||||
resolveExtractionConfig,
|
||||
vectorDimsForModel,
|
||||
} from "./config.js";
|
||||
import { Embeddings } from "./embeddings.js";
|
||||
import { evaluateAutoCapture, extractUserMessages, runSleepCycle } from "./extractor.js";
|
||||
import { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||
import { hybridSearch } from "./search.js";
|
||||
|
||||
// ============================================================================
|
||||
// Plugin Definition
|
||||
// ============================================================================
|
||||
|
||||
const memoryNeo4jPlugin = {
|
||||
id: "memory-neo4j",
|
||||
name: "Memory (Neo4j)",
|
||||
description:
|
||||
"Neo4j-backed long-term memory with three-signal hybrid search, entity extraction, and knowledge graph",
|
||||
kind: "memory" as const,
|
||||
configSchema: memoryNeo4jConfigSchema,
|
||||
|
||||
register(api: OpenClawPluginApi) {
|
||||
// Parse configuration
|
||||
const cfg = memoryNeo4jConfigSchema.parse(api.pluginConfig);
|
||||
const extractionConfig = resolveExtractionConfig();
|
||||
const vectorDim = vectorDimsForModel(cfg.embedding.model);
|
||||
|
||||
// Create shared resources
|
||||
const db = new Neo4jMemoryClient(
|
||||
cfg.neo4j.uri,
|
||||
cfg.neo4j.username,
|
||||
cfg.neo4j.password,
|
||||
vectorDim,
|
||||
api.logger,
|
||||
);
|
||||
const embeddings = new Embeddings(
|
||||
cfg.embedding.apiKey,
|
||||
cfg.embedding.model,
|
||||
cfg.embedding.provider,
|
||||
cfg.embedding.baseUrl,
|
||||
);
|
||||
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: registered (uri: ${cfg.neo4j.uri}, provider: ${cfg.embedding.provider}, model: ${cfg.embedding.model}, ` +
|
||||
`extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"})`,
|
||||
);
|
||||
|
||||
// ========================================================================
|
||||
// Tools (using factory pattern for agentId)
|
||||
// ========================================================================
|
||||
|
||||
// memory_recall — Three-signal hybrid search
|
||||
api.registerTool(
|
||||
(ctx) => {
|
||||
const agentId = ctx.agentId || "default";
|
||||
return {
|
||||
name: "memory_recall",
|
||||
label: "Memory Recall",
|
||||
description:
|
||||
"Search through long-term memories. Use when you need context about user preferences, past decisions, or previously discussed topics.",
|
||||
parameters: Type.Object({
|
||||
query: Type.String({ description: "Search query" }),
|
||||
limit: Type.Optional(Type.Number({ description: "Max results (default: 5)" })),
|
||||
}),
|
||||
async execute(_toolCallId: string, params: unknown) {
|
||||
const { query, limit = 5 } = params as {
|
||||
query: string;
|
||||
limit?: number;
|
||||
};
|
||||
|
||||
const results = await hybridSearch(
|
||||
db,
|
||||
embeddings,
|
||||
query,
|
||||
limit,
|
||||
agentId,
|
||||
extractionConfig.enabled,
|
||||
);
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No relevant memories found." }],
|
||||
details: { count: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
const text = results
|
||||
.map((r, i) => `${i + 1}. [${r.category}] ${r.text} (${(r.score * 100).toFixed(0)}%)`)
|
||||
.join("\n");
|
||||
|
||||
const sanitizedResults = results.map((r) => ({
|
||||
id: r.id,
|
||||
text: r.text,
|
||||
category: r.category,
|
||||
importance: r.importance,
|
||||
score: r.score,
|
||||
}));
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Found ${results.length} memories:\n\n${text}`,
|
||||
},
|
||||
],
|
||||
details: { count: results.length, memories: sanitizedResults },
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
{ name: "memory_recall" },
|
||||
);
|
||||
|
||||
// memory_store — Store with background entity extraction
|
||||
api.registerTool(
|
||||
(ctx) => {
|
||||
const agentId = ctx.agentId || "default";
|
||||
const sessionKey = ctx.sessionKey;
|
||||
return {
|
||||
name: "memory_store",
|
||||
label: "Memory Store",
|
||||
description:
|
||||
"Save important information in long-term memory. Use for preferences, facts, decisions.",
|
||||
parameters: Type.Object({
|
||||
text: Type.String({ description: "Information to remember" }),
|
||||
importance: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Importance 0-1 (default: 0.7)",
|
||||
}),
|
||||
),
|
||||
category: Type.Optional(stringEnum(MEMORY_CATEGORIES)),
|
||||
}),
|
||||
async execute(_toolCallId: string, params: unknown) {
|
||||
const {
|
||||
text,
|
||||
importance = 0.7,
|
||||
category = "other",
|
||||
} = params as {
|
||||
text: string;
|
||||
importance?: number;
|
||||
category?: MemoryCategory;
|
||||
};
|
||||
|
||||
// 1. Generate embedding
|
||||
const vector = await embeddings.embed(text);
|
||||
|
||||
// 2. Check for duplicates (vector similarity > 0.95)
|
||||
const existing = await db.findSimilar(vector, 0.95, 1);
|
||||
if (existing.length > 0) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Similar memory already exists: "${existing[0].text}"`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
action: "duplicate",
|
||||
existingId: existing[0].id,
|
||||
existingText: existing[0].text,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// 3. Store memory immediately (fast path)
|
||||
const memoryId = randomUUID();
|
||||
await db.storeMemory({
|
||||
id: memoryId,
|
||||
text,
|
||||
embedding: vector,
|
||||
importance: Math.min(1, Math.max(0, importance)),
|
||||
category,
|
||||
source: "user" as MemorySource,
|
||||
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
|
||||
agentId,
|
||||
sessionKey,
|
||||
});
|
||||
|
||||
// 4. Extraction is deferred to sleep cycle (like human memory consolidation)
|
||||
// See: runSleepCycleExtraction() and `openclaw memory sleep` command
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Stored: "${text.slice(0, 100)}${text.length > 100 ? "..." : ""}"`,
|
||||
},
|
||||
],
|
||||
details: { action: "created", id: memoryId },
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
{ name: "memory_store" },
|
||||
);
|
||||
|
||||
// memory_forget — Delete with cascade
|
||||
api.registerTool(
|
||||
(_ctx) => {
|
||||
return {
|
||||
name: "memory_forget",
|
||||
label: "Memory Forget",
|
||||
description: "Delete specific memories. GDPR-compliant.",
|
||||
parameters: Type.Object({
|
||||
query: Type.Optional(Type.String({ description: "Search to find memory" })),
|
||||
memoryId: Type.Optional(Type.String({ description: "Specific memory ID" })),
|
||||
}),
|
||||
async execute(_toolCallId: string, params: unknown) {
|
||||
const { query, memoryId } = params as {
|
||||
query?: string;
|
||||
memoryId?: string;
|
||||
};
|
||||
|
||||
// Direct delete by ID
|
||||
if (memoryId) {
|
||||
const deleted = await db.deleteMemory(memoryId);
|
||||
if (!deleted) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Memory ${memoryId} not found.`,
|
||||
},
|
||||
],
|
||||
details: { action: "not_found", id: memoryId },
|
||||
};
|
||||
}
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Memory ${memoryId} forgotten.`,
|
||||
},
|
||||
],
|
||||
details: { action: "deleted", id: memoryId },
|
||||
};
|
||||
}
|
||||
|
||||
// Search-based delete
|
||||
if (query) {
|
||||
const vector = await embeddings.embed(query);
|
||||
const results = await db.vectorSearch(vector, 5, 0.7);
|
||||
|
||||
if (results.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No matching memories found." }],
|
||||
details: { found: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
// Auto-delete if single high-confidence match
|
||||
if (results.length === 1 && results[0].score > 0.9) {
|
||||
await db.deleteMemory(results[0].id);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Forgotten: "${results[0].text}"`,
|
||||
},
|
||||
],
|
||||
details: { action: "deleted", id: results[0].id },
|
||||
};
|
||||
}
|
||||
|
||||
// Multiple candidates — ask user to specify
|
||||
const list = results.map((r) => `- [${r.id}] ${r.text.slice(0, 60)}...`).join("\n");
|
||||
|
||||
const sanitizedCandidates = results.map((r) => ({
|
||||
id: r.id,
|
||||
text: r.text,
|
||||
category: r.category,
|
||||
score: r.score,
|
||||
}));
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Found ${results.length} candidates. Specify memoryId:\n${list}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
action: "candidates",
|
||||
candidates: sanitizedCandidates,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: "Provide query or memoryId." }],
|
||||
details: { error: "missing_param" },
|
||||
};
|
||||
},
|
||||
};
|
||||
},
|
||||
{ name: "memory_forget" },
|
||||
);
|
||||
|
||||
// ========================================================================
|
||||
// CLI Commands
|
||||
// ========================================================================
|
||||
|
||||
api.registerCli(
|
||||
({ program }) => {
|
||||
// Find existing memory command or create fallback
|
||||
let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
|
||||
if (!memoryCmd) {
|
||||
// Fallback if core memory CLI not registered yet
|
||||
memoryCmd = program.command("memory").description("Memory commands");
|
||||
}
|
||||
|
||||
// Add neo4j memory subcommand group
|
||||
const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
|
||||
|
||||
memory
|
||||
.command("list")
|
||||
.description("List memory counts by agent and category")
|
||||
.option("--json", "Output as JSON")
|
||||
.action(async (opts: { json?: boolean }) => {
|
||||
try {
|
||||
await db.ensureInitialized();
|
||||
const stats = await db.getMemoryStats();
|
||||
|
||||
if (opts.json) {
|
||||
console.log(JSON.stringify(stats, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
if (stats.length === 0) {
|
||||
console.log("No memories stored.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Group by agentId
|
||||
const byAgent = new Map<
|
||||
string,
|
||||
Array<{ category: string; count: number; avgImportance: number }>
|
||||
>();
|
||||
for (const row of stats) {
|
||||
const list = byAgent.get(row.agentId) || [];
|
||||
list.push({
|
||||
category: row.category,
|
||||
count: row.count,
|
||||
avgImportance: row.avgImportance,
|
||||
});
|
||||
byAgent.set(row.agentId, list);
|
||||
}
|
||||
|
||||
// Print table for each agent
|
||||
for (const [agentId, categories] of byAgent) {
|
||||
const total = categories.reduce((sum, c) => sum + c.count, 0);
|
||||
console.log(`\n┌─ ${agentId} (${total} total)`);
|
||||
console.log("│");
|
||||
console.log("│ Category Count Avg Importance");
|
||||
console.log("│ ─────────────────────────────────────");
|
||||
for (const { category, count, avgImportance } of categories) {
|
||||
const cat = category.padEnd(12);
|
||||
const cnt = String(count).padStart(5);
|
||||
const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
|
||||
console.log(`│ ${cat} ${cnt} ${imp}`);
|
||||
}
|
||||
console.log("└");
|
||||
}
|
||||
console.log("");
|
||||
} catch (err) {
|
||||
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
});
|
||||
|
||||
memory
|
||||
.command("search")
|
||||
.description("Search memories")
|
||||
.argument("<query>", "Search query")
|
||||
.option("--limit <n>", "Max results", "5")
|
||||
.action(async (query: string, opts: { limit: string }) => {
|
||||
try {
|
||||
const results = await hybridSearch(
|
||||
db,
|
||||
embeddings,
|
||||
query,
|
||||
parseInt(opts.limit, 10),
|
||||
"default",
|
||||
extractionConfig.enabled,
|
||||
);
|
||||
const output = results.map((r) => ({
|
||||
id: r.id,
|
||||
text: r.text,
|
||||
category: r.category,
|
||||
importance: r.importance,
|
||||
score: r.score,
|
||||
}));
|
||||
console.log(JSON.stringify(output, null, 2));
|
||||
} catch (err) {
|
||||
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
});
|
||||
|
||||
memory
|
||||
.command("stats")
|
||||
.description("Show memory statistics and configuration")
|
||||
.action(async () => {
|
||||
try {
|
||||
await db.ensureInitialized();
|
||||
const stats = await db.getMemoryStats();
|
||||
const total = stats.reduce((sum, s) => sum + s.count, 0);
|
||||
|
||||
console.log("\nMemory (Neo4j) Statistics");
|
||||
console.log("─────────────────────────");
|
||||
console.log(`Total memories: ${total}`);
|
||||
console.log(`Neo4j URI: ${cfg.neo4j.uri}`);
|
||||
console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`);
|
||||
console.log(
|
||||
`Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
|
||||
);
|
||||
console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`);
|
||||
console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`);
|
||||
console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
|
||||
|
||||
if (stats.length > 0) {
|
||||
// Group by category across all agents
|
||||
const byCategory = new Map<string, number>();
|
||||
for (const row of stats) {
|
||||
byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
|
||||
}
|
||||
console.log("\nBy Category:");
|
||||
for (const [category, count] of byCategory) {
|
||||
console.log(` ${category.padEnd(12)} ${count}`);
|
||||
}
|
||||
|
||||
// Show agent count
|
||||
const agents = new Set(stats.map((s) => s.agentId));
|
||||
console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
|
||||
}
|
||||
console.log("");
|
||||
} catch (err) {
|
||||
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
});
|
||||
|
||||
memory
|
||||
.command("sleep")
|
||||
.description(
|
||||
"Run sleep cycle — consolidate memories (dedup → promote → decay → extract → cleanup)",
|
||||
)
|
||||
.option("--agent <id>", "Agent id (default: all agents)")
|
||||
.option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
|
||||
.option(
|
||||
"--promotion-threshold <n>",
|
||||
"Min importance for auto-promotion to core (default: 0.9)",
|
||||
)
|
||||
.option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
|
||||
.option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
|
||||
.option("--decay-half-life <days>", "Base half-life in days (default: 30)")
|
||||
.option("--batch-size <n>", "Extraction batch size (default: 50)")
|
||||
.option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
|
||||
.action(
|
||||
async (opts: {
|
||||
agent?: string;
|
||||
dedupThreshold?: string;
|
||||
promotionThreshold?: string;
|
||||
promotionMinAge?: string;
|
||||
decayThreshold?: string;
|
||||
decayHalfLife?: string;
|
||||
batchSize?: string;
|
||||
delay?: string;
|
||||
}) => {
|
||||
console.log("\n🌙 Memory Sleep Cycle");
|
||||
console.log("═════════════════════════════════════════════════════════════");
|
||||
console.log("Five-phase memory consolidation (like human sleep):\n");
|
||||
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
|
||||
console.log(" Phase 2: Core Promotion — Promote high-importance to core");
|
||||
console.log(" Phase 3: Decay & Pruning — Remove stale low-importance memories");
|
||||
console.log(" Phase 4: Extraction — Form entity relationships");
|
||||
console.log(" Phase 5: Orphan Cleanup — Remove disconnected nodes\n");
|
||||
|
||||
try {
|
||||
await db.ensureInitialized();
|
||||
|
||||
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
|
||||
agentId: opts.agent,
|
||||
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
|
||||
promotionImportanceThreshold: opts.promotionThreshold
|
||||
? parseFloat(opts.promotionThreshold)
|
||||
: undefined,
|
||||
promotionMinAgeDays: opts.promotionMinAge
|
||||
? parseInt(opts.promotionMinAge, 10)
|
||||
: undefined,
|
||||
decayRetentionThreshold: opts.decayThreshold
|
||||
? parseFloat(opts.decayThreshold)
|
||||
: undefined,
|
||||
decayBaseHalfLifeDays: opts.decayHalfLife
|
||||
? parseInt(opts.decayHalfLife, 10)
|
||||
: undefined,
|
||||
extractionBatchSize: opts.batchSize ? parseInt(opts.batchSize, 10) : undefined,
|
||||
extractionDelayMs: opts.delay ? parseInt(opts.delay, 10) : undefined,
|
||||
onPhaseStart: (phase) => {
|
||||
const phaseNames = {
|
||||
dedup: "Phase 1: Deduplication",
|
||||
promotion: "Phase 2: Core Promotion",
|
||||
decay: "Phase 3: Decay & Pruning",
|
||||
extraction: "Phase 4: Extraction",
|
||||
cleanup: "Phase 5: Orphan Cleanup",
|
||||
};
|
||||
console.log(`\n▶ ${phaseNames[phase]}`);
|
||||
console.log("─────────────────────────────────────────────────────────────");
|
||||
},
|
||||
onProgress: (_phase, message) => {
|
||||
console.log(` ${message}`);
|
||||
},
|
||||
});
|
||||
|
||||
console.log("\n═════════════════════════════════════════════════════════════");
|
||||
console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
|
||||
console.log("─────────────────────────────────────────────────────────────");
|
||||
console.log(
|
||||
` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
|
||||
);
|
||||
console.log(
|
||||
` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
|
||||
);
|
||||
console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`);
|
||||
console.log(
|
||||
` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` +
|
||||
(result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
|
||||
);
|
||||
console.log(
|
||||
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
|
||||
);
|
||||
if (result.aborted) {
|
||||
console.log("\n⚠️ Sleep cycle was aborted before completion.");
|
||||
}
|
||||
console.log("");
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
memory
|
||||
.command("promote")
|
||||
.description("Manually promote a memory to core status")
|
||||
.argument("<id>", "Memory ID to promote")
|
||||
.action(async (id: string) => {
|
||||
try {
|
||||
await db.ensureInitialized();
|
||||
const promoted = await db.promoteToCore([id]);
|
||||
if (promoted > 0) {
|
||||
console.log(`✅ Memory ${id} promoted to core.`);
|
||||
} else {
|
||||
console.log(`❌ Memory ${id} not found.`);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
|
||||
process.exitCode = 1;
|
||||
}
|
||||
});
|
||||
},
|
||||
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
|
||||
);
|
||||
|
||||
// ========================================================================
|
||||
// Lifecycle Hooks
|
||||
// ========================================================================
|
||||
|
||||
// Track sessions where core memories have already been loaded (skip on subsequent turns).
|
||||
// NOTE: This is in-memory and will be cleared on gateway restart. The agent_bootstrap
|
||||
// hook below also checks for existing conversation history to avoid re-injecting core
|
||||
// memories after restarts.
|
||||
const bootstrappedSessions = new Set<string>();
|
||||
|
||||
// After compaction: clear bootstrap flag so core memories get re-injected
|
||||
if (cfg.coreMemory.enabled) {
|
||||
api.on("after_compaction", async (_event, ctx) => {
|
||||
if (ctx.sessionKey) {
|
||||
bootstrappedSessions.delete(ctx.sessionKey);
|
||||
api.logger.info?.(
|
||||
`memory-neo4j: cleared bootstrap flag for session ${ctx.sessionKey} after compaction`,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Auto-recall: inject relevant memories before agent starts
|
||||
api.logger.debug?.(`memory-neo4j: autoRecall=${cfg.autoRecall}`);
|
||||
if (cfg.autoRecall) {
|
||||
api.logger.debug?.("memory-neo4j: registering before_agent_start hook for auto-recall");
|
||||
api.on("before_agent_start", async (event, ctx) => {
|
||||
if (!event.prompt || event.prompt.length < 5) {
|
||||
return;
|
||||
}
|
||||
|
||||
const agentId = ctx.agentId || "default";
|
||||
|
||||
// Truncate prompt to avoid exceeding embedding model context length
|
||||
// ~6000 chars is safe for most embedding models (leaves headroom for 2k tokens)
|
||||
const MAX_QUERY_CHARS = 6000;
|
||||
const query =
|
||||
event.prompt.length > MAX_QUERY_CHARS
|
||||
? event.prompt.slice(0, MAX_QUERY_CHARS)
|
||||
: event.prompt;
|
||||
|
||||
try {
|
||||
const results = await hybridSearch(
|
||||
db,
|
||||
embeddings,
|
||||
query,
|
||||
3,
|
||||
agentId,
|
||||
extractionConfig.enabled,
|
||||
);
|
||||
|
||||
if (results.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const memoryContext = results.map((r) => `- [${r.category}] ${r.text}`).join("\n");
|
||||
|
||||
api.logger.info?.(`memory-neo4j: injecting ${results.length} memories into context`);
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: auto-recall memories: ${JSON.stringify(results.map((r) => ({ id: r.id, text: r.text.slice(0, 80), category: r.category, score: r.score })))}`,
|
||||
);
|
||||
|
||||
return {
|
||||
prependContext: `<relevant-memories>\nThe following memories may be relevant to this conversation:\n${memoryContext}\n</relevant-memories>`,
|
||||
};
|
||||
} catch (err) {
|
||||
api.logger.warn(`memory-neo4j: auto-recall failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Core memories: inject as virtual MEMORY.md at bootstrap time (scoped by agentId).
|
||||
// Only runs on new sessions and after compaction (not every turn).
|
||||
api.logger.debug?.(`memory-neo4j: coreMemory.enabled=${cfg.coreMemory.enabled}`);
|
||||
if (cfg.coreMemory.enabled) {
|
||||
api.logger.debug?.("memory-neo4j: registering agent_bootstrap hook for core memories");
|
||||
api.on("agent_bootstrap", async (event, ctx) => {
|
||||
const sessionKey = ctx.sessionKey;
|
||||
|
||||
// Skip if this session was already bootstrapped (avoid re-loading every turn).
|
||||
// The after_compaction hook clears the flag so we re-inject after compaction.
|
||||
if (sessionKey && bootstrappedSessions.has(sessionKey)) {
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: skipping core memory injection for already-bootstrapped session=${sessionKey}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Log when we're about to inject core memories for a session that wasn't tracked
|
||||
// This helps diagnose cases where context might be lost after gateway restarts
|
||||
if (sessionKey) {
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: session=${sessionKey} not in bootstrappedSessions (size=${bootstrappedSessions.size}), will check for core memories`,
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const agentId = ctx.agentId || "default";
|
||||
const maxEntries = cfg.coreMemory.maxEntries;
|
||||
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: loading core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
|
||||
);
|
||||
// Core memories are always included (no importance filter) - if marked as core, it's important
|
||||
// Results are ordered by importance desc, so most important come first up to maxEntries
|
||||
const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
|
||||
|
||||
if (coreMemories.length === 0) {
|
||||
if (sessionKey) {
|
||||
bootstrappedSessions.add(sessionKey);
|
||||
}
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: no core memories found for agent=${agentId}, marking session as bootstrapped`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Format core memories into a MEMORY.md-style document
|
||||
let content = "# Core Memory\n\n";
|
||||
content += "*Persistent context loaded from long-term memory*\n\n";
|
||||
for (const mem of coreMemories) {
|
||||
content += `- ${mem.text}\n`;
|
||||
}
|
||||
|
||||
// Find and replace MEMORY.md in the files list, or add it
|
||||
const files = [...event.files];
|
||||
const memoryIndex = files.findIndex(
|
||||
(f) => f.name === "MEMORY.md" || f.name === "memory.md",
|
||||
);
|
||||
|
||||
const virtualFile = {
|
||||
name: "MEMORY.md" as const,
|
||||
path: "memory://neo4j/core-memory",
|
||||
content,
|
||||
missing: false,
|
||||
};
|
||||
|
||||
const action = memoryIndex >= 0 ? "replaced" : "added";
|
||||
if (memoryIndex >= 0) {
|
||||
files[memoryIndex] = virtualFile;
|
||||
} else {
|
||||
files.push(virtualFile);
|
||||
}
|
||||
|
||||
if (sessionKey) {
|
||||
bootstrappedSessions.add(sessionKey);
|
||||
}
|
||||
// Log at info level when actually injecting, debug for skips
|
||||
api.logger.info?.(
|
||||
`memory-neo4j: ${action} MEMORY.md with ${coreMemories.length} core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
|
||||
);
|
||||
|
||||
return { files };
|
||||
} catch (err) {
|
||||
api.logger.warn(`memory-neo4j: core memory injection failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Auto-capture: LLM-based decision on what to store from conversations
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
|
||||
);
|
||||
if (cfg.autoCapture) {
|
||||
api.logger.debug?.("memory-neo4j: registering agent_end hook for auto-capture");
|
||||
api.on("agent_end", async (event, ctx) => {
|
||||
api.logger.debug?.(
|
||||
`memory-neo4j: agent_end fired (success=${event.success}, messages=${event.messages?.length ?? 0})`,
|
||||
);
|
||||
if (!event.success || !event.messages || event.messages.length === 0) {
|
||||
api.logger.debug?.("memory-neo4j: skipping - no success or empty messages");
|
||||
return;
|
||||
}
|
||||
|
||||
const agentId = ctx.agentId || "default";
|
||||
const sessionKey = ctx.sessionKey;
|
||||
|
||||
try {
|
||||
if (extractionConfig.enabled) {
|
||||
// LLM-based auto-capture (Decision Q8)
|
||||
const userMessages = extractUserMessages(event.messages);
|
||||
if (userMessages.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const items = await evaluateAutoCapture(userMessages, extractionConfig);
|
||||
if (items.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
let stored = 0;
|
||||
for (const item of items) {
|
||||
try {
|
||||
const vector = await embeddings.embed(item.text);
|
||||
|
||||
// Check for duplicates
|
||||
const existing = await db.findSimilar(vector, 0.95, 1);
|
||||
if (existing.length > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const memoryId = randomUUID();
|
||||
await db.storeMemory({
|
||||
id: memoryId,
|
||||
text: item.text,
|
||||
embedding: vector,
|
||||
importance: item.importance,
|
||||
category: item.category,
|
||||
source: "auto-capture",
|
||||
extractionStatus: "pending",
|
||||
agentId,
|
||||
sessionKey,
|
||||
});
|
||||
|
||||
// Extraction deferred to sleep cycle (like human memory consolidation)
|
||||
stored++;
|
||||
} catch (err) {
|
||||
api.logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (stored > 0) {
|
||||
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (LLM-based)`);
|
||||
}
|
||||
} else {
|
||||
// Fallback: rule-based capture (no extraction API key)
|
||||
const userMessages = extractUserMessages(event.messages);
|
||||
if (userMessages.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const toCapture = userMessages.filter(
|
||||
(text) => text.length >= 10 && text.length <= 500 && shouldCaptureRuleBased(text),
|
||||
);
|
||||
if (toCapture.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
let stored = 0;
|
||||
for (const text of toCapture.slice(0, 3)) {
|
||||
const category = detectCategory(text);
|
||||
const vector = await embeddings.embed(text);
|
||||
|
||||
const existing = await db.findSimilar(vector, 0.95, 1);
|
||||
if (existing.length > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
await db.storeMemory({
|
||||
id: randomUUID(),
|
||||
text,
|
||||
embedding: vector,
|
||||
importance: 0.7,
|
||||
category,
|
||||
source: "auto-capture",
|
||||
extractionStatus: "skipped",
|
||||
agentId,
|
||||
sessionKey,
|
||||
});
|
||||
stored++;
|
||||
}
|
||||
|
||||
if (stored > 0) {
|
||||
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (rule-based)`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
api.logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Service
|
||||
// ========================================================================
|
||||
|
||||
api.registerService({
|
||||
id: "memory-neo4j",
|
||||
start: async () => {
|
||||
try {
|
||||
await db.ensureInitialized();
|
||||
api.logger.info(
|
||||
`memory-neo4j: service started (uri: ${cfg.neo4j.uri}, model: ${cfg.embedding.model})`,
|
||||
);
|
||||
} catch (err) {
|
||||
api.logger.error(
|
||||
`memory-neo4j: failed to start — ${String(err)}. Memory tools will attempt lazy initialization.`,
|
||||
);
|
||||
// Don't throw — allow graceful degradation.
|
||||
// Tools will retry initialization on first use.
|
||||
}
|
||||
},
|
||||
stop: async () => {
|
||||
await db.close();
|
||||
api.logger.info("memory-neo4j: service stopped");
|
||||
},
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Rule-based capture filter (fallback when no extraction API key)
|
||||
// ============================================================================
|
||||
|
||||
const MEMORY_TRIGGERS = [
|
||||
/remember|zapamatuj|pamatuj/i,
|
||||
/prefer|radši|nechci|preferuji/i,
|
||||
/decided|rozhodli|budeme používat/i,
|
||||
/\+\d{10,}/,
|
||||
/[\w.-]+@[\w.-]+\.\w+/,
|
||||
/my\s+\w+\s+is|is\s+my/i,
|
||||
/i (like|prefer|hate|love|want|need)/i,
|
||||
/always|never|important/i,
|
||||
];
|
||||
|
||||
function shouldCaptureRuleBased(text: string): boolean {
|
||||
if (text.includes("<relevant-memories>")) {
|
||||
return false;
|
||||
}
|
||||
if (text.startsWith("<") && text.includes("</")) {
|
||||
return false;
|
||||
}
|
||||
if (text.includes("**") && text.includes("\n-")) {
|
||||
return false;
|
||||
}
|
||||
const emojiCount = (text.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
|
||||
if (emojiCount > 3) {
|
||||
return false;
|
||||
}
|
||||
return MEMORY_TRIGGERS.some((r) => r.test(text));
|
||||
}
|
||||
|
||||
function detectCategory(text: string): MemoryCategory {
|
||||
const lower = text.toLowerCase();
|
||||
if (/prefer|radši|like|love|hate|want/i.test(lower)) {
|
||||
return "preference";
|
||||
}
|
||||
if (/decided|rozhodli|will use|budeme/i.test(lower)) {
|
||||
return "decision";
|
||||
}
|
||||
if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower)) {
|
||||
return "entity";
|
||||
}
|
||||
if (/is|are|has|have|je|má|jsou/i.test(lower)) {
|
||||
return "fact";
|
||||
}
|
||||
return "other";
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Export
|
||||
// ============================================================================
|
||||
|
||||
export default memoryNeo4jPlugin;
|
||||
1292
extensions/memory-neo4j/neo4j-client.ts
Normal file
1292
extensions/memory-neo4j/neo4j-client.ts
Normal file
File diff suppressed because it is too large
Load Diff
99
extensions/memory-neo4j/openclaw.plugin.json
Normal file
99
extensions/memory-neo4j/openclaw.plugin.json
Normal file
@@ -0,0 +1,99 @@
|
||||
{
|
||||
"id": "memory-neo4j",
|
||||
"kind": "memory",
|
||||
"uiHints": {
|
||||
"embedding.provider": {
|
||||
"label": "Embedding Provider",
|
||||
"placeholder": "openai",
|
||||
"help": "Provider for embeddings: 'openai' or 'ollama'"
|
||||
},
|
||||
"embedding.apiKey": {
|
||||
"label": "API Key",
|
||||
"sensitive": true,
|
||||
"placeholder": "sk-proj-...",
|
||||
"help": "API key for OpenAI embeddings (not needed for Ollama)"
|
||||
},
|
||||
"embedding.model": {
|
||||
"label": "Embedding Model",
|
||||
"placeholder": "text-embedding-3-small",
|
||||
"help": "Embedding model to use (e.g., text-embedding-3-small for OpenAI, mxbai-embed-large for Ollama)"
|
||||
},
|
||||
"embedding.baseUrl": {
|
||||
"label": "Base URL",
|
||||
"placeholder": "http://localhost:11434",
|
||||
"help": "Base URL for Ollama API (optional)"
|
||||
},
|
||||
"neo4j.uri": {
|
||||
"label": "Neo4j URI",
|
||||
"placeholder": "bolt://localhost:7687",
|
||||
"help": "Bolt connection URI for your Neo4j instance"
|
||||
},
|
||||
"neo4j.user": {
|
||||
"label": "Neo4j Username",
|
||||
"placeholder": "neo4j"
|
||||
},
|
||||
"neo4j.password": {
|
||||
"label": "Neo4j Password",
|
||||
"sensitive": true
|
||||
},
|
||||
"autoCapture": {
|
||||
"label": "Auto-Capture",
|
||||
"help": "Automatically capture important information from conversations"
|
||||
},
|
||||
"autoRecall": {
|
||||
"label": "Auto-Recall",
|
||||
"help": "Automatically inject relevant memories into context"
|
||||
}
|
||||
},
|
||||
"configSchema": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"embedding": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"provider": {
|
||||
"type": "string",
|
||||
"enum": ["openai", "ollama"]
|
||||
},
|
||||
"apiKey": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"baseUrl": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"neo4j": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"uri": {
|
||||
"type": "string"
|
||||
},
|
||||
"user": {
|
||||
"type": "string"
|
||||
},
|
||||
"username": {
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["uri"]
|
||||
},
|
||||
"autoCapture": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"autoRecall": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["neo4j"]
|
||||
}
|
||||
}
|
||||
19
extensions/memory-neo4j/package.json
Normal file
19
extensions/memory-neo4j/package.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "@openclaw/memory-neo4j",
|
||||
"version": "2026.2.2",
|
||||
"description": "OpenClaw Neo4j-backed long-term memory plugin with three-signal hybrid search, entity extraction, and knowledge graph",
|
||||
"type": "module",
|
||||
"dependencies": {
|
||||
"@sinclair/typebox": "0.34.48",
|
||||
"neo4j-driver": "^5.27.0",
|
||||
"openai": "^6.17.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"openclaw": "workspace:*"
|
||||
},
|
||||
"openclaw": {
|
||||
"extensions": [
|
||||
"./index.ts"
|
||||
]
|
||||
}
|
||||
}
|
||||
174
extensions/memory-neo4j/schema.ts
Normal file
174
extensions/memory-neo4j/schema.ts
Normal file
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Graph schema types, Cypher query templates, and constants for memory-neo4j.
|
||||
*/
|
||||
|
||||
// ============================================================================
|
||||
// Node Types
|
||||
// ============================================================================
|
||||
|
||||
export type MemoryCategory = "preference" | "fact" | "decision" | "entity" | "other";
|
||||
export type EntityType = "person" | "organization" | "location" | "event" | "concept";
|
||||
export type ExtractionStatus = "pending" | "complete" | "failed" | "skipped";
|
||||
export type MemorySource = "user" | "auto-capture" | "memory-watcher" | "import";
|
||||
|
||||
export type MemoryNode = {
|
||||
id: string;
|
||||
text: string;
|
||||
embedding: number[];
|
||||
importance: number;
|
||||
category: MemoryCategory;
|
||||
source: MemorySource;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
extractionStatus: ExtractionStatus;
|
||||
agentId: string;
|
||||
sessionKey?: string;
|
||||
};
|
||||
|
||||
export type EntityNode = {
|
||||
id: string;
|
||||
name: string;
|
||||
type: EntityType;
|
||||
aliases: string[];
|
||||
embedding?: number[];
|
||||
description?: string;
|
||||
firstSeen: string;
|
||||
lastSeen: string;
|
||||
mentionCount: number;
|
||||
};
|
||||
|
||||
export type TagNode = {
|
||||
id: string;
|
||||
name: string;
|
||||
category: string;
|
||||
createdAt: string;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Extraction Types
|
||||
// ============================================================================
|
||||
|
||||
export type ExtractedEntity = {
|
||||
name: string;
|
||||
type: EntityType;
|
||||
aliases?: string[];
|
||||
description?: string;
|
||||
};
|
||||
|
||||
export type ExtractedRelationship = {
|
||||
source: string;
|
||||
target: string;
|
||||
type: string;
|
||||
confidence: number;
|
||||
};
|
||||
|
||||
export type ExtractedTag = {
|
||||
name: string;
|
||||
category: string;
|
||||
};
|
||||
|
||||
export type ExtractionResult = {
|
||||
entities: ExtractedEntity[];
|
||||
relationships: ExtractedRelationship[];
|
||||
tags: ExtractedTag[];
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Auto-Capture Types
|
||||
// ============================================================================
|
||||
|
||||
export type CaptureItem = {
|
||||
text: string;
|
||||
category: MemoryCategory;
|
||||
importance: number;
|
||||
};
|
||||
|
||||
export type CaptureDecision = {
|
||||
memories: CaptureItem[];
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Search Types
|
||||
// ============================================================================
|
||||
|
||||
export type SearchSignalResult = {
|
||||
id: string;
|
||||
text: string;
|
||||
category: string;
|
||||
importance: number;
|
||||
createdAt: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
export type HybridSearchResult = {
|
||||
id: string;
|
||||
text: string;
|
||||
category: string;
|
||||
importance: number;
|
||||
createdAt: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Input Types
|
||||
// ============================================================================
|
||||
|
||||
export type StoreMemoryInput = {
|
||||
id: string;
|
||||
text: string;
|
||||
embedding: number[];
|
||||
importance: number;
|
||||
category: MemoryCategory;
|
||||
source: MemorySource;
|
||||
extractionStatus: ExtractionStatus;
|
||||
agentId: string;
|
||||
sessionKey?: string;
|
||||
};
|
||||
|
||||
export type MergeEntityInput = {
|
||||
id: string;
|
||||
name: string;
|
||||
type: EntityType;
|
||||
aliases?: string[];
|
||||
description?: string;
|
||||
embedding?: number[];
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Constants
|
||||
// ============================================================================
|
||||
|
||||
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
|
||||
|
||||
export const ENTITY_TYPES = ["person", "organization", "location", "event", "concept"] as const;
|
||||
|
||||
export const ALLOWED_RELATIONSHIP_TYPES = new Set([
|
||||
"WORKS_AT",
|
||||
"LIVES_AT",
|
||||
"KNOWS",
|
||||
"MARRIED_TO",
|
||||
"PREFERS",
|
||||
"DECIDED",
|
||||
"RELATED_TO",
|
||||
]);
|
||||
|
||||
// ============================================================================
|
||||
// Lucene Helpers
|
||||
// ============================================================================
|
||||
|
||||
const LUCENE_SPECIAL_CHARS = /[+\-&|!(){}[\]^"~*?:\\/]/g;
|
||||
|
||||
/**
|
||||
* Escape special characters for Lucene fulltext search queries.
|
||||
*/
|
||||
export function escapeLucene(query: string): string {
|
||||
return query.replace(LUCENE_SPECIAL_CHARS, "\\$&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a relationship type is in the allowed set.
|
||||
* Prevents Cypher injection via dynamic relationship type.
|
||||
*/
|
||||
export function validateRelationshipType(type: string): boolean {
|
||||
return ALLOWED_RELATIONSHIP_TYPES.has(type);
|
||||
}
|
||||
257
extensions/memory-neo4j/search.ts
Normal file
257
extensions/memory-neo4j/search.ts
Normal file
@@ -0,0 +1,257 @@
|
||||
/**
|
||||
* Three-signal hybrid search with query-adaptive RRF fusion.
|
||||
*
|
||||
* Combines:
|
||||
* Signal 1: Vector similarity (HNSW cosine)
|
||||
* Signal 2: BM25 full-text keyword matching
|
||||
* Signal 3: Graph traversal (entity → MENTIONS ← memory)
|
||||
*
|
||||
* Fused using confidence-weighted Reciprocal Rank Fusion (RRF)
|
||||
* with query-adaptive signal weights.
|
||||
*
|
||||
* Adapted from ~/Downloads/ontology/app/services/rrf.py
|
||||
*/
|
||||
|
||||
import type { Embeddings } from "./embeddings.js";
|
||||
import type { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||
import type { HybridSearchResult, SearchSignalResult } from "./schema.js";
|
||||
|
||||
// ============================================================================
|
||||
// Query Classification
|
||||
// ============================================================================
|
||||
|
||||
export type QueryType = "short" | "entity" | "long" | "default";
|
||||
|
||||
/**
|
||||
* Classify a query to determine adaptive signal weights.
|
||||
*
|
||||
* - short (1-2 words): BM25 excels at exact keyword matching
|
||||
* - entity (proper nouns detected): Graph traversal finds connected memories
|
||||
* - long (5+ words): Vector captures semantic intent better
|
||||
* - default: balanced weights
|
||||
*/
|
||||
export function classifyQuery(query: string): QueryType {
|
||||
const words = query.trim().split(/\s+/);
|
||||
const wordCount = words.length;
|
||||
|
||||
// Short queries: 1-2 words → boost BM25
|
||||
if (wordCount <= 2) {
|
||||
return "short";
|
||||
}
|
||||
|
||||
// Long queries: 5+ words → boost vector
|
||||
if (wordCount >= 5) {
|
||||
return "long";
|
||||
}
|
||||
|
||||
// Entity detection: check for capitalized words (proper nouns)
|
||||
// Heuristic: if more than half of non-first words are capitalized
|
||||
const capitalizedWords = words
|
||||
.slice(1) // skip first word (often capitalized anyway)
|
||||
.filter(
|
||||
(w) =>
|
||||
/^[A-Z]/.test(w) &&
|
||||
!/^(I|A|An|The|Is|Are|Was|Were|What|Who|Where|When|How|Why|Do|Does|Did)$/.test(w),
|
||||
);
|
||||
|
||||
if (capitalizedWords.length > 0) {
|
||||
return "entity";
|
||||
}
|
||||
|
||||
// Check for question patterns targeting entities
|
||||
if (/^(who|where|what)\s+(is|does|did|was|were)\s/i.test(query)) {
|
||||
return "entity";
|
||||
}
|
||||
|
||||
return "default";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get adaptive signal weights based on query type.
|
||||
* Returns [vectorWeight, bm25Weight, graphWeight].
|
||||
*
|
||||
* Decision Q7: Query-adaptive RRF weights
|
||||
* - Short → boost BM25 (keyword matching)
|
||||
* - Entity → boost graph (relationship traversal)
|
||||
* - Long → boost vector (semantic similarity)
|
||||
*/
|
||||
export function getAdaptiveWeights(
|
||||
queryType: QueryType,
|
||||
graphEnabled: boolean,
|
||||
): [number, number, number] {
|
||||
const graphBase = graphEnabled ? 1.0 : 0.0;
|
||||
|
||||
switch (queryType) {
|
||||
case "short":
|
||||
return [0.8, 1.2, graphBase * 1.0];
|
||||
case "entity":
|
||||
return [0.8, 1.0, graphBase * 1.3];
|
||||
case "long":
|
||||
return [1.2, 0.7, graphBase * 0.8];
|
||||
case "default":
|
||||
default:
|
||||
return [1.0, 1.0, graphBase * 1.0];
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Confidence-Weighted RRF Fusion
|
||||
// ============================================================================
|
||||
|
||||
type SignalEntry = {
|
||||
rank: number; // 1-indexed
|
||||
score: number; // 0-1 normalized
|
||||
};
|
||||
|
||||
type FusedCandidate = {
|
||||
id: string;
|
||||
text: string;
|
||||
category: string;
|
||||
importance: number;
|
||||
createdAt: string;
|
||||
rrfScore: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Fuse multiple search signals using confidence-weighted RRF.
|
||||
*
|
||||
* Formula: RRF_conf(d) = Σ w_i × score_i(d) / (k + rank_i(d))
|
||||
*
|
||||
* Unlike standard RRF which only uses ranks, this variant preserves
|
||||
* score magnitude: rank-1 with score 0.99 contributes more than
|
||||
* rank-1 with score 0.55.
|
||||
*
|
||||
* Reference: Cormack et al. (2009), extended with confidence weighting
|
||||
* from ~/Downloads/ontology/app/services/rrf.py
|
||||
*/
|
||||
function fuseWithConfidenceRRF(
|
||||
signals: SearchSignalResult[][],
|
||||
k: number,
|
||||
weights: number[],
|
||||
): FusedCandidate[] {
|
||||
// Build per-signal rank/score lookups
|
||||
const signalMaps: Map<string, SignalEntry>[] = signals.map((signal) => {
|
||||
const map = new Map<string, SignalEntry>();
|
||||
for (let i = 0; i < signal.length; i++) {
|
||||
const entry = signal[i];
|
||||
// If duplicate in same signal, keep first (higher ranked)
|
||||
if (!map.has(entry.id)) {
|
||||
map.set(entry.id, { rank: i + 1, score: entry.score });
|
||||
}
|
||||
}
|
||||
return map;
|
||||
});
|
||||
|
||||
// Collect all unique candidate IDs with their metadata
|
||||
const candidateMetadata = new Map<
|
||||
string,
|
||||
{ text: string; category: string; importance: number; createdAt: string }
|
||||
>();
|
||||
|
||||
for (const signal of signals) {
|
||||
for (const entry of signal) {
|
||||
if (!candidateMetadata.has(entry.id)) {
|
||||
candidateMetadata.set(entry.id, {
|
||||
text: entry.text,
|
||||
category: entry.category,
|
||||
importance: entry.importance,
|
||||
createdAt: entry.createdAt,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate confidence-weighted RRF score for each candidate
|
||||
const results: FusedCandidate[] = [];
|
||||
|
||||
for (const [id, meta] of candidateMetadata) {
|
||||
let rrfScore = 0;
|
||||
|
||||
for (let i = 0; i < signalMaps.length; i++) {
|
||||
const entry = signalMaps[i].get(id);
|
||||
if (entry && entry.rank > 0) {
|
||||
// Confidence-weighted: multiply by original score
|
||||
rrfScore += weights[i] * entry.score * (1 / (k + entry.rank));
|
||||
}
|
||||
}
|
||||
|
||||
results.push({
|
||||
id,
|
||||
text: meta.text,
|
||||
category: meta.category,
|
||||
importance: meta.importance,
|
||||
createdAt: meta.createdAt,
|
||||
rrfScore,
|
||||
});
|
||||
}
|
||||
|
||||
// Sort by RRF score descending
|
||||
results.sort((a, b) => b.rrfScore - a.rrfScore);
|
||||
return results;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Hybrid Search Orchestrator
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Perform a three-signal hybrid search with query-adaptive RRF fusion.
|
||||
*
|
||||
* 1. Embed the query
|
||||
* 2. Classify query for adaptive weights
|
||||
* 3. Run three signals in parallel
|
||||
* 4. Fuse with confidence-weighted RRF
|
||||
* 5. Return top results
|
||||
*
|
||||
* Graceful degradation: if any signal fails, RRF works with remaining signals.
|
||||
* If graph search is not enabled (no extraction API key), uses 2-signal fusion.
|
||||
*/
|
||||
export async function hybridSearch(
|
||||
db: Neo4jMemoryClient,
|
||||
embeddings: Embeddings,
|
||||
query: string,
|
||||
limit: number = 5,
|
||||
agentId: string = "default",
|
||||
graphEnabled: boolean = false,
|
||||
options: {
|
||||
rrfK?: number;
|
||||
candidateMultiplier?: number;
|
||||
graphFiringThreshold?: number;
|
||||
} = {},
|
||||
): Promise<HybridSearchResult[]> {
|
||||
const { rrfK = 60, candidateMultiplier = 4, graphFiringThreshold = 0.3 } = options;
|
||||
|
||||
const candidateLimit = Math.floor(Math.min(200, Math.max(1, limit * candidateMultiplier)));
|
||||
|
||||
// 1. Generate query embedding
|
||||
const queryEmbedding = await embeddings.embed(query);
|
||||
|
||||
// 2. Classify query and get adaptive weights
|
||||
const queryType = classifyQuery(query);
|
||||
const weights = getAdaptiveWeights(queryType, graphEnabled);
|
||||
|
||||
// 3. Run signals in parallel
|
||||
const [vectorResults, bm25Results, graphResults] = await Promise.all([
|
||||
db.vectorSearch(queryEmbedding, candidateLimit, 0.1, agentId),
|
||||
db.bm25Search(query, candidateLimit, agentId),
|
||||
graphEnabled
|
||||
? db.graphSearch(query, candidateLimit, graphFiringThreshold, agentId)
|
||||
: Promise.resolve([] as SearchSignalResult[]),
|
||||
]);
|
||||
|
||||
// 4. Fuse with confidence-weighted RRF
|
||||
const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, graphResults], rrfK, weights);
|
||||
|
||||
// 5. Return top results, normalized to 0-100% display scores
|
||||
const maxRrf = fused.length > 0 ? fused[0].rrfScore : 1;
|
||||
const normalizer = maxRrf > 0 ? 1 / maxRrf : 1;
|
||||
|
||||
return fused.slice(0, limit).map((r) => ({
|
||||
id: r.id,
|
||||
text: r.text,
|
||||
category: r.category,
|
||||
importance: r.importance,
|
||||
createdAt: r.createdAt,
|
||||
score: Math.min(1, r.rrfScore * normalizer), // Normalize to 0-1
|
||||
}));
|
||||
}
|
||||
19
extensions/memory-neo4j/tsconfig.json
Normal file
19
extensions/memory-neo4j/tsconfig.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2023",
|
||||
"lib": ["ES2023"],
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "."
|
||||
},
|
||||
"include": ["*.ts"],
|
||||
"exclude": ["node_modules", "dist", "*.test.ts"]
|
||||
}
|
||||
Reference in New Issue
Block a user