Sync adabot changes on top of origin/main

Includes:
- memory-neo4j: four-phase sleep cycle (dedup, decay, extraction, cleanup)
- memory-neo4j: full plugin implementation with hybrid search
- memory-lancedb: updates and benchmarks
- OpenSpec workflow skills and commands
- Session memory hooks
- Various CLI and config improvements

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Tarun Sukhani
2026-02-04 15:14:46 +00:00
parent 7cfd0aed5f
commit e65d1deedd
59 changed files with 7326 additions and 310 deletions

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env node
/**
* LanceDB performance benchmark
*/
import * as lancedb from "@lancedb/lancedb";
import OpenAI from "openai";
const LANCEDB_PATH = "/home/tsukhani/.openclaw/memory/lancedb";
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const openai = new OpenAI({ apiKey: OPENAI_API_KEY });
async function embed(text) {
const start = Date.now();
const response = await openai.embeddings.create({
model: "text-embedding-3-small",
input: text,
});
const embedTime = Date.now() - start;
return { vector: response.data[0].embedding, embedTime };
}
async function main() {
console.log("📊 LanceDB Performance Benchmark");
console.log("================================\n");
// Connect
const connectStart = Date.now();
const db = await lancedb.connect(LANCEDB_PATH);
const table = await db.openTable("memories");
const connectTime = Date.now() - connectStart;
console.log(`Connection time: ${connectTime}ms`);
const count = await table.countRows();
console.log(`Total memories: ${count}\n`);
// Test queries
const queries = [
"Tarun's preferences",
"What is the OpenRouter API key location?",
"meeting schedule",
"Abundent Academy training",
"slate blue",
];
console.log("Search benchmarks (5 runs each, limit=5):\n");
for (const query of queries) {
const times = [];
let embedTime = 0;
for (let i = 0; i < 5; i++) {
const { vector, embedTime: et } = await embed(query);
embedTime = et; // Last one
const searchStart = Date.now();
const _results = await table.vectorSearch(vector).limit(5).toArray();
const searchTime = Date.now() - searchStart;
times.push(searchTime);
}
const avg = Math.round(times.reduce((a, b) => a + b, 0) / times.length);
const min = Math.min(...times);
const max = Math.max(...times);
console.log(`"${query}"`);
console.log(` Embedding: ${embedTime}ms`);
console.log(` Search: avg=${avg}ms, min=${min}ms, max=${max}ms`);
console.log("");
}
// Raw vector search (no embedding)
console.log("\nRaw vector search (pre-computed embedding):");
const { vector } = await embed("test query");
const rawTimes = [];
for (let i = 0; i < 10; i++) {
const start = Date.now();
await table.vectorSearch(vector).limit(5).toArray();
rawTimes.push(Date.now() - start);
}
const avgRaw = Math.round(rawTimes.reduce((a, b) => a + b, 0) / rawTimes.length);
console.log(` avg=${avgRaw}ms, min=${Math.min(...rawTimes)}ms, max=${Math.max(...rawTimes)}ms`);
}
main().catch(console.error);

View File

@@ -2,6 +2,20 @@ import fs from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
export type AutoCaptureConfig = {
enabled: boolean;
/** LLM provider for memory extraction: "openrouter" (default) or "openai" */
provider?: "openrouter" | "openai";
/** LLM model for memory extraction (default: google/gemini-2.0-flash-001) */
model?: string;
/** API key for the LLM provider (supports ${ENV_VAR} syntax) */
apiKey?: string;
/** Base URL for the LLM provider (default: https://openrouter.ai/api/v1) */
baseUrl?: string;
/** Maximum messages to send for extraction (default: 10) */
maxMessages?: number;
};
export type MemoryConfig = {
embedding: {
provider: "openai";
@@ -9,16 +23,29 @@ export type MemoryConfig = {
apiKey: string;
};
dbPath?: string;
autoCapture?: boolean;
/** @deprecated Use autoCapture object instead. Boolean true enables with defaults. */
autoCapture?: boolean | AutoCaptureConfig;
autoRecall?: boolean;
captureMaxChars?: number;
coreMemory?: {
enabled?: boolean;
/** Maximum number of core memories to load */
maxEntries?: number;
/** Minimum importance threshold for core memories */
minImportance?: number;
};
};
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
export const MEMORY_CATEGORIES = [
"preference",
"fact",
"decision",
"entity",
"other",
"core",
] as const;
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
const DEFAULT_MODEL = "text-embedding-3-small";
export const DEFAULT_CAPTURE_MAX_CHARS = 500;
const LEGACY_STATE_DIRS: string[] = [];
function resolveDefaultDbPath(): string {
@@ -93,7 +120,7 @@ export const memoryConfigSchema = {
const cfg = value as Record<string, unknown>;
assertAllowedKeys(
cfg,
["embedding", "dbPath", "autoCapture", "autoRecall", "captureMaxChars"],
["embedding", "dbPath", "autoCapture", "autoRecall", "coreMemory"],
"memory config",
);
@@ -105,13 +132,41 @@ export const memoryConfigSchema = {
const model = resolveEmbeddingModel(embedding);
const captureMaxChars =
typeof cfg.captureMaxChars === "number" ? Math.floor(cfg.captureMaxChars) : undefined;
if (
typeof captureMaxChars === "number" &&
(captureMaxChars < 100 || captureMaxChars > 10_000)
) {
throw new Error("captureMaxChars must be between 100 and 10000");
// Parse autoCapture (supports boolean for backward compat, or object for LLM config)
let autoCapture: MemoryConfig["autoCapture"];
if (cfg.autoCapture === false) {
autoCapture = false;
} else if (cfg.autoCapture === true || cfg.autoCapture === undefined) {
// Legacy boolean or default — enable with defaults
autoCapture = { enabled: true };
} else if (typeof cfg.autoCapture === "object" && !Array.isArray(cfg.autoCapture)) {
const ac = cfg.autoCapture as Record<string, unknown>;
assertAllowedKeys(
ac,
["enabled", "provider", "model", "apiKey", "baseUrl", "maxMessages"],
"autoCapture config",
);
autoCapture = {
enabled: ac.enabled !== false,
provider:
ac.provider === "openai" || ac.provider === "openrouter" ? ac.provider : "openrouter",
model: typeof ac.model === "string" ? ac.model : undefined,
apiKey: typeof ac.apiKey === "string" ? resolveEnvVars(ac.apiKey) : undefined,
baseUrl: typeof ac.baseUrl === "string" ? ac.baseUrl : undefined,
maxMessages: typeof ac.maxMessages === "number" ? ac.maxMessages : undefined,
};
}
// Parse coreMemory
let coreMemory: MemoryConfig["coreMemory"];
if (cfg.coreMemory && typeof cfg.coreMemory === "object" && !Array.isArray(cfg.coreMemory)) {
const bc = cfg.coreMemory as Record<string, unknown>;
assertAllowedKeys(bc, ["enabled", "maxEntries", "minImportance"], "coreMemory config");
coreMemory = {
enabled: bc.enabled === true,
maxEntries: typeof bc.maxEntries === "number" ? bc.maxEntries : 50,
minImportance: typeof bc.minImportance === "number" ? bc.minImportance : 0.5,
};
}
return {
@@ -121,9 +176,10 @@ export const memoryConfigSchema = {
apiKey: resolveEnvVars(embedding.apiKey),
},
dbPath: typeof cfg.dbPath === "string" ? cfg.dbPath : DEFAULT_DB_PATH,
autoCapture: cfg.autoCapture === true,
autoCapture: autoCapture ?? { enabled: true },
autoRecall: cfg.autoRecall !== false,
captureMaxChars: captureMaxChars ?? DEFAULT_CAPTURE_MAX_CHARS,
// Default coreMemory to enabled for consistency with autoCapture/autoRecall
coreMemory: coreMemory ?? { enabled: true, maxEntries: 50, minImportance: 0.5 },
};
},
uiHints: {
@@ -143,19 +199,47 @@ export const memoryConfigSchema = {
placeholder: "~/.openclaw/memory/lancedb",
advanced: true,
},
autoCapture: {
"autoCapture.enabled": {
label: "Auto-Capture",
help: "Automatically capture important information from conversations",
help: "Automatically capture important information from conversations using LLM extraction",
},
"autoCapture.provider": {
label: "Capture LLM Provider",
placeholder: "openrouter",
advanced: true,
help: "LLM provider for memory extraction (openrouter or openai)",
},
"autoCapture.model": {
label: "Capture Model",
placeholder: "google/gemini-2.0-flash-001",
advanced: true,
help: "LLM model for memory extraction (use a fast/cheap model)",
},
"autoCapture.apiKey": {
label: "Capture API Key",
sensitive: true,
advanced: true,
help: "API key for capture LLM (defaults to OpenRouter key from provider config)",
},
autoRecall: {
label: "Auto-Recall",
help: "Automatically inject relevant memories into context",
},
captureMaxChars: {
label: "Capture Max Chars",
help: "Maximum message length eligible for auto-capture",
"coreMemory.enabled": {
label: "Core Memory",
help: "Inject core memories as virtual MEMORY.md at session start (replaces MEMORY.md file)",
},
"coreMemory.maxEntries": {
label: "Max Core Entries",
placeholder: "50",
advanced: true,
placeholder: String(DEFAULT_CAPTURE_MAX_CHARS),
help: "Maximum number of core memories to load",
},
"coreMemory.minImportance": {
label: "Min Core Importance",
placeholder: "0.5",
advanced: true,
help: "Minimum importance threshold for core memories (0-1)",
},
},
};

View File

@@ -0,0 +1,102 @@
#!/usr/bin/env node
/**
* Export memories from LanceDB for migration to memory-neo4j
*
* Usage:
* pnpm exec node export-memories.mjs [output-file.json]
*
* Default output: memories-export.json
*/
import * as lancedb from "@lancedb/lancedb";
import { writeFileSync } from "fs";
const LANCEDB_PATH = process.env.LANCEDB_PATH || "/home/tsukhani/.openclaw/memory/lancedb";
const AGENT_ID = process.env.AGENT_ID || "main";
const outputFile = process.argv[2] || "memories-export.json";
console.log("📦 Memory Export Tool (LanceDB)");
console.log(` LanceDB path: ${LANCEDB_PATH}`);
console.log(` Output: ${outputFile}`);
console.log("");
// Transform for neo4j format
function transformMemory(lanceEntry) {
const createdAtISO = new Date(lanceEntry.createdAt).toISOString();
return {
id: lanceEntry.id,
text: lanceEntry.text,
embedding: lanceEntry.vector,
importance: lanceEntry.importance,
category: lanceEntry.category,
createdAt: createdAtISO,
updatedAt: createdAtISO,
source: "import",
extractionStatus: "skipped",
agentId: AGENT_ID,
};
}
async function main() {
// Load from LanceDB
console.log("📥 Loading from LanceDB...");
const db = await lancedb.connect(LANCEDB_PATH);
const table = await db.openTable("memories");
const count = await table.countRows();
console.log(` Found ${count} memories`);
const memories = await table
.query()
.limit(count + 100)
.toArray();
console.log(` Loaded ${memories.length} memories`);
// Transform
console.log("🔄 Transforming...");
const transformed = memories.map(transformMemory);
// Stats
const stats = {};
transformed.forEach((m) => {
stats[m.category] = (stats[m.category] || 0) + 1;
});
console.log(" Categories:", stats);
// Export
console.log(`📤 Exporting to ${outputFile}...`);
const exportData = {
exportedAt: new Date().toISOString(),
sourcePlugin: "memory-lancedb",
targetPlugin: "memory-neo4j",
agentId: AGENT_ID,
vectorDim: transformed[0]?.embedding?.length || 1536,
count: transformed.length,
stats,
memories: transformed,
};
writeFileSync(outputFile, JSON.stringify(exportData, null, 2));
// Also write a preview without embeddings
const previewFile = outputFile.replace(".json", "-preview.json");
const preview = {
...exportData,
memories: transformed.map((m) => ({
...m,
embedding: `[${m.embedding?.length} dims]`,
})),
};
writeFileSync(previewFile, JSON.stringify(preview, null, 2));
console.log(`✅ Exported ${transformed.length} memories`);
console.log(
` Full export: ${outputFile} (${(JSON.stringify(exportData).length / 1024 / 1024).toFixed(2)} MB)`,
);
console.log(` Preview: ${previewFile}`);
}
main().catch((err) => {
console.error("❌ Error:", err.message);
process.exit(1);
});

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,26 @@
import * as lancedb from "@lancedb/lancedb";
const db = await lancedb.connect("/home/tsukhani/.openclaw/memory/lancedb");
const tables = await db.tableNames();
console.log("Tables:", tables);
if (tables.includes("memories")) {
const table = await db.openTable("memories");
const count = await table.countRows();
console.log("Memory count:", count);
const all = await table.query().limit(200).toArray();
const stats = { preference: 0, fact: 0, decision: 0, entity: 0, other: 0, core: 0 };
all.forEach((e) => {
stats[e.category] = (stats[e.category] || 0) + 1;
});
console.log("\nCategory breakdown:", stats);
console.log("\nSample entries:");
all.slice(0, 5).forEach((e, i) => {
console.log(`${i + 1}. [${e.category}] ${(e.text || "").substring(0, 100)}...`);
console.log(` id: ${e.id}, importance: ${e.importance}, vectorDim: ${e.vector?.length}`);
});
}

View File

@@ -26,11 +26,21 @@
"label": "Auto-Recall",
"help": "Automatically inject relevant memories into context"
},
"captureMaxChars": {
"label": "Capture Max Chars",
"help": "Maximum message length eligible for auto-capture",
"coreMemory.enabled": {
"label": "Core Memory",
"help": "Inject core memories as virtual MEMORY.md at session start (replaces MEMORY.md file)"
},
"coreMemory.maxEntries": {
"label": "Max Core Entries",
"placeholder": "50",
"advanced": true,
"placeholder": "500"
"help": "Maximum number of core memories to load"
},
"coreMemory.minImportance": {
"label": "Min Core Importance",
"placeholder": "0.5",
"advanced": true,
"help": "Minimum importance threshold for core memories (0-1)"
}
},
"configSchema": {
@@ -60,10 +70,20 @@
"autoRecall": {
"type": "boolean"
},
"captureMaxChars": {
"type": "number",
"minimum": 100,
"maximum": 10000
"coreMemory": {
"type": "object",
"additionalProperties": false,
"properties": {
"enabled": {
"type": "boolean"
},
"maxEntries": {
"type": "number"
},
"minImportance": {
"type": "number"
}
}
}
},
"required": ["embedding"]

View File

@@ -0,0 +1,209 @@
/**
* Configuration schema for memory-neo4j plugin.
*
* Matches the JSON Schema in openclaw.plugin.json.
* Provides runtime parsing with env var resolution and defaults.
*/
export type EmbeddingProvider = "openai" | "ollama";
export type MemoryNeo4jConfig = {
neo4j: {
uri: string;
username: string;
password: string;
};
embedding: {
provider: EmbeddingProvider;
apiKey?: string;
model: string;
baseUrl?: string;
};
autoCapture: boolean;
autoRecall: boolean;
coreMemory: {
enabled: boolean;
maxEntries: number;
};
};
/**
* Extraction configuration resolved from environment variables.
* Entity extraction auto-enables when OPENROUTER_API_KEY is set.
*/
export type ExtractionConfig = {
enabled: boolean;
apiKey: string;
model: string;
baseUrl: string;
temperature: number;
maxRetries: number;
};
export const MEMORY_CATEGORIES = [
"core",
"preference",
"fact",
"decision",
"entity",
"other",
] as const;
export type MemoryCategory = (typeof MEMORY_CATEGORIES)[number];
const EMBEDDING_DIMENSIONS: Record<string, number> = {
// OpenAI models
"text-embedding-3-small": 1536,
"text-embedding-3-large": 3072,
// Ollama models (common ones)
"mxbai-embed-large": 1024,
"mxbai-embed-large-2k:latest": 1024,
"nomic-embed-text": 768,
"all-minilm": 384,
};
// Default dimension for unknown models (Ollama models vary)
const DEFAULT_EMBEDDING_DIMS = 1024;
export function vectorDimsForModel(model: string): number {
// Check exact match first
if (EMBEDDING_DIMENSIONS[model]) {
return EMBEDDING_DIMENSIONS[model];
}
// Check prefix match (for versioned models like mxbai-embed-large:latest)
for (const [known, dims] of Object.entries(EMBEDDING_DIMENSIONS)) {
if (model.startsWith(known)) {
return dims;
}
}
// Return default for unknown models
return DEFAULT_EMBEDDING_DIMS;
}
/**
* Resolve ${ENV_VAR} references in string values.
*/
function resolveEnvVars(value: string): string {
return value.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
const envValue = process.env[envVar];
if (!envValue) {
throw new Error(`Environment variable ${envVar} is not set`);
}
return envValue;
});
}
/**
* Resolve extraction config from environment variables.
* Returns enabled: false if OPENROUTER_API_KEY is not set.
*/
export function resolveExtractionConfig(): ExtractionConfig {
const apiKey = process.env.OPENROUTER_API_KEY ?? "";
return {
enabled: apiKey.length > 0,
apiKey,
model: process.env.EXTRACTION_MODEL ?? "google/gemini-2.0-flash-001",
baseUrl: process.env.EXTRACTION_BASE_URL ?? "https://openrouter.ai/api/v1",
temperature: 0.0,
maxRetries: 2,
};
}
function assertAllowedKeys(value: Record<string, unknown>, allowed: string[], label: string) {
const unknown = Object.keys(value).filter((key) => !allowed.includes(key));
if (unknown.length > 0) {
throw new Error(`${label} has unknown keys: ${unknown.join(", ")}`);
}
}
/**
* Config schema with parse method for runtime validation & transformation.
* JSON Schema validation is handled by openclaw.plugin.json; this handles
* env var resolution and defaults.
*/
export const memoryNeo4jConfigSchema = {
parse(value: unknown): MemoryNeo4jConfig {
if (!value || typeof value !== "object" || Array.isArray(value)) {
throw new Error("memory-neo4j config required");
}
const cfg = value as Record<string, unknown>;
assertAllowedKeys(
cfg,
["embedding", "neo4j", "autoCapture", "autoRecall", "coreMemory"],
"memory-neo4j config",
);
// Parse neo4j section
const neo4jRaw = cfg.neo4j as Record<string, unknown> | undefined;
if (!neo4jRaw || typeof neo4jRaw !== "object") {
throw new Error("neo4j config section is required");
}
assertAllowedKeys(neo4jRaw, ["uri", "user", "username", "password"], "neo4j config");
if (typeof neo4jRaw.uri !== "string" || !neo4jRaw.uri) {
throw new Error("neo4j.uri is required");
}
const neo4jPassword =
typeof neo4jRaw.password === "string" ? resolveEnvVars(neo4jRaw.password) : "";
// Support both 'user' and 'username' for neo4j config
const neo4jUsername =
typeof neo4jRaw.user === "string"
? neo4jRaw.user
: typeof neo4jRaw.username === "string"
? neo4jRaw.username
: "neo4j";
// Parse embedding section (optional for ollama without apiKey)
const embeddingRaw = cfg.embedding as Record<string, unknown> | undefined;
assertAllowedKeys(
embeddingRaw ?? {},
["provider", "apiKey", "model", "baseUrl"],
"embedding config",
);
const provider: EmbeddingProvider = embeddingRaw?.provider === "ollama" ? "ollama" : "openai";
// apiKey is required for openai, optional for ollama
let apiKey: string | undefined;
if (typeof embeddingRaw?.apiKey === "string" && embeddingRaw.apiKey) {
apiKey = resolveEnvVars(embeddingRaw.apiKey);
} else if (provider === "openai") {
throw new Error("embedding.apiKey is required for OpenAI provider");
}
const embeddingModel =
typeof embeddingRaw?.model === "string"
? embeddingRaw.model
: provider === "ollama"
? "mxbai-embed-large"
: "text-embedding-3-small";
const baseUrl = typeof embeddingRaw?.baseUrl === "string" ? embeddingRaw.baseUrl : undefined;
// Parse coreMemory section (optional with defaults)
const coreMemoryRaw = cfg.coreMemory as Record<string, unknown> | undefined;
const coreMemoryEnabled = coreMemoryRaw?.enabled !== false; // enabled by default
const coreMemoryMaxEntries =
typeof coreMemoryRaw?.maxEntries === "number" ? coreMemoryRaw.maxEntries : 50;
return {
neo4j: {
uri: neo4jRaw.uri,
username: neo4jUsername,
password: neo4jPassword,
},
embedding: {
provider,
apiKey,
model: embeddingModel,
baseUrl,
},
autoCapture: cfg.autoCapture !== false,
autoRecall: cfg.autoRecall !== false,
coreMemory: {
enabled: coreMemoryEnabled,
maxEntries: coreMemoryMaxEntries,
},
};
},
};

View File

@@ -0,0 +1,104 @@
/**
* Embedding generation for memory-neo4j.
*
* Supports both OpenAI and Ollama providers.
*/
import OpenAI from "openai";
import type { EmbeddingProvider } from "./config.js";
export class Embeddings {
private client: OpenAI | null = null;
private readonly provider: EmbeddingProvider;
private readonly baseUrl: string;
constructor(
private readonly apiKey: string | undefined,
private readonly model: string = "text-embedding-3-small",
provider: EmbeddingProvider = "openai",
baseUrl?: string,
) {
this.provider = provider;
this.baseUrl = baseUrl ?? (provider === "ollama" ? "http://localhost:11434" : "");
if (provider === "openai") {
if (!apiKey) {
throw new Error("API key required for OpenAI embeddings");
}
this.client = new OpenAI({ apiKey });
}
}
/**
* Generate an embedding vector for a single text.
*/
async embed(text: string): Promise<number[]> {
if (this.provider === "ollama") {
return this.embedOllama(text);
}
return this.embedOpenAI(text);
}
/**
* Generate embeddings for multiple texts.
* Returns array of embeddings in the same order as input.
*/
async embedBatch(texts: string[]): Promise<number[][]> {
if (texts.length === 0) {
return [];
}
if (this.provider === "ollama") {
// Ollama doesn't support batch, so we do sequential
return Promise.all(texts.map((t) => this.embedOllama(t)));
}
return this.embedBatchOpenAI(texts);
}
private async embedOpenAI(text: string): Promise<number[]> {
if (!this.client) {
throw new Error("OpenAI client not initialized");
}
const response = await this.client.embeddings.create({
model: this.model,
input: text,
});
return response.data[0].embedding;
}
private async embedBatchOpenAI(texts: string[]): Promise<number[][]> {
if (!this.client) {
throw new Error("OpenAI client not initialized");
}
const response = await this.client.embeddings.create({
model: this.model,
input: texts,
});
// Sort by index to ensure correct order
return response.data.toSorted((a, b) => a.index - b.index).map((d) => d.embedding);
}
private async embedOllama(text: string): Promise<number[]> {
const url = `${this.baseUrl}/api/embed`;
const response = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: this.model,
input: text,
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Ollama embedding failed: ${response.status} ${error}`);
}
const data = (await response.json()) as { embeddings?: number[][] };
if (!data.embeddings?.[0]) {
throw new Error("No embedding returned from Ollama");
}
return data.embeddings[0];
}
}

View File

@@ -0,0 +1,774 @@
/**
* LLM-based entity extraction and auto-capture decision for memory-neo4j.
*
* Uses Gemini Flash via OpenRouter for:
* 1. Entity extraction: Extract entities and relationships from stored memories
* 2. Auto-capture decision: Decide what's worth remembering from conversations
*
* Both run as background fire-and-forget operations with graceful degradation.
*/
import { randomUUID } from "node:crypto";
import type { ExtractionConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { CaptureItem, EntityType, ExtractionResult, MemoryCategory } from "./schema.js";
import { ALLOWED_RELATIONSHIP_TYPES, ENTITY_TYPES } from "./schema.js";
// ============================================================================
// Types
// ============================================================================
type Logger = {
info: (msg: string) => void;
warn: (msg: string) => void;
error: (msg: string) => void;
debug?: (msg: string) => void;
};
// ============================================================================
// Extraction Prompt
// ============================================================================
const ENTITY_EXTRACTION_PROMPT = `You are an entity extraction system for a personal memory store.
Extract entities and relationships from this memory text.
Memory: "{text}"
Return JSON:
{
"entities": [
{"name": "tarun", "type": "person", "aliases": ["boss"], "description": "brief description"}
],
"relationships": [
{"source": "tarun", "target": "abundent", "type": "WORKS_AT", "confidence": 0.95}
],
"tags": [
{"name": "neo4j", "category": "technology"}
]
}
Rules:
- Normalize entity names to lowercase
- Entity types: person, organization, location, event, concept
- Relationship types: WORKS_AT, LIVES_AT, KNOWS, MARRIED_TO, PREFERS, DECIDED, RELATED_TO
- Confidence: 0.0-1.0
- Only extract what's explicitly stated or strongly implied
- Return empty arrays if nothing to extract
- Keep entity descriptions brief (1 sentence max)`;
// ============================================================================
// Auto-Capture Decision Prompt
// ============================================================================
const AUTO_CAPTURE_PROMPT = `You are an AI memory curator. Given these user messages from a conversation, identify information worth storing as long-term memories.
Only extract:
- Personal preferences and opinions ("I prefer dark mode", "I like TypeScript")
- Important facts about people, places, organizations
- Decisions made ("We decided to use Neo4j", "Going with plan A")
- Contact information (emails, phone numbers, usernames)
- Important events or dates
- Technical decisions and configurations
Do NOT extract:
- General questions or instructions to the AI
- Routine greetings or acknowledgments
- Information that is too vague or contextual
- Information already in system prompts or documentation
Categories:
- "core": Foundational identity info that should ALWAYS be remembered (user's name, role, company, key relationships, critical preferences that define who they are). Use sparingly - only for truly foundational facts.
- "preference": User preferences and opinions
- "fact": Facts about people, places, things
- "decision": Decisions made
- "entity": Entity-focused memories
- "other": Miscellaneous
Messages:
"""
{messages}
"""
Return JSON:
{
"memories": [
{"text": "concise memory text", "category": "core|preference|fact|decision|entity|other", "importance": 0.7}
]
}
If nothing is worth remembering, return: {"memories": []}`;
// ============================================================================
// OpenRouter API Client
// ============================================================================
async function callOpenRouter(config: ExtractionConfig, prompt: string): Promise<string | null> {
for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
try {
const response = await fetch(`${config.baseUrl}/chat/completions`, {
method: "POST",
headers: {
Authorization: `Bearer ${config.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: config.model,
messages: [{ role: "user", content: prompt }],
temperature: config.temperature,
response_format: { type: "json_object" },
}),
});
if (!response.ok) {
const body = await response.text().catch(() => "");
throw new Error(`OpenRouter API error ${response.status}: ${body}`);
}
const data = (await response.json()) as {
choices?: Array<{ message?: { content?: string } }>;
};
return data.choices?.[0]?.message?.content ?? null;
} catch (err) {
if (attempt >= config.maxRetries) {
throw err;
}
// Exponential backoff
await new Promise((resolve) => setTimeout(resolve, 500 * Math.pow(2, attempt)));
}
}
return null;
}
// ============================================================================
// Entity Extraction
// ============================================================================
/**
* Extract entities and relationships from a memory text using LLM.
*/
export async function extractEntities(
text: string,
config: ExtractionConfig,
): Promise<ExtractionResult | null> {
if (!config.enabled) {
return null;
}
const prompt = ENTITY_EXTRACTION_PROMPT.replace("{text}", text);
try {
const content = await callOpenRouter(config, prompt);
if (!content) {
return null;
}
const parsed = JSON.parse(content) as Record<string, unknown>;
return validateExtractionResult(parsed);
} catch {
// Will be handled by caller; don't throw for parse errors
return null;
}
}
/**
* Validate and sanitize LLM extraction output.
*/
function validateExtractionResult(raw: Record<string, unknown>): ExtractionResult {
const entities = Array.isArray(raw.entities) ? raw.entities : [];
const relationships = Array.isArray(raw.relationships) ? raw.relationships : [];
const tags = Array.isArray(raw.tags) ? raw.tags : [];
const validEntityTypes = new Set<string>(ENTITY_TYPES);
return {
entities: entities
.filter(
(e: unknown): e is Record<string, unknown> =>
e !== null &&
typeof e === "object" &&
typeof (e as Record<string, unknown>).name === "string" &&
typeof (e as Record<string, unknown>).type === "string",
)
.map((e) => ({
name: String(e.name).trim().toLowerCase(),
type: validEntityTypes.has(String(e.type)) ? (String(e.type) as EntityType) : "concept",
aliases: Array.isArray(e.aliases)
? (e.aliases as unknown[])
.filter((a): a is string => typeof a === "string")
.map((a) => a.trim().toLowerCase())
: undefined,
description: typeof e.description === "string" ? e.description : undefined,
}))
.filter((e) => e.name.length > 0),
relationships: relationships
.filter(
(r: unknown): r is Record<string, unknown> =>
r !== null &&
typeof r === "object" &&
typeof (r as Record<string, unknown>).source === "string" &&
typeof (r as Record<string, unknown>).target === "string" &&
typeof (r as Record<string, unknown>).type === "string" &&
ALLOWED_RELATIONSHIP_TYPES.has(String((r as Record<string, unknown>).type)),
)
.map((r) => ({
source: String(r.source).trim().toLowerCase(),
target: String(r.target).trim().toLowerCase(),
type: String(r.type),
confidence: typeof r.confidence === "number" ? Math.min(1, Math.max(0, r.confidence)) : 0.7,
})),
tags: tags
.filter(
(t: unknown): t is Record<string, unknown> =>
t !== null &&
typeof t === "object" &&
typeof (t as Record<string, unknown>).name === "string",
)
.map((t) => ({
name: String(t.name).trim().toLowerCase(),
category: typeof t.category === "string" ? t.category : "topic",
}))
.filter((t) => t.name.length > 0),
};
}
// ============================================================================
// Background Extraction Pipeline
// ============================================================================
/**
* Run entity extraction in the background for a stored memory.
* Fire-and-forget: errors are logged but never propagated.
*
* Flow:
* 1. Call LLM to extract entities and relationships
* 2. MERGE Entity nodes (idempotent)
* 3. Create MENTIONS relationships from Memory → Entity
* 4. Create inter-Entity relationships (WORKS_AT, KNOWS, etc.)
* 5. Tag the memory
* 6. Update extractionStatus to "complete" or "failed"
*/
export async function runBackgroundExtraction(
memoryId: string,
text: string,
db: Neo4jMemoryClient,
embeddings: Embeddings,
config: ExtractionConfig,
logger: Logger,
): Promise<void> {
if (!config.enabled) {
await db.updateExtractionStatus(memoryId, "skipped").catch(() => {});
return;
}
try {
const result = await extractEntities(text, config);
if (!result) {
await db.updateExtractionStatus(memoryId, "failed");
return;
}
// Empty extraction is valid — not all memories have extractable entities
if (
result.entities.length === 0 &&
result.relationships.length === 0 &&
result.tags.length === 0
) {
await db.updateExtractionStatus(memoryId, "complete");
return;
}
// Generate embeddings for entity names (for entity vector search)
let entityEmbeddings: Map<string, number[]> | undefined;
if (result.entities.length > 0) {
try {
const names = result.entities.map((e) => e.name);
const vectors = await embeddings.embedBatch(names);
entityEmbeddings = new Map(names.map((n, i) => [n, vectors[i]]));
} catch (err) {
logger.debug?.(`memory-neo4j: entity embedding generation failed: ${String(err)}`);
}
}
// MERGE Entity nodes
for (const entity of result.entities) {
try {
await db.mergeEntity({
id: randomUUID(),
name: entity.name,
type: entity.type,
aliases: entity.aliases,
description: entity.description,
embedding: entityEmbeddings?.get(entity.name),
});
// Create MENTIONS relationship
await db.createMentions(memoryId, entity.name, "context", 1.0);
} catch (err) {
logger.warn(`memory-neo4j: entity merge failed for "${entity.name}": ${String(err)}`);
}
}
// Create inter-Entity relationships
for (const rel of result.relationships) {
try {
await db.createEntityRelationship(rel.source, rel.target, rel.type, rel.confidence);
} catch (err) {
logger.debug?.(
`memory-neo4j: relationship creation failed: ${rel.source}->${rel.target}: ${String(err)}`,
);
}
}
// Tag the memory
for (const tag of result.tags) {
try {
await db.tagMemory(memoryId, tag.name, tag.category);
} catch (err) {
logger.debug?.(`memory-neo4j: tagging failed for "${tag.name}": ${String(err)}`);
}
}
await db.updateExtractionStatus(memoryId, "complete");
logger.info(
`memory-neo4j: extraction complete for ${memoryId.slice(0, 8)}` +
`${result.entities.length} entities, ${result.relationships.length} rels, ${result.tags.length} tags`,
);
} catch (err) {
logger.warn(`memory-neo4j: extraction failed for ${memoryId.slice(0, 8)}: ${String(err)}`);
await db.updateExtractionStatus(memoryId, "failed").catch(() => {});
}
}
// ============================================================================
// Sleep Cycle - Five Phase Memory Consolidation
// ============================================================================
/**
* Sleep Cycle Result - aggregated stats from all five phases.
*/
export type SleepCycleResult = {
// Phase 1: Deduplication
dedup: {
clustersFound: number;
memoriesMerged: number;
};
// Phase 2: Core Promotion
promotion: {
candidatesFound: number;
promoted: number;
};
// Phase 3: Decay & Pruning
decay: {
memoriesPruned: number;
};
// Phase 4: Entity Extraction
extraction: {
total: number;
processed: number;
succeeded: number;
failed: number;
};
// Phase 5: Orphan Cleanup
cleanup: {
entitiesRemoved: number;
tagsRemoved: number;
};
// Overall
durationMs: number;
aborted: boolean;
};
export type SleepCycleOptions = {
// Common
agentId?: string;
abortSignal?: AbortSignal;
// Phase 1: Deduplication
dedupThreshold?: number; // Vector similarity threshold (default: 0.95)
// Phase 2: Core Promotion
promotionImportanceThreshold?: number; // Min importance to auto-promote (default: 0.9)
promotionMinAgeDays?: number; // Min age before promotion (default: 7)
// Phase 3: Decay
decayRetentionThreshold?: number; // Below this, memory is pruned (default: 0.1)
decayBaseHalfLifeDays?: number; // Base half-life in days (default: 30)
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
// Phase 4: Extraction
extractionBatchSize?: number; // Memories per batch (default: 50)
extractionDelayMs?: number; // Delay between batches (default: 1000)
// Progress callback
onPhaseStart?: (phase: "dedup" | "promotion" | "decay" | "extraction" | "cleanup") => void;
onProgress?: (phase: string, message: string) => void;
};
/**
* Run the full sleep cycle - five phases of memory consolidation.
*
* This mimics how human memory consolidation works during sleep:
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
* 2. CORE PROMOTION - Promote high-importance memories to core status
* 3. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
* 4. EXTRACTION - Form entity relationships (strengthen connections)
* 5. CLEANUP - Remove orphaned entities/tags (garbage collection)
*
* Benefits:
* - Reduces latency during active conversations
* - Prevents memory bloat and "self-degradation"
* - Cleaner separation between capture and consolidation
*
* Research basis:
* - Ebbinghaus forgetting curve for decay
* - FadeMem importance-weighted retention
* - Graphiti/Zep edge deduplication patterns
*/
export async function runSleepCycle(
db: Neo4jMemoryClient,
embeddings: Embeddings,
config: ExtractionConfig,
logger: Logger,
options: SleepCycleOptions = {},
): Promise<SleepCycleResult> {
const startTime = Date.now();
const {
agentId,
abortSignal,
dedupThreshold = 0.95,
promotionImportanceThreshold = 0.9,
promotionMinAgeDays = 7,
decayRetentionThreshold = 0.1,
decayBaseHalfLifeDays = 30,
decayImportanceMultiplier = 2,
extractionBatchSize = 50,
extractionDelayMs = 1000,
onPhaseStart,
onProgress,
} = options;
const result: SleepCycleResult = {
dedup: { clustersFound: 0, memoriesMerged: 0 },
promotion: { candidatesFound: 0, promoted: 0 },
decay: { memoriesPruned: 0 },
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
cleanup: { entitiesRemoved: 0, tagsRemoved: 0 },
durationMs: 0,
aborted: false,
};
// --------------------------------------------------------------------------
// Phase 1: Deduplication
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("dedup");
logger.info("memory-neo4j: [sleep] Phase 1: Deduplication");
try {
const clusters = await db.findDuplicateClusters(dedupThreshold, agentId);
result.dedup.clustersFound = clusters.length;
for (const cluster of clusters) {
if (abortSignal?.aborted) {
break;
}
const { deletedCount } = await db.mergeMemoryCluster(
cluster.memoryIds,
cluster.importances,
);
result.dedup.memoriesMerged += deletedCount;
onProgress?.("dedup", `Merged cluster of ${cluster.memoryIds.length} → 1`);
}
logger.info(
`memory-neo4j: [sleep] Phase 1 complete — ${result.dedup.clustersFound} clusters, ${result.dedup.memoriesMerged} merged`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 1 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 2: Core Promotion
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("promotion");
logger.info("memory-neo4j: [sleep] Phase 2: Core Promotion");
try {
const candidates = await db.findPromotionCandidates({
importanceThreshold: promotionImportanceThreshold,
minAgeDays: promotionMinAgeDays,
agentId,
});
result.promotion.candidatesFound = candidates.length;
if (candidates.length > 0) {
const ids = candidates.map((m) => m.id);
result.promotion.promoted = await db.promoteToCore(ids);
for (const c of candidates) {
onProgress?.("promotion", `Promoted "${c.text.slice(0, 50)}..." to core`);
}
}
logger.info(
`memory-neo4j: [sleep] Phase 2 complete — ${result.promotion.promoted} memories promoted to core`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 2 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 3: Decay & Pruning
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("decay");
logger.info("memory-neo4j: [sleep] Phase 3: Decay & Pruning");
try {
const decayed = await db.findDecayedMemories({
retentionThreshold: decayRetentionThreshold,
baseHalfLifeDays: decayBaseHalfLifeDays,
importanceMultiplier: decayImportanceMultiplier,
agentId,
});
if (decayed.length > 0) {
const ids = decayed.map((m) => m.id);
result.decay.memoriesPruned = await db.pruneMemories(ids);
onProgress?.("decay", `Pruned ${result.decay.memoriesPruned} decayed memories`);
}
logger.info(
`memory-neo4j: [sleep] Phase 3 complete — ${result.decay.memoriesPruned} memories pruned`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 3 error: ${String(err)}`);
}
}
// --------------------------------------------------------------------------
// Phase 4: Entity Extraction
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && config.enabled) {
onPhaseStart?.("extraction");
logger.info("memory-neo4j: [sleep] Phase 4: Entity Extraction");
try {
// Get initial count
const counts = await db.countByExtractionStatus(agentId);
result.extraction.total = counts.pending;
if (result.extraction.total > 0) {
let hasMore = true;
while (hasMore && !abortSignal?.aborted) {
const pending = await db.listPendingExtractions(extractionBatchSize, agentId);
if (pending.length === 0) {
hasMore = false;
break;
}
for (const memory of pending) {
if (abortSignal?.aborted) {
break;
}
try {
await runBackgroundExtraction(memory.id, memory.text, db, embeddings, config, logger);
result.extraction.succeeded++;
} catch (err) {
logger.warn(
`memory-neo4j: extraction failed for ${memory.id.slice(0, 8)}: ${String(err)}`,
);
result.extraction.failed++;
}
result.extraction.processed++;
if (result.extraction.processed % 10 === 0) {
onProgress?.(
"extraction",
`${result.extraction.processed}/${result.extraction.total} processed`,
);
}
}
// Delay between batches
if (hasMore && !abortSignal?.aborted) {
await new Promise((resolve) => setTimeout(resolve, extractionDelayMs));
}
}
}
logger.info(
`memory-neo4j: [sleep] Phase 4 complete — ${result.extraction.succeeded} extracted, ${result.extraction.failed} failed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 4 error: ${String(err)}`);
}
} else if (!config.enabled) {
logger.info("memory-neo4j: [sleep] Phase 4 skipped — extraction not enabled");
}
// --------------------------------------------------------------------------
// Phase 5: Orphan Cleanup
// --------------------------------------------------------------------------
if (!abortSignal?.aborted) {
onPhaseStart?.("cleanup");
logger.info("memory-neo4j: [sleep] Phase 5: Orphan Cleanup");
try {
// Clean up orphan entities
const orphanEntities = await db.findOrphanEntities();
if (orphanEntities.length > 0) {
result.cleanup.entitiesRemoved = await db.deleteOrphanEntities(
orphanEntities.map((e) => e.id),
);
onProgress?.("cleanup", `Removed ${result.cleanup.entitiesRemoved} orphan entities`);
}
// Clean up orphan tags
const orphanTags = await db.findOrphanTags();
if (orphanTags.length > 0) {
result.cleanup.tagsRemoved = await db.deleteOrphanTags(orphanTags.map((t) => t.id));
onProgress?.("cleanup", `Removed ${result.cleanup.tagsRemoved} orphan tags`);
}
logger.info(
`memory-neo4j: [sleep] Phase 5 complete — ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 5 error: ${String(err)}`);
}
}
result.durationMs = Date.now() - startTime;
result.aborted = abortSignal?.aborted ?? false;
logger.info(
`memory-neo4j: [sleep] Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s` +
(result.aborted ? " (aborted)" : ""),
);
return result;
}
// ============================================================================
// Auto-Capture Decision
// ============================================================================
/**
* Evaluate user messages and decide what's worth storing as long-term memory.
* Returns a list of memory items to store, or empty if nothing worth keeping.
*/
export async function evaluateAutoCapture(
userMessages: string[],
config: ExtractionConfig,
): Promise<CaptureItem[]> {
if (!config.enabled || userMessages.length === 0) {
return [];
}
const combined = userMessages.join("\n\n");
if (combined.length < 10) {
return [];
}
const prompt = AUTO_CAPTURE_PROMPT.replace("{messages}", combined);
try {
const content = await callOpenRouter(config, prompt);
if (!content) {
return [];
}
const parsed = JSON.parse(content) as Record<string, unknown>;
return validateCaptureDecision(parsed);
} catch {
// Silently fail — auto-capture is best-effort
return [];
}
}
/**
* Validate and sanitize the auto-capture LLM output.
*/
function validateCaptureDecision(raw: Record<string, unknown>): CaptureItem[] {
const memories = Array.isArray(raw.memories) ? raw.memories : [];
const validCategories = new Set<string>(["preference", "fact", "decision", "entity", "other"]);
return memories
.filter(
(m: unknown): m is Record<string, unknown> =>
m !== null &&
typeof m === "object" &&
typeof (m as Record<string, unknown>).text === "string" &&
(m as Record<string, unknown>).text !== "",
)
.map((m) => ({
text: String(m.text).slice(0, 2000), // cap length
category: validCategories.has(String(m.category))
? (String(m.category) as MemoryCategory)
: "other",
importance: typeof m.importance === "number" ? Math.min(1, Math.max(0, m.importance)) : 0.7,
}))
.slice(0, 5); // Max 5 captures per conversation
}
// ============================================================================
// Message Extraction Helper
// ============================================================================
/**
* Extract user message texts from the event.messages array.
* Handles both string content and content block arrays.
*/
export function extractUserMessages(messages: unknown[]): string[] {
const texts: string[] = [];
for (const msg of messages) {
if (!msg || typeof msg !== "object") {
continue;
}
const msgObj = msg as Record<string, unknown>;
// Only process user messages for auto-capture
if (msgObj.role !== "user") {
continue;
}
const content = msgObj.content;
if (typeof content === "string") {
texts.push(content);
continue;
}
if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === "object" &&
"type" in block &&
(block as Record<string, unknown>).type === "text" &&
"text" in block &&
typeof (block as Record<string, unknown>).text === "string"
) {
texts.push((block as Record<string, unknown>).text as string);
}
}
}
}
// Filter out noise
return texts.filter(
(t) => t.length >= 10 && !t.includes("<relevant-memories>") && !t.includes("<system>"),
);
}

View File

@@ -0,0 +1,942 @@
/**
* OpenClaw Memory (Neo4j) Plugin
*
* Drop-in replacement for memory-lancedb with three-signal hybrid search,
* entity extraction, and knowledge graph capabilities.
*
* Provides:
* - memory_recall: Hybrid search (vector + BM25 + graph traversal)
* - memory_store: Store memories with background entity extraction
* - memory_forget: Delete memories with cascade cleanup
*
* Architecture decisions: see docs/memory-neo4j/ARCHITECTURE.md
*/
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
import { Type } from "@sinclair/typebox";
import { randomUUID } from "node:crypto";
import { stringEnum } from "openclaw/plugin-sdk";
import type { MemoryCategory, MemorySource } from "./schema.js";
import {
MEMORY_CATEGORIES,
memoryNeo4jConfigSchema,
resolveExtractionConfig,
vectorDimsForModel,
} from "./config.js";
import { Embeddings } from "./embeddings.js";
import { evaluateAutoCapture, extractUserMessages, runSleepCycle } from "./extractor.js";
import { Neo4jMemoryClient } from "./neo4j-client.js";
import { hybridSearch } from "./search.js";
// ============================================================================
// Plugin Definition
// ============================================================================
const memoryNeo4jPlugin = {
id: "memory-neo4j",
name: "Memory (Neo4j)",
description:
"Neo4j-backed long-term memory with three-signal hybrid search, entity extraction, and knowledge graph",
kind: "memory" as const,
configSchema: memoryNeo4jConfigSchema,
register(api: OpenClawPluginApi) {
// Parse configuration
const cfg = memoryNeo4jConfigSchema.parse(api.pluginConfig);
const extractionConfig = resolveExtractionConfig();
const vectorDim = vectorDimsForModel(cfg.embedding.model);
// Create shared resources
const db = new Neo4jMemoryClient(
cfg.neo4j.uri,
cfg.neo4j.username,
cfg.neo4j.password,
vectorDim,
api.logger,
);
const embeddings = new Embeddings(
cfg.embedding.apiKey,
cfg.embedding.model,
cfg.embedding.provider,
cfg.embedding.baseUrl,
);
api.logger.debug?.(
`memory-neo4j: registered (uri: ${cfg.neo4j.uri}, provider: ${cfg.embedding.provider}, model: ${cfg.embedding.model}, ` +
`extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"})`,
);
// ========================================================================
// Tools (using factory pattern for agentId)
// ========================================================================
// memory_recall — Three-signal hybrid search
api.registerTool(
(ctx) => {
const agentId = ctx.agentId || "default";
return {
name: "memory_recall",
label: "Memory Recall",
description:
"Search through long-term memories. Use when you need context about user preferences, past decisions, or previously discussed topics.",
parameters: Type.Object({
query: Type.String({ description: "Search query" }),
limit: Type.Optional(Type.Number({ description: "Max results (default: 5)" })),
}),
async execute(_toolCallId: string, params: unknown) {
const { query, limit = 5 } = params as {
query: string;
limit?: number;
};
const results = await hybridSearch(
db,
embeddings,
query,
limit,
agentId,
extractionConfig.enabled,
);
if (results.length === 0) {
return {
content: [{ type: "text", text: "No relevant memories found." }],
details: { count: 0 },
};
}
const text = results
.map((r, i) => `${i + 1}. [${r.category}] ${r.text} (${(r.score * 100).toFixed(0)}%)`)
.join("\n");
const sanitizedResults = results.map((r) => ({
id: r.id,
text: r.text,
category: r.category,
importance: r.importance,
score: r.score,
}));
return {
content: [
{
type: "text",
text: `Found ${results.length} memories:\n\n${text}`,
},
],
details: { count: results.length, memories: sanitizedResults },
};
},
};
},
{ name: "memory_recall" },
);
// memory_store — Store with background entity extraction
api.registerTool(
(ctx) => {
const agentId = ctx.agentId || "default";
const sessionKey = ctx.sessionKey;
return {
name: "memory_store",
label: "Memory Store",
description:
"Save important information in long-term memory. Use for preferences, facts, decisions.",
parameters: Type.Object({
text: Type.String({ description: "Information to remember" }),
importance: Type.Optional(
Type.Number({
description: "Importance 0-1 (default: 0.7)",
}),
),
category: Type.Optional(stringEnum(MEMORY_CATEGORIES)),
}),
async execute(_toolCallId: string, params: unknown) {
const {
text,
importance = 0.7,
category = "other",
} = params as {
text: string;
importance?: number;
category?: MemoryCategory;
};
// 1. Generate embedding
const vector = await embeddings.embed(text);
// 2. Check for duplicates (vector similarity > 0.95)
const existing = await db.findSimilar(vector, 0.95, 1);
if (existing.length > 0) {
return {
content: [
{
type: "text",
text: `Similar memory already exists: "${existing[0].text}"`,
},
],
details: {
action: "duplicate",
existingId: existing[0].id,
existingText: existing[0].text,
},
};
}
// 3. Store memory immediately (fast path)
const memoryId = randomUUID();
await db.storeMemory({
id: memoryId,
text,
embedding: vector,
importance: Math.min(1, Math.max(0, importance)),
category,
source: "user" as MemorySource,
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
agentId,
sessionKey,
});
// 4. Extraction is deferred to sleep cycle (like human memory consolidation)
// See: runSleepCycleExtraction() and `openclaw memory sleep` command
return {
content: [
{
type: "text",
text: `Stored: "${text.slice(0, 100)}${text.length > 100 ? "..." : ""}"`,
},
],
details: { action: "created", id: memoryId },
};
},
};
},
{ name: "memory_store" },
);
// memory_forget — Delete with cascade
api.registerTool(
(_ctx) => {
return {
name: "memory_forget",
label: "Memory Forget",
description: "Delete specific memories. GDPR-compliant.",
parameters: Type.Object({
query: Type.Optional(Type.String({ description: "Search to find memory" })),
memoryId: Type.Optional(Type.String({ description: "Specific memory ID" })),
}),
async execute(_toolCallId: string, params: unknown) {
const { query, memoryId } = params as {
query?: string;
memoryId?: string;
};
// Direct delete by ID
if (memoryId) {
const deleted = await db.deleteMemory(memoryId);
if (!deleted) {
return {
content: [
{
type: "text",
text: `Memory ${memoryId} not found.`,
},
],
details: { action: "not_found", id: memoryId },
};
}
return {
content: [
{
type: "text",
text: `Memory ${memoryId} forgotten.`,
},
],
details: { action: "deleted", id: memoryId },
};
}
// Search-based delete
if (query) {
const vector = await embeddings.embed(query);
const results = await db.vectorSearch(vector, 5, 0.7);
if (results.length === 0) {
return {
content: [{ type: "text", text: "No matching memories found." }],
details: { found: 0 },
};
}
// Auto-delete if single high-confidence match
if (results.length === 1 && results[0].score > 0.9) {
await db.deleteMemory(results[0].id);
return {
content: [
{
type: "text",
text: `Forgotten: "${results[0].text}"`,
},
],
details: { action: "deleted", id: results[0].id },
};
}
// Multiple candidates — ask user to specify
const list = results.map((r) => `- [${r.id}] ${r.text.slice(0, 60)}...`).join("\n");
const sanitizedCandidates = results.map((r) => ({
id: r.id,
text: r.text,
category: r.category,
score: r.score,
}));
return {
content: [
{
type: "text",
text: `Found ${results.length} candidates. Specify memoryId:\n${list}`,
},
],
details: {
action: "candidates",
candidates: sanitizedCandidates,
},
};
}
return {
content: [{ type: "text", text: "Provide query or memoryId." }],
details: { error: "missing_param" },
};
},
};
},
{ name: "memory_forget" },
);
// ========================================================================
// CLI Commands
// ========================================================================
api.registerCli(
({ program }) => {
// Find existing memory command or create fallback
let memoryCmd = program.commands.find((cmd) => cmd.name() === "memory");
if (!memoryCmd) {
// Fallback if core memory CLI not registered yet
memoryCmd = program.command("memory").description("Memory commands");
}
// Add neo4j memory subcommand group
const memory = memoryCmd.command("neo4j").description("Neo4j graph memory commands");
memory
.command("list")
.description("List memory counts by agent and category")
.option("--json", "Output as JSON")
.action(async (opts: { json?: boolean }) => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
if (opts.json) {
console.log(JSON.stringify(stats, null, 2));
return;
}
if (stats.length === 0) {
console.log("No memories stored.");
return;
}
// Group by agentId
const byAgent = new Map<
string,
Array<{ category: string; count: number; avgImportance: number }>
>();
for (const row of stats) {
const list = byAgent.get(row.agentId) || [];
list.push({
category: row.category,
count: row.count,
avgImportance: row.avgImportance,
});
byAgent.set(row.agentId, list);
}
// Print table for each agent
for (const [agentId, categories] of byAgent) {
const total = categories.reduce((sum, c) => sum + c.count, 0);
console.log(`\n┌─ ${agentId} (${total} total)`);
console.log("│");
console.log("│ Category Count Avg Importance");
console.log("│ ─────────────────────────────────────");
for (const { category, count, avgImportance } of categories) {
const cat = category.padEnd(12);
const cnt = String(count).padStart(5);
const imp = (avgImportance * 100).toFixed(0).padStart(3) + "%";
console.log(`${cat} ${cnt} ${imp}`);
}
console.log("└");
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("search")
.description("Search memories")
.argument("<query>", "Search query")
.option("--limit <n>", "Max results", "5")
.action(async (query: string, opts: { limit: string }) => {
try {
const results = await hybridSearch(
db,
embeddings,
query,
parseInt(opts.limit, 10),
"default",
extractionConfig.enabled,
);
const output = results.map((r) => ({
id: r.id,
text: r.text,
category: r.category,
importance: r.importance,
score: r.score,
}));
console.log(JSON.stringify(output, null, 2));
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("stats")
.description("Show memory statistics and configuration")
.action(async () => {
try {
await db.ensureInitialized();
const stats = await db.getMemoryStats();
const total = stats.reduce((sum, s) => sum + s.count, 0);
console.log("\nMemory (Neo4j) Statistics");
console.log("─────────────────────────");
console.log(`Total memories: ${total}`);
console.log(`Neo4j URI: ${cfg.neo4j.uri}`);
console.log(`Embedding: ${cfg.embedding.provider}/${cfg.embedding.model}`);
console.log(
`Extraction: ${extractionConfig.enabled ? extractionConfig.model : "disabled"}`,
);
console.log(`Auto-capture: ${cfg.autoCapture ? "enabled" : "disabled"}`);
console.log(`Auto-recall: ${cfg.autoRecall ? "enabled" : "disabled"}`);
console.log(`Core memory: ${cfg.coreMemory.enabled ? "enabled" : "disabled"}`);
if (stats.length > 0) {
// Group by category across all agents
const byCategory = new Map<string, number>();
for (const row of stats) {
byCategory.set(row.category, (byCategory.get(row.category) ?? 0) + row.count);
}
console.log("\nBy Category:");
for (const [category, count] of byCategory) {
console.log(` ${category.padEnd(12)} ${count}`);
}
// Show agent count
const agents = new Set(stats.map((s) => s.agentId));
console.log(`\nAgents: ${agents.size} (${[...agents].join(", ")})`);
}
console.log("");
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
memory
.command("sleep")
.description(
"Run sleep cycle — consolidate memories (dedup → promote → decay → extract → cleanup)",
)
.option("--agent <id>", "Agent id (default: all agents)")
.option("--dedup-threshold <n>", "Vector similarity threshold for dedup (default: 0.95)")
.option(
"--promotion-threshold <n>",
"Min importance for auto-promotion to core (default: 0.9)",
)
.option("--promotion-min-age <days>", "Min age in days before promotion (default: 7)")
.option("--decay-threshold <n>", "Decay score threshold for pruning (default: 0.1)")
.option("--decay-half-life <days>", "Base half-life in days (default: 30)")
.option("--batch-size <n>", "Extraction batch size (default: 50)")
.option("--delay <ms>", "Delay between extraction batches in ms (default: 1000)")
.action(
async (opts: {
agent?: string;
dedupThreshold?: string;
promotionThreshold?: string;
promotionMinAge?: string;
decayThreshold?: string;
decayHalfLife?: string;
batchSize?: string;
delay?: string;
}) => {
console.log("\n🌙 Memory Sleep Cycle");
console.log("═════════════════════════════════════════════════════════════");
console.log("Five-phase memory consolidation (like human sleep):\n");
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
console.log(" Phase 2: Core Promotion — Promote high-importance to core");
console.log(" Phase 3: Decay & Pruning — Remove stale low-importance memories");
console.log(" Phase 4: Extraction — Form entity relationships");
console.log(" Phase 5: Orphan Cleanup — Remove disconnected nodes\n");
try {
await db.ensureInitialized();
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
agentId: opts.agent,
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
promotionImportanceThreshold: opts.promotionThreshold
? parseFloat(opts.promotionThreshold)
: undefined,
promotionMinAgeDays: opts.promotionMinAge
? parseInt(opts.promotionMinAge, 10)
: undefined,
decayRetentionThreshold: opts.decayThreshold
? parseFloat(opts.decayThreshold)
: undefined,
decayBaseHalfLifeDays: opts.decayHalfLife
? parseInt(opts.decayHalfLife, 10)
: undefined,
extractionBatchSize: opts.batchSize ? parseInt(opts.batchSize, 10) : undefined,
extractionDelayMs: opts.delay ? parseInt(opts.delay, 10) : undefined,
onPhaseStart: (phase) => {
const phaseNames = {
dedup: "Phase 1: Deduplication",
promotion: "Phase 2: Core Promotion",
decay: "Phase 3: Decay & Pruning",
extraction: "Phase 4: Extraction",
cleanup: "Phase 5: Orphan Cleanup",
};
console.log(`\n▶ ${phaseNames[phase]}`);
console.log("─────────────────────────────────────────────────────────────");
},
onProgress: (_phase, message) => {
console.log(` ${message}`);
},
});
console.log("\n═════════════════════════════════════════════════════════════");
console.log(`✅ Sleep cycle complete in ${(result.durationMs / 1000).toFixed(1)}s`);
console.log("─────────────────────────────────────────────────────────────");
console.log(
` Deduplication: ${result.dedup.clustersFound} clusters → ${result.dedup.memoriesMerged} merged`,
);
console.log(
` Promotion: ${result.promotion.promoted}/${result.promotion.candidatesFound} promoted to core`,
);
console.log(` Decay/Pruning: ${result.decay.memoriesPruned} memories pruned`);
console.log(
` Extraction: ${result.extraction.succeeded}/${result.extraction.total} extracted` +
(result.extraction.failed > 0 ? ` (${result.extraction.failed} failed)` : ""),
);
console.log(
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
);
if (result.aborted) {
console.log("\n⚠ Sleep cycle was aborted before completion.");
}
console.log("");
} catch (err) {
console.error(
`\n❌ Sleep cycle failed: ${err instanceof Error ? err.message : String(err)}`,
);
process.exitCode = 1;
}
},
);
memory
.command("promote")
.description("Manually promote a memory to core status")
.argument("<id>", "Memory ID to promote")
.action(async (id: string) => {
try {
await db.ensureInitialized();
const promoted = await db.promoteToCore([id]);
if (promoted > 0) {
console.log(`✅ Memory ${id} promoted to core.`);
} else {
console.log(`❌ Memory ${id} not found.`);
process.exitCode = 1;
}
} catch (err) {
console.error(`Error: ${err instanceof Error ? err.message : String(err)}`);
process.exitCode = 1;
}
});
},
{ commands: [] }, // Adds subcommands to existing "memory" command, no conflict
);
// ========================================================================
// Lifecycle Hooks
// ========================================================================
// Track sessions where core memories have already been loaded (skip on subsequent turns).
// NOTE: This is in-memory and will be cleared on gateway restart. The agent_bootstrap
// hook below also checks for existing conversation history to avoid re-injecting core
// memories after restarts.
const bootstrappedSessions = new Set<string>();
// After compaction: clear bootstrap flag so core memories get re-injected
if (cfg.coreMemory.enabled) {
api.on("after_compaction", async (_event, ctx) => {
if (ctx.sessionKey) {
bootstrappedSessions.delete(ctx.sessionKey);
api.logger.info?.(
`memory-neo4j: cleared bootstrap flag for session ${ctx.sessionKey} after compaction`,
);
}
});
}
// Auto-recall: inject relevant memories before agent starts
api.logger.debug?.(`memory-neo4j: autoRecall=${cfg.autoRecall}`);
if (cfg.autoRecall) {
api.logger.debug?.("memory-neo4j: registering before_agent_start hook for auto-recall");
api.on("before_agent_start", async (event, ctx) => {
if (!event.prompt || event.prompt.length < 5) {
return;
}
const agentId = ctx.agentId || "default";
// Truncate prompt to avoid exceeding embedding model context length
// ~6000 chars is safe for most embedding models (leaves headroom for 2k tokens)
const MAX_QUERY_CHARS = 6000;
const query =
event.prompt.length > MAX_QUERY_CHARS
? event.prompt.slice(0, MAX_QUERY_CHARS)
: event.prompt;
try {
const results = await hybridSearch(
db,
embeddings,
query,
3,
agentId,
extractionConfig.enabled,
);
if (results.length === 0) {
return;
}
const memoryContext = results.map((r) => `- [${r.category}] ${r.text}`).join("\n");
api.logger.info?.(`memory-neo4j: injecting ${results.length} memories into context`);
api.logger.debug?.(
`memory-neo4j: auto-recall memories: ${JSON.stringify(results.map((r) => ({ id: r.id, text: r.text.slice(0, 80), category: r.category, score: r.score })))}`,
);
return {
prependContext: `<relevant-memories>\nThe following memories may be relevant to this conversation:\n${memoryContext}\n</relevant-memories>`,
};
} catch (err) {
api.logger.warn(`memory-neo4j: auto-recall failed: ${String(err)}`);
}
});
}
// Core memories: inject as virtual MEMORY.md at bootstrap time (scoped by agentId).
// Only runs on new sessions and after compaction (not every turn).
api.logger.debug?.(`memory-neo4j: coreMemory.enabled=${cfg.coreMemory.enabled}`);
if (cfg.coreMemory.enabled) {
api.logger.debug?.("memory-neo4j: registering agent_bootstrap hook for core memories");
api.on("agent_bootstrap", async (event, ctx) => {
const sessionKey = ctx.sessionKey;
// Skip if this session was already bootstrapped (avoid re-loading every turn).
// The after_compaction hook clears the flag so we re-inject after compaction.
if (sessionKey && bootstrappedSessions.has(sessionKey)) {
api.logger.debug?.(
`memory-neo4j: skipping core memory injection for already-bootstrapped session=${sessionKey}`,
);
return;
}
// Log when we're about to inject core memories for a session that wasn't tracked
// This helps diagnose cases where context might be lost after gateway restarts
if (sessionKey) {
api.logger.debug?.(
`memory-neo4j: session=${sessionKey} not in bootstrappedSessions (size=${bootstrappedSessions.size}), will check for core memories`,
);
}
try {
const agentId = ctx.agentId || "default";
const maxEntries = cfg.coreMemory.maxEntries;
api.logger.debug?.(
`memory-neo4j: loading core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
);
// Core memories are always included (no importance filter) - if marked as core, it's important
// Results are ordered by importance desc, so most important come first up to maxEntries
const coreMemories = await db.listByCategory("core", maxEntries, 0, agentId);
if (coreMemories.length === 0) {
if (sessionKey) {
bootstrappedSessions.add(sessionKey);
}
api.logger.debug?.(
`memory-neo4j: no core memories found for agent=${agentId}, marking session as bootstrapped`,
);
return;
}
// Format core memories into a MEMORY.md-style document
let content = "# Core Memory\n\n";
content += "*Persistent context loaded from long-term memory*\n\n";
for (const mem of coreMemories) {
content += `- ${mem.text}\n`;
}
// Find and replace MEMORY.md in the files list, or add it
const files = [...event.files];
const memoryIndex = files.findIndex(
(f) => f.name === "MEMORY.md" || f.name === "memory.md",
);
const virtualFile = {
name: "MEMORY.md" as const,
path: "memory://neo4j/core-memory",
content,
missing: false,
};
const action = memoryIndex >= 0 ? "replaced" : "added";
if (memoryIndex >= 0) {
files[memoryIndex] = virtualFile;
} else {
files.push(virtualFile);
}
if (sessionKey) {
bootstrappedSessions.add(sessionKey);
}
// Log at info level when actually injecting, debug for skips
api.logger.info?.(
`memory-neo4j: ${action} MEMORY.md with ${coreMemories.length} core memories for agent=${agentId} session=${sessionKey ?? "unknown"}`,
);
return { files };
} catch (err) {
api.logger.warn(`memory-neo4j: core memory injection failed: ${String(err)}`);
}
});
}
// Auto-capture: LLM-based decision on what to store from conversations
api.logger.debug?.(
`memory-neo4j: autoCapture=${cfg.autoCapture}, extraction.enabled=${extractionConfig.enabled}`,
);
if (cfg.autoCapture) {
api.logger.debug?.("memory-neo4j: registering agent_end hook for auto-capture");
api.on("agent_end", async (event, ctx) => {
api.logger.debug?.(
`memory-neo4j: agent_end fired (success=${event.success}, messages=${event.messages?.length ?? 0})`,
);
if (!event.success || !event.messages || event.messages.length === 0) {
api.logger.debug?.("memory-neo4j: skipping - no success or empty messages");
return;
}
const agentId = ctx.agentId || "default";
const sessionKey = ctx.sessionKey;
try {
if (extractionConfig.enabled) {
// LLM-based auto-capture (Decision Q8)
const userMessages = extractUserMessages(event.messages);
if (userMessages.length === 0) {
return;
}
const items = await evaluateAutoCapture(userMessages, extractionConfig);
if (items.length === 0) {
return;
}
let stored = 0;
for (const item of items) {
try {
const vector = await embeddings.embed(item.text);
// Check for duplicates
const existing = await db.findSimilar(vector, 0.95, 1);
if (existing.length > 0) {
continue;
}
const memoryId = randomUUID();
await db.storeMemory({
id: memoryId,
text: item.text,
embedding: vector,
importance: item.importance,
category: item.category,
source: "auto-capture",
extractionStatus: "pending",
agentId,
sessionKey,
});
// Extraction deferred to sleep cycle (like human memory consolidation)
stored++;
} catch (err) {
api.logger.debug?.(`memory-neo4j: auto-capture item failed: ${String(err)}`);
}
}
if (stored > 0) {
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (LLM-based)`);
}
} else {
// Fallback: rule-based capture (no extraction API key)
const userMessages = extractUserMessages(event.messages);
if (userMessages.length === 0) {
return;
}
const toCapture = userMessages.filter(
(text) => text.length >= 10 && text.length <= 500 && shouldCaptureRuleBased(text),
);
if (toCapture.length === 0) {
return;
}
let stored = 0;
for (const text of toCapture.slice(0, 3)) {
const category = detectCategory(text);
const vector = await embeddings.embed(text);
const existing = await db.findSimilar(vector, 0.95, 1);
if (existing.length > 0) {
continue;
}
await db.storeMemory({
id: randomUUID(),
text,
embedding: vector,
importance: 0.7,
category,
source: "auto-capture",
extractionStatus: "skipped",
agentId,
sessionKey,
});
stored++;
}
if (stored > 0) {
api.logger.info(`memory-neo4j: auto-captured ${stored} memories (rule-based)`);
}
}
} catch (err) {
api.logger.warn(`memory-neo4j: auto-capture failed: ${String(err)}`);
}
});
}
// ========================================================================
// Service
// ========================================================================
api.registerService({
id: "memory-neo4j",
start: async () => {
try {
await db.ensureInitialized();
api.logger.info(
`memory-neo4j: service started (uri: ${cfg.neo4j.uri}, model: ${cfg.embedding.model})`,
);
} catch (err) {
api.logger.error(
`memory-neo4j: failed to start — ${String(err)}. Memory tools will attempt lazy initialization.`,
);
// Don't throw — allow graceful degradation.
// Tools will retry initialization on first use.
}
},
stop: async () => {
await db.close();
api.logger.info("memory-neo4j: service stopped");
},
});
},
};
// ============================================================================
// Rule-based capture filter (fallback when no extraction API key)
// ============================================================================
const MEMORY_TRIGGERS = [
/remember|zapamatuj|pamatuj/i,
/prefer|radši|nechci|preferuji/i,
/decided|rozhodli|budeme používat/i,
/\+\d{10,}/,
/[\w.-]+@[\w.-]+\.\w+/,
/my\s+\w+\s+is|is\s+my/i,
/i (like|prefer|hate|love|want|need)/i,
/always|never|important/i,
];
function shouldCaptureRuleBased(text: string): boolean {
if (text.includes("<relevant-memories>")) {
return false;
}
if (text.startsWith("<") && text.includes("</")) {
return false;
}
if (text.includes("**") && text.includes("\n-")) {
return false;
}
const emojiCount = (text.match(/[\u{1F300}-\u{1F9FF}]/gu) || []).length;
if (emojiCount > 3) {
return false;
}
return MEMORY_TRIGGERS.some((r) => r.test(text));
}
function detectCategory(text: string): MemoryCategory {
const lower = text.toLowerCase();
if (/prefer|radši|like|love|hate|want/i.test(lower)) {
return "preference";
}
if (/decided|rozhodli|will use|budeme/i.test(lower)) {
return "decision";
}
if (/\+\d{10,}|@[\w.-]+\.\w+|is called|jmenuje se/i.test(lower)) {
return "entity";
}
if (/is|are|has|have|je|má|jsou/i.test(lower)) {
return "fact";
}
return "other";
}
// ============================================================================
// Export
// ============================================================================
export default memoryNeo4jPlugin;

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,99 @@
{
"id": "memory-neo4j",
"kind": "memory",
"uiHints": {
"embedding.provider": {
"label": "Embedding Provider",
"placeholder": "openai",
"help": "Provider for embeddings: 'openai' or 'ollama'"
},
"embedding.apiKey": {
"label": "API Key",
"sensitive": true,
"placeholder": "sk-proj-...",
"help": "API key for OpenAI embeddings (not needed for Ollama)"
},
"embedding.model": {
"label": "Embedding Model",
"placeholder": "text-embedding-3-small",
"help": "Embedding model to use (e.g., text-embedding-3-small for OpenAI, mxbai-embed-large for Ollama)"
},
"embedding.baseUrl": {
"label": "Base URL",
"placeholder": "http://localhost:11434",
"help": "Base URL for Ollama API (optional)"
},
"neo4j.uri": {
"label": "Neo4j URI",
"placeholder": "bolt://localhost:7687",
"help": "Bolt connection URI for your Neo4j instance"
},
"neo4j.user": {
"label": "Neo4j Username",
"placeholder": "neo4j"
},
"neo4j.password": {
"label": "Neo4j Password",
"sensitive": true
},
"autoCapture": {
"label": "Auto-Capture",
"help": "Automatically capture important information from conversations"
},
"autoRecall": {
"label": "Auto-Recall",
"help": "Automatically inject relevant memories into context"
}
},
"configSchema": {
"type": "object",
"additionalProperties": false,
"properties": {
"embedding": {
"type": "object",
"additionalProperties": false,
"properties": {
"provider": {
"type": "string",
"enum": ["openai", "ollama"]
},
"apiKey": {
"type": "string"
},
"model": {
"type": "string"
},
"baseUrl": {
"type": "string"
}
}
},
"neo4j": {
"type": "object",
"additionalProperties": false,
"properties": {
"uri": {
"type": "string"
},
"user": {
"type": "string"
},
"username": {
"type": "string"
},
"password": {
"type": "string"
}
},
"required": ["uri"]
},
"autoCapture": {
"type": "boolean"
},
"autoRecall": {
"type": "boolean"
}
},
"required": ["neo4j"]
}
}

View File

@@ -0,0 +1,19 @@
{
"name": "@openclaw/memory-neo4j",
"version": "2026.2.2",
"description": "OpenClaw Neo4j-backed long-term memory plugin with three-signal hybrid search, entity extraction, and knowledge graph",
"type": "module",
"dependencies": {
"@sinclair/typebox": "0.34.48",
"neo4j-driver": "^5.27.0",
"openai": "^6.17.0"
},
"devDependencies": {
"openclaw": "workspace:*"
},
"openclaw": {
"extensions": [
"./index.ts"
]
}
}

View File

@@ -0,0 +1,174 @@
/**
* Graph schema types, Cypher query templates, and constants for memory-neo4j.
*/
// ============================================================================
// Node Types
// ============================================================================
export type MemoryCategory = "preference" | "fact" | "decision" | "entity" | "other";
export type EntityType = "person" | "organization" | "location" | "event" | "concept";
export type ExtractionStatus = "pending" | "complete" | "failed" | "skipped";
export type MemorySource = "user" | "auto-capture" | "memory-watcher" | "import";
export type MemoryNode = {
id: string;
text: string;
embedding: number[];
importance: number;
category: MemoryCategory;
source: MemorySource;
createdAt: string;
updatedAt: string;
extractionStatus: ExtractionStatus;
agentId: string;
sessionKey?: string;
};
export type EntityNode = {
id: string;
name: string;
type: EntityType;
aliases: string[];
embedding?: number[];
description?: string;
firstSeen: string;
lastSeen: string;
mentionCount: number;
};
export type TagNode = {
id: string;
name: string;
category: string;
createdAt: string;
};
// ============================================================================
// Extraction Types
// ============================================================================
export type ExtractedEntity = {
name: string;
type: EntityType;
aliases?: string[];
description?: string;
};
export type ExtractedRelationship = {
source: string;
target: string;
type: string;
confidence: number;
};
export type ExtractedTag = {
name: string;
category: string;
};
export type ExtractionResult = {
entities: ExtractedEntity[];
relationships: ExtractedRelationship[];
tags: ExtractedTag[];
};
// ============================================================================
// Auto-Capture Types
// ============================================================================
export type CaptureItem = {
text: string;
category: MemoryCategory;
importance: number;
};
export type CaptureDecision = {
memories: CaptureItem[];
};
// ============================================================================
// Search Types
// ============================================================================
export type SearchSignalResult = {
id: string;
text: string;
category: string;
importance: number;
createdAt: string;
score: number;
};
export type HybridSearchResult = {
id: string;
text: string;
category: string;
importance: number;
createdAt: string;
score: number;
};
// ============================================================================
// Input Types
// ============================================================================
export type StoreMemoryInput = {
id: string;
text: string;
embedding: number[];
importance: number;
category: MemoryCategory;
source: MemorySource;
extractionStatus: ExtractionStatus;
agentId: string;
sessionKey?: string;
};
export type MergeEntityInput = {
id: string;
name: string;
type: EntityType;
aliases?: string[];
description?: string;
embedding?: number[];
};
// ============================================================================
// Constants
// ============================================================================
export const MEMORY_CATEGORIES = ["preference", "fact", "decision", "entity", "other"] as const;
export const ENTITY_TYPES = ["person", "organization", "location", "event", "concept"] as const;
export const ALLOWED_RELATIONSHIP_TYPES = new Set([
"WORKS_AT",
"LIVES_AT",
"KNOWS",
"MARRIED_TO",
"PREFERS",
"DECIDED",
"RELATED_TO",
]);
// ============================================================================
// Lucene Helpers
// ============================================================================
const LUCENE_SPECIAL_CHARS = /[+\-&|!(){}[\]^"~*?:\\/]/g;
/**
* Escape special characters for Lucene fulltext search queries.
*/
export function escapeLucene(query: string): string {
return query.replace(LUCENE_SPECIAL_CHARS, "\\$&");
}
/**
* Validate that a relationship type is in the allowed set.
* Prevents Cypher injection via dynamic relationship type.
*/
export function validateRelationshipType(type: string): boolean {
return ALLOWED_RELATIONSHIP_TYPES.has(type);
}

View File

@@ -0,0 +1,257 @@
/**
* Three-signal hybrid search with query-adaptive RRF fusion.
*
* Combines:
* Signal 1: Vector similarity (HNSW cosine)
* Signal 2: BM25 full-text keyword matching
* Signal 3: Graph traversal (entity → MENTIONS ← memory)
*
* Fused using confidence-weighted Reciprocal Rank Fusion (RRF)
* with query-adaptive signal weights.
*
* Adapted from ~/Downloads/ontology/app/services/rrf.py
*/
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
import type { HybridSearchResult, SearchSignalResult } from "./schema.js";
// ============================================================================
// Query Classification
// ============================================================================
export type QueryType = "short" | "entity" | "long" | "default";
/**
* Classify a query to determine adaptive signal weights.
*
* - short (1-2 words): BM25 excels at exact keyword matching
* - entity (proper nouns detected): Graph traversal finds connected memories
* - long (5+ words): Vector captures semantic intent better
* - default: balanced weights
*/
export function classifyQuery(query: string): QueryType {
const words = query.trim().split(/\s+/);
const wordCount = words.length;
// Short queries: 1-2 words → boost BM25
if (wordCount <= 2) {
return "short";
}
// Long queries: 5+ words → boost vector
if (wordCount >= 5) {
return "long";
}
// Entity detection: check for capitalized words (proper nouns)
// Heuristic: if more than half of non-first words are capitalized
const capitalizedWords = words
.slice(1) // skip first word (often capitalized anyway)
.filter(
(w) =>
/^[A-Z]/.test(w) &&
!/^(I|A|An|The|Is|Are|Was|Were|What|Who|Where|When|How|Why|Do|Does|Did)$/.test(w),
);
if (capitalizedWords.length > 0) {
return "entity";
}
// Check for question patterns targeting entities
if (/^(who|where|what)\s+(is|does|did|was|were)\s/i.test(query)) {
return "entity";
}
return "default";
}
/**
* Get adaptive signal weights based on query type.
* Returns [vectorWeight, bm25Weight, graphWeight].
*
* Decision Q7: Query-adaptive RRF weights
* - Short → boost BM25 (keyword matching)
* - Entity → boost graph (relationship traversal)
* - Long → boost vector (semantic similarity)
*/
export function getAdaptiveWeights(
queryType: QueryType,
graphEnabled: boolean,
): [number, number, number] {
const graphBase = graphEnabled ? 1.0 : 0.0;
switch (queryType) {
case "short":
return [0.8, 1.2, graphBase * 1.0];
case "entity":
return [0.8, 1.0, graphBase * 1.3];
case "long":
return [1.2, 0.7, graphBase * 0.8];
case "default":
default:
return [1.0, 1.0, graphBase * 1.0];
}
}
// ============================================================================
// Confidence-Weighted RRF Fusion
// ============================================================================
type SignalEntry = {
rank: number; // 1-indexed
score: number; // 0-1 normalized
};
type FusedCandidate = {
id: string;
text: string;
category: string;
importance: number;
createdAt: string;
rrfScore: number;
};
/**
* Fuse multiple search signals using confidence-weighted RRF.
*
* Formula: RRF_conf(d) = Σ w_i × score_i(d) / (k + rank_i(d))
*
* Unlike standard RRF which only uses ranks, this variant preserves
* score magnitude: rank-1 with score 0.99 contributes more than
* rank-1 with score 0.55.
*
* Reference: Cormack et al. (2009), extended with confidence weighting
* from ~/Downloads/ontology/app/services/rrf.py
*/
function fuseWithConfidenceRRF(
signals: SearchSignalResult[][],
k: number,
weights: number[],
): FusedCandidate[] {
// Build per-signal rank/score lookups
const signalMaps: Map<string, SignalEntry>[] = signals.map((signal) => {
const map = new Map<string, SignalEntry>();
for (let i = 0; i < signal.length; i++) {
const entry = signal[i];
// If duplicate in same signal, keep first (higher ranked)
if (!map.has(entry.id)) {
map.set(entry.id, { rank: i + 1, score: entry.score });
}
}
return map;
});
// Collect all unique candidate IDs with their metadata
const candidateMetadata = new Map<
string,
{ text: string; category: string; importance: number; createdAt: string }
>();
for (const signal of signals) {
for (const entry of signal) {
if (!candidateMetadata.has(entry.id)) {
candidateMetadata.set(entry.id, {
text: entry.text,
category: entry.category,
importance: entry.importance,
createdAt: entry.createdAt,
});
}
}
}
// Calculate confidence-weighted RRF score for each candidate
const results: FusedCandidate[] = [];
for (const [id, meta] of candidateMetadata) {
let rrfScore = 0;
for (let i = 0; i < signalMaps.length; i++) {
const entry = signalMaps[i].get(id);
if (entry && entry.rank > 0) {
// Confidence-weighted: multiply by original score
rrfScore += weights[i] * entry.score * (1 / (k + entry.rank));
}
}
results.push({
id,
text: meta.text,
category: meta.category,
importance: meta.importance,
createdAt: meta.createdAt,
rrfScore,
});
}
// Sort by RRF score descending
results.sort((a, b) => b.rrfScore - a.rrfScore);
return results;
}
// ============================================================================
// Hybrid Search Orchestrator
// ============================================================================
/**
* Perform a three-signal hybrid search with query-adaptive RRF fusion.
*
* 1. Embed the query
* 2. Classify query for adaptive weights
* 3. Run three signals in parallel
* 4. Fuse with confidence-weighted RRF
* 5. Return top results
*
* Graceful degradation: if any signal fails, RRF works with remaining signals.
* If graph search is not enabled (no extraction API key), uses 2-signal fusion.
*/
export async function hybridSearch(
db: Neo4jMemoryClient,
embeddings: Embeddings,
query: string,
limit: number = 5,
agentId: string = "default",
graphEnabled: boolean = false,
options: {
rrfK?: number;
candidateMultiplier?: number;
graphFiringThreshold?: number;
} = {},
): Promise<HybridSearchResult[]> {
const { rrfK = 60, candidateMultiplier = 4, graphFiringThreshold = 0.3 } = options;
const candidateLimit = Math.floor(Math.min(200, Math.max(1, limit * candidateMultiplier)));
// 1. Generate query embedding
const queryEmbedding = await embeddings.embed(query);
// 2. Classify query and get adaptive weights
const queryType = classifyQuery(query);
const weights = getAdaptiveWeights(queryType, graphEnabled);
// 3. Run signals in parallel
const [vectorResults, bm25Results, graphResults] = await Promise.all([
db.vectorSearch(queryEmbedding, candidateLimit, 0.1, agentId),
db.bm25Search(query, candidateLimit, agentId),
graphEnabled
? db.graphSearch(query, candidateLimit, graphFiringThreshold, agentId)
: Promise.resolve([] as SearchSignalResult[]),
]);
// 4. Fuse with confidence-weighted RRF
const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, graphResults], rrfK, weights);
// 5. Return top results, normalized to 0-100% display scores
const maxRrf = fused.length > 0 ? fused[0].rrfScore : 1;
const normalizer = maxRrf > 0 ? 1 / maxRrf : 1;
return fused.slice(0, limit).map((r) => ({
id: r.id,
text: r.text,
category: r.category,
importance: r.importance,
createdAt: r.createdAt,
score: Math.min(1, r.rrfScore * normalizer), // Normalize to 0-1
}));
}

View File

@@ -0,0 +1,19 @@
{
"compilerOptions": {
"target": "ES2023",
"lib": ["ES2023"],
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "./dist",
"rootDir": "."
},
"include": ["*.ts"],
"exclude": ["node_modules", "dist", "*.test.ts"]
}