memory-neo4j: task-aware memory filtering (3 layers)

Layer 1 — Recall-time filter (task-filter.ts):
- New module that reads TASKS.md completed tasks and filters recalled
  memories that match completed task IDs or keywords
- Integrated into auto-recall hook as Feature 3 (after score/dedup filters)
- 60-second cache to avoid re-parsing TASKS.md on every message
- 29 new tests

Layer 2 — Sleep cycle Phase 7 (task-memory cleanup):
- New phase cross-references completed tasks with stored memories
- LLM classifies each matched memory as 'lasting' (keep) or 'noise' (delete)
- Conservative: keeps memories on any doubt or LLM failure
- Scans only tasks completed within last 7 days
- New searchMemoriesByKeywords() method on neo4j client
- 16 new tests

Layer 3 — Memory task metadata (taskId field):
- Optional taskId field on MemoryNode, StoreMemoryInput, and search results
- Auto-tags memories during auto-capture when exactly 1 active task exists
- Precise taskId-based filtering at recall time (complements Layer 1)
- findMemoriesByTaskId() and clearTaskIdFromMemories() on neo4j client
- taskId flows through vector, BM25, and graph search signals + RRF fusion
- 20 new tests

All 669 memory-neo4j tests pass. Zero regressions in full suite.
All changes are backward compatible — existing memories without taskId
continue to work. No migration needed.
This commit is contained in:
Tarun Sukhani
2026-02-16 17:04:40 +08:00
parent 18b8007d23
commit 6ff248fd4e
9 changed files with 2052 additions and 5 deletions

View File

@@ -32,6 +32,8 @@ import { isSemanticDuplicate, rateImportance } from "./extractor.js";
import { extractUserMessages, extractAssistantMessages } from "./message-utils.js";
import { Neo4jMemoryClient } from "./neo4j-client.js";
import { hybridSearch } from "./search.js";
import { isRelatedToCompletedTask, loadCompletedTaskKeywords } from "./task-filter.js";
import { parseTaskLedger } from "./task-ledger.js";
// ============================================================================
// Plugin Definition
@@ -185,16 +187,21 @@ const memoryNeo4jPlugin = {
}),
),
category: Type.Optional(stringEnum(MEMORY_CATEGORIES)),
taskId: Type.Optional(
Type.String({ description: "Optional task ID to link memory to (e.g., TASK-001)" }),
),
}),
async execute(_toolCallId: string, params: unknown) {
const {
text,
importance = 0.7,
category = "other",
taskId,
} = params as {
text: string;
importance?: number;
category?: MemoryCategory;
taskId?: string;
};
// 1. Generate embedding
@@ -232,6 +239,8 @@ const memoryNeo4jPlugin = {
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
agentId,
sessionKey,
// Layer 3: Pass through taskId if provided by the agent
...(taskId ? { taskId } : {}),
});
// 4. Extraction is deferred to sleep cycle (like human memory consolidation)
@@ -557,6 +566,49 @@ const memoryNeo4jPlugin = {
results = results.filter((r) => !coreIds.has(r.id));
}
// Feature 3: Filter out memories related to completed tasks
const workspaceDir = ctx.workspaceDir;
if (workspaceDir) {
try {
const completedTasks = await loadCompletedTaskKeywords(workspaceDir);
if (completedTasks.length > 0) {
const before = results.length;
results = results.filter((r) => !isRelatedToCompletedTask(r.text, completedTasks));
if (results.length < before) {
api.logger.debug?.(
`memory-neo4j: task-filter removed ${before - results.length} memories related to completed tasks`,
);
}
}
} catch (err) {
api.logger.debug?.(`memory-neo4j: task-filter skipped: ${String(err)}`);
}
}
// Layer 3: Filter out memories linked to completed tasks by taskId
// This complements Layer 1's keyword-based filter with precise taskId matching
if (workspaceDir) {
try {
const fs = await import("node:fs/promises");
const path = await import("node:path");
const tasksPath = path.default.join(workspaceDir, "TASKS.md");
const content = await fs.default.readFile(tasksPath, "utf-8");
const ledger = parseTaskLedger(content);
const completedTaskIds = new Set(ledger.completedTasks.map((t) => t.id));
if (completedTaskIds.size > 0) {
const before = results.length;
results = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId));
if (results.length < before) {
api.logger.debug?.(
`memory-neo4j: taskId-filter removed ${before - results.length} memories linked to completed tasks`,
);
}
}
} catch {
// TASKS.md doesn't exist or can't be read — skip taskId filter
}
}
const totalMs = performance.now() - t0;
api.logger.info?.(
`memory-neo4j: [bench] auto-recall ${totalMs.toFixed(0)}ms total (search=${(tSearch - t0).toFixed(0)}ms), ${results.length} results`,
@@ -723,6 +775,7 @@ const memoryNeo4jPlugin = {
embeddings,
extractionConfig,
api.logger,
ctx.workspaceDir, // Layer 3: pass workspace dir for task auto-tagging
);
});
}
@@ -756,6 +809,51 @@ const memoryNeo4jPlugin = {
},
};
// ============================================================================
// Layer 3: TASKS.md cache for auto-capture task tagging
// ============================================================================
/** Cached result of TASKS.md parsing for auto-capture task tagging. */
let _taskLedgerCache: { activeTaskId: string | undefined; expiresAt: number } | null = null;
const TASK_LEDGER_CACHE_TTL_MS = 60_000; // 60 seconds
/**
* Get the active task ID from TASKS.md (if exactly one active task).
* Results are cached with a 60-second TTL to avoid re-parsing on every message.
*/
async function getActiveTaskIdForCapture(
workspaceDir: string | undefined,
logger: Logger,
): Promise<string | undefined> {
const now = Date.now();
if (_taskLedgerCache && now < _taskLedgerCache.expiresAt) {
return _taskLedgerCache.activeTaskId;
}
let activeTaskId: string | undefined;
if (workspaceDir) {
try {
const fs = await import("node:fs/promises");
const path = await import("node:path");
const tasksPath = path.default.join(workspaceDir, "TASKS.md");
const content = await fs.default.readFile(tasksPath, "utf-8");
const ledger = parseTaskLedger(content);
// Only auto-tag when there's exactly one active task to avoid ambiguity
if (ledger.activeTasks.length === 1) {
activeTaskId = ledger.activeTasks[0].id;
}
} catch {
// TASKS.md doesn't exist or can't be read — no auto-tagging
}
}
_taskLedgerCache = { activeTaskId, expiresAt: now + TASK_LEDGER_CACHE_TTL_MS };
return activeTaskId;
}
// Exported for testing
export { _taskLedgerCache, getActiveTaskIdForCapture as _getActiveTaskIdForCapture };
// ============================================================================
// Auto-capture pipeline (fire-and-forget from agent_end hook)
// ============================================================================
@@ -776,6 +874,7 @@ async function captureMessage(
extractionConfig: import("./config.js").ExtractionConfig,
logger: Logger,
precomputedVector?: number[],
taskId?: string, // Layer 3: optional task ID for auto-tagging
): Promise<{ stored: boolean; semanticDeduped: boolean }> {
// For assistant messages, rate importance first (before embedding) to skip early.
// When extraction is disabled, rateImportance returns 0.5 (the fallback), so we
@@ -835,6 +934,8 @@ async function captureMessage(
extractionStatus: extractionConfig.enabled ? "pending" : "skipped",
agentId,
sessionKey,
// Layer 3: auto-tag with active task ID when available
...(taskId ? { taskId } : {}),
});
return { stored: true, semanticDeduped: false };
}
@@ -851,6 +952,7 @@ async function runAutoCapture(
embeddings: import("./embeddings.js").Embeddings,
extractionConfig: import("./config.js").ExtractionConfig,
logger: Logger,
workspaceDir?: string, // Layer 3: workspace dir for task auto-tagging
): Promise<void> {
try {
const t0 = performance.now();
@@ -890,6 +992,9 @@ async function runAutoCapture(
const vectors = allTexts.length > 0 ? await embeddings.embedBatch(allTexts) : [];
const tEmbed = performance.now();
// Layer 3: Detect active task for auto-tagging
const activeTaskId = await getActiveTaskIdForCapture(workspaceDir, logger);
// Process each with pre-computed vector
for (let i = 0; i < allMeta.length; i++) {
try {
@@ -906,6 +1011,7 @@ async function runAutoCapture(
extractionConfig,
logger,
vectors[i],
activeTaskId, // Layer 3: auto-tag with active task ID
);
if (result.stored) stored++;
if (result.semanticDeduped) semanticDeduped++;

View File

@@ -237,6 +237,8 @@ export class Neo4jMemoryClient {
const session = this.driver!.session();
try {
const now = new Date().toISOString();
// Layer 3: taskId is optional — only set on the node when provided
const taskIdClause = input.taskId ? ", taskId: $taskId" : "";
const result = await session.run(
`CREATE (m:Memory {
id: $id, text: $text, embedding: $embedding,
@@ -245,12 +247,13 @@ export class Neo4jMemoryClient {
agentId: $agentId, sessionKey: $sessionKey,
createdAt: $createdAt, updatedAt: $updatedAt,
retrievalCount: $retrievalCount, lastRetrievedAt: $lastRetrievedAt,
extractionRetries: $extractionRetries
extractionRetries: $extractionRetries${taskIdClause}
})
RETURN m.id AS id`,
{
...input,
sessionKey: input.sessionKey ?? null,
taskId: input.taskId ?? null,
createdAt: now,
updatedAt: now,
retrievalCount: 0,
@@ -444,6 +447,7 @@ export class Neo4jMemoryClient {
WHERE score >= $minScore ${agentFilter}
RETURN node.id AS id, node.text AS text, node.category AS category,
node.importance AS importance, node.createdAt AS createdAt,
node.taskId AS taskId,
score AS similarity
ORDER BY score DESC`,
{
@@ -461,6 +465,7 @@ export class Neo4jMemoryClient {
importance: r.get("importance") as number,
createdAt: String(r.get("createdAt") ?? ""),
score: r.get("similarity") as number,
taskId: (r.get("taskId") as string) ?? undefined,
}));
} finally {
await session.close();
@@ -495,6 +500,7 @@ export class Neo4jMemoryClient {
WHERE true ${agentFilter}
RETURN node.id AS id, node.text AS text, node.category AS category,
node.importance AS importance, node.createdAt AS createdAt,
node.taskId AS taskId,
score AS bm25Score
ORDER BY score DESC
LIMIT $limit`,
@@ -513,6 +519,7 @@ export class Neo4jMemoryClient {
importance: r.get("importance") as number,
createdAt: String(r.get("createdAt") ?? ""),
rawScore: r.get("bm25Score") as number,
taskId: (r.get("taskId") as string) ?? undefined,
}));
if (records.length === 0) {
@@ -585,6 +592,7 @@ export class Neo4jMemoryClient {
WITH entity, collect({
id: m.id, text: m.text, category: m.category,
importance: m.importance, createdAt: m.createdAt,
taskId: m.taskId,
score: coalesce(rm.confidence, 1.0)
}) AS directResults
@@ -596,6 +604,7 @@ export class Neo4jMemoryClient {
WITH directResults, collect({
id: m2.id, text: m2.text, category: m2.category,
importance: m2.importance, createdAt: m2.createdAt,
taskId: m2.taskId,
score: reduce(s = 1.0, r IN rels | s * coalesce(r.confidence, 0.7)) * coalesce(rm2.confidence, 1.0)
}) AS hopResults
@@ -604,6 +613,7 @@ export class Neo4jMemoryClient {
WITH row WHERE row.id IS NOT NULL
RETURN row.id AS id, row.text AS text, row.category AS category,
row.importance AS importance, row.createdAt AS createdAt,
row.taskId AS taskId,
max(row.score) AS graphScore`,
{ query: escaped, firingThreshold, ...(agentId ? { agentId } : {}) },
);
@@ -625,6 +635,7 @@ export class Neo4jMemoryClient {
importance: record.get("importance") as number,
createdAt: String(record.get("createdAt") ?? ""),
score,
taskId: (record.get("taskId") as string) ?? undefined,
});
}
}
@@ -725,6 +736,62 @@ export class Neo4jMemoryClient {
});
}
// --------------------------------------------------------------------------
// Layer 3: Task Metadata Operations
// --------------------------------------------------------------------------
/**
* Find memories linked to a specific task ID.
* Used by recall filter and sleep cycle to identify task-related memories.
*/
async findMemoriesByTaskId(
taskId: string,
agentId?: string,
): Promise<Array<{ id: string; text: string; category: string; importance: number }>> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await session.run(
`MATCH (m:Memory)
WHERE m.taskId = $taskId ${agentFilter}
RETURN m.id AS id, m.text AS text, m.category AS category, m.importance AS importance
ORDER BY m.createdAt ASC`,
{ taskId, ...(agentId ? { agentId } : {}) },
);
return result.records.map((r) => ({
id: r.get("id") as string,
text: r.get("text") as string,
category: r.get("category") as string,
importance: r.get("importance") as number,
}));
} finally {
await session.close();
}
}
/**
* Bulk-clear taskId from memories (e.g., when the task-memory link is no longer needed).
* Sets taskId to null rather than deleting the memory.
*/
async clearTaskIdFromMemories(taskId: string, agentId?: string): Promise<number> {
await this.ensureInitialized();
const session = this.driver!.session();
try {
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
const result = await session.run(
`MATCH (m:Memory)
WHERE m.taskId = $taskId ${agentFilter}
SET m.taskId = null
RETURN count(m) AS cleared`,
{ taskId, ...(agentId ? { agentId } : {}) },
);
return (result.records[0]?.get("cleared") as number) ?? 0;
} finally {
await session.close();
}
}
// --------------------------------------------------------------------------
// Entity & Relationship Operations
// --------------------------------------------------------------------------
@@ -2089,6 +2156,55 @@ export class Neo4jMemoryClient {
}
}
/**
* Search memories by keywords using the fulltext (BM25) index.
* Returns memories whose text matches any of the given keywords.
* Used by the sleep cycle task-memory cleanup phase to find memories
* related to completed tasks.
*/
async searchMemoriesByKeywords(
keywords: string[],
limit: number = 50,
agentId?: string,
): Promise<Array<{ id: string; text: string; category: string }>> {
if (keywords.length === 0) {
return [];
}
await this.ensureInitialized();
const session = this.driver!.session();
try {
// Build a Lucene OR query from the keywords
const escaped = keywords.map((k) => escapeLucene(k.trim())).filter((k) => k.length > 0);
if (escaped.length === 0) {
return [];
}
const query = escaped.join(" OR ");
const agentFilter = agentId ? "AND node.agentId = $agentId" : "";
const result = await session.run(
`CALL db.index.fulltext.queryNodes('memory_fulltext_index', $query)
YIELD node, score
WHERE true ${agentFilter}
RETURN node.id AS id, node.text AS text, node.category AS category
ORDER BY score DESC
LIMIT $limit`,
{
query,
limit: neo4j.int(Math.floor(limit)),
...(agentId ? { agentId } : {}),
},
);
return result.records.map((r) => ({
id: r.get("id") as string,
text: r.get("text") as string,
category: r.get("category") as string,
}));
} finally {
await session.close();
}
}
/**
* Reconcile mentionCount for all entities by counting actual MENTIONS relationships.
* Fixes entities with NULL or stale mentionCount values (e.g., entities created

View File

@@ -42,6 +42,7 @@ export type MemoryNode = {
sessionKey?: string;
retrievalCount: number;
lastRetrievedAt?: string;
taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001")
};
export type EntityNode = {
@@ -103,6 +104,7 @@ export type SearchSignalResult = {
importance: number;
createdAt: string;
score: number;
taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001")
};
export type SignalAttribution = {
@@ -117,6 +119,7 @@ export type HybridSearchResult = {
importance: number;
createdAt: string;
score: number;
taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001")
signals?: {
vector: SignalAttribution;
bm25: SignalAttribution;
@@ -138,6 +141,7 @@ export type StoreMemoryInput = {
extractionStatus: ExtractionStatus;
agentId: string;
sessionKey?: string;
taskId?: string; // Optional link to TASKS.md task (e.g., "TASK-001")
};
export type MergeEntityInput = {

View File

@@ -112,6 +112,7 @@ type FusedCandidate = {
importance: number;
createdAt: string;
rrfScore: number;
taskId?: string;
signals: {
vector: SignalAttribution;
bm25: SignalAttribution;
@@ -151,7 +152,7 @@ export function fuseWithConfidenceRRF(
// Collect all unique candidate IDs with their metadata
const candidateMetadata = new Map<
string,
{ text: string; category: string; importance: number; createdAt: string }
{ text: string; category: string; importance: number; createdAt: string; taskId?: string }
>();
for (const signal of signals) {
@@ -162,6 +163,7 @@ export function fuseWithConfidenceRRF(
category: entry.category,
importance: entry.importance,
createdAt: entry.createdAt,
taskId: entry.taskId,
});
}
}
@@ -196,6 +198,7 @@ export function fuseWithConfidenceRRF(
importance: meta.importance,
createdAt: meta.createdAt,
rrfScore,
taskId: meta.taskId,
signals,
});
}
@@ -288,6 +291,7 @@ export async function hybridSearch(
importance: r.importance,
createdAt: r.createdAt,
score: Math.min(1, r.rrfScore * normalizer), // Normalize to 0-1
taskId: r.taskId,
signals: r.signals,
}));

View File

@@ -0,0 +1,234 @@
/**
* Tests for Phase 7: Task-Memory Cleanup in the sleep cycle.
*
* Tests the LLM classification function and integration with the sleep cycle.
*/
import { describe, it, expect, vi, beforeEach } from "vitest";
import type { ExtractionConfig } from "./config.js";
import { classifyTaskMemory } from "./sleep-cycle.js";
// --------------------------------------------------------------------------
// Mock the LLM client so we don't make real API calls
// --------------------------------------------------------------------------
vi.mock("./llm-client.js", () => ({
callOpenRouter: vi.fn(),
callOpenRouterStream: vi.fn(),
isTransientError: vi.fn(() => false),
}));
// Import the mocked function for controlling behavior per test
import { callOpenRouter } from "./llm-client.js";
const mockCallOpenRouter = vi.mocked(callOpenRouter);
// --------------------------------------------------------------------------
// Helpers
// --------------------------------------------------------------------------
const baseConfig: ExtractionConfig = {
enabled: true,
apiKey: "test-key",
model: "test-model",
baseUrl: "http://localhost:8080",
temperature: 0,
maxRetries: 0,
};
const disabledConfig: ExtractionConfig = {
...baseConfig,
enabled: false,
};
// --------------------------------------------------------------------------
// classifyTaskMemory()
// --------------------------------------------------------------------------
describe("classifyTaskMemory", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("returns 'noise' for task-specific progress memory", async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({
classification: "noise",
reason: "This is task-specific progress tracking",
}),
);
const result = await classifyTaskMemory(
"Currently working on TASK-003, step 2: fixing the column alignment in the LinkedIn dashboard",
"Fix LinkedIn Dashboard tab",
baseConfig,
);
expect(result).toBe("noise");
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
});
it("returns 'lasting' for decision/fact memory", async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({
classification: "lasting",
reason: "Contains a reusable technical decision",
}),
);
const result = await classifyTaskMemory(
"ReActor face swap produces better results than Replicate for video face replacement",
"Implement face swap pipeline",
baseConfig,
);
expect(result).toBe("lasting");
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
});
it("returns 'lasting' when LLM returns null (conservative)", async () => {
mockCallOpenRouter.mockResolvedValueOnce(null);
const result = await classifyTaskMemory("Some ambiguous memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM throws (conservative)", async () => {
mockCallOpenRouter.mockRejectedValueOnce(new Error("network error"));
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM returns malformed JSON", async () => {
mockCallOpenRouter.mockResolvedValueOnce("not json at all");
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM returns unexpected classification", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "unknown_value" }));
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when config is disabled", async () => {
const result = await classifyTaskMemory("Task progress memory", "Some task", disabledConfig);
expect(result).toBe("lasting");
expect(mockCallOpenRouter).not.toHaveBeenCalled();
});
it("passes task title in system prompt", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
await classifyTaskMemory("Memory text here", "Fix LinkedIn Dashboard tab", baseConfig);
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
const callArgs = mockCallOpenRouter.mock.calls[0];
const messages = callArgs[1] as Array<{ role: string; content: string }>;
expect(messages[0].content).toContain("Fix LinkedIn Dashboard tab");
});
it("passes memory text as user message", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "noise" }));
await classifyTaskMemory(
"Debugging step: checked column B3 alignment",
"Fix Dashboard",
baseConfig,
);
const callArgs = mockCallOpenRouter.mock.calls[0];
const messages = callArgs[1] as Array<{ role: string; content: string }>;
expect(messages[1].role).toBe("user");
expect(messages[1].content).toBe("Debugging step: checked column B3 alignment");
});
it("passes abort signal to LLM call", async () => {
const controller = new AbortController();
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
await classifyTaskMemory("Memory text", "Task title", baseConfig, controller.signal);
const callArgs = mockCallOpenRouter.mock.calls[0];
expect(callArgs[2]).toBe(controller.signal);
});
});
// --------------------------------------------------------------------------
// Classification examples — verify the prompt produces expected behavior
// These test that noise vs lasting classification is passed through correctly
// --------------------------------------------------------------------------
describe("classifyTaskMemory classification examples", () => {
beforeEach(() => {
vi.clearAllMocks();
});
const noiseExamples = [
{
memory: "Currently working on TASK-003, step 2: fixing the column alignment",
task: "Fix LinkedIn Dashboard tab",
reason: "task progress update",
},
{
memory: "ACTIVE TASK: TASK-004 — Fix browser port collision. Step: testing port 18807",
task: "Fix browser port collision",
reason: "active task checkpoint",
},
{
memory: "Debugging the flight search: Scoot API returned 500, retrying with different dates",
task: "Book KL↔Singapore flights for India trip",
reason: "debugging steps",
},
];
for (const example of noiseExamples) {
it(`classifies "${example.reason}" as noise`, async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({ classification: "noise", reason: example.reason }),
);
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
expect(result).toBe("noise");
});
}
const lastingExamples = [
{
memory:
"Port map: 18792 (chrome), 18800 (chetan), 18805 (linkedin), 18806 (tsukhani), 18807 (openclaw)",
task: "Fix browser port collision",
reason: "useful reference configuration",
},
{
memory:
"Dashboard layout: B3:B9 = Total, Accepted, Pending, Not Connected, Follow-ups Sent, Acceptance Rate%, Date",
task: "Fix LinkedIn Dashboard tab",
reason: "lasting documentation of layout",
},
{
memory: "ReActor face swap produces better results than Replicate for video face replacement",
task: "Implement face swap pipeline",
reason: "tool comparison decision",
},
];
for (const example of lastingExamples) {
it(`classifies "${example.reason}" as lasting`, async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({ classification: "lasting", reason: example.reason }),
);
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
expect(result).toBe("lasting");
});
}
});

View File

@@ -12,6 +12,7 @@
* 5. NOISE CLEANUP - Remove dangerous pattern memories
* 5b. CREDENTIAL SCAN - Remove memories containing leaked credentials
* 6. TASK LEDGER - Archive stale tasks in TASKS.md
* 7. TASK-MEMORY CLEANUP - Remove task-noise memories for completed tasks
*
* Research basis:
* - ACT-R memory model for retrieval-based importance
@@ -19,6 +20,8 @@
* - MemGPT/Letta for tiered memory architecture
*/
import fs from "node:fs/promises";
import path from "node:path";
import type { ExtractionConfig } from "./config.js";
import type { Embeddings } from "./embeddings.js";
import type { Neo4jMemoryClient } from "./neo4j-client.js";
@@ -29,8 +32,13 @@ import {
resolveConflict,
runBackgroundExtraction,
} from "./extractor.js";
import { callOpenRouter } from "./llm-client.js";
import { makePairKey } from "./schema.js";
import { reviewAndArchiveStaleTasks, type StaleTaskResult } from "./task-ledger.js";
import {
parseTaskLedger,
reviewAndArchiveStaleTasks,
type StaleTaskResult,
} from "./task-ledger.js";
/**
* Sleep Cycle Result - aggregated stats from all phases.
@@ -92,6 +100,12 @@ export type SleepCycleResult = {
archivedCount: number;
archivedIds: string[];
};
// Phase 7: Task-Memory Cleanup
taskMemoryCleanup: {
tasksChecked: number;
memoriesEvaluated: number;
memoriesRemoved: number;
};
// Overall
durationMs: number;
aborted: boolean;
@@ -133,6 +147,10 @@ export type SleepCycleOptions = {
workspaceDir?: string; // Workspace dir for TASKS.md (default: resolved from env)
staleTaskMaxAgeMs?: number; // Max age before task is stale (default: 24h)
// Phase 7: Task-Memory Cleanup
skipTaskMemoryCleanup?: boolean; // Skip task-memory cleanup (default: false)
taskMemoryMaxAgeDays?: number; // Only check tasks completed within this many days (default: 7)
// Progress callback
onPhaseStart?: (
phase:
@@ -146,7 +164,8 @@ export type SleepCycleOptions = {
| "cleanup"
| "noiseCleanup"
| "credentialScan"
| "taskLedger",
| "taskLedger"
| "taskMemoryCleanup",
) => void;
onProgress?: (phase: string, message: string) => void;
};
@@ -213,12 +232,68 @@ export function detectCredential(text: string): string | null {
return null;
}
// ============================================================================
// Task-Memory Classification
// ============================================================================
/**
* Use LLM to classify whether a memory is "lasting" (valuable independent
* of the completed task) or "noise" (only useful while the task was active).
*
* Conservative: returns "lasting" on any failure to avoid deleting valuable memories.
*/
export async function classifyTaskMemory(
memoryText: string,
taskTitle: string,
config: ExtractionConfig,
abortSignal?: AbortSignal,
): Promise<"lasting" | "noise"> {
if (!config.enabled) {
return "lasting";
}
try {
const content = await callOpenRouter(
config,
[
{
role: "system",
content: `A task titled "${taskTitle}" has been completed. The following memory was created during this task.
Classify this memory:
- "lasting" if it contains a decision, preference, fact, or knowledge that is valuable INDEPENDENT of the task
- "noise" if it contains task progress, debugging steps, intermediate state, or context that is only useful while the task was active
When in doubt, choose "lasting". It is better to keep some noise than to delete valuable knowledge.
Return JSON: {"classification": "lasting"|"noise", "reason": "brief explanation"}`,
},
{ role: "user", content: memoryText },
],
abortSignal,
);
if (!content) {
return "lasting";
}
const parsed = JSON.parse(content) as { classification?: string };
if (parsed.classification === "noise") {
return "noise";
}
return "lasting";
} catch {
// On any failure, keep the memory (conservative)
return "lasting";
}
}
// ============================================================================
// Sleep Cycle Implementation
// ============================================================================
/**
* Run the full sleep cycle - six phases of memory consolidation.
* Run the full sleep cycle - seven phases of memory consolidation.
*/
export async function runSleepCycle(
db: Neo4jMemoryClient,
@@ -246,6 +321,8 @@ export async function runSleepCycle(
singleUseTagMinAgeDays = 14,
workspaceDir,
staleTaskMaxAgeMs,
skipTaskMemoryCleanup = false,
taskMemoryMaxAgeDays = 7,
onPhaseStart,
onProgress,
} = options;
@@ -261,6 +338,7 @@ export async function runSleepCycle(
cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 },
credentialScan: { memoriesScanned: 0, credentialsFound: 0, memoriesRemoved: 0 },
taskLedger: { staleCount: 0, archivedCount: 0, archivedIds: [] },
taskMemoryCleanup: { tasksChecked: 0, memoriesEvaluated: 0, memoriesRemoved: 0 },
durationMs: 0,
aborted: false,
};
@@ -929,6 +1007,155 @@ export async function runSleepCycle(
logger.info("memory-neo4j: [sleep] Phase 6: Task Ledger Cleanup — SKIPPED (no workspace dir)");
}
// --------------------------------------------------------------------------
// Phase 7: Task-Memory Cleanup
// Cross-references completed tasks (from TASKS.md) with stored memories.
// For each completed task (within the last N days), searches for memories
// mentioning that task and uses LLM to classify them as "lasting" (keep)
// vs "noise" (delete). This prevents stale task-specific memories from
// being recalled after tasks are done.
// --------------------------------------------------------------------------
if (!abortSignal?.aborted && workspaceDir && config.enabled && !skipTaskMemoryCleanup) {
onPhaseStart?.("taskMemoryCleanup");
logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup");
try {
const tasksPath = path.join(workspaceDir, "TASKS.md");
let tasksContent: string | null = null;
try {
tasksContent = await fs.readFile(tasksPath, "utf-8");
} catch {
// TASKS.md doesn't exist — skip
}
if (tasksContent) {
const ledger = parseTaskLedger(tasksContent);
const now = new Date();
const maxAgeMs = taskMemoryMaxAgeDays * 24 * 60 * 60 * 1000;
// Filter to recently completed tasks (within maxAgeDays)
const recentCompleted = ledger.completedTasks.filter((task) => {
// Use the "Completed" field, "Updated" field, or "Started" field as date source
const dateStr =
task.details?.match(/Completed:\s*(\S+)/)?.[1] || task.updated || task.started;
if (!dateStr) {
return false;
}
// Try to parse date — accept formats like "2026-02-16", "2026-02-16 09:15"
const cleaned = dateStr
.trim()
.replace(/\s+[A-Z]{2,5}$/, "")
.replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})/, "$1T$2");
const date = new Date(cleaned);
if (Number.isNaN(date.getTime())) {
return false;
}
return now.getTime() - date.getTime() <= maxAgeMs;
});
result.taskMemoryCleanup.tasksChecked = recentCompleted.length;
if (recentCompleted.length > 0) {
onProgress?.(
"taskMemoryCleanup",
`Found ${recentCompleted.length} recently completed tasks to check`,
);
// Collect all memories to evaluate across all tasks (dedup by id)
const memoriesToEvaluate = new Map<
string,
{ id: string; text: string; category: string; taskTitle: string }
>();
for (const task of recentCompleted) {
if (abortSignal?.aborted) break;
// Build keywords from task ID and title words
const keywords = [task.id];
const titleWords = task.title
.split(/\s+/)
.filter((w) => w.length > 3)
.map((w) => w.replace(/[^a-zA-Z0-9-]/g, ""))
.filter((w) => w.length > 3);
keywords.push(...titleWords);
const matches = await db.searchMemoriesByKeywords(keywords, 50, agentId);
for (const mem of matches) {
// Skip core memories — those are user-curated
if (mem.category === "core") continue;
if (!memoriesToEvaluate.has(mem.id)) {
memoriesToEvaluate.set(mem.id, { ...mem, taskTitle: task.title });
}
}
}
// Classify memories in parallel batches using LLM
const toEvaluate = [...memoriesToEvaluate.values()];
result.taskMemoryCleanup.memoriesEvaluated = toEvaluate.length;
if (toEvaluate.length > 0) {
onProgress?.("taskMemoryCleanup", `Evaluating ${toEvaluate.length} memories with LLM`);
}
const toRemove: string[] = [];
for (let i = 0; i < toEvaluate.length && !abortSignal?.aborted; i += llmConcurrency) {
const batch = toEvaluate.slice(i, i + llmConcurrency);
const outcomes = await Promise.allSettled(
batch.map((mem) => classifyTaskMemory(mem.text, mem.taskTitle, config, abortSignal)),
);
for (let k = 0; k < outcomes.length; k++) {
const outcome = outcomes[k];
const mem = batch[k];
if (outcome.status === "fulfilled" && outcome.value === "noise") {
toRemove.push(mem.id);
onProgress?.(
"taskMemoryCleanup",
`Noise: "${mem.text.slice(0, 60)}..." (task: ${mem.taskTitle})`,
);
} else if (outcome.status === "fulfilled" && outcome.value === "lasting") {
onProgress?.("taskMemoryCleanup", `Lasting: "${mem.text.slice(0, 60)}..."`);
}
// On failure, keep the memory (conservative)
}
}
// Remove noise memories
if (toRemove.length > 0 && !abortSignal?.aborted) {
for (const id of toRemove) {
if (abortSignal?.aborted) break;
await db.invalidateMemory(id);
}
result.taskMemoryCleanup.memoriesRemoved = toRemove.length;
onProgress?.("taskMemoryCleanup", `Invalidated ${toRemove.length} task-noise memories`);
}
} else {
onProgress?.("taskMemoryCleanup", "No recently completed tasks found");
}
} else {
onProgress?.("taskMemoryCleanup", "TASKS.md not found — skipped");
}
logger.info(
`memory-neo4j: [sleep] Phase 7 complete — ${result.taskMemoryCleanup.tasksChecked} tasks checked, ${result.taskMemoryCleanup.memoriesEvaluated} memories evaluated, ${result.taskMemoryCleanup.memoriesRemoved} removed`,
);
} catch (err) {
logger.warn(`memory-neo4j: [sleep] Phase 7 error: ${String(err)}`);
}
} else if (!workspaceDir) {
logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (no workspace dir)");
} else if (!config.enabled) {
logger.info(
"memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (extraction not enabled)",
);
} else if (skipTaskMemoryCleanup) {
logger.info("memory-neo4j: [sleep] Phase 7: Task-Memory Cleanup — SKIPPED (disabled)");
}
result.durationMs = Date.now() - startTime;
result.aborted = abortSignal?.aborted ?? false;

View File

@@ -0,0 +1,426 @@
/**
* Tests for task-filter.ts — Task-aware recall filtering (Layer 1).
*
* Verifies that memories related to completed tasks are correctly identified
* and filtered, while unrelated or loosely-matching memories are preserved.
*/
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import {
buildCompletedTaskInfo,
clearTaskFilterCache,
extractSignificantKeywords,
isRelatedToCompletedTask,
loadCompletedTaskKeywords,
type CompletedTaskInfo,
} from "./task-filter.js";
// ============================================================================
// Sample TASKS.md content
// ============================================================================
const SAMPLE_TASKS_MD = `# Active Tasks
_No active tasks_
# Completed
<!-- Move done tasks here with completion date -->
## TASK-002: Book KL↔Singapore flights for India trip
- **Completed:** 2026-02-16
- **Details:** Tarun booked manually — Scoot TR453 (Feb 23 KUL→SIN) and AirAsia AK720 (Mar 3 SIN→KUL)
## TASK-003: Fix LinkedIn Dashboard tab
- **Completed:** 2026-02-16
- **Details:** Fixed misaligned stats, wrong industry numbers, stale data. Added Not Connected row, consolidated industries into 10 groups, cleared residual data.
## TASK-004: Fix browser port collision
- **Completed:** 2026-02-16
- **Details:** Added explicit openclaw profile on port 18807 (was colliding with chetan on 18800)
`;
// ============================================================================
// extractSignificantKeywords()
// ============================================================================
describe("extractSignificantKeywords", () => {
it("extracts words with length >= 4", () => {
const keywords = extractSignificantKeywords("Fix the big dashboard bug");
expect(keywords).toContain("dashboard");
expect(keywords).not.toContain("fix"); // too short
expect(keywords).not.toContain("the"); // too short
expect(keywords).not.toContain("big"); // too short
expect(keywords).not.toContain("bug"); // too short
});
it("removes stop words", () => {
const keywords = extractSignificantKeywords("should have been using this work");
// All of these are stop words
expect(keywords).toHaveLength(0);
});
it("lowercases all keywords", () => {
const keywords = extractSignificantKeywords("LinkedIn Dashboard Singapore");
expect(keywords).toContain("linkedin");
expect(keywords).toContain("dashboard");
expect(keywords).toContain("singapore");
});
it("deduplicates keywords", () => {
const keywords = extractSignificantKeywords("dashboard dashboard dashboard");
expect(keywords).toEqual(["dashboard"]);
});
it("returns empty for empty/null input", () => {
expect(extractSignificantKeywords("")).toEqual([]);
expect(extractSignificantKeywords(null as unknown as string)).toEqual([]);
});
it("handles special characters", () => {
const keywords = extractSignificantKeywords("port 18807 (colliding with chetan)");
expect(keywords).toContain("port");
expect(keywords).toContain("18807");
expect(keywords).toContain("colliding");
expect(keywords).toContain("chetan");
});
});
// ============================================================================
// buildCompletedTaskInfo()
// ============================================================================
describe("buildCompletedTaskInfo", () => {
it("extracts keywords from title and details", () => {
const info = buildCompletedTaskInfo({
id: "TASK-003",
title: "Fix LinkedIn Dashboard tab",
status: "done",
details:
"Fixed misaligned stats, wrong industry numbers, stale data. Added Not Connected row, consolidated industries into 10 groups, cleared residual data.",
rawLines: [
"## TASK-003: Fix LinkedIn Dashboard tab",
"- **Completed:** 2026-02-16",
"- **Details:** Fixed misaligned stats, wrong industry numbers, stale data.",
],
isCompleted: true,
});
expect(info.id).toBe("TASK-003");
expect(info.keywords).toContain("linkedin");
expect(info.keywords).toContain("dashboard");
expect(info.keywords).toContain("misaligned");
expect(info.keywords).toContain("stats");
expect(info.keywords).toContain("industry");
});
it("includes currentStep keywords", () => {
const info = buildCompletedTaskInfo({
id: "TASK-010",
title: "Deploy staging server",
status: "done",
currentStep: "Verifying nginx configuration",
rawLines: ["## TASK-010: Deploy staging server"],
isCompleted: true,
});
expect(info.keywords).toContain("deploy");
expect(info.keywords).toContain("staging");
expect(info.keywords).toContain("server");
expect(info.keywords).toContain("nginx");
expect(info.keywords).toContain("configuration");
});
it("handles task with minimal fields", () => {
const info = buildCompletedTaskInfo({
id: "TASK-001",
title: "Quick fix",
status: "done",
rawLines: ["## TASK-001: Quick fix"],
isCompleted: true,
});
expect(info.id).toBe("TASK-001");
expect(info.keywords).toContain("quick");
// "fix" is only 3 chars, should be excluded
expect(info.keywords).not.toContain("fix");
});
});
// ============================================================================
// isRelatedToCompletedTask()
// ============================================================================
describe("isRelatedToCompletedTask", () => {
const completedTasks: CompletedTaskInfo[] = [
{
id: "TASK-002",
keywords: [
"book",
"singapore",
"flights",
"india",
"trip",
"scoot",
"tr453",
"airasia",
"ak720",
],
},
{
id: "TASK-003",
keywords: [
"linkedin",
"dashboard",
"misaligned",
"stats",
"industry",
"numbers",
"stale",
"connected",
"consolidated",
"industries",
"groups",
"cleared",
"residual",
"data",
],
},
{
id: "TASK-004",
keywords: [
"browser",
"port",
"collision",
"openclaw",
"profile",
"18807",
"colliding",
"chetan",
"18800",
],
},
];
// --- Task ID matching ---
it("matches memory containing task ID", () => {
expect(
isRelatedToCompletedTask("TASK-002 flights have been booked successfully", completedTasks),
).toBe(true);
});
it("matches task ID case-insensitively", () => {
expect(
isRelatedToCompletedTask("Completed task-003 — dashboard is fixed", completedTasks),
).toBe(true);
});
// --- Keyword matching ---
it("matches memory with 2+ keywords from a completed task", () => {
expect(
isRelatedToCompletedTask(
"LinkedIn dashboard stats are now showing correctly",
completedTasks,
),
).toBe(true);
});
it("matches memory with keywords from flight task", () => {
expect(
isRelatedToCompletedTask("Booked Singapore flights for the India trip", completedTasks),
).toBe(true);
});
// --- False positive prevention ---
it("does NOT match memory with only 1 keyword overlap", () => {
expect(isRelatedToCompletedTask("Singapore has great food markets", completedTasks)).toBe(
false,
);
});
it("does NOT match memory about LinkedIn that is unrelated to dashboard fix", () => {
// "linkedin" alone is only 1 keyword match — should NOT be filtered
expect(
isRelatedToCompletedTask(
"LinkedIn connection request from John Smith accepted",
completedTasks,
),
).toBe(false);
});
it("does NOT match memory about browser that is unrelated to port fix", () => {
// "browser" alone is only 1 keyword
expect(
isRelatedToCompletedTask("Browser extension for Flux image generation", completedTasks),
).toBe(false);
});
it("does NOT match completely unrelated memory", () => {
expect(isRelatedToCompletedTask("Tarun's birthday is August 23, 1974", completedTasks)).toBe(
false,
);
});
// --- Edge cases ---
it("returns false for empty memory text", () => {
expect(isRelatedToCompletedTask("", completedTasks)).toBe(false);
});
it("returns false for empty completed tasks array", () => {
expect(isRelatedToCompletedTask("TASK-002 flights booked", [])).toBe(false);
});
it("handles task with no keywords (only ID matching works)", () => {
const tasksNoKeywords: CompletedTaskInfo[] = [{ id: "TASK-099", keywords: [] }];
expect(isRelatedToCompletedTask("Completed TASK-099", tasksNoKeywords)).toBe(true);
expect(isRelatedToCompletedTask("Some random memory", tasksNoKeywords)).toBe(false);
});
});
// ============================================================================
// loadCompletedTaskKeywords()
// ============================================================================
describe("loadCompletedTaskKeywords", () => {
let tmpDir: string;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-filter-test-"));
clearTaskFilterCache();
});
afterEach(async () => {
clearTaskFilterCache();
await fs.rm(tmpDir, { recursive: true, force: true });
});
it("parses completed tasks from TASKS.md", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD);
const tasks = await loadCompletedTaskKeywords(tmpDir);
expect(tasks).toHaveLength(3);
expect(tasks.map((t) => t.id)).toEqual(["TASK-002", "TASK-003", "TASK-004"]);
});
it("extracts keywords from completed tasks", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD);
const tasks = await loadCompletedTaskKeywords(tmpDir);
const flightTask = tasks.find((t) => t.id === "TASK-002");
expect(flightTask).toBeDefined();
expect(flightTask!.keywords).toContain("singapore");
expect(flightTask!.keywords).toContain("flights");
});
it("returns empty array when TASKS.md does not exist", async () => {
const tasks = await loadCompletedTaskKeywords(tmpDir);
expect(tasks).toEqual([]);
});
it("returns empty array for empty TASKS.md", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), "");
const tasks = await loadCompletedTaskKeywords(tmpDir);
expect(tasks).toEqual([]);
});
it("returns empty array for TASKS.md with no completed tasks", async () => {
const content = `# Active Tasks
## TASK-001: Do something
- **Status:** in_progress
- **Details:** Working on it
# Completed
<!-- Move done tasks here with completion date -->
`;
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content);
const tasks = await loadCompletedTaskKeywords(tmpDir);
expect(tasks).toEqual([]);
});
it("handles malformed TASKS.md gracefully", async () => {
const content = `This is not a valid TASKS.md file
Just some random text
No headers or structure at all`;
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content);
const tasks = await loadCompletedTaskKeywords(tmpDir);
expect(tasks).toEqual([]);
});
// --- Cache behavior ---
it("returns cached data within TTL", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD);
const first = await loadCompletedTaskKeywords(tmpDir);
expect(first).toHaveLength(3);
// Modify the file — should still return cached result
await fs.writeFile(path.join(tmpDir, "TASKS.md"), "# Active Tasks\n\n# Completed\n");
const second = await loadCompletedTaskKeywords(tmpDir);
expect(second).toHaveLength(3); // Still cached
expect(second).toBe(first); // Same reference (from cache)
});
it("refreshes after cache is cleared", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD);
const first = await loadCompletedTaskKeywords(tmpDir);
expect(first).toHaveLength(3);
// Modify file and clear cache
await fs.writeFile(path.join(tmpDir, "TASKS.md"), "# Active Tasks\n\n# Completed\n");
clearTaskFilterCache();
const second = await loadCompletedTaskKeywords(tmpDir);
expect(second).toHaveLength(0); // Re-read from disk
});
});
// ============================================================================
// Integration: end-to-end filtering
// ============================================================================
describe("end-to-end recall filtering", () => {
let tmpDir: string;
beforeEach(async () => {
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-filter-e2e-"));
clearTaskFilterCache();
});
afterEach(async () => {
clearTaskFilterCache();
await fs.rm(tmpDir, { recursive: true, force: true });
});
it("filters memories related to completed tasks while keeping unrelated ones", async () => {
await fs.writeFile(path.join(tmpDir, "TASKS.md"), SAMPLE_TASKS_MD);
const completedTasks = await loadCompletedTaskKeywords(tmpDir);
const memories = [
{ text: "TASK-002 flights have been booked — Scoot TR453 confirmed", keep: false },
{ text: "LinkedIn dashboard stats fixed — industry numbers corrected", keep: false },
{ text: "Browser port collision resolved — openclaw on 18807", keep: false },
{ text: "Tarun's birthday is August 23, 1974", keep: true },
{ text: "Singapore has great food markets", keep: true },
{ text: "LinkedIn connection from Jane Doe accepted", keep: true },
{ text: "Memory-neo4j sleep cycle runs at 3am", keep: true },
];
for (const m of memories) {
const isRelated = isRelatedToCompletedTask(m.text, completedTasks);
expect(isRelated).toBe(!m.keep);
}
});
});

View File

@@ -0,0 +1,324 @@
/**
* Task-aware recall filter (Layer 1).
*
* Filters out auto-recalled memories that relate to completed tasks,
* preventing stale task-state memories from being injected into agent context.
*
* Design principles:
* - Conservative: false positives (filtering useful memories) are worse than
* false negatives (letting some stale ones through).
* - Fast: runs on every message, targeting < 5ms with caching.
* - Graceful: missing/malformed TASKS.md is silently ignored.
*/
import fs from "node:fs/promises";
import path from "node:path";
import { parseTaskLedger, type ParsedTask } from "./task-ledger.js";
// ============================================================================
// Types
// ============================================================================
/** Extracted keyword info for a single completed task. */
export type CompletedTaskInfo = {
/** Task ID (e.g. "TASK-002") */
id: string;
/** Significant keywords extracted from the task title + details + currentStep */
keywords: string[];
};
// ============================================================================
// Constants
// ============================================================================
/** Cache TTL in milliseconds — avoids re-reading TASKS.md on every message. */
const CACHE_TTL_MS = 60_000;
/** Minimum keyword length to be considered "significant". */
const MIN_KEYWORD_LENGTH = 4;
/**
* Common English stop words that should be excluded from keyword matching.
* Only words ≥ MIN_KEYWORD_LENGTH are included (shorter ones are filtered by length).
*/
const STOP_WORDS = new Set([
"about",
"also",
"been",
"before",
"being",
"between",
"both",
"came",
"come",
"could",
"does",
"done",
"each",
"even",
"find",
"first",
"found",
"from",
"going",
"good",
"great",
"have",
"here",
"high",
"however",
"into",
"just",
"keep",
"know",
"last",
"like",
"long",
"look",
"made",
"make",
"many",
"more",
"most",
"much",
"must",
"need",
"next",
"only",
"other",
"over",
"part",
"said",
"same",
"should",
"show",
"since",
"some",
"still",
"such",
"take",
"than",
"that",
"their",
"them",
"then",
"there",
"these",
"they",
"this",
"through",
"time",
"under",
"used",
"using",
"very",
"want",
"were",
"what",
"when",
"where",
"which",
"while",
"will",
"with",
"without",
"work",
"would",
"your",
// Task-related generic words that shouldn't be matching keywords:
"task",
"tasks",
"active",
"completed",
"details",
"status",
"started",
"updated",
"blocked",
]);
/**
* Minimum number of keyword matches required to consider a memory related
* to a completed task (when matching by keywords rather than task ID).
*/
const MIN_KEYWORD_MATCHES = 2;
// ============================================================================
// Cache
// ============================================================================
type CacheEntry = {
tasks: CompletedTaskInfo[];
timestamp: number;
};
const cache = new Map<string, CacheEntry>();
/** Clear the cache (exposed for testing). */
export function clearTaskFilterCache(): void {
cache.clear();
}
// ============================================================================
// Keyword Extraction
// ============================================================================
/**
* Extract significant keywords from a text string.
*
* Filters out short words, stop words, and common noise to produce
* a set of meaningful terms that can identify task-specific content.
*/
export function extractSignificantKeywords(text: string): string[] {
if (!text) {
return [];
}
const words = text
.toLowerCase()
// Replace non-alphanumeric chars (except hyphens in task IDs) with spaces
.replace(/[^a-z0-9\-]/g, " ")
.split(/\s+/)
.filter((w) => w.length >= MIN_KEYWORD_LENGTH && !STOP_WORDS.has(w));
// Deduplicate while preserving order
return [...new Set(words)];
}
/**
* Build a {@link CompletedTaskInfo} from a parsed completed task.
*
* Extracts keywords from the task's title, details, and current step.
*/
export function buildCompletedTaskInfo(task: ParsedTask): CompletedTaskInfo {
const parts: string[] = [task.title];
if (task.details) {
parts.push(task.details);
}
if (task.currentStep) {
parts.push(task.currentStep);
}
// Also extract from raw lines to capture fields the parser doesn't map
// (e.g. "- **Completed:** 2026-02-16")
for (const line of task.rawLines) {
const trimmed = line.trim();
// Skip the header line (already have title) and empty lines
if (trimmed.startsWith("##") || trimmed === "") {
continue;
}
// Include field values from bullet lines
const fieldMatch = trimmed.match(/^-\s+\*\*.+?:\*\*\s*(.+)$/);
if (fieldMatch) {
parts.push(fieldMatch[1]);
}
}
const keywords = extractSignificantKeywords(parts.join(" "));
return {
id: task.id,
keywords,
};
}
// ============================================================================
// Core API
// ============================================================================
/**
* Load completed task info from TASKS.md in the given workspace directory.
*
* Results are cached per workspace dir with a 60-second TTL to avoid
* re-reading and re-parsing on every message.
*
* @param workspaceDir - Path to the workspace directory containing TASKS.md
* @returns Array of completed task info (empty if TASKS.md is missing or has no completed tasks)
*/
export async function loadCompletedTaskKeywords(
workspaceDir: string,
): Promise<CompletedTaskInfo[]> {
const now = Date.now();
// Check cache
const cached = cache.get(workspaceDir);
if (cached && now - cached.timestamp < CACHE_TTL_MS) {
return cached.tasks;
}
// Read and parse TASKS.md
const tasksPath = path.join(workspaceDir, "TASKS.md");
let content: string;
try {
content = await fs.readFile(tasksPath, "utf-8");
} catch {
// File doesn't exist or isn't readable — cache empty result
cache.set(workspaceDir, { tasks: [], timestamp: now });
return [];
}
if (!content.trim()) {
cache.set(workspaceDir, { tasks: [], timestamp: now });
return [];
}
const ledger = parseTaskLedger(content);
const tasks = ledger.completedTasks.map(buildCompletedTaskInfo);
// Cache the result
cache.set(workspaceDir, { tasks, timestamp: now });
return tasks;
}
/**
* Check if a memory's text is related to a completed task.
*
* Uses two matching strategies:
* 1. **Task ID match** — if the memory text contains a completed task's ID
* (e.g. "TASK-002"), it's considered related.
* 2. **Keyword match** — if the memory text matches {@link MIN_KEYWORD_MATCHES}
* or more significant keywords from a completed task, it's considered related.
*
* The filter is intentionally conservative: a memory about "Flux 2" won't be
* filtered just because a completed task mentioned "Flux", unless the memory
* also matches additional task-specific keywords.
*
* @param memoryText - The text content of the recalled memory
* @param completedTasks - Completed task info from {@link loadCompletedTaskKeywords}
* @returns `true` if the memory appears related to a completed task
*/
export function isRelatedToCompletedTask(
memoryText: string,
completedTasks: CompletedTaskInfo[],
): boolean {
if (!memoryText || completedTasks.length === 0) {
return false;
}
const lowerText = memoryText.toLowerCase();
for (const task of completedTasks) {
// Strategy 1: Direct task ID match (case-insensitive)
if (lowerText.includes(task.id.toLowerCase())) {
return true;
}
// Strategy 2: Keyword overlap — require MIN_KEYWORD_MATCHES distinct keywords
if (task.keywords.length === 0) {
continue;
}
let matchCount = 0;
for (const keyword of task.keywords) {
if (lowerText.includes(keyword)) {
matchCount++;
if (matchCount >= MIN_KEYWORD_MATCHES) {
return true;
}
}
}
}
return false;
}

View File

@@ -0,0 +1,606 @@
/**
* Tests for Layer 3: Task Metadata on memories.
*
* Tests that memories can be linked to specific tasks via taskId,
* enabling precise task-aware filtering at recall and cleanup time.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import type { StoreMemoryInput } from "./schema.js";
import { Neo4jMemoryClient } from "./neo4j-client.js";
import { fuseWithConfidenceRRF } from "./search.js";
import { parseTaskLedger } from "./task-ledger.js";
// ============================================================================
// Test Helpers
// ============================================================================
function createMockSession() {
return {
run: vi.fn().mockResolvedValue({ records: [] }),
close: vi.fn().mockResolvedValue(undefined),
executeWrite: vi.fn(
async (work: (tx: { run: ReturnType<typeof vi.fn> }) => Promise<unknown>) => {
const mockTx = { run: vi.fn().mockResolvedValue({ records: [] }) };
return work(mockTx);
},
),
};
}
function createMockDriver() {
return {
session: vi.fn().mockReturnValue(createMockSession()),
close: vi.fn().mockResolvedValue(undefined),
};
}
function createMockLogger() {
return {
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
debug: vi.fn(),
};
}
function createMockRecord(data: Record<string, unknown>) {
return {
get: (key: string) => data[key],
keys: Object.keys(data),
};
}
// ============================================================================
// Neo4jMemoryClient: storeMemory with taskId
// ============================================================================
describe("Task Metadata: storeMemory", () => {
let client: Neo4jMemoryClient;
let mockDriver: ReturnType<typeof createMockDriver>;
let mockSession: ReturnType<typeof createMockSession>;
beforeEach(() => {
const mockLogger = createMockLogger();
mockDriver = createMockDriver();
mockSession = createMockSession();
mockDriver.session.mockReturnValue(mockSession);
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
(client as any).driver = mockDriver;
(client as any).indexesReady = true;
});
it("should store memory with taskId when provided", async () => {
mockSession.run.mockResolvedValue({
records: [createMockRecord({ id: "mem-1" })],
});
const input: StoreMemoryInput = {
id: "mem-1",
text: "test memory with task",
embedding: [0.1, 0.2],
importance: 0.7,
category: "fact",
source: "user",
extractionStatus: "pending",
agentId: "agent-1",
taskId: "TASK-001",
};
await client.storeMemory(input);
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
const params = runCall[1] as Record<string, unknown>;
// Cypher should include taskId clause
expect(cypher).toContain("taskId");
// Params should include the taskId value
expect(params.taskId).toBe("TASK-001");
});
it("should store memory without taskId when not provided", async () => {
mockSession.run.mockResolvedValue({
records: [createMockRecord({ id: "mem-2" })],
});
const input: StoreMemoryInput = {
id: "mem-2",
text: "test memory without task",
embedding: [0.1, 0.2],
importance: 0.7,
category: "fact",
source: "user",
extractionStatus: "pending",
agentId: "agent-1",
};
await client.storeMemory(input);
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
// Cypher should NOT include taskId clause when not provided
// The dynamic clause is only added when taskId is present
expect(cypher).not.toContain(", taskId: $taskId");
});
it("backward compatibility: existing memories without taskId still work", async () => {
// Storing without taskId should work exactly as before
mockSession.run.mockResolvedValue({
records: [createMockRecord({ id: "mem-3" })],
});
const input: StoreMemoryInput = {
id: "mem-3",
text: "legacy memory",
embedding: [0.1],
importance: 0.5,
category: "other",
source: "auto-capture",
extractionStatus: "skipped",
agentId: "default",
};
const id = await client.storeMemory(input);
expect(id).toBe("mem-3");
});
});
// ============================================================================
// Neo4jMemoryClient: findMemoriesByTaskId
// ============================================================================
describe("Task Metadata: findMemoriesByTaskId", () => {
let client: Neo4jMemoryClient;
let mockDriver: ReturnType<typeof createMockDriver>;
let mockSession: ReturnType<typeof createMockSession>;
beforeEach(() => {
const mockLogger = createMockLogger();
mockDriver = createMockDriver();
mockSession = createMockSession();
mockDriver.session.mockReturnValue(mockSession);
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
(client as any).driver = mockDriver;
(client as any).indexesReady = true;
});
it("should find memories by taskId", async () => {
mockSession.run.mockResolvedValue({
records: [
createMockRecord({
id: "mem-1",
text: "task-related memory",
category: "fact",
importance: 0.8,
}),
createMockRecord({
id: "mem-2",
text: "another task memory",
category: "other",
importance: 0.6,
}),
],
});
const results = await client.findMemoriesByTaskId("TASK-001");
expect(results).toHaveLength(2);
expect(results[0].id).toBe("mem-1");
expect(results[1].id).toBe("mem-2");
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
const params = runCall[1] as Record<string, unknown>;
expect(cypher).toContain("m.taskId = $taskId");
expect(params.taskId).toBe("TASK-001");
});
it("should filter by agentId when provided", async () => {
mockSession.run.mockResolvedValue({ records: [] });
await client.findMemoriesByTaskId("TASK-001", "agent-1");
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
const params = runCall[1] as Record<string, unknown>;
expect(cypher).toContain("m.agentId = $agentId");
expect(params.agentId).toBe("agent-1");
});
it("should return empty array when no memories match", async () => {
mockSession.run.mockResolvedValue({ records: [] });
const results = await client.findMemoriesByTaskId("TASK-999");
expect(results).toHaveLength(0);
});
});
// ============================================================================
// Neo4jMemoryClient: clearTaskIdFromMemories
// ============================================================================
describe("Task Metadata: clearTaskIdFromMemories", () => {
let client: Neo4jMemoryClient;
let mockDriver: ReturnType<typeof createMockDriver>;
let mockSession: ReturnType<typeof createMockSession>;
beforeEach(() => {
const mockLogger = createMockLogger();
mockDriver = createMockDriver();
mockSession = createMockSession();
mockDriver.session.mockReturnValue(mockSession);
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
(client as any).driver = mockDriver;
(client as any).indexesReady = true;
});
it("should clear taskId from all matching memories", async () => {
mockSession.run.mockResolvedValue({
records: [createMockRecord({ cleared: 3 })],
});
const count = await client.clearTaskIdFromMemories("TASK-001");
expect(count).toBe(3);
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
const params = runCall[1] as Record<string, unknown>;
expect(cypher).toContain("m.taskId = $taskId");
expect(cypher).toContain("SET m.taskId = null");
expect(params.taskId).toBe("TASK-001");
});
it("should filter by agentId when provided", async () => {
mockSession.run.mockResolvedValue({
records: [createMockRecord({ cleared: 1 })],
});
await client.clearTaskIdFromMemories("TASK-001", "agent-1");
const runCall = mockSession.run.mock.calls[0];
const cypher = runCall[0] as string;
const params = runCall[1] as Record<string, unknown>;
expect(cypher).toContain("m.agentId = $agentId");
expect(params.agentId).toBe("agent-1");
});
it("should return 0 when no memories match", async () => {
mockSession.run.mockResolvedValue({
records: [createMockRecord({ cleared: 0 })],
});
const count = await client.clearTaskIdFromMemories("TASK-999");
expect(count).toBe(0);
});
});
// ============================================================================
// Hybrid search results include taskId
// ============================================================================
describe("Task Metadata: hybrid search includes taskId", () => {
it("should carry taskId through RRF fusion", () => {
const vectorResults = [
{
id: "mem-1",
text: "memory with task",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
score: 0.9,
taskId: "TASK-001",
},
{
id: "mem-2",
text: "memory without task",
category: "other",
importance: 0.5,
createdAt: "2026-01-02",
score: 0.8,
},
];
const bm25Results = [
{
id: "mem-1",
text: "memory with task",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
score: 0.7,
taskId: "TASK-001",
},
];
const graphResults: typeof vectorResults = [];
const fused = fuseWithConfidenceRRF(
[vectorResults, bm25Results, graphResults],
60,
[1.0, 1.0, 1.0],
);
// mem-1 should have taskId preserved
const mem1 = fused.find((r) => r.id === "mem-1");
expect(mem1).toBeDefined();
expect(mem1!.taskId).toBe("TASK-001");
// mem-2 should have undefined taskId
const mem2 = fused.find((r) => r.id === "mem-2");
expect(mem2).toBeDefined();
expect(mem2!.taskId).toBeUndefined();
});
it("should include taskId in fused results when present in any signal", () => {
// taskId present only in BM25 signal
const vectorResults = [
{
id: "mem-1",
text: "test",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
score: 0.9,
// no taskId
},
];
const bm25Results = [
{
id: "mem-1",
text: "test",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
score: 0.7,
taskId: "TASK-002",
},
];
const fused = fuseWithConfidenceRRF([vectorResults, bm25Results, []], 60, [1.0, 1.0, 1.0]);
// The first signal (vector) is used for metadata — taskId would be undefined
// because candidateMetadata takes the first occurrence
const mem1 = fused.find((r) => r.id === "mem-1");
expect(mem1).toBeDefined();
// The first signal to contribute metadata wins
// vector came first and has no taskId
expect(mem1!.taskId).toBeUndefined();
});
});
// ============================================================================
// Auto-tagging: parseTaskLedger for active task detection
// ============================================================================
describe("Task Metadata: auto-tagging via parseTaskLedger", () => {
it("should detect single active task for auto-tagging", () => {
const content = `# Active Tasks
## TASK-005: Fix login bug
- **Status:** in_progress
- **Started:** 2026-02-16
# Completed
## TASK-004: Fix browser port collision
- **Completed:** 2026-02-16
`;
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(1);
expect(ledger.activeTasks[0].id).toBe("TASK-005");
});
it("should not auto-tag when multiple active tasks exist", () => {
const content = `# Active Tasks
## TASK-005: Fix login bug
- **Status:** in_progress
## TASK-006: Update docs
- **Status:** in_progress
# Completed
`;
const ledger = parseTaskLedger(content);
// Multiple active tasks — should NOT auto-tag
expect(ledger.activeTasks.length).toBeGreaterThan(1);
});
it("should not auto-tag when no active tasks exist", () => {
const content = `# Active Tasks
_No active tasks_
# Completed
## TASK-004: Fix browser port collision
- **Completed:** 2026-02-16
`;
const ledger = parseTaskLedger(content);
expect(ledger.activeTasks).toHaveLength(0);
});
it("should extract completed task IDs for recall filtering", () => {
const content = `# Active Tasks
## TASK-007: New feature
- **Status:** in_progress
# Completed
## TASK-002: Book flights
- **Completed:** 2026-02-16
## TASK-003: Fix dashboard
- **Completed:** 2026-02-16
`;
const ledger = parseTaskLedger(content);
const completedTaskIds = new Set(ledger.completedTasks.map((t) => t.id));
expect(completedTaskIds.has("TASK-002")).toBe(true);
expect(completedTaskIds.has("TASK-003")).toBe(true);
expect(completedTaskIds.has("TASK-007")).toBe(false);
});
});
// ============================================================================
// Recall filter: taskId-based completed task filtering
// ============================================================================
describe("Task Metadata: recall filter", () => {
it("should filter out memories linked to completed tasks", () => {
const completedTaskIds = new Set(["TASK-002", "TASK-003"]);
const results = [
{
id: "1",
text: "active task memory",
taskId: "TASK-007",
score: 0.9,
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
},
{
id: "2",
text: "completed task memory",
taskId: "TASK-002",
score: 0.85,
category: "fact",
importance: 0.7,
createdAt: "2026-01-01",
},
{
id: "3",
text: "no task memory",
score: 0.8,
category: "other",
importance: 0.5,
createdAt: "2026-01-01",
},
{
id: "4",
text: "another completed",
taskId: "TASK-003",
score: 0.75,
category: "fact",
importance: 0.6,
createdAt: "2026-01-01",
},
];
const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId));
expect(filtered).toHaveLength(2);
expect(filtered[0].id).toBe("1"); // active task — kept
expect(filtered[1].id).toBe("3"); // no task — kept
});
it("should keep all memories when no completed task IDs", () => {
const completedTaskIds = new Set<string>();
const results = [
{ id: "1", text: "memory A", taskId: "TASK-001", score: 0.9 },
{ id: "2", text: "memory B", score: 0.8 },
];
const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId));
expect(filtered).toHaveLength(2);
});
it("should keep memories without taskId regardless of filter", () => {
const completedTaskIds = new Set(["TASK-001", "TASK-002"]);
const results = [
{ id: "1", text: "old memory without task", score: 0.9 },
{ id: "2", text: "another old one", taskId: undefined, score: 0.8 },
];
const filtered = results.filter((r) => !r.taskId || !completedTaskIds.has(r.taskId));
expect(filtered).toHaveLength(2);
});
});
// ============================================================================
// Vector/BM25 search results include taskId
// ============================================================================
describe("Task Metadata: search signal taskId", () => {
let client: Neo4jMemoryClient;
let mockDriver: ReturnType<typeof createMockDriver>;
let mockSession: ReturnType<typeof createMockSession>;
beforeEach(() => {
const mockLogger = createMockLogger();
mockDriver = createMockDriver();
mockSession = createMockSession();
mockDriver.session.mockReturnValue(mockSession);
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
(client as any).driver = mockDriver;
(client as any).indexesReady = true;
});
it("vector search should include taskId in results", async () => {
mockSession.run.mockResolvedValue({
records: [
createMockRecord({
id: "mem-1",
text: "test",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
taskId: "TASK-001",
similarity: 0.95,
}),
createMockRecord({
id: "mem-2",
text: "test2",
category: "other",
importance: 0.5,
createdAt: "2026-01-02",
taskId: null, // Legacy memory without taskId
similarity: 0.85,
}),
],
});
const results = await client.vectorSearch([0.1, 0.2], 10, 0.1);
expect(results[0].taskId).toBe("TASK-001");
expect(results[1].taskId).toBeUndefined(); // null → undefined
});
it("BM25 search should include taskId in results", async () => {
mockSession.run.mockResolvedValue({
records: [
createMockRecord({
id: "mem-1",
text: "test query",
category: "fact",
importance: 0.8,
createdAt: "2026-01-01",
taskId: "TASK-002",
bm25Score: 5.0,
}),
],
});
const results = await client.bm25Search("test query", 10);
expect(results[0].taskId).toBe("TASK-002");
});
});