feat(memory): Add opt-in temporal decay for hybrid search scoring

Exponential decay (half-life configurable, default 30 days) applied
before MMR re-ranking. Dated daily files (memory/YYYY-MM-DD.md) use
filename date; evergreen files (MEMORY.md, topic files) are not
decayed; other sources fall back to file mtime.

Config: memorySearch.query.hybrid.temporalDecay.{enabled, halfLifeDays}
Default: disabled (backwards compatible, opt-in).
This commit is contained in:
Rodrigo Uroz
2026-02-10 08:42:22 -03:00
committed by Peter Steinberger
parent fa9420069a
commit 6b3e0710f4
13 changed files with 1372 additions and 35 deletions

View File

@@ -17,8 +17,8 @@ describe("memory hybrid helpers", () => {
expect(bm25RankToScore(-100)).toBeCloseTo(1);
});
it("mergeHybridResults unions by id and combines weighted scores", () => {
const merged = mergeHybridResults({
it("mergeHybridResults unions by id and combines weighted scores", async () => {
const merged = await mergeHybridResults({
vectorWeight: 0.7,
textWeight: 0.3,
vector: [
@@ -52,8 +52,8 @@ describe("memory hybrid helpers", () => {
expect(b?.score).toBeCloseTo(0.3 * 1.0);
});
it("mergeHybridResults prefers keyword snippet when ids overlap", () => {
const merged = mergeHybridResults({
it("mergeHybridResults prefers keyword snippet when ids overlap", async () => {
const merged = await mergeHybridResults({
vectorWeight: 0.5,
textWeight: 0.5,
vector: [

View File

@@ -1,8 +1,14 @@
import { applyMMRToHybridResults, type MMRConfig, DEFAULT_MMR_CONFIG } from "./mmr.js";
import {
applyTemporalDecayToHybridResults,
type TemporalDecayConfig,
DEFAULT_TEMPORAL_DECAY_CONFIG,
} from "./temporal-decay.js";
export type HybridSource = string;
export { type MMRConfig, DEFAULT_MMR_CONFIG };
export { type TemporalDecayConfig, DEFAULT_TEMPORAL_DECAY_CONFIG };
export type HybridVectorResult = {
id: string;
@@ -42,21 +48,28 @@ export function bm25RankToScore(rank: number): number {
return 1 / (1 + normalized);
}
export function mergeHybridResults(params: {
export async function mergeHybridResults(params: {
vector: HybridVectorResult[];
keyword: HybridKeywordResult[];
vectorWeight: number;
textWeight: number;
workspaceDir?: string;
/** MMR configuration for diversity-aware re-ranking */
mmr?: Partial<MMRConfig>;
}): Array<{
path: string;
startLine: number;
endLine: number;
score: number;
snippet: string;
source: HybridSource;
}> {
/** Temporal decay configuration for recency-aware scoring */
temporalDecay?: Partial<TemporalDecayConfig>;
/** Test seam for deterministic time-dependent behavior */
nowMs?: number;
}): Promise<
Array<{
path: string;
startLine: number;
endLine: number;
score: number;
snippet: string;
source: HybridSource;
}>
> {
const byId = new Map<
string,
{
@@ -117,7 +130,14 @@ export function mergeHybridResults(params: {
};
});
const sorted = merged.toSorted((a, b) => b.score - a.score);
const temporalDecayConfig = { ...DEFAULT_TEMPORAL_DECAY_CONFIG, ...params.temporalDecay };
const decayed = await applyTemporalDecayToHybridResults({
results: merged,
temporalDecay: temporalDecayConfig,
workspaceDir: params.workspaceDir,
nowMs: params.nowMs,
});
const sorted = decayed.toSorted((a, b) => b.score - a.score);
// Apply MMR re-ranking if enabled
const mmrConfig = { ...DEFAULT_MMR_CONFIG, ...params.mmr };

View File

@@ -278,11 +278,13 @@ export class MemoryIndexManager implements MemorySearchManager {
return vectorResults.filter((entry) => entry.score >= minScore).slice(0, maxResults);
}
const merged = this.mergeHybridResults({
const merged = await this.mergeHybridResults({
vector: vectorResults,
keyword: keywordResults,
vectorWeight: hybrid.vectorWeight,
textWeight: hybrid.textWeight,
mmr: hybrid.mmr,
temporalDecay: hybrid.temporalDecay,
});
return merged.filter((entry) => entry.score >= minScore).slice(0, maxResults);
@@ -343,8 +345,10 @@ export class MemoryIndexManager implements MemorySearchManager {
keyword: Array<MemorySearchResult & { id: string; textScore: number }>;
vectorWeight: number;
textWeight: number;
}): MemorySearchResult[] {
const merged = mergeHybridResults({
mmr?: { enabled: boolean; lambda: number };
temporalDecay?: { enabled: boolean; halfLifeDays: number };
}): Promise<MemorySearchResult[]> {
return mergeHybridResults({
vector: params.vector.map((r) => ({
id: r.id,
path: r.path,
@@ -365,8 +369,10 @@ export class MemoryIndexManager implements MemorySearchManager {
})),
vectorWeight: params.vectorWeight,
textWeight: params.textWeight,
});
return merged.map((entry) => entry as MemorySearchResult);
mmr: params.mmr,
temporalDecay: params.temporalDecay,
workspaceDir: this.workspaceDir,
}).then((entries) => entries.map((entry) => entry as MemorySearchResult));
}
async sync(params?: {

View File

@@ -8,7 +8,7 @@ import {
applyMMRToHybridResults,
DEFAULT_MMR_CONFIG,
type MMRItem,
} from "../mmr.js";
} from "./mmr.js";
describe("tokenize", () => {
it("extracts alphanumeric tokens and lowercases", () => {

View File

@@ -39,15 +39,21 @@ export function tokenize(text: string): Set<string> {
* Returns a value in [0, 1] where 1 means identical sets.
*/
export function jaccardSimilarity(setA: Set<string>, setB: Set<string>): number {
if (setA.size === 0 && setB.size === 0) return 1;
if (setA.size === 0 || setB.size === 0) return 0;
if (setA.size === 0 && setB.size === 0) {
return 1;
}
if (setA.size === 0 || setB.size === 0) {
return 0;
}
let intersectionSize = 0;
const smaller = setA.size <= setB.size ? setA : setB;
const larger = setA.size <= setB.size ? setB : setA;
for (const token of smaller) {
if (larger.has(token)) intersectionSize++;
if (larger.has(token)) {
intersectionSize++;
}
}
const unionSize = setA.size + setB.size - intersectionSize;
@@ -69,7 +75,9 @@ function maxSimilarityToSelected(
selectedItems: MMRItem[],
tokenCache: Map<string, Set<string>>,
): number {
if (selectedItems.length === 0) return 0;
if (selectedItems.length === 0) {
return 0;
}
let maxSim = 0;
const itemTokens = tokenCache.get(item.id) ?? tokenize(item.content);
@@ -77,7 +85,9 @@ function maxSimilarityToSelected(
for (const selected of selectedItems) {
const selectedTokens = tokenCache.get(selected.id) ?? tokenize(selected.content);
const sim = jaccardSimilarity(itemTokens, selectedTokens);
if (sim > maxSim) maxSim = sim;
if (sim > maxSim) {
maxSim = sim;
}
}
return maxSim;
@@ -107,14 +117,16 @@ export function mmrRerank<T extends MMRItem>(items: T[], config: Partial<MMRConf
const { enabled = DEFAULT_MMR_CONFIG.enabled, lambda = DEFAULT_MMR_CONFIG.lambda } = config;
// Early exits
if (!enabled || items.length <= 1) return [...items];
if (!enabled || items.length <= 1) {
return [...items];
}
// Clamp lambda to valid range
const clampedLambda = Math.max(0, Math.min(1, lambda));
// If lambda is 1, just return sorted by relevance (no diversity penalty)
if (clampedLambda === 1) {
return [...items].sort((a, b) => b.score - a.score);
return [...items].toSorted((a, b) => b.score - a.score);
}
// Pre-tokenize all items for efficiency
@@ -129,7 +141,9 @@ export function mmrRerank<T extends MMRItem>(items: T[], config: Partial<MMRConf
const scoreRange = maxScore - minScore;
const normalizeScore = (score: number): number => {
if (scoreRange === 0) return 1; // All scores equal
if (scoreRange === 0) {
return 1; // All scores equal
}
return (score - minScore) / scoreRange;
};
@@ -175,7 +189,9 @@ export function mmrRerank<T extends MMRItem>(items: T[], config: Partial<MMRConf
export function applyMMRToHybridResults<
T extends { score: number; snippet: string; path: string; startLine: number },
>(results: T[], config: Partial<MMRConfig> = {}): T[] {
if (results.length === 0) return results;
if (results.length === 0) {
return results;
}
// Create a map from ID to original item for type-safe retrieval
const itemById = new Map<string, T>();

View File

@@ -0,0 +1,173 @@
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, it } from "vitest";
import { mergeHybridResults } from "./hybrid.js";
import {
applyTemporalDecayToHybridResults,
applyTemporalDecayToScore,
calculateTemporalDecayMultiplier,
} from "./temporal-decay.js";
const DAY_MS = 24 * 60 * 60 * 1000;
const NOW_MS = Date.UTC(2026, 1, 10, 0, 0, 0);
const tempDirs: string[] = [];
async function makeTempDir(): Promise<string> {
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-temporal-decay-"));
tempDirs.push(dir);
return dir;
}
afterEach(async () => {
await Promise.all(
tempDirs.splice(0).map(async (dir) => {
await fs.rm(dir, { recursive: true, force: true });
}),
);
});
describe("temporal decay", () => {
it("matches exponential decay formula", () => {
const halfLifeDays = 30;
const ageInDays = 10;
const lambda = Math.LN2 / halfLifeDays;
const expectedMultiplier = Math.exp(-lambda * ageInDays);
expect(calculateTemporalDecayMultiplier({ ageInDays, halfLifeDays })).toBeCloseTo(
expectedMultiplier,
);
expect(applyTemporalDecayToScore({ score: 0.8, ageInDays, halfLifeDays })).toBeCloseTo(
0.8 * expectedMultiplier,
);
});
it("is 0.5 exactly at half-life", () => {
expect(calculateTemporalDecayMultiplier({ ageInDays: 30, halfLifeDays: 30 })).toBeCloseTo(0.5);
});
it("does not decay evergreen memory files", async () => {
const dir = await makeTempDir();
const rootMemoryPath = path.join(dir, "MEMORY.md");
const topicPath = path.join(dir, "memory", "projects.md");
await fs.mkdir(path.dirname(topicPath), { recursive: true });
await fs.writeFile(rootMemoryPath, "evergreen");
await fs.writeFile(topicPath, "topic evergreen");
const veryOld = new Date(Date.UTC(2010, 0, 1));
await fs.utimes(rootMemoryPath, veryOld, veryOld);
await fs.utimes(topicPath, veryOld, veryOld);
const decayed = await applyTemporalDecayToHybridResults({
results: [
{ path: "MEMORY.md", score: 1, source: "memory" },
{ path: "memory/projects.md", score: 0.75, source: "memory" },
],
workspaceDir: dir,
temporalDecay: { enabled: true, halfLifeDays: 30 },
nowMs: NOW_MS,
});
expect(decayed[0]?.score).toBeCloseTo(1);
expect(decayed[1]?.score).toBeCloseTo(0.75);
});
it("applies decay in hybrid merging before ranking", async () => {
const merged = await mergeHybridResults({
vectorWeight: 1,
textWeight: 0,
temporalDecay: { enabled: true, halfLifeDays: 30 },
mmr: { enabled: false },
nowMs: NOW_MS,
vector: [
{
id: "old",
path: "memory/2025-01-01.md",
startLine: 1,
endLine: 1,
source: "memory",
snippet: "old but high",
vectorScore: 0.95,
},
{
id: "new",
path: "memory/2026-02-10.md",
startLine: 1,
endLine: 1,
source: "memory",
snippet: "new and relevant",
vectorScore: 0.8,
},
],
keyword: [],
});
expect(merged[0]?.path).toBe("memory/2026-02-10.md");
expect(merged[0]?.score ?? 0).toBeGreaterThan(merged[1]?.score ?? 0);
});
it("handles future dates, zero age, and very old memories", async () => {
const merged = await mergeHybridResults({
vectorWeight: 1,
textWeight: 0,
temporalDecay: { enabled: true, halfLifeDays: 30 },
mmr: { enabled: false },
nowMs: NOW_MS,
vector: [
{
id: "future",
path: "memory/2099-01-01.md",
startLine: 1,
endLine: 1,
source: "memory",
snippet: "future",
vectorScore: 0.9,
},
{
id: "today",
path: "memory/2026-02-10.md",
startLine: 1,
endLine: 1,
source: "memory",
snippet: "today",
vectorScore: 0.8,
},
{
id: "very-old",
path: "memory/2000-01-01.md",
startLine: 1,
endLine: 1,
source: "memory",
snippet: "ancient",
vectorScore: 1,
},
],
keyword: [],
});
const byPath = new Map(merged.map((entry) => [entry.path, entry]));
expect(byPath.get("memory/2099-01-01.md")?.score).toBeCloseTo(0.9);
expect(byPath.get("memory/2026-02-10.md")?.score).toBeCloseTo(0.8);
expect(byPath.get("memory/2000-01-01.md")?.score ?? 1).toBeLessThan(0.001);
});
it("uses file mtime fallback for non-memory sources", async () => {
const dir = await makeTempDir();
const sessionPath = path.join(dir, "sessions", "thread.jsonl");
await fs.mkdir(path.dirname(sessionPath), { recursive: true });
await fs.writeFile(sessionPath, "{}\n");
const oldMtime = new Date(NOW_MS - 30 * DAY_MS);
await fs.utimes(sessionPath, oldMtime, oldMtime);
const decayed = await applyTemporalDecayToHybridResults({
results: [{ path: "sessions/thread.jsonl", score: 1, source: "sessions" }],
workspaceDir: dir,
temporalDecay: { enabled: true, halfLifeDays: 30 },
nowMs: NOW_MS,
});
expect(decayed[0]?.score).toBeCloseTo(0.5, 2);
});
});

View File

@@ -0,0 +1,166 @@
import fs from "node:fs/promises";
import path from "node:path";
export type TemporalDecayConfig = {
enabled: boolean;
halfLifeDays: number;
};
export const DEFAULT_TEMPORAL_DECAY_CONFIG: TemporalDecayConfig = {
enabled: false,
halfLifeDays: 30,
};
const DAY_MS = 24 * 60 * 60 * 1000;
const DATED_MEMORY_PATH_RE = /(?:^|\/)memory\/(\d{4})-(\d{2})-(\d{2})\.md$/;
export function toDecayLambda(halfLifeDays: number): number {
if (!Number.isFinite(halfLifeDays) || halfLifeDays <= 0) {
return 0;
}
return Math.LN2 / halfLifeDays;
}
export function calculateTemporalDecayMultiplier(params: {
ageInDays: number;
halfLifeDays: number;
}): number {
const lambda = toDecayLambda(params.halfLifeDays);
const clampedAge = Math.max(0, params.ageInDays);
if (lambda <= 0 || !Number.isFinite(clampedAge)) {
return 1;
}
return Math.exp(-lambda * clampedAge);
}
export function applyTemporalDecayToScore(params: {
score: number;
ageInDays: number;
halfLifeDays: number;
}): number {
return params.score * calculateTemporalDecayMultiplier(params);
}
function parseMemoryDateFromPath(filePath: string): Date | null {
const normalized = filePath.replaceAll("\\", "/").replace(/^\.\//, "");
const match = DATED_MEMORY_PATH_RE.exec(normalized);
if (!match) {
return null;
}
const year = Number(match[1]);
const month = Number(match[2]);
const day = Number(match[3]);
if (!Number.isInteger(year) || !Number.isInteger(month) || !Number.isInteger(day)) {
return null;
}
const timestamp = Date.UTC(year, month - 1, day);
const parsed = new Date(timestamp);
if (
parsed.getUTCFullYear() !== year ||
parsed.getUTCMonth() !== month - 1 ||
parsed.getUTCDate() !== day
) {
return null;
}
return parsed;
}
function isEvergreenMemoryPath(filePath: string): boolean {
const normalized = filePath.replaceAll("\\", "/").replace(/^\.\//, "");
if (normalized === "MEMORY.md" || normalized === "memory.md") {
return true;
}
if (!normalized.startsWith("memory/")) {
return false;
}
return !DATED_MEMORY_PATH_RE.test(normalized);
}
async function extractTimestamp(params: {
filePath: string;
source?: string;
workspaceDir?: string;
}): Promise<Date | null> {
const fromPath = parseMemoryDateFromPath(params.filePath);
if (fromPath) {
return fromPath;
}
// Memory root/topic files are evergreen knowledge and should not decay.
if (params.source === "memory" && isEvergreenMemoryPath(params.filePath)) {
return null;
}
if (!params.workspaceDir) {
return null;
}
const absolutePath = path.isAbsolute(params.filePath)
? params.filePath
: path.resolve(params.workspaceDir, params.filePath);
try {
const stat = await fs.stat(absolutePath);
if (!Number.isFinite(stat.mtimeMs)) {
return null;
}
return new Date(stat.mtimeMs);
} catch {
return null;
}
}
function ageInDaysFromTimestamp(timestamp: Date, nowMs: number): number {
const ageMs = Math.max(0, nowMs - timestamp.getTime());
return ageMs / DAY_MS;
}
export async function applyTemporalDecayToHybridResults<
T extends { path: string; score: number; source: string },
>(params: {
results: T[];
temporalDecay?: Partial<TemporalDecayConfig>;
workspaceDir?: string;
nowMs?: number;
}): Promise<T[]> {
const config = { ...DEFAULT_TEMPORAL_DECAY_CONFIG, ...params.temporalDecay };
if (!config.enabled) {
return [...params.results];
}
const nowMs = params.nowMs ?? Date.now();
const timestampCache = new Map<string, Date | null>();
return Promise.all(
params.results.map(async (entry) => {
const cacheKey = `${entry.source}:${entry.path}`;
if (!timestampCache.has(cacheKey)) {
const timestamp = await extractTimestamp({
filePath: entry.path,
source: entry.source,
workspaceDir: params.workspaceDir,
});
timestampCache.set(cacheKey, timestamp);
}
const timestamp = timestampCache.get(cacheKey) ?? null;
if (!timestamp) {
return entry;
}
const decayedScore = applyTemporalDecayToScore({
score: entry.score,
ageInDays: ageInDaysFromTimestamp(timestamp, nowMs),
halfLifeDays: config.halfLifeDays,
});
return {
...entry,
score: decayedScore,
};
}),
);
}