diff --git a/CHANGELOG.md b/CHANGELOG.md index 23592dbd82..a263eba6b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai - Slack/Discord/Telegram: add per-channel ack reaction overrides (account/channel-level) to support platform-specific emoji formats. (#17092) Thanks @zerone0x. - Cron/Gateway: add finished-run webhook delivery toggle (`notify`) and dedicated webhook auth token support (`cron.webhookToken`) for outbound cron webhook posts. (#14535) Thanks @advaitpaliwal. - Channels: deduplicate probe/token resolution base types across core + extensions while preserving per-channel error typing. (#16986) Thanks @iyoda and @thewilloftheshadow. +- Memory: add MMR (Maximal Marginal Relevance) re-ranking for hybrid search diversity. Configurable via `memorySearch.query.hybrid.mmr`. Thanks @rodrigouroz. ### Fixes diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md index 699e6659ca..cc9c76e761 100644 --- a/docs/concepts/memory.md +++ b/docs/concepts/memory.md @@ -353,7 +353,6 @@ agents: { ``` Tools: - - `memory_search` — returns snippets with file + line ranges. - `memory_get` — read memory file content by path. @@ -396,11 +395,11 @@ But it can be weak at exact, high-signal tokens: - IDs (`a828e60`, `b3b9895a…`) - code symbols (`memorySearch.query.hybrid`) -- error strings (“sqlite-vec unavailable”) +- error strings ("sqlite-vec unavailable") BM25 (full-text) is the opposite: strong at exact tokens, weaker at paraphrases. Hybrid search is the pragmatic middle ground: **use both retrieval signals** so you get -good results for both “natural language” queries and “needle in a haystack” queries. +good results for both "natural language" queries and "needle in a haystack" queries. #### How we merge results (the current design) @@ -423,12 +422,28 @@ Notes: - `vectorWeight` + `textWeight` is normalized to 1.0 in config resolution, so weights behave as percentages. - If embeddings are unavailable (or the provider returns a zero-vector), we still run BM25 and return keyword matches. -- If FTS5 can’t be created, we keep vector-only search (no hard failure). +- If FTS5 can't be created, we keep vector-only search (no hard failure). -This isn’t “IR-theory perfect”, but it’s simple, fast, and tends to improve recall/precision on real notes. +This isn't "IR-theory perfect", but it's simple, fast, and tends to improve recall/precision on real notes. If we want to get fancier later, common next steps are Reciprocal Rank Fusion (RRF) or score normalization (min/max or z-score) before mixing. +#### MMR re-ranking (diversity) + +When hybrid search returns results, multiple chunks may contain similar or overlapping content. +**MMR (Maximal Marginal Relevance)** re-ranks the results to balance relevance with diversity, +ensuring the top results aren't all saying the same thing. + +How it works: +1. Results are scored by their original relevance (vector + BM25 weighted score). +2. MMR iteratively selects results that maximize: `λ × relevance − (1−λ) × similarity_to_selected`. +3. Already-selected results are penalized via Jaccard text similarity. + +The `lambda` parameter controls the trade-off: +- `lambda = 1.0` → pure relevance (no diversity penalty) +- `lambda = 0.0` → maximum diversity (ignores relevance) +- Default: `0.7` (balanced, slight relevance bias) + Config: ```json5 @@ -440,7 +455,11 @@ agents: { enabled: true, vectorWeight: 0.7, textWeight: 0.3, - candidateMultiplier: 4 + candidateMultiplier: 4, + mmr: { + enabled: true, + lambda: 0.7 + } } } } diff --git a/src/memory/__tests__/mmr.test.ts b/src/memory/__tests__/mmr.test.ts new file mode 100644 index 0000000000..5931eb6610 --- /dev/null +++ b/src/memory/__tests__/mmr.test.ts @@ -0,0 +1,371 @@ +import { describe, it, expect } from "vitest"; +import { + tokenize, + jaccardSimilarity, + textSimilarity, + computeMMRScore, + mmrRerank, + applyMMRToHybridResults, + DEFAULT_MMR_CONFIG, + type MMRItem, +} from "../mmr.js"; + +describe("tokenize", () => { + it("extracts alphanumeric tokens and lowercases", () => { + const result = tokenize("Hello World 123"); + expect(result).toEqual(new Set(["hello", "world", "123"])); + }); + + it("handles empty string", () => { + expect(tokenize("")).toEqual(new Set()); + }); + + it("handles special characters only", () => { + expect(tokenize("!@#$%^&*()")).toEqual(new Set()); + }); + + it("handles underscores in tokens", () => { + const result = tokenize("hello_world test_case"); + expect(result).toEqual(new Set(["hello_world", "test_case"])); + }); + + it("deduplicates repeated tokens", () => { + const result = tokenize("hello hello world world"); + expect(result).toEqual(new Set(["hello", "world"])); + }); +}); + +describe("jaccardSimilarity", () => { + it("returns 1 for identical sets", () => { + const set = new Set(["a", "b", "c"]); + expect(jaccardSimilarity(set, set)).toBe(1); + }); + + it("returns 0 for disjoint sets", () => { + const setA = new Set(["a", "b"]); + const setB = new Set(["c", "d"]); + expect(jaccardSimilarity(setA, setB)).toBe(0); + }); + + it("returns 1 for two empty sets", () => { + expect(jaccardSimilarity(new Set(), new Set())).toBe(1); + }); + + it("returns 0 when one set is empty", () => { + expect(jaccardSimilarity(new Set(["a"]), new Set())).toBe(0); + expect(jaccardSimilarity(new Set(), new Set(["a"]))).toBe(0); + }); + + it("computes correct similarity for partial overlap", () => { + const setA = new Set(["a", "b", "c"]); + const setB = new Set(["b", "c", "d"]); + // Intersection: {b, c} = 2, Union: {a, b, c, d} = 4 + expect(jaccardSimilarity(setA, setB)).toBe(0.5); + }); + + it("is symmetric", () => { + const setA = new Set(["a", "b"]); + const setB = new Set(["b", "c"]); + expect(jaccardSimilarity(setA, setB)).toBe(jaccardSimilarity(setB, setA)); + }); +}); + +describe("textSimilarity", () => { + it("returns 1 for identical text", () => { + expect(textSimilarity("hello world", "hello world")).toBe(1); + }); + + it("returns 1 for same words different order", () => { + expect(textSimilarity("hello world", "world hello")).toBe(1); + }); + + it("returns 0 for completely different text", () => { + expect(textSimilarity("hello world", "foo bar")).toBe(0); + }); + + it("handles case insensitivity", () => { + expect(textSimilarity("Hello World", "hello world")).toBe(1); + }); +}); + +describe("computeMMRScore", () => { + it("returns pure relevance when lambda=1", () => { + expect(computeMMRScore(0.8, 0.5, 1)).toBe(0.8); + }); + + it("returns negative similarity when lambda=0", () => { + expect(computeMMRScore(0.8, 0.5, 0)).toBe(-0.5); + }); + + it("balances relevance and diversity at lambda=0.5", () => { + // 0.5 * 0.8 - 0.5 * 0.6 = 0.4 - 0.3 = 0.1 + expect(computeMMRScore(0.8, 0.6, 0.5)).toBeCloseTo(0.1); + }); + + it("computes correctly with default lambda=0.7", () => { + // 0.7 * 1.0 - 0.3 * 0.5 = 0.7 - 0.15 = 0.55 + expect(computeMMRScore(1.0, 0.5, 0.7)).toBeCloseTo(0.55); + }); +}); + +describe("mmrRerank", () => { + describe("edge cases", () => { + it("returns empty array for empty input", () => { + expect(mmrRerank([])).toEqual([]); + }); + + it("returns single item unchanged", () => { + const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }]; + expect(mmrRerank(items)).toEqual(items); + }); + + it("returns copy, not original array", () => { + const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }]; + const result = mmrRerank(items); + expect(result).not.toBe(items); + }); + + it("returns items unchanged when disabled", () => { + const items: MMRItem[] = [ + { id: "1", score: 0.9, content: "hello" }, + { id: "2", score: 0.8, content: "hello" }, + ]; + const result = mmrRerank(items, { enabled: false }); + expect(result).toEqual(items); + }); + }); + + describe("lambda edge cases", () => { + const diverseItems: MMRItem[] = [ + { id: "1", score: 1.0, content: "apple banana cherry" }, + { id: "2", score: 0.9, content: "apple banana date" }, + { id: "3", score: 0.8, content: "elderberry fig grape" }, + ]; + + it("lambda=1 returns pure relevance order", () => { + const result = mmrRerank(diverseItems, { lambda: 1 }); + expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]); + }); + + it("lambda=0 maximizes diversity", () => { + const result = mmrRerank(diverseItems, { lambda: 0 }); + // First item is still highest score (no penalty yet) + expect(result[0].id).toBe("1"); + // Second should be most different from first + expect(result[1].id).toBe("3"); // elderberry... is most different + }); + + it("clamps lambda > 1 to 1", () => { + const result = mmrRerank(diverseItems, { lambda: 1.5 }); + expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]); + }); + + it("clamps lambda < 0 to 0", () => { + const result = mmrRerank(diverseItems, { lambda: -0.5 }); + expect(result[0].id).toBe("1"); + expect(result[1].id).toBe("3"); + }); + }); + + describe("diversity behavior", () => { + it("promotes diverse results over similar high-scoring ones", () => { + const items: MMRItem[] = [ + { id: "1", score: 1.0, content: "machine learning neural networks" }, + { id: "2", score: 0.95, content: "machine learning deep learning" }, + { id: "3", score: 0.9, content: "database systems sql queries" }, + { id: "4", score: 0.85, content: "machine learning algorithms" }, + ]; + + const result = mmrRerank(items, { lambda: 0.5 }); + + // First is always highest score + expect(result[0].id).toBe("1"); + // Second should be the diverse database item, not another ML item + expect(result[1].id).toBe("3"); + }); + + it("handles items with identical content", () => { + const items: MMRItem[] = [ + { id: "1", score: 1.0, content: "identical content" }, + { id: "2", score: 0.9, content: "identical content" }, + { id: "3", score: 0.8, content: "different stuff" }, + ]; + + const result = mmrRerank(items, { lambda: 0.5 }); + expect(result[0].id).toBe("1"); + // Second should be different, not identical duplicate + expect(result[1].id).toBe("3"); + }); + + it("handles all identical content gracefully", () => { + const items: MMRItem[] = [ + { id: "1", score: 1.0, content: "same" }, + { id: "2", score: 0.9, content: "same" }, + { id: "3", score: 0.8, content: "same" }, + ]; + + const result = mmrRerank(items, { lambda: 0.7 }); + // Should still complete without error, order by score as tiebreaker + expect(result).toHaveLength(3); + }); + }); + + describe("tie-breaking", () => { + it("uses original score as tiebreaker", () => { + const items: MMRItem[] = [ + { id: "1", score: 1.0, content: "unique content one" }, + { id: "2", score: 0.9, content: "unique content two" }, + { id: "3", score: 0.8, content: "unique content three" }, + ]; + + // With very different content and lambda=1, should be pure score order + const result = mmrRerank(items, { lambda: 1 }); + expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]); + }); + + it("preserves all items even with same MMR scores", () => { + const items: MMRItem[] = [ + { id: "1", score: 0.5, content: "a" }, + { id: "2", score: 0.5, content: "b" }, + { id: "3", score: 0.5, content: "c" }, + ]; + + const result = mmrRerank(items, { lambda: 0.7 }); + expect(result).toHaveLength(3); + expect(new Set(result.map((i) => i.id))).toEqual(new Set(["1", "2", "3"])); + }); + }); + + describe("score normalization", () => { + it("handles items with same scores", () => { + const items: MMRItem[] = [ + { id: "1", score: 0.5, content: "hello world" }, + { id: "2", score: 0.5, content: "foo bar" }, + ]; + + const result = mmrRerank(items, { lambda: 0.7 }); + expect(result).toHaveLength(2); + }); + + it("handles negative scores", () => { + const items: MMRItem[] = [ + { id: "1", score: -0.5, content: "hello world" }, + { id: "2", score: -1.0, content: "foo bar" }, + ]; + + const result = mmrRerank(items, { lambda: 0.7 }); + expect(result).toHaveLength(2); + // Higher score (less negative) should come first + expect(result[0].id).toBe("1"); + }); + }); +}); + +describe("applyMMRToHybridResults", () => { + type HybridResult = { + path: string; + startLine: number; + endLine: number; + score: number; + snippet: string; + source: string; + }; + + it("returns empty array for empty input", () => { + expect(applyMMRToHybridResults([])).toEqual([]); + }); + + it("preserves all original fields", () => { + const results: HybridResult[] = [ + { + path: "/test/file.ts", + startLine: 1, + endLine: 10, + score: 0.9, + snippet: "hello world", + source: "memory", + }, + ]; + + const reranked = applyMMRToHybridResults(results); + expect(reranked[0]).toEqual(results[0]); + }); + + it("creates unique IDs from path and startLine", () => { + const results: HybridResult[] = [ + { + path: "/test/a.ts", + startLine: 1, + endLine: 10, + score: 0.9, + snippet: "same content here", + source: "memory", + }, + { + path: "/test/a.ts", + startLine: 20, + endLine: 30, + score: 0.8, + snippet: "same content here", + source: "memory", + }, + ]; + + // Should work without ID collision + const reranked = applyMMRToHybridResults(results); + expect(reranked).toHaveLength(2); + }); + + it("re-ranks results for diversity", () => { + const results: HybridResult[] = [ + { + path: "/a.ts", + startLine: 1, + endLine: 10, + score: 1.0, + snippet: "function add numbers together", + source: "memory", + }, + { + path: "/b.ts", + startLine: 1, + endLine: 10, + score: 0.95, + snippet: "function add values together", + source: "memory", + }, + { + path: "/c.ts", + startLine: 1, + endLine: 10, + score: 0.9, + snippet: "database connection pool", + source: "memory", + }, + ]; + + const reranked = applyMMRToHybridResults(results, { lambda: 0.5 }); + + // First stays the same (highest score) + expect(reranked[0].path).toBe("/a.ts"); + // Second should be the diverse one + expect(reranked[1].path).toBe("/c.ts"); + }); + + it("respects disabled config", () => { + const results: HybridResult[] = [ + { path: "/a.ts", startLine: 1, endLine: 10, score: 0.9, snippet: "test", source: "memory" }, + { path: "/b.ts", startLine: 1, endLine: 10, score: 0.8, snippet: "test", source: "memory" }, + ]; + + const reranked = applyMMRToHybridResults(results, { enabled: false }); + expect(reranked).toEqual(results); + }); +}); + +describe("DEFAULT_MMR_CONFIG", () => { + it("has expected default values", () => { + expect(DEFAULT_MMR_CONFIG.enabled).toBe(true); + expect(DEFAULT_MMR_CONFIG.lambda).toBe(0.7); + }); +}); diff --git a/src/memory/hybrid.ts b/src/memory/hybrid.ts index cea9f26041..1ea1c5e6f7 100644 --- a/src/memory/hybrid.ts +++ b/src/memory/hybrid.ts @@ -1,5 +1,9 @@ +import { applyMMRToHybridResults, type MMRConfig, DEFAULT_MMR_CONFIG } from "./mmr.js"; + export type HybridSource = string; +export { type MMRConfig, DEFAULT_MMR_CONFIG }; + export type HybridVectorResult = { id: string; path: string; @@ -43,6 +47,8 @@ export function mergeHybridResults(params: { keyword: HybridKeywordResult[]; vectorWeight: number; textWeight: number; + /** MMR configuration for diversity-aware re-ranking */ + mmr?: Partial; }): Array<{ path: string; startLine: number; @@ -111,5 +117,13 @@ export function mergeHybridResults(params: { }; }); - return merged.toSorted((a, b) => b.score - a.score); + const sorted = merged.toSorted((a, b) => b.score - a.score); + + // Apply MMR re-ranking if enabled + const mmrConfig = { ...DEFAULT_MMR_CONFIG, ...params.mmr }; + if (mmrConfig.enabled) { + return applyMMRToHybridResults(sorted, mmrConfig); + } + + return sorted; } diff --git a/src/memory/mmr.ts b/src/memory/mmr.ts new file mode 100644 index 0000000000..3d6397184e --- /dev/null +++ b/src/memory/mmr.ts @@ -0,0 +1,198 @@ +/** + * Maximal Marginal Relevance (MMR) re-ranking algorithm. + * + * MMR balances relevance with diversity by iteratively selecting results + * that maximize: λ * relevance - (1-λ) * max_similarity_to_selected + * + * @see Carbonell & Goldstein, "The Use of MMR, Diversity-Based Reranking" (1998) + */ + +export type MMRItem = { + id: string; + score: number; + content: string; +}; + +export type MMRConfig = { + /** Enable/disable MMR re-ranking. Default: true */ + enabled: boolean; + /** Lambda parameter: 0 = max diversity, 1 = max relevance. Default: 0.7 */ + lambda: number; +}; + +export const DEFAULT_MMR_CONFIG: MMRConfig = { + enabled: true, + lambda: 0.7, +}; + +/** + * Tokenize text for Jaccard similarity computation. + * Extracts alphanumeric tokens and normalizes to lowercase. + */ +export function tokenize(text: string): Set { + const tokens = text.toLowerCase().match(/[a-z0-9_]+/g) ?? []; + return new Set(tokens); +} + +/** + * Compute Jaccard similarity between two token sets. + * Returns a value in [0, 1] where 1 means identical sets. + */ +export function jaccardSimilarity(setA: Set, setB: Set): number { + if (setA.size === 0 && setB.size === 0) return 1; + if (setA.size === 0 || setB.size === 0) return 0; + + let intersectionSize = 0; + const smaller = setA.size <= setB.size ? setA : setB; + const larger = setA.size <= setB.size ? setB : setA; + + for (const token of smaller) { + if (larger.has(token)) intersectionSize++; + } + + const unionSize = setA.size + setB.size - intersectionSize; + return unionSize === 0 ? 0 : intersectionSize / unionSize; +} + +/** + * Compute text similarity between two content strings using Jaccard on tokens. + */ +export function textSimilarity(contentA: string, contentB: string): number { + return jaccardSimilarity(tokenize(contentA), tokenize(contentB)); +} + +/** + * Compute the maximum similarity between an item and all selected items. + */ +function maxSimilarityToSelected( + item: MMRItem, + selectedItems: MMRItem[], + tokenCache: Map>, +): number { + if (selectedItems.length === 0) return 0; + + let maxSim = 0; + const itemTokens = tokenCache.get(item.id) ?? tokenize(item.content); + + for (const selected of selectedItems) { + const selectedTokens = tokenCache.get(selected.id) ?? tokenize(selected.content); + const sim = jaccardSimilarity(itemTokens, selectedTokens); + if (sim > maxSim) maxSim = sim; + } + + return maxSim; +} + +/** + * Compute MMR score for a candidate item. + * MMR = λ * relevance - (1-λ) * max_similarity_to_selected + */ +export function computeMMRScore(relevance: number, maxSimilarity: number, lambda: number): number { + return lambda * relevance - (1 - lambda) * maxSimilarity; +} + +/** + * Re-rank items using Maximal Marginal Relevance (MMR). + * + * The algorithm iteratively selects items that balance relevance with diversity: + * 1. Start with the highest-scoring item + * 2. For each remaining slot, select the item that maximizes the MMR score + * 3. MMR score = λ * relevance - (1-λ) * max_similarity_to_already_selected + * + * @param items - Items to re-rank, must have score and content + * @param config - MMR configuration (lambda, enabled) + * @returns Re-ranked items in MMR order + */ +export function mmrRerank(items: T[], config: Partial = {}): T[] { + const { enabled = DEFAULT_MMR_CONFIG.enabled, lambda = DEFAULT_MMR_CONFIG.lambda } = config; + + // Early exits + if (!enabled || items.length <= 1) return [...items]; + + // Clamp lambda to valid range + const clampedLambda = Math.max(0, Math.min(1, lambda)); + + // If lambda is 1, just return sorted by relevance (no diversity penalty) + if (clampedLambda === 1) { + return [...items].sort((a, b) => b.score - a.score); + } + + // Pre-tokenize all items for efficiency + const tokenCache = new Map>(); + for (const item of items) { + tokenCache.set(item.id, tokenize(item.content)); + } + + // Normalize scores to [0, 1] for fair comparison with similarity + const maxScore = Math.max(...items.map((i) => i.score)); + const minScore = Math.min(...items.map((i) => i.score)); + const scoreRange = maxScore - minScore; + + const normalizeScore = (score: number): number => { + if (scoreRange === 0) return 1; // All scores equal + return (score - minScore) / scoreRange; + }; + + const selected: T[] = []; + const remaining = new Set(items); + + // Select items iteratively + while (remaining.size > 0) { + let bestItem: T | null = null; + let bestMMRScore = -Infinity; + + for (const candidate of remaining) { + const normalizedRelevance = normalizeScore(candidate.score); + const maxSim = maxSimilarityToSelected(candidate, selected, tokenCache); + const mmrScore = computeMMRScore(normalizedRelevance, maxSim, clampedLambda); + + // Use original score as tiebreaker (higher is better) + if ( + mmrScore > bestMMRScore || + (mmrScore === bestMMRScore && bestItem && candidate.score > bestItem.score) + ) { + bestMMRScore = mmrScore; + bestItem = candidate; + } + } + + if (bestItem) { + selected.push(bestItem); + remaining.delete(bestItem); + } else { + // Should never happen, but safety exit + break; + } + } + + return selected; +} + +/** + * Apply MMR re-ranking to hybrid search results. + * Adapts the generic MMR function to work with the hybrid search result format. + */ +export function applyMMRToHybridResults< + T extends { score: number; snippet: string; path: string; startLine: number }, +>(results: T[], config: Partial = {}): T[] { + if (results.length === 0) return results; + + // Create a map from ID to original item for type-safe retrieval + const itemById = new Map(); + + // Create MMR items with unique IDs + const mmrItems: MMRItem[] = results.map((r, index) => { + const id = `${r.path}:${r.startLine}:${index}`; + itemById.set(id, r); + return { + id, + score: r.score, + content: r.snippet, + }; + }); + + const reranked = mmrRerank(mmrItems, config); + + // Map back to original items using the ID + return reranked.map((item) => itemById.get(item.id)!); +}