feat(memory): Add MMR re-ranking for search result diversity

Adds Maximal Marginal Relevance (MMR) re-ranking to hybrid search results. - New mmr.ts with tokenization, Jaccard similarity, and MMR algorithm - Integrated into mergeHybridResults() with optional mmr config - 40 comprehensive tests covering edge cases and diversity behavior - Configurable lambda parameter (default 0.7) to balance relevance vs diversity - Updated CHANGELOG.md and memory docs This helps avoid redundant results when multiple chunks contain similar content.
2026-02-19 18:39:20 -05:00 · 2026-01-26 15:23:22 -03:00
parent a0ab301dc3
commit fa9420069a
5 changed files with 610 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,7 @@ Docs: https://docs.openclaw.ai
 - Slack/Discord/Telegram: add per-channel ack reaction overrides (account/channel-level) to support platform-specific emoji formats. (#17092) Thanks @zerone0x.
 - Cron/Gateway: add finished-run webhook delivery toggle (`notify`) and dedicated webhook auth token support (`cron.webhookToken`) for outbound cron webhook posts. (#14535) Thanks @advaitpaliwal.
 - Channels: deduplicate probe/token resolution base types across core + extensions while preserving per-channel error typing. (#16986) Thanks @iyoda and @thewilloftheshadow.
+- Memory: add MMR (Maximal Marginal Relevance) re-ranking for hybrid search diversity. Configurable via `memorySearch.query.hybrid.mmr`. Thanks @rodrigouroz.

 ### Fixes

--- a/docs/concepts/memory.md
+++ b/docs/concepts/memory.md
@@ -353,7 +353,6 @@ agents: {
 ```

 Tools:
-
 - `memory_search` — returns snippets with file + line ranges.
 - `memory_get` — read memory file content by path.

@@ -396,11 +395,11 @@ But it can be weak at exact, high-signal tokens:

 - IDs (`a828e60`, `b3b9895a…`)
 - code symbols (`memorySearch.query.hybrid`)
- error strings (“sqlite-vec unavailable”)
+- error strings ("sqlite-vec unavailable")

 BM25 (full-text) is the opposite: strong at exact tokens, weaker at paraphrases.
 Hybrid search is the pragmatic middle ground: **use both retrieval signals** so you get
-good results for both “natural language” queries and “needle in a haystack” queries.
+good results for both "natural language" queries and "needle in a haystack" queries.

 #### How we merge results (the current design)

@@ -423,12 +422,28 @@ Notes:

 - `vectorWeight` + `textWeight` is normalized to 1.0 in config resolution, so weights behave as percentages.
 - If embeddings are unavailable (or the provider returns a zero-vector), we still run BM25 and return keyword matches.
- If FTS5 can’t be created, we keep vector-only search (no hard failure).
+- If FTS5 can't be created, we keep vector-only search (no hard failure).

-This isn’t “IR-theory perfect”, but it’s simple, fast, and tends to improve recall/precision on real notes.
+This isn't "IR-theory perfect", but it's simple, fast, and tends to improve recall/precision on real notes.
 If we want to get fancier later, common next steps are Reciprocal Rank Fusion (RRF) or score normalization
 (min/max or z-score) before mixing.

+#### MMR re-ranking (diversity)
+
+When hybrid search returns results, multiple chunks may contain similar or overlapping content.
+**MMR (Maximal Marginal Relevance)** re-ranks the results to balance relevance with diversity,
+ensuring the top results aren't all saying the same thing.
+
+How it works:
+1. Results are scored by their original relevance (vector + BM25 weighted score).
+2. MMR iteratively selects results that maximize: `λ × relevance − (1−λ) × similarity_to_selected`.
+3. Already-selected results are penalized via Jaccard text similarity.
+
+The `lambda` parameter controls the trade-off:
+- `lambda = 1.0` → pure relevance (no diversity penalty)
+- `lambda = 0.0` → maximum diversity (ignores relevance)
+- Default: `0.7` (balanced, slight relevance bias)
+
 Config:

 ```json5
@@ -440,7 +455,11 @@ agents: {
          enabled: true,
          vectorWeight: 0.7,
          textWeight: 0.3,
-          candidateMultiplier: 4
+          candidateMultiplier: 4,
+          mmr: {
+            enabled: true,
+            lambda: 0.7
+          }
        }
      }
    }
--- a/src/memory/tests/mmr.test.ts
+++ b/src/memory/tests/mmr.test.ts
@@ -0,0 +1,371 @@
+import { describe, it, expect } from "vitest";
+import {
+  tokenize,
+  jaccardSimilarity,
+  textSimilarity,
+  computeMMRScore,
+  mmrRerank,
+  applyMMRToHybridResults,
+  DEFAULT_MMR_CONFIG,
+  type MMRItem,
+} from "../mmr.js";
+
+describe("tokenize", () => {
+  it("extracts alphanumeric tokens and lowercases", () => {
+    const result = tokenize("Hello World 123");
+    expect(result).toEqual(new Set(["hello", "world", "123"]));
+  });
+
+  it("handles empty string", () => {
+    expect(tokenize("")).toEqual(new Set());
+  });
+
+  it("handles special characters only", () => {
+    expect(tokenize("!@#$%^&*()")).toEqual(new Set());
+  });
+
+  it("handles underscores in tokens", () => {
+    const result = tokenize("hello_world test_case");
+    expect(result).toEqual(new Set(["hello_world", "test_case"]));
+  });
+
+  it("deduplicates repeated tokens", () => {
+    const result = tokenize("hello hello world world");
+    expect(result).toEqual(new Set(["hello", "world"]));
+  });
+});
+
+describe("jaccardSimilarity", () => {
+  it("returns 1 for identical sets", () => {
+    const set = new Set(["a", "b", "c"]);
+    expect(jaccardSimilarity(set, set)).toBe(1);
+  });
+
+  it("returns 0 for disjoint sets", () => {
+    const setA = new Set(["a", "b"]);
+    const setB = new Set(["c", "d"]);
+    expect(jaccardSimilarity(setA, setB)).toBe(0);
+  });
+
+  it("returns 1 for two empty sets", () => {
+    expect(jaccardSimilarity(new Set(), new Set())).toBe(1);
+  });
+
+  it("returns 0 when one set is empty", () => {
+    expect(jaccardSimilarity(new Set(["a"]), new Set())).toBe(0);
+    expect(jaccardSimilarity(new Set(), new Set(["a"]))).toBe(0);
+  });
+
+  it("computes correct similarity for partial overlap", () => {
+    const setA = new Set(["a", "b", "c"]);
+    const setB = new Set(["b", "c", "d"]);
+    // Intersection: {b, c} = 2, Union: {a, b, c, d} = 4
+    expect(jaccardSimilarity(setA, setB)).toBe(0.5);
+  });
+
+  it("is symmetric", () => {
+    const setA = new Set(["a", "b"]);
+    const setB = new Set(["b", "c"]);
+    expect(jaccardSimilarity(setA, setB)).toBe(jaccardSimilarity(setB, setA));
+  });
+});
+
+describe("textSimilarity", () => {
+  it("returns 1 for identical text", () => {
+    expect(textSimilarity("hello world", "hello world")).toBe(1);
+  });
+
+  it("returns 1 for same words different order", () => {
+    expect(textSimilarity("hello world", "world hello")).toBe(1);
+  });
+
+  it("returns 0 for completely different text", () => {
+    expect(textSimilarity("hello world", "foo bar")).toBe(0);
+  });
+
+  it("handles case insensitivity", () => {
+    expect(textSimilarity("Hello World", "hello world")).toBe(1);
+  });
+});
+
+describe("computeMMRScore", () => {
+  it("returns pure relevance when lambda=1", () => {
+    expect(computeMMRScore(0.8, 0.5, 1)).toBe(0.8);
+  });
+
+  it("returns negative similarity when lambda=0", () => {
+    expect(computeMMRScore(0.8, 0.5, 0)).toBe(-0.5);
+  });
+
+  it("balances relevance and diversity at lambda=0.5", () => {
+    // 0.5 * 0.8 - 0.5 * 0.6 = 0.4 - 0.3 = 0.1
+    expect(computeMMRScore(0.8, 0.6, 0.5)).toBeCloseTo(0.1);
+  });
+
+  it("computes correctly with default lambda=0.7", () => {
+    // 0.7 * 1.0 - 0.3 * 0.5 = 0.7 - 0.15 = 0.55
+    expect(computeMMRScore(1.0, 0.5, 0.7)).toBeCloseTo(0.55);
+  });
+});
+
+describe("mmrRerank", () => {
+  describe("edge cases", () => {
+    it("returns empty array for empty input", () => {
+      expect(mmrRerank([])).toEqual([]);
+    });
+
+    it("returns single item unchanged", () => {
+      const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
+      expect(mmrRerank(items)).toEqual(items);
+    });
+
+    it("returns copy, not original array", () => {
+      const items: MMRItem[] = [{ id: "1", score: 0.9, content: "hello" }];
+      const result = mmrRerank(items);
+      expect(result).not.toBe(items);
+    });
+
+    it("returns items unchanged when disabled", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 0.9, content: "hello" },
+        { id: "2", score: 0.8, content: "hello" },
+      ];
+      const result = mmrRerank(items, { enabled: false });
+      expect(result).toEqual(items);
+    });
+  });
+
+  describe("lambda edge cases", () => {
+    const diverseItems: MMRItem[] = [
+      { id: "1", score: 1.0, content: "apple banana cherry" },
+      { id: "2", score: 0.9, content: "apple banana date" },
+      { id: "3", score: 0.8, content: "elderberry fig grape" },
+    ];
+
+    it("lambda=1 returns pure relevance order", () => {
+      const result = mmrRerank(diverseItems, { lambda: 1 });
+      expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
+    });
+
+    it("lambda=0 maximizes diversity", () => {
+      const result = mmrRerank(diverseItems, { lambda: 0 });
+      // First item is still highest score (no penalty yet)
+      expect(result[0].id).toBe("1");
+      // Second should be most different from first
+      expect(result[1].id).toBe("3"); // elderberry... is most different
+    });
+
+    it("clamps lambda > 1 to 1", () => {
+      const result = mmrRerank(diverseItems, { lambda: 1.5 });
+      expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
+    });
+
+    it("clamps lambda < 0 to 0", () => {
+      const result = mmrRerank(diverseItems, { lambda: -0.5 });
+      expect(result[0].id).toBe("1");
+      expect(result[1].id).toBe("3");
+    });
+  });
+
+  describe("diversity behavior", () => {
+    it("promotes diverse results over similar high-scoring ones", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 1.0, content: "machine learning neural networks" },
+        { id: "2", score: 0.95, content: "machine learning deep learning" },
+        { id: "3", score: 0.9, content: "database systems sql queries" },
+        { id: "4", score: 0.85, content: "machine learning algorithms" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.5 });
+
+      // First is always highest score
+      expect(result[0].id).toBe("1");
+      // Second should be the diverse database item, not another ML item
+      expect(result[1].id).toBe("3");
+    });
+
+    it("handles items with identical content", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 1.0, content: "identical content" },
+        { id: "2", score: 0.9, content: "identical content" },
+        { id: "3", score: 0.8, content: "different stuff" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.5 });
+      expect(result[0].id).toBe("1");
+      // Second should be different, not identical duplicate
+      expect(result[1].id).toBe("3");
+    });
+
+    it("handles all identical content gracefully", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 1.0, content: "same" },
+        { id: "2", score: 0.9, content: "same" },
+        { id: "3", score: 0.8, content: "same" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.7 });
+      // Should still complete without error, order by score as tiebreaker
+      expect(result).toHaveLength(3);
+    });
+  });
+
+  describe("tie-breaking", () => {
+    it("uses original score as tiebreaker", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 1.0, content: "unique content one" },
+        { id: "2", score: 0.9, content: "unique content two" },
+        { id: "3", score: 0.8, content: "unique content three" },
+      ];
+
+      // With very different content and lambda=1, should be pure score order
+      const result = mmrRerank(items, { lambda: 1 });
+      expect(result.map((i) => i.id)).toEqual(["1", "2", "3"]);
+    });
+
+    it("preserves all items even with same MMR scores", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 0.5, content: "a" },
+        { id: "2", score: 0.5, content: "b" },
+        { id: "3", score: 0.5, content: "c" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.7 });
+      expect(result).toHaveLength(3);
+      expect(new Set(result.map((i) => i.id))).toEqual(new Set(["1", "2", "3"]));
+    });
+  });
+
+  describe("score normalization", () => {
+    it("handles items with same scores", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: 0.5, content: "hello world" },
+        { id: "2", score: 0.5, content: "foo bar" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.7 });
+      expect(result).toHaveLength(2);
+    });
+
+    it("handles negative scores", () => {
+      const items: MMRItem[] = [
+        { id: "1", score: -0.5, content: "hello world" },
+        { id: "2", score: -1.0, content: "foo bar" },
+      ];
+
+      const result = mmrRerank(items, { lambda: 0.7 });
+      expect(result).toHaveLength(2);
+      // Higher score (less negative) should come first
+      expect(result[0].id).toBe("1");
+    });
+  });
+});
+
+describe("applyMMRToHybridResults", () => {
+  type HybridResult = {
+    path: string;
+    startLine: number;
+    endLine: number;
+    score: number;
+    snippet: string;
+    source: string;
+  };
+
+  it("returns empty array for empty input", () => {
+    expect(applyMMRToHybridResults([])).toEqual([]);
+  });
+
+  it("preserves all original fields", () => {
+    const results: HybridResult[] = [
+      {
+        path: "/test/file.ts",
+        startLine: 1,
+        endLine: 10,
+        score: 0.9,
+        snippet: "hello world",
+        source: "memory",
+      },
+    ];
+
+    const reranked = applyMMRToHybridResults(results);
+    expect(reranked[0]).toEqual(results[0]);
+  });
+
+  it("creates unique IDs from path and startLine", () => {
+    const results: HybridResult[] = [
+      {
+        path: "/test/a.ts",
+        startLine: 1,
+        endLine: 10,
+        score: 0.9,
+        snippet: "same content here",
+        source: "memory",
+      },
+      {
+        path: "/test/a.ts",
+        startLine: 20,
+        endLine: 30,
+        score: 0.8,
+        snippet: "same content here",
+        source: "memory",
+      },
+    ];
+
+    // Should work without ID collision
+    const reranked = applyMMRToHybridResults(results);
+    expect(reranked).toHaveLength(2);
+  });
+
+  it("re-ranks results for diversity", () => {
+    const results: HybridResult[] = [
+      {
+        path: "/a.ts",
+        startLine: 1,
+        endLine: 10,
+        score: 1.0,
+        snippet: "function add numbers together",
+        source: "memory",
+      },
+      {
+        path: "/b.ts",
+        startLine: 1,
+        endLine: 10,
+        score: 0.95,
+        snippet: "function add values together",
+        source: "memory",
+      },
+      {
+        path: "/c.ts",
+        startLine: 1,
+        endLine: 10,
+        score: 0.9,
+        snippet: "database connection pool",
+        source: "memory",
+      },
+    ];
+
+    const reranked = applyMMRToHybridResults(results, { lambda: 0.5 });
+
+    // First stays the same (highest score)
+    expect(reranked[0].path).toBe("/a.ts");
+    // Second should be the diverse one
+    expect(reranked[1].path).toBe("/c.ts");
+  });
+
+  it("respects disabled config", () => {
+    const results: HybridResult[] = [
+      { path: "/a.ts", startLine: 1, endLine: 10, score: 0.9, snippet: "test", source: "memory" },
+      { path: "/b.ts", startLine: 1, endLine: 10, score: 0.8, snippet: "test", source: "memory" },
+    ];
+
+    const reranked = applyMMRToHybridResults(results, { enabled: false });
+    expect(reranked).toEqual(results);
+  });
+});
+
+describe("DEFAULT_MMR_CONFIG", () => {
+  it("has expected default values", () => {
+    expect(DEFAULT_MMR_CONFIG.enabled).toBe(true);
+    expect(DEFAULT_MMR_CONFIG.lambda).toBe(0.7);
+  });
+});
--- a/src/memory/hybrid.ts
+++ b/src/memory/hybrid.ts
@@ -1,5 +1,9 @@
+import { applyMMRToHybridResults, type MMRConfig, DEFAULT_MMR_CONFIG } from "./mmr.js";
+
 export type HybridSource = string;

+export { type MMRConfig, DEFAULT_MMR_CONFIG };
+
 export type HybridVectorResult = {
  id: string;
  path: string;
@@ -43,6 +47,8 @@ export function mergeHybridResults(params: {
  keyword: HybridKeywordResult[];
  vectorWeight: number;
  textWeight: number;
+  /** MMR configuration for diversity-aware re-ranking */
+  mmr?: Partial<MMRConfig>;
 }): Array<{
  path: string;
  startLine: number;
@@ -111,5 +117,13 @@ export function mergeHybridResults(params: {
    };
  });

-  return merged.toSorted((a, b) => b.score - a.score);
+  const sorted = merged.toSorted((a, b) => b.score - a.score);
+
+  // Apply MMR re-ranking if enabled
+  const mmrConfig = { ...DEFAULT_MMR_CONFIG, ...params.mmr };
+  if (mmrConfig.enabled) {
+    return applyMMRToHybridResults(sorted, mmrConfig);
+  }
+
+  return sorted;
 }
--- a/src/memory/mmr.ts
+++ b/src/memory/mmr.ts
@@ -0,0 +1,198 @@
+/**
+ * Maximal Marginal Relevance (MMR) re-ranking algorithm.
+ *
+ * MMR balances relevance with diversity by iteratively selecting results
+ * that maximize: λ * relevance - (1-λ) * max_similarity_to_selected
+ *
+ * @see Carbonell & Goldstein, "The Use of MMR, Diversity-Based Reranking" (1998)
+ */
+
+export type MMRItem = {
+  id: string;
+  score: number;
+  content: string;
+};
+
+export type MMRConfig = {
+  /** Enable/disable MMR re-ranking. Default: true */
+  enabled: boolean;
+  /** Lambda parameter: 0 = max diversity, 1 = max relevance. Default: 0.7 */
+  lambda: number;
+};
+
+export const DEFAULT_MMR_CONFIG: MMRConfig = {
+  enabled: true,
+  lambda: 0.7,
+};
+
+/**
+ * Tokenize text for Jaccard similarity computation.
+ * Extracts alphanumeric tokens and normalizes to lowercase.
+ */
+export function tokenize(text: string): Set<string> {
+  const tokens = text.toLowerCase().match(/[a-z0-9_]+/g) ?? [];
+  return new Set(tokens);
+}
+
+/**
+ * Compute Jaccard similarity between two token sets.
+ * Returns a value in [0, 1] where 1 means identical sets.
+ */
+export function jaccardSimilarity(setA: Set<string>, setB: Set<string>): number {
+  if (setA.size === 0 && setB.size === 0) return 1;
+  if (setA.size === 0 || setB.size === 0) return 0;
+
+  let intersectionSize = 0;
+  const smaller = setA.size <= setB.size ? setA : setB;
+  const larger = setA.size <= setB.size ? setB : setA;
+
+  for (const token of smaller) {
+    if (larger.has(token)) intersectionSize++;
+  }
+
+  const unionSize = setA.size + setB.size - intersectionSize;
+  return unionSize === 0 ? 0 : intersectionSize / unionSize;
+}
+
+/**
+ * Compute text similarity between two content strings using Jaccard on tokens.
+ */
+export function textSimilarity(contentA: string, contentB: string): number {
+  return jaccardSimilarity(tokenize(contentA), tokenize(contentB));
+}
+
+/**
+ * Compute the maximum similarity between an item and all selected items.
+ */
+function maxSimilarityToSelected(
+  item: MMRItem,
+  selectedItems: MMRItem[],
+  tokenCache: Map<string, Set<string>>,
+): number {
+  if (selectedItems.length === 0) return 0;
+
+  let maxSim = 0;
+  const itemTokens = tokenCache.get(item.id) ?? tokenize(item.content);
+
+  for (const selected of selectedItems) {
+    const selectedTokens = tokenCache.get(selected.id) ?? tokenize(selected.content);
+    const sim = jaccardSimilarity(itemTokens, selectedTokens);
+    if (sim > maxSim) maxSim = sim;
+  }
+
+  return maxSim;
+}
+
+/**
+ * Compute MMR score for a candidate item.
+ * MMR = λ * relevance - (1-λ) * max_similarity_to_selected
+ */
+export function computeMMRScore(relevance: number, maxSimilarity: number, lambda: number): number {
+  return lambda * relevance - (1 - lambda) * maxSimilarity;
+}
+
+/**
+ * Re-rank items using Maximal Marginal Relevance (MMR).
+ *
+ * The algorithm iteratively selects items that balance relevance with diversity:
+ * 1. Start with the highest-scoring item
+ * 2. For each remaining slot, select the item that maximizes the MMR score
+ * 3. MMR score = λ * relevance - (1-λ) * max_similarity_to_already_selected
+ *
+ * @param items - Items to re-rank, must have score and content
+ * @param config - MMR configuration (lambda, enabled)
+ * @returns Re-ranked items in MMR order
+ */
+export function mmrRerank<T extends MMRItem>(items: T[], config: Partial<MMRConfig> = {}): T[] {
+  const { enabled = DEFAULT_MMR_CONFIG.enabled, lambda = DEFAULT_MMR_CONFIG.lambda } = config;
+
+  // Early exits
+  if (!enabled || items.length <= 1) return [...items];
+
+  // Clamp lambda to valid range
+  const clampedLambda = Math.max(0, Math.min(1, lambda));
+
+  // If lambda is 1, just return sorted by relevance (no diversity penalty)
+  if (clampedLambda === 1) {
+    return [...items].sort((a, b) => b.score - a.score);
+  }
+
+  // Pre-tokenize all items for efficiency
+  const tokenCache = new Map<string, Set<string>>();
+  for (const item of items) {
+    tokenCache.set(item.id, tokenize(item.content));
+  }
+
+  // Normalize scores to [0, 1] for fair comparison with similarity
+  const maxScore = Math.max(...items.map((i) => i.score));
+  const minScore = Math.min(...items.map((i) => i.score));
+  const scoreRange = maxScore - minScore;
+
+  const normalizeScore = (score: number): number => {
+    if (scoreRange === 0) return 1; // All scores equal
+    return (score - minScore) / scoreRange;
+  };
+
+  const selected: T[] = [];
+  const remaining = new Set(items);
+
+  // Select items iteratively
+  while (remaining.size > 0) {
+    let bestItem: T | null = null;
+    let bestMMRScore = -Infinity;
+
+    for (const candidate of remaining) {
+      const normalizedRelevance = normalizeScore(candidate.score);
+      const maxSim = maxSimilarityToSelected(candidate, selected, tokenCache);
+      const mmrScore = computeMMRScore(normalizedRelevance, maxSim, clampedLambda);
+
+      // Use original score as tiebreaker (higher is better)
+      if (
+        mmrScore > bestMMRScore ||
+        (mmrScore === bestMMRScore && bestItem && candidate.score > bestItem.score)
+      ) {
+        bestMMRScore = mmrScore;
+        bestItem = candidate;
+      }
+    }
+
+    if (bestItem) {
+      selected.push(bestItem);
+      remaining.delete(bestItem);
+    } else {
+      // Should never happen, but safety exit
+      break;
+    }
+  }
+
+  return selected;
+}
+
+/**
+ * Apply MMR re-ranking to hybrid search results.
+ * Adapts the generic MMR function to work with the hybrid search result format.
+ */
+export function applyMMRToHybridResults<
+  T extends { score: number; snippet: string; path: string; startLine: number },
+>(results: T[], config: Partial<MMRConfig> = {}): T[] {
+  if (results.length === 0) return results;
+
+  // Create a map from ID to original item for type-safe retrieval
+  const itemById = new Map<string, T>();
+
+  // Create MMR items with unique IDs
+  const mmrItems: MMRItem[] = results.map((r, index) => {
+    const id = `${r.path}:${r.startLine}:${index}`;
+    itemById.set(id, r);
+    return {
+      id,
+      score: r.score,
+      content: r.snippet,
+    };
+  });
+
+  const reranked = mmrRerank(mmrItems, config);
+
+  // Map back to original items using the ID
+  return reranked.map((item) => itemById.get(item.id)!);
+}