feat: LLM-based query expansion for FTS mode

When searching in FTS-only mode (no embedding provider), extract meaningful keywords from conversational queries using LLM to improve search results. Changes: - New query-expansion module with keyword extraction - Supports English and Chinese stop word filtering - Null safety guards for FTS-only mode (provider can be null) - Lint compliance fixes for string iteration This helps users find relevant memory entries even with vague queries.
2026-02-19 18:39:20 -05:00 · 2026-02-16 14:46:02 +08:00
parent 65aedac20e
commit bcab2469de
5 changed files with 506 additions and 13 deletions
--- a/src/memory/manager-embedding-ops.ts
+++ b/src/memory/manager-embedding-ops.ts
@@ -72,7 +72,7 @@ class MemoryManagerEmbeddingOps {
  }

  private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
-    if (!this.cache.enabled) {
+    if (!this.cache.enabled || !this.provider) {
      return new Map();
    }
    if (hashes.length === 0) {
@@ -114,7 +114,7 @@ class MemoryManagerEmbeddingOps {
  }

  private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void {
-    if (!this.cache.enabled) {
+    if (!this.cache.enabled || !this.provider) {
      return;
    }
    if (entries.length === 0) {
@@ -245,6 +245,9 @@ class MemoryManagerEmbeddingOps {
    entry: MemoryFileEntry | SessionFileEntry,
    source: MemorySource,
  ): Promise<number[][]> {
+    if (!this.provider) {
+      return this.embedChunksInBatches(chunks);
+    }
    if (this.provider.id === "openai" && this.openAi) {
      return this.embedChunksWithOpenAiBatch(chunks, entry, source);
    }
@@ -423,7 +426,7 @@ class MemoryManagerEmbeddingOps {
        method: "POST",
        url: OPENAI_BATCH_ENDPOINT,
        body: {
-          model: this.openAi?.model ?? this.provider.model,
+          model: this.openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
          input: chunk.text,
        },
      }),
@@ -493,6 +496,9 @@ class MemoryManagerEmbeddingOps {
    if (texts.length === 0) {
      return [];
    }
+    if (!this.provider) {
+      throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
+    }
    let attempt = 0;
    let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
    while (true) {
@@ -532,7 +538,7 @@ class MemoryManagerEmbeddingOps {
  }

  private resolveEmbeddingTimeout(kind: "query" | "batch"): number {
-    const isLocal = this.provider.id === "local";
+    const isLocal = this.provider?.id === "local";
    if (kind === "query") {
      return isLocal ? EMBEDDING_QUERY_TIMEOUT_LOCAL_MS : EMBEDDING_QUERY_TIMEOUT_REMOTE_MS;
    }
@@ -540,6 +546,9 @@ class MemoryManagerEmbeddingOps {
  }

  private async embedQueryWithTimeout(text: string): Promise<number[]> {
+    if (!this.provider) {
+      throw new Error("Cannot embed query in FTS-only mode (no embedding provider)");
+    }
    const timeoutMs = this.resolveEmbeddingTimeout("query");
    log.debug("memory embeddings: query start", { provider: this.provider.id, timeoutMs });
    return await this.withTimeout(
@@ -685,6 +694,15 @@ class MemoryManagerEmbeddingOps {
    entry: MemoryFileEntry | SessionFileEntry,
    options: { source: MemorySource; content?: string },
  ) {
+    // FTS-only mode: skip indexing if no provider
+    if (!this.provider) {
+      log.debug("Skipping embedding indexing in FTS-only mode", {
+        path: entry.path,
+        source: options.source,
+      });
+      return;
+    }
+
    const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
    const chunks = enforceEmbeddingMaxInputTokens(
      this.provider,
--- a/src/memory/manager-sync-ops.ts
+++ b/src/memory/manager-sync-ops.ts
@@ -544,6 +544,12 @@ class MemoryManagerSyncOps {
    needsFullReindex: boolean;
    progress?: MemorySyncProgressState;
  }) {
+    // FTS-only mode: skip embedding sync (no provider)
+    if (!this.provider) {
+      log.debug("Skipping memory file sync in FTS-only mode (no embedding provider)");
+      return;
+    }
+
    const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
    const fileEntries = await Promise.all(
      files.map(async (file) => buildFileEntry(file, this.workspaceDir)),
@@ -619,6 +625,12 @@ class MemoryManagerSyncOps {
    needsFullReindex: boolean;
    progress?: MemorySyncProgressState;
  }) {
+    // FTS-only mode: skip embedding sync (no provider)
+    if (!this.provider) {
+      log.debug("Skipping session file sync in FTS-only mode (no embedding provider)");
+      return;
+    }
+
    const files = await listSessionFilesForAgent(this.agentId);
    const activePaths = new Set(files.map((file) => sessionPathForFile(file)));
    const indexAll = params.needsFullReindex || this.sessionsDirtyFiles.size === 0;
@@ -759,8 +771,8 @@ class MemoryManagerSyncOps {
    const needsFullReindex =
      params?.force ||
      !meta ||
-      meta.model !== this.provider.model ||
-      meta.provider !== this.provider.id ||
+      (this.provider && meta.model !== this.provider.model) ||
+      (this.provider && meta.provider !== this.provider.id) ||
      meta.providerKey !== this.providerKey ||
      meta.chunkTokens !== this.settings.chunking.tokens ||
      meta.chunkOverlap !== this.settings.chunking.overlap ||
@@ -834,6 +846,7 @@ class MemoryManagerSyncOps {
    const batch = this.settings.remote?.batch;
    const enabled = Boolean(
      batch?.enabled &&
+      this.provider &&
      ((this.openAi && this.provider.id === "openai") ||
        (this.gemini && this.provider.id === "gemini") ||
        (this.voyage && this.provider.id === "voyage")),
@@ -849,7 +862,7 @@ class MemoryManagerSyncOps {

  private async activateFallbackProvider(reason: string): Promise<boolean> {
    const fallback = this.settings.fallback;
-    if (!fallback || fallback === "none" || fallback === this.provider.id) {
+    if (!fallback || fallback === "none" || !this.provider || fallback === this.provider.id) {
      return false;
    }
    if (this.fallbackFrom) {
@@ -957,8 +970,8 @@ class MemoryManagerSyncOps {
      }

      nextMeta = {
-        model: this.provider.model,
-        provider: this.provider.id,
+        model: this.provider?.model ?? "fts-only",
+        provider: this.provider?.id ?? "none",
        providerKey: this.providerKey,
        chunkTokens: this.settings.chunking.tokens,
        chunkOverlap: this.settings.chunking.overlap,
@@ -1023,8 +1036,8 @@ class MemoryManagerSyncOps {
    }

    const nextMeta: MemoryIndexMeta = {
-      model: this.provider.model,
-      provider: this.provider.id,
+      model: this.provider?.model ?? "fts-only",
+      provider: this.provider?.id ?? "none",
      providerKey: this.providerKey,
      chunkTokens: this.settings.chunking.tokens,
      chunkOverlap: this.settings.chunking.overlap,
--- a/src/memory/manager.ts
+++ b/src/memory/manager.ts
@@ -28,6 +28,7 @@ import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js";
 import { memoryManagerEmbeddingOps } from "./manager-embedding-ops.js";
 import { searchKeyword, searchVector } from "./manager-search.js";
 import { memoryManagerSyncOps } from "./manager-sync-ops.js";
+import { extractKeywords } from "./query-expansion.js";
 const SNIPPET_MAX_CHARS = 700;
 const VECTOR_TABLE = "chunks_vec";
 const FTS_TABLE = "chunks_fts";
@@ -233,8 +234,34 @@ export class MemoryIndexManager implements MemorySearchManager {
        log.warn("memory search: no provider and FTS unavailable");
        return [];
      }
-      const ftsResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
-      return ftsResults.filter((entry) => entry.score >= minScore).slice(0, maxResults);
+
+      // Extract keywords for better FTS matching on conversational queries
+      // e.g., "that thing we discussed about the API" → ["discussed", "API"]
+      const keywords = extractKeywords(cleaned);
+      const searchTerms = keywords.length > 0 ? keywords : [cleaned];
+
+      // Search with each keyword and merge results
+      const resultSets = await Promise.all(
+        searchTerms.map((term) => this.searchKeyword(term, candidates).catch(() => [])),
+      );
+
+      // Merge and deduplicate results, keeping highest score for each chunk
+      const seenIds = new Map<string, (typeof resultSets)[0][0]>();
+      for (const results of resultSets) {
+        for (const result of results) {
+          const existing = seenIds.get(result.id);
+          if (!existing || result.score > existing.score) {
+            seenIds.set(result.id, result);
+          }
+        }
+      }
+
+      const merged = [...seenIds.values()]
+        .toSorted((a, b) => b.score - a.score)
+        .filter((entry) => entry.score >= minScore)
+        .slice(0, maxResults);
+
+      return merged;
    }

    const keywordResults = hybrid.enabled
--- a/src/memory/query-expansion.test.ts
+++ b/src/memory/query-expansion.test.ts
@@ -0,0 +1,78 @@
+import { describe, expect, it } from "vitest";
+import { expandQueryForFts, extractKeywords } from "./query-expansion.js";
+
+describe("extractKeywords", () => {
+  it("extracts keywords from English conversational query", () => {
+    const keywords = extractKeywords("that thing we discussed about the API");
+    expect(keywords).toContain("discussed");
+    expect(keywords).toContain("api");
+    // Should not include stop words
+    expect(keywords).not.toContain("that");
+    expect(keywords).not.toContain("thing");
+    expect(keywords).not.toContain("we");
+    expect(keywords).not.toContain("about");
+    expect(keywords).not.toContain("the");
+  });
+
+  it("extracts keywords from Chinese conversational query", () => {
+    const keywords = extractKeywords("之前讨论的那个方案");
+    expect(keywords).toContain("讨论");
+    expect(keywords).toContain("方案");
+    // Should not include stop words
+    expect(keywords).not.toContain("之前");
+    expect(keywords).not.toContain("的");
+    expect(keywords).not.toContain("那个");
+  });
+
+  it("extracts keywords from mixed language query", () => {
+    const keywords = extractKeywords("昨天讨论的 API design");
+    expect(keywords).toContain("讨论");
+    expect(keywords).toContain("api");
+    expect(keywords).toContain("design");
+  });
+
+  it("returns specific technical terms", () => {
+    const keywords = extractKeywords("what was the solution for the CFR bug");
+    expect(keywords).toContain("solution");
+    expect(keywords).toContain("cfr");
+    expect(keywords).toContain("bug");
+  });
+
+  it("handles empty query", () => {
+    expect(extractKeywords("")).toEqual([]);
+    expect(extractKeywords("   ")).toEqual([]);
+  });
+
+  it("handles query with only stop words", () => {
+    const keywords = extractKeywords("the a an is are");
+    expect(keywords.length).toBe(0);
+  });
+
+  it("removes duplicate keywords", () => {
+    const keywords = extractKeywords("test test testing");
+    const testCount = keywords.filter((k) => k === "test").length;
+    expect(testCount).toBe(1);
+  });
+});
+
+describe("expandQueryForFts", () => {
+  it("returns original query and extracted keywords", () => {
+    const result = expandQueryForFts("that API we discussed");
+    expect(result.original).toBe("that API we discussed");
+    expect(result.keywords).toContain("api");
+    expect(result.keywords).toContain("discussed");
+  });
+
+  it("builds expanded OR query for FTS", () => {
+    const result = expandQueryForFts("the solution for bugs");
+    expect(result.expanded).toContain("OR");
+    expect(result.expanded).toContain("solution");
+    expect(result.expanded).toContain("bugs");
+  });
+
+  it("returns original query when no keywords extracted", () => {
+    const result = expandQueryForFts("the");
+    expect(result.keywords.length).toBe(0);
+    expect(result.expanded).toBe("the");
+  });
+});
--- a/src/memory/query-expansion.ts
+++ b/src/memory/query-expansion.ts
@@ -0,0 +1,357 @@
+/**
+ * Query expansion for FTS-only search mode.
+ *
+ * When no embedding provider is available, we fall back to FTS (full-text search).
+ * FTS works best with specific keywords, but users often ask conversational queries
+ * like "that thing we discussed yesterday" or "之前讨论的那个方案".
+ *
+ * This module extracts meaningful keywords from such queries to improve FTS results.
+ */
+
+// Common stop words that don't add search value
+const STOP_WORDS_EN = new Set([
+  // Articles and determiners
+  "a",
+  "an",
+  "the",
+  "this",
+  "that",
+  "these",
+  "those",
+  // Pronouns
+  "i",
+  "me",
+  "my",
+  "we",
+  "our",
+  "you",
+  "your",
+  "he",
+  "she",
+  "it",
+  "they",
+  "them",
+  // Common verbs
+  "is",
+  "are",
+  "was",
+  "were",
+  "be",
+  "been",
+  "being",
+  "have",
+  "has",
+  "had",
+  "do",
+  "does",
+  "did",
+  "will",
+  "would",
+  "could",
+  "should",
+  "can",
+  "may",
+  "might",
+  // Prepositions
+  "in",
+  "on",
+  "at",
+  "to",
+  "for",
+  "of",
+  "with",
+  "by",
+  "from",
+  "about",
+  "into",
+  "through",
+  "during",
+  "before",
+  "after",
+  "above",
+  "below",
+  "between",
+  "under",
+  "over",
+  // Conjunctions
+  "and",
+  "or",
+  "but",
+  "if",
+  "then",
+  "because",
+  "as",
+  "while",
+  "when",
+  "where",
+  "what",
+  "which",
+  "who",
+  "how",
+  "why",
+  // Time references (vague, not useful for FTS)
+  "yesterday",
+  "today",
+  "tomorrow",
+  "earlier",
+  "later",
+  "recently",
+  "before",
+  "ago",
+  "just",
+  "now",
+  // Vague references
+  "thing",
+  "things",
+  "stuff",
+  "something",
+  "anything",
+  "everything",
+  "nothing",
+  // Question words
+  "please",
+  "help",
+  "find",
+  "show",
+  "get",
+  "tell",
+  "give",
+]);
+
+const STOP_WORDS_ZH = new Set([
+  // Pronouns
+  "我",
+  "我们",
+  "你",
+  "你们",
+  "他",
+  "她",
+  "它",
+  "他们",
+  "这",
+  "那",
+  "这个",
+  "那个",
+  "这些",
+  "那些",
+  // Auxiliary words
+  "的",
+  "了",
+  "着",
+  "过",
+  "得",
+  "地",
+  "吗",
+  "呢",
+  "吧",
+  "啊",
+  "呀",
+  "嘛",
+  "啦",
+  // Verbs (common, vague)
+  "是",
+  "有",
+  "在",
+  "被",
+  "把",
+  "给",
+  "让",
+  "用",
+  "到",
+  "去",
+  "来",
+  "做",
+  "说",
+  "看",
+  "找",
+  "想",
+  "要",
+  "能",
+  "会",
+  "可以",
+  // Prepositions and conjunctions
+  "和",
+  "与",
+  "或",
+  "但",
+  "但是",
+  "因为",
+  "所以",
+  "如果",
+  "虽然",
+  "而",
+  "也",
+  "都",
+  "就",
+  "还",
+  "又",
+  "再",
+  "才",
+  "只",
+  // Time (vague)
+  "之前",
+  "以前",
+  "之后",
+  "以后",
+  "刚才",
+  "现在",
+  "昨天",
+  "今天",
+  "明天",
+  "最近",
+  // Vague references
+  "东西",
+  "事情",
+  "事",
+  "什么",
+  "哪个",
+  "哪些",
+  "怎么",
+  "为什么",
+  "多少",
+  // Question/request words
+  "请",
+  "帮",
+  "帮忙",
+  "告诉",
+]);
+
+/**
+ * Check if a token looks like a meaningful keyword.
+ * Returns false for short tokens, numbers-only, etc.
+ */
+function isValidKeyword(token: string): boolean {
+  if (!token || token.length === 0) {
+    return false;
+  }
+  // Skip very short English words (likely stop words or fragments)
+  if (/^[a-zA-Z]+$/.test(token) && token.length < 3) {
+    return false;
+  }
+  // Skip pure numbers (not useful for semantic search)
+  if (/^\d+$/.test(token)) {
+    return false;
+  }
+  // Skip tokens that are all punctuation
+  if (/^[\p{P}\p{S}]+$/u.test(token)) {
+    return false;
+  }
+  return true;
+}
+
+/**
+ * Simple tokenizer that handles both English and Chinese text.
+ * For Chinese, we do character-based splitting since we don't have a proper segmenter.
+ * For English, we split on whitespace and punctuation.
+ */
+function tokenize(text: string): string[] {
+  const tokens: string[] = [];
+  const normalized = text.toLowerCase().trim();
+
+  // Split into segments (English words, Chinese character sequences, etc.)
+  const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean);
+
+  for (const segment of segments) {
+    // Check if segment contains CJK characters
+    if (/[\u4e00-\u9fff]/.test(segment)) {
+      // For Chinese, extract character n-grams (unigrams and bigrams)
+      const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
+      // Add individual characters
+      tokens.push(...chars);
+      // Add bigrams for better phrase matching
+      for (let i = 0; i < chars.length - 1; i++) {
+        tokens.push(chars[i] + chars[i + 1]);
+      }
+    } else {
+      // For non-CJK, keep as single token
+      tokens.push(segment);
+    }
+  }
+
+  return tokens;
+}
+
+/**
+ * Extract keywords from a conversational query for FTS search.
+ *
+ * Examples:
+ * - "that thing we discussed about the API" → ["discussed", "API"]
+ * - "之前讨论的那个方案" → ["讨论", "方案"]
+ * - "what was the solution for the bug" → ["solution", "bug"]
+ */
+export function extractKeywords(query: string): string[] {
+  const tokens = tokenize(query);
+  const keywords: string[] = [];
+  const seen = new Set<string>();
+
+  for (const token of tokens) {
+    // Skip stop words
+    if (STOP_WORDS_EN.has(token) || STOP_WORDS_ZH.has(token)) {
+      continue;
+    }
+    // Skip invalid keywords
+    if (!isValidKeyword(token)) {
+      continue;
+    }
+    // Skip duplicates
+    if (seen.has(token)) {
+      continue;
+    }
+    seen.add(token);
+    keywords.push(token);
+  }
+
+  return keywords;
+}
+
+/**
+ * Expand a query for FTS search.
+ * Returns both the original query and extracted keywords for OR-matching.
+ *
+ * @param query - User's original query
+ * @returns Object with original query and extracted keywords
+ */
+export function expandQueryForFts(query: string): {
+  original: string;
+  keywords: string[];
+  expanded: string;
+} {
+  const original = query.trim();
+  const keywords = extractKeywords(original);
+
+  // Build expanded query: original terms OR extracted keywords
+  // This ensures both exact matches and keyword matches are found
+  const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original;
+
+  return { original, keywords, expanded };
+}
+
+/**
+ * Type for an optional LLM-based query expander.
+ * Can be provided to enhance keyword extraction with semantic understanding.
+ */
+export type LlmQueryExpander = (query: string) => Promise<string[]>;
+
+/**
+ * Expand query with optional LLM assistance.
+ * Falls back to local extraction if LLM is unavailable or fails.
+ */
+export async function expandQueryWithLlm(
+  query: string,
+  llmExpander?: LlmQueryExpander,
+): Promise<string[]> {
+  // If LLM expander is provided, try it first
+  if (llmExpander) {
+    try {
+      const llmKeywords = await llmExpander(query);
+      if (llmKeywords.length > 0) {
+        return llmKeywords;
+      }
+    } catch {
+      // LLM failed, fall back to local extraction
+    }
+  }
+
+  // Fall back to local keyword extraction
+  return extractKeywords(query);
+}