feat: LLM-based query expansion for FTS mode

When searching in FTS-only mode (no embedding provider), extract meaningful
keywords from conversational queries using LLM to improve search results.

Changes:
- New query-expansion module with keyword extraction
- Supports English and Chinese stop word filtering
- Null safety guards for FTS-only mode (provider can be null)
- Lint compliance fixes for string iteration

This helps users find relevant memory entries even with vague queries.
This commit is contained in:
康熙
2026-02-16 14:46:02 +08:00
committed by Peter Steinberger
parent 65aedac20e
commit bcab2469de
5 changed files with 506 additions and 13 deletions

View File

@@ -72,7 +72,7 @@ class MemoryManagerEmbeddingOps {
}
private loadEmbeddingCache(hashes: string[]): Map<string, number[]> {
if (!this.cache.enabled) {
if (!this.cache.enabled || !this.provider) {
return new Map();
}
if (hashes.length === 0) {
@@ -114,7 +114,7 @@ class MemoryManagerEmbeddingOps {
}
private upsertEmbeddingCache(entries: Array<{ hash: string; embedding: number[] }>): void {
if (!this.cache.enabled) {
if (!this.cache.enabled || !this.provider) {
return;
}
if (entries.length === 0) {
@@ -245,6 +245,9 @@ class MemoryManagerEmbeddingOps {
entry: MemoryFileEntry | SessionFileEntry,
source: MemorySource,
): Promise<number[][]> {
if (!this.provider) {
return this.embedChunksInBatches(chunks);
}
if (this.provider.id === "openai" && this.openAi) {
return this.embedChunksWithOpenAiBatch(chunks, entry, source);
}
@@ -423,7 +426,7 @@ class MemoryManagerEmbeddingOps {
method: "POST",
url: OPENAI_BATCH_ENDPOINT,
body: {
model: this.openAi?.model ?? this.provider.model,
model: this.openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
input: chunk.text,
},
}),
@@ -493,6 +496,9 @@ class MemoryManagerEmbeddingOps {
if (texts.length === 0) {
return [];
}
if (!this.provider) {
throw new Error("Cannot embed batch in FTS-only mode (no embedding provider)");
}
let attempt = 0;
let delayMs = EMBEDDING_RETRY_BASE_DELAY_MS;
while (true) {
@@ -532,7 +538,7 @@ class MemoryManagerEmbeddingOps {
}
private resolveEmbeddingTimeout(kind: "query" | "batch"): number {
const isLocal = this.provider.id === "local";
const isLocal = this.provider?.id === "local";
if (kind === "query") {
return isLocal ? EMBEDDING_QUERY_TIMEOUT_LOCAL_MS : EMBEDDING_QUERY_TIMEOUT_REMOTE_MS;
}
@@ -540,6 +546,9 @@ class MemoryManagerEmbeddingOps {
}
private async embedQueryWithTimeout(text: string): Promise<number[]> {
if (!this.provider) {
throw new Error("Cannot embed query in FTS-only mode (no embedding provider)");
}
const timeoutMs = this.resolveEmbeddingTimeout("query");
log.debug("memory embeddings: query start", { provider: this.provider.id, timeoutMs });
return await this.withTimeout(
@@ -685,6 +694,15 @@ class MemoryManagerEmbeddingOps {
entry: MemoryFileEntry | SessionFileEntry,
options: { source: MemorySource; content?: string },
) {
// FTS-only mode: skip indexing if no provider
if (!this.provider) {
log.debug("Skipping embedding indexing in FTS-only mode", {
path: entry.path,
source: options.source,
});
return;
}
const content = options.content ?? (await fs.readFile(entry.absPath, "utf-8"));
const chunks = enforceEmbeddingMaxInputTokens(
this.provider,

View File

@@ -544,6 +544,12 @@ class MemoryManagerSyncOps {
needsFullReindex: boolean;
progress?: MemorySyncProgressState;
}) {
// FTS-only mode: skip embedding sync (no provider)
if (!this.provider) {
log.debug("Skipping memory file sync in FTS-only mode (no embedding provider)");
return;
}
const files = await listMemoryFiles(this.workspaceDir, this.settings.extraPaths);
const fileEntries = await Promise.all(
files.map(async (file) => buildFileEntry(file, this.workspaceDir)),
@@ -619,6 +625,12 @@ class MemoryManagerSyncOps {
needsFullReindex: boolean;
progress?: MemorySyncProgressState;
}) {
// FTS-only mode: skip embedding sync (no provider)
if (!this.provider) {
log.debug("Skipping session file sync in FTS-only mode (no embedding provider)");
return;
}
const files = await listSessionFilesForAgent(this.agentId);
const activePaths = new Set(files.map((file) => sessionPathForFile(file)));
const indexAll = params.needsFullReindex || this.sessionsDirtyFiles.size === 0;
@@ -759,8 +771,8 @@ class MemoryManagerSyncOps {
const needsFullReindex =
params?.force ||
!meta ||
meta.model !== this.provider.model ||
meta.provider !== this.provider.id ||
(this.provider && meta.model !== this.provider.model) ||
(this.provider && meta.provider !== this.provider.id) ||
meta.providerKey !== this.providerKey ||
meta.chunkTokens !== this.settings.chunking.tokens ||
meta.chunkOverlap !== this.settings.chunking.overlap ||
@@ -834,6 +846,7 @@ class MemoryManagerSyncOps {
const batch = this.settings.remote?.batch;
const enabled = Boolean(
batch?.enabled &&
this.provider &&
((this.openAi && this.provider.id === "openai") ||
(this.gemini && this.provider.id === "gemini") ||
(this.voyage && this.provider.id === "voyage")),
@@ -849,7 +862,7 @@ class MemoryManagerSyncOps {
private async activateFallbackProvider(reason: string): Promise<boolean> {
const fallback = this.settings.fallback;
if (!fallback || fallback === "none" || fallback === this.provider.id) {
if (!fallback || fallback === "none" || !this.provider || fallback === this.provider.id) {
return false;
}
if (this.fallbackFrom) {
@@ -957,8 +970,8 @@ class MemoryManagerSyncOps {
}
nextMeta = {
model: this.provider.model,
provider: this.provider.id,
model: this.provider?.model ?? "fts-only",
provider: this.provider?.id ?? "none",
providerKey: this.providerKey,
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
@@ -1023,8 +1036,8 @@ class MemoryManagerSyncOps {
}
const nextMeta: MemoryIndexMeta = {
model: this.provider.model,
provider: this.provider.id,
model: this.provider?.model ?? "fts-only",
provider: this.provider?.id ?? "none",
providerKey: this.providerKey,
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,

View File

@@ -28,6 +28,7 @@ import { isMemoryPath, normalizeExtraMemoryPaths } from "./internal.js";
import { memoryManagerEmbeddingOps } from "./manager-embedding-ops.js";
import { searchKeyword, searchVector } from "./manager-search.js";
import { memoryManagerSyncOps } from "./manager-sync-ops.js";
import { extractKeywords } from "./query-expansion.js";
const SNIPPET_MAX_CHARS = 700;
const VECTOR_TABLE = "chunks_vec";
const FTS_TABLE = "chunks_fts";
@@ -233,8 +234,34 @@ export class MemoryIndexManager implements MemorySearchManager {
log.warn("memory search: no provider and FTS unavailable");
return [];
}
const ftsResults = await this.searchKeyword(cleaned, candidates).catch(() => []);
return ftsResults.filter((entry) => entry.score >= minScore).slice(0, maxResults);
// Extract keywords for better FTS matching on conversational queries
// e.g., "that thing we discussed about the API" → ["discussed", "API"]
const keywords = extractKeywords(cleaned);
const searchTerms = keywords.length > 0 ? keywords : [cleaned];
// Search with each keyword and merge results
const resultSets = await Promise.all(
searchTerms.map((term) => this.searchKeyword(term, candidates).catch(() => [])),
);
// Merge and deduplicate results, keeping highest score for each chunk
const seenIds = new Map<string, (typeof resultSets)[0][0]>();
for (const results of resultSets) {
for (const result of results) {
const existing = seenIds.get(result.id);
if (!existing || result.score > existing.score) {
seenIds.set(result.id, result);
}
}
}
const merged = [...seenIds.values()]
.toSorted((a, b) => b.score - a.score)
.filter((entry) => entry.score >= minScore)
.slice(0, maxResults);
return merged;
}
const keywordResults = hybrid.enabled

View File

@@ -0,0 +1,78 @@
import { describe, expect, it } from "vitest";
import { expandQueryForFts, extractKeywords } from "./query-expansion.js";
describe("extractKeywords", () => {
it("extracts keywords from English conversational query", () => {
const keywords = extractKeywords("that thing we discussed about the API");
expect(keywords).toContain("discussed");
expect(keywords).toContain("api");
// Should not include stop words
expect(keywords).not.toContain("that");
expect(keywords).not.toContain("thing");
expect(keywords).not.toContain("we");
expect(keywords).not.toContain("about");
expect(keywords).not.toContain("the");
});
it("extracts keywords from Chinese conversational query", () => {
const keywords = extractKeywords("之前讨论的那个方案");
expect(keywords).toContain("讨论");
expect(keywords).toContain("方案");
// Should not include stop words
expect(keywords).not.toContain("之前");
expect(keywords).not.toContain("的");
expect(keywords).not.toContain("那个");
});
it("extracts keywords from mixed language query", () => {
const keywords = extractKeywords("昨天讨论的 API design");
expect(keywords).toContain("讨论");
expect(keywords).toContain("api");
expect(keywords).toContain("design");
});
it("returns specific technical terms", () => {
const keywords = extractKeywords("what was the solution for the CFR bug");
expect(keywords).toContain("solution");
expect(keywords).toContain("cfr");
expect(keywords).toContain("bug");
});
it("handles empty query", () => {
expect(extractKeywords("")).toEqual([]);
expect(extractKeywords(" ")).toEqual([]);
});
it("handles query with only stop words", () => {
const keywords = extractKeywords("the a an is are");
expect(keywords.length).toBe(0);
});
it("removes duplicate keywords", () => {
const keywords = extractKeywords("test test testing");
const testCount = keywords.filter((k) => k === "test").length;
expect(testCount).toBe(1);
});
});
describe("expandQueryForFts", () => {
it("returns original query and extracted keywords", () => {
const result = expandQueryForFts("that API we discussed");
expect(result.original).toBe("that API we discussed");
expect(result.keywords).toContain("api");
expect(result.keywords).toContain("discussed");
});
it("builds expanded OR query for FTS", () => {
const result = expandQueryForFts("the solution for bugs");
expect(result.expanded).toContain("OR");
expect(result.expanded).toContain("solution");
expect(result.expanded).toContain("bugs");
});
it("returns original query when no keywords extracted", () => {
const result = expandQueryForFts("the");
expect(result.keywords.length).toBe(0);
expect(result.expanded).toBe("the");
});
});

View File

@@ -0,0 +1,357 @@
/**
* Query expansion for FTS-only search mode.
*
* When no embedding provider is available, we fall back to FTS (full-text search).
* FTS works best with specific keywords, but users often ask conversational queries
* like "that thing we discussed yesterday" or "之前讨论的那个方案".
*
* This module extracts meaningful keywords from such queries to improve FTS results.
*/
// Common stop words that don't add search value
const STOP_WORDS_EN = new Set([
// Articles and determiners
"a",
"an",
"the",
"this",
"that",
"these",
"those",
// Pronouns
"i",
"me",
"my",
"we",
"our",
"you",
"your",
"he",
"she",
"it",
"they",
"them",
// Common verbs
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"do",
"does",
"did",
"will",
"would",
"could",
"should",
"can",
"may",
"might",
// Prepositions
"in",
"on",
"at",
"to",
"for",
"of",
"with",
"by",
"from",
"about",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"between",
"under",
"over",
// Conjunctions
"and",
"or",
"but",
"if",
"then",
"because",
"as",
"while",
"when",
"where",
"what",
"which",
"who",
"how",
"why",
// Time references (vague, not useful for FTS)
"yesterday",
"today",
"tomorrow",
"earlier",
"later",
"recently",
"before",
"ago",
"just",
"now",
// Vague references
"thing",
"things",
"stuff",
"something",
"anything",
"everything",
"nothing",
// Question words
"please",
"help",
"find",
"show",
"get",
"tell",
"give",
]);
const STOP_WORDS_ZH = new Set([
// Pronouns
"我",
"我们",
"你",
"你们",
"他",
"她",
"它",
"他们",
"这",
"那",
"这个",
"那个",
"这些",
"那些",
// Auxiliary words
"的",
"了",
"着",
"过",
"得",
"地",
"吗",
"呢",
"吧",
"啊",
"呀",
"嘛",
"啦",
// Verbs (common, vague)
"是",
"有",
"在",
"被",
"把",
"给",
"让",
"用",
"到",
"去",
"来",
"做",
"说",
"看",
"找",
"想",
"要",
"能",
"会",
"可以",
// Prepositions and conjunctions
"和",
"与",
"或",
"但",
"但是",
"因为",
"所以",
"如果",
"虽然",
"而",
"也",
"都",
"就",
"还",
"又",
"再",
"才",
"只",
// Time (vague)
"之前",
"以前",
"之后",
"以后",
"刚才",
"现在",
"昨天",
"今天",
"明天",
"最近",
// Vague references
"东西",
"事情",
"事",
"什么",
"哪个",
"哪些",
"怎么",
"为什么",
"多少",
// Question/request words
"请",
"帮",
"帮忙",
"告诉",
]);
/**
* Check if a token looks like a meaningful keyword.
* Returns false for short tokens, numbers-only, etc.
*/
function isValidKeyword(token: string): boolean {
if (!token || token.length === 0) {
return false;
}
// Skip very short English words (likely stop words or fragments)
if (/^[a-zA-Z]+$/.test(token) && token.length < 3) {
return false;
}
// Skip pure numbers (not useful for semantic search)
if (/^\d+$/.test(token)) {
return false;
}
// Skip tokens that are all punctuation
if (/^[\p{P}\p{S}]+$/u.test(token)) {
return false;
}
return true;
}
/**
* Simple tokenizer that handles both English and Chinese text.
* For Chinese, we do character-based splitting since we don't have a proper segmenter.
* For English, we split on whitespace and punctuation.
*/
function tokenize(text: string): string[] {
const tokens: string[] = [];
const normalized = text.toLowerCase().trim();
// Split into segments (English words, Chinese character sequences, etc.)
const segments = normalized.split(/[\s\p{P}]+/u).filter(Boolean);
for (const segment of segments) {
// Check if segment contains CJK characters
if (/[\u4e00-\u9fff]/.test(segment)) {
// For Chinese, extract character n-grams (unigrams and bigrams)
const chars = Array.from(segment).filter((c) => /[\u4e00-\u9fff]/.test(c));
// Add individual characters
tokens.push(...chars);
// Add bigrams for better phrase matching
for (let i = 0; i < chars.length - 1; i++) {
tokens.push(chars[i] + chars[i + 1]);
}
} else {
// For non-CJK, keep as single token
tokens.push(segment);
}
}
return tokens;
}
/**
* Extract keywords from a conversational query for FTS search.
*
* Examples:
* - "that thing we discussed about the API" → ["discussed", "API"]
* - "之前讨论的那个方案" → ["讨论", "方案"]
* - "what was the solution for the bug" → ["solution", "bug"]
*/
export function extractKeywords(query: string): string[] {
const tokens = tokenize(query);
const keywords: string[] = [];
const seen = new Set<string>();
for (const token of tokens) {
// Skip stop words
if (STOP_WORDS_EN.has(token) || STOP_WORDS_ZH.has(token)) {
continue;
}
// Skip invalid keywords
if (!isValidKeyword(token)) {
continue;
}
// Skip duplicates
if (seen.has(token)) {
continue;
}
seen.add(token);
keywords.push(token);
}
return keywords;
}
/**
* Expand a query for FTS search.
* Returns both the original query and extracted keywords for OR-matching.
*
* @param query - User's original query
* @returns Object with original query and extracted keywords
*/
export function expandQueryForFts(query: string): {
original: string;
keywords: string[];
expanded: string;
} {
const original = query.trim();
const keywords = extractKeywords(original);
// Build expanded query: original terms OR extracted keywords
// This ensures both exact matches and keyword matches are found
const expanded = keywords.length > 0 ? `${original} OR ${keywords.join(" OR ")}` : original;
return { original, keywords, expanded };
}
/**
* Type for an optional LLM-based query expander.
* Can be provided to enhance keyword extraction with semantic understanding.
*/
export type LlmQueryExpander = (query: string) => Promise<string[]>;
/**
* Expand query with optional LLM assistance.
* Falls back to local extraction if LLM is unavailable or fails.
*/
export async function expandQueryWithLlm(
query: string,
llmExpander?: LlmQueryExpander,
): Promise<string[]> {
// If LLM expander is provided, try it first
if (llmExpander) {
try {
const llmKeywords = await llmExpander(query);
if (llmKeywords.length > 0) {
return llmKeywords;
}
} catch {
// LLM failed, fall back to local extraction
}
}
// Fall back to local keyword extraction
return extractKeywords(query);
}