fix (memory/search): support unicode tokens in FTS query builder

This commit is contained in:
Vignesh Natarajan
2026-02-15 19:17:03 -08:00
parent 82631d225c
commit 501e893676
2 changed files with 3 additions and 1 deletions

View File

@@ -5,6 +5,8 @@ describe("memory hybrid helpers", () => {
it("buildFtsQuery tokenizes and AND-joins", () => {
expect(buildFtsQuery("hello world")).toBe('"hello" AND "world"');
expect(buildFtsQuery("FOO_bar baz-1")).toBe('"FOO_bar" AND "baz" AND "1"');
expect(buildFtsQuery("金银价格")).toBe('"金银价格"');
expect(buildFtsQuery("価格 2026年")).toBe('"価格" AND "2026年"');
expect(buildFtsQuery(" ")).toBeNull();
});

View File

@@ -23,7 +23,7 @@ export type HybridKeywordResult = {
export function buildFtsQuery(raw: string): string | null {
const tokens =
raw
.match(/[A-Za-z0-9_]+/g)
.match(/[\p{L}\p{N}_]+/gu)
?.map((t) => t.trim())
.filter(Boolean) ?? [];
if (tokens.length === 0) {