diff --git a/src/memory/hybrid.ts b/src/memory/hybrid.ts index 753748bf9..51566aab4 100644 --- a/src/memory/hybrid.ts +++ b/src/memory/hybrid.ts @@ -21,16 +21,26 @@ export type HybridKeywordResult = { }; export function buildFtsQuery(raw: string): string | null { + const q = raw.normalize("NFKC").trim(); + if (!q) return null; + // Korean+Japanese+Chinese detection (support google embedding + openai embedding) + const hasCjk = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u.test(q); + + const hasWhitespace = /\s/u.test(q); + if (hasCjk && !hasWhitespace) { + return `"${q.replaceAll('"', '""')}"`; + } const tokens = - raw - .match(/[A-Za-z0-9_]+/g) - ?.map((t) => t.trim()) - .filter(Boolean) ?? []; - if (tokens.length === 0) return null; - const quoted = tokens.map((t) => `"${t.replaceAll('"', "")}"`); + q.match(/[\p{L}\p{N}_]+/gu)?.map((t) => t.trim()).filter(Boolean) ?? []; + + if (tokens.length === 0) { + return `"${q.replaceAll('"', '""')}"`; + } + const quoted = tokens.map((t) => `"${t.replaceAll('"', '""')}"`); return quoted.join(" AND "); } + export function bm25RankToScore(rank: number): number { const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999; return 1 / (1 + normalized);