This commit is contained in:
TideFinder 2026-01-29 21:53:31 -05:00 committed by GitHub
commit 5f82a9164a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -21,16 +21,26 @@ export type HybridKeywordResult = {
}; };
export function buildFtsQuery(raw: string): string | null { export function buildFtsQuery(raw: string): string | null {
const q = raw.normalize("NFKC").trim();
if (!q) return null;
// Korean+Japanese+Chinese detection (support google embedding + openai embedding)
const hasCjk = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u.test(q);
const hasWhitespace = /\s/u.test(q);
if (hasCjk && !hasWhitespace) {
return `"${q.replaceAll('"', '""')}"`;
}
const tokens = const tokens =
raw q.match(/[\p{L}\p{N}_]+/gu)?.map((t) => t.trim()).filter(Boolean) ?? [];
.match(/[A-Za-z0-9_]+/g)
?.map((t) => t.trim()) if (tokens.length === 0) {
.filter(Boolean) ?? []; return `"${q.replaceAll('"', '""')}"`;
if (tokens.length === 0) return null; }
const quoted = tokens.map((t) => `"${t.replaceAll('"', "")}"`); const quoted = tokens.map((t) => `"${t.replaceAll('"', '""')}"`);
return quoted.join(" AND "); return quoted.join(" AND ");
} }
export function bm25RankToScore(rank: number): number { export function bm25RankToScore(rank: number): number {
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999; const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
return 1 / (1 + normalized); return 1 / (1 + normalized);