Merge fa29938e8f into 4583f88626
This commit is contained in:
commit
5f82a9164a
@ -21,16 +21,26 @@ export type HybridKeywordResult = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export function buildFtsQuery(raw: string): string | null {
|
export function buildFtsQuery(raw: string): string | null {
|
||||||
|
const q = raw.normalize("NFKC").trim();
|
||||||
|
if (!q) return null;
|
||||||
|
// Korean+Japanese+Chinese detection (support google embedding + openai embedding)
|
||||||
|
const hasCjk = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u.test(q);
|
||||||
|
|
||||||
|
const hasWhitespace = /\s/u.test(q);
|
||||||
|
if (hasCjk && !hasWhitespace) {
|
||||||
|
return `"${q.replaceAll('"', '""')}"`;
|
||||||
|
}
|
||||||
const tokens =
|
const tokens =
|
||||||
raw
|
q.match(/[\p{L}\p{N}_]+/gu)?.map((t) => t.trim()).filter(Boolean) ?? [];
|
||||||
.match(/[A-Za-z0-9_]+/g)
|
|
||||||
?.map((t) => t.trim())
|
if (tokens.length === 0) {
|
||||||
.filter(Boolean) ?? [];
|
return `"${q.replaceAll('"', '""')}"`;
|
||||||
if (tokens.length === 0) return null;
|
}
|
||||||
const quoted = tokens.map((t) => `"${t.replaceAll('"', "")}"`);
|
const quoted = tokens.map((t) => `"${t.replaceAll('"', '""')}"`);
|
||||||
return quoted.join(" AND ");
|
return quoted.join(" AND ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export function bm25RankToScore(rank: number): number {
|
export function bm25RankToScore(rank: number): number {
|
||||||
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
|
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
|
||||||
return 1 / (1 + normalized);
|
return 1 / (1 + normalized);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user