fix: support Unicode characters in hybrid search FTS query
The previous regex /[A-Za-z0-9_]+/g only matched ASCII characters,
causing CJK (Chinese/Japanese/Korean) and other non-ASCII Unicode
characters to be completely ignored during hybrid search.
Changed to /[\p{L}\p{N}_]+/gu which uses Unicode property escapes:
- \p{L} matches all Unicode letters (including CJK)
- \p{N} matches all Unicode numbers
- u flag enables Unicode mode
This ensures hybrid search works correctly with Chinese, Japanese,
Korean, and other non-Latin scripts.
This commit is contained in:
parent
9025da2296
commit
83d3f4bd21
@ -23,7 +23,7 @@ export type HybridKeywordResult = {
|
|||||||
export function buildFtsQuery(raw: string): string | null {
|
export function buildFtsQuery(raw: string): string | null {
|
||||||
const tokens =
|
const tokens =
|
||||||
raw
|
raw
|
||||||
.match(/[A-Za-z0-9_]+/g)
|
.match(/[\p{L}\p{N}_]+/gu)
|
||||||
?.map((t) => t.trim())
|
?.map((t) => t.trim())
|
||||||
.filter(Boolean) ?? [];
|
.filter(Boolean) ?? [];
|
||||||
if (tokens.length === 0) return null;
|
if (tokens.length === 0) return null;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user