fix: support Unicode characters in hybrid search FTS query
The previous regex /[A-Za-z0-9_]+/g only matched ASCII characters,
causing CJK (Chinese/Japanese/Korean) and other non-ASCII Unicode
characters to be completely ignored during hybrid search.
Changed to /[\p{L}\p{N}_]+/gu which uses Unicode property escapes:
- \p{L} matches all Unicode letters (including CJK)
- \p{N} matches all Unicode numbers
- u flag enables Unicode mode
This ensures hybrid search works correctly with Chinese, Japanese,
Korean, and other non-Latin scripts.
This commit is contained in:
parent
9025da2296
commit
83d3f4bd21
@ -23,7 +23,7 @@ export type HybridKeywordResult = {
|
||||
export function buildFtsQuery(raw: string): string | null {
|
||||
const tokens =
|
||||
raw
|
||||
.match(/[A-Za-z0-9_]+/g)
|
||||
.match(/[\p{L}\p{N}_]+/gu)
|
||||
?.map((t) => t.trim())
|
||||
.filter(Boolean) ?? [];
|
||||
if (tokens.length === 0) return null;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user