fix: support Unicode characters in hybrid search FTS query

The previous regex /[A-Za-z0-9_]+/g only matched ASCII characters,
causing CJK (Chinese/Japanese/Korean) and other non-ASCII Unicode
characters to be completely ignored during hybrid search.

Changed to /[\p{L}\p{N}_]+/gu which uses Unicode property escapes:
- \p{L} matches all Unicode letters (including CJK)
- \p{N} matches all Unicode numbers
- u flag enables Unicode mode

This ensures hybrid search works correctly with Chinese, Japanese,
Korean, and other non-Latin scripts.
This commit is contained in:
r266-tech 2026-01-30 14:56:10 +08:00
parent 9025da2296
commit 83d3f4bd21

View File

@ -23,7 +23,7 @@ export type HybridKeywordResult = {
export function buildFtsQuery(raw: string): string | null {
const tokens =
raw
.match(/[A-Za-z0-9_]+/g)
.match(/[\p{L}\p{N}_]+/gu)
?.map((t) => t.trim())
.filter(Boolean) ?? [];
if (tokens.length === 0) return null;