mirror of
https://github.com/sqlite/sqlite.git
synced 2025-06-23 22:01:42 +03:00
Add the "categories" option to the unicode61 tokenizer in fts5.
FossilOrigin-Name: 80d2b9e635e3100f90cffdcffa5b5038da6fbbfccc9f5777c59a4ae760d4cb62
This commit is contained in:
@ -143,4 +143,40 @@ proc tl_load_casefolding_txt {zName} {
|
||||
}
|
||||
}
|
||||
|
||||
proc cc_load_unicodedata_text {zName} {
|
||||
set fd [open $zName]
|
||||
set lField {
|
||||
code
|
||||
character_name
|
||||
general_category
|
||||
canonical_combining_classes
|
||||
bidirectional_category
|
||||
character_decomposition_mapping
|
||||
decimal_digit_value
|
||||
digit_value
|
||||
numeric_value
|
||||
mirrored
|
||||
unicode_1_name
|
||||
iso10646_comment_field
|
||||
uppercase_mapping
|
||||
lowercase_mapping
|
||||
titlecase_mapping
|
||||
}
|
||||
set lRet [list]
|
||||
|
||||
while { ![eof $fd] } {
|
||||
set line [gets $fd]
|
||||
if {$line == ""} continue
|
||||
|
||||
set fields [split $line ";"]
|
||||
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
|
||||
foreach $lField $fields {}
|
||||
|
||||
lappend lRet [list $code $general_category]
|
||||
}
|
||||
|
||||
close $fd
|
||||
set lRet
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user