1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-06-23 22:01:42 +03:00

Add the "categories" option to the unicode61 tokenizer in fts5.

FossilOrigin-Name: 80d2b9e635e3100f90cffdcffa5b5038da6fbbfccc9f5777c59a4ae760d4cb62
This commit is contained in:
dan
2018-07-13 19:52:43 +00:00
parent e882551935
commit b80bb6ce88
9 changed files with 974 additions and 150 deletions

View File

@ -143,4 +143,40 @@ proc tl_load_casefolding_txt {zName} {
}
}
proc cc_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
lappend lRet [list $code $general_category]
}
close $fd
set lRet
}