# 2024 Aug 10 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the built-in fts5 tokenizers. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer3 # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test return } proc get_sod {args} { return "split_on_dot" } proc get_lowercase {args} { return "lowercase" } proc lowercase {flags txt} { set n [string length $txt] sqlite3_fts5_token [string tolower $txt] 0 $n return 0 } proc split_on_dot {flags txt} { set iOff 0 foreach t [split $txt "."] { set n [string length $txt] sqlite3_fts5_token $t $iOff [expr $iOff+$n] incr iOff [expr {$n+1}] } return "" } foreach {tn script} { 1 { sqlite3_fts5_create_tokenizer db lowercase get_lowercase sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod } 2 { sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase sqlite3_fts5_create_tokenizer -parent lowercase db split_on_dot get_sod } 3 { sqlite3_fts5_create_tokenizer db lowercase get_lowercase sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod } 4 { sqlite3_fts5_create_tokenizer -v2 db lowercase get_lowercase sqlite3_fts5_create_tokenizer -v2 -parent lowercase db split_on_dot get_sod } } { reset_db eval $script do_execsql_test 1.$tn.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=split_on_dot); CREATE VIRTUAL TABLE t1vocab USING fts5vocab(t1, instance); INSERT INTO t1 VALUES('ABC.Def.ghi'); } do_execsql_test 1.$tn.1 { SELECT term FROM t1vocab ORDER BY 1 } {abc def ghi} } finish_test