# 2020 September 30 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # Tests for the fts5 "trigram" tokenizer. # source [file join [file dirname [info script]] fts5_common.tcl] ifcapable !fts5 { finish_test ; return } set ::testprefix fts5trigram do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } foreach {tn s res} { 1 abc "(abc)defghijklm" 2 defgh "abc(defgh)ijklm" 3 abcdefghijklm "(abcdefghijklm)" 4 กรุ "(กรุ)งเทพมหานคร" 5 งเทพมห "กรุ(งเทพมห)านคร" 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" 7 Abc "(abc)defghijklm" 8 deFgh "abc(defgh)ijklm" 9 aBcdefGhijKlm "(abcdefghijklm)" } { do_execsql_test 1.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($s) } $res } do_execsql_test 1.2.0 { SELECT fts5_expr('ABCD', 'tokenize=trigram') } {{"abc" + "bcd"}} do_execsql_test 1.2.1 { SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a' } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 9 {%งเ%} 2 10 {%"งเ"%} {} } { do_execsql_test 1.3.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } #------------------------------------------------------------------------- reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1"); INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } do_catchsql_test 2.0.1 { CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram case_sensitive'); } {1 {error in tokenizer constructor}} foreach {tn s res} { 1 abc "(abc)defghijklm" 2 defgh "abc(defgh)ijklm" 3 abcdefghijklm "(abcdefghijklm)" 4 กรุ "(กรุ)งเทพมหานคร" 5 งเทพมห "กรุ(งเทพมห)านคร" 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" 7 Abc "" 8 deFgh "" 9 aBcdefGhijKlm "" } { do_execsql_test 2.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($s) } $res } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 } { do_execsql_test 2.2.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } foreach {tn like res} { 1 {*cdef*} 1 2 {cdef*} {} 3 {*f*} 1 4 {*f?h*} 1 5 {*f?g*} {} 6 {abc*klm} 1 7 {abcdefg*} 1 8 {*รุงเ*} 2 9 {abc[d]efg*} 1 10 {abc[]d]efg*} 1 11 {abc[^]d]efg*} {} 12 {abc[^]XYZ]efg*} 1 } { do_execsql_test 2.3.$tn { SELECT rowid FROM t1 WHERE y GLOB $like } $res } do_execsql_test 2.3.null.1 { SELECT rowid FROM t1 WHERE y LIKE NULL } #------------------------------------------------------------------------- reset_db do_catchsql_test 3.1 { CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2"); } {1 {error in tokenizer constructor}} do_catchsql_test 3.2 { CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11"); } {1 {error in tokenizer constructor}} do_catchsql_test 3.3 { CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); } {0 {}} #------------------------------------------------------------------------- reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram"); } do_execsql_test 4.1 { INSERT INTO t0 VALUES (x'000b01'); } do_execsql_test 4.2 { INSERT INTO t0(t0) VALUES('integrity-check'); } #------------------------------------------------------------------------- reset_db foreach_detail_mode $::testprefix { foreach {ci} {0 1} { reset_db do_execsql_test 5.cs=$ci.0.1 " CREATE VIRTUAL TABLE t1 USING fts5( y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL% ); " do_execsql_test 5.cs=$ci.0.2 { INSERT INTO t1 VALUES('abcdefghijklm'); INSERT INTO t1 VALUES('กรุงเทพมหานคร'); } foreach {tn like res} { 1 {%cDef%} 1 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 } { do_execsql_test 5.cs=$ci.1.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } } } do_execsql_test 6.0 { CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram"); CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1"); } # LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works # with case-sensitive. do_eqp_test 6.1 { SELECT * FROM ci0 WHERE x LIKE ? } {VIRTUAL TABLE INDEX 0:L0} do_eqp_test 6.2 { SELECT * FROM ci0 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} do_eqp_test 6.3 { SELECT * FROM ci1 WHERE x LIKE ? } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} do_eqp_test 6.4 { SELECT * FROM ci1 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} do_eqp_test 6.5 { SELECT * FROM ci1 WHERE x < ? } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} do_eqp_test 6.6 { SELECT * FROM ci0 WHERE x < ? } {{SCAN ci0 VIRTUAL TABLE INDEX 0:}} reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram"); INSERT INTO f (rowid, filename) VALUES (10, "giraffe.png"), (20, "жираф.png"), (30, "cat.png"), (40, "кот.png"), (50, "misic-🎵-.mp3"); } do_execsql_test 7.1 { SELECT rowid FROM f WHERE +filename GLOB '*ир*'; } {20} do_execsql_test 7.2 { SELECT rowid FROM f WHERE filename GLOB '*ир*'; } {20} #------------------------------------------------------------------------- reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); INSERT INTO t1 VALUES('abcdefghijklm'); } foreach {tn match res} { 1 "abc ghi" "(abc)def(ghi)jklm" 2 "def ghi" "abc(defghi)jklm" 3 "efg ghi" "abcd(efghi)jklm" 4 "efghi" "abcd(efghi)jklm" 5 "abcd jklm" "(abcd)efghi(jklm)" 6 "ijkl jklm" "abcdefgh(ijklm)" 7 "ijk ijkl hijk" "abcdefg(hijkl)m" } { do_execsql_test 8.1.$tn { SELECT highlight(t1, 0, '(', ')') FROM t1($match) } $res } do_execsql_test 8.2 { CREATE VIRTUAL TABLE ft2 USING fts5(a, tokenize="trigram"); INSERT INTO ft2 VALUES('abc x cde'); INSERT INTO ft2 VALUES('abc cde'); INSERT INTO ft2 VALUES('abcde'); } do_execsql_test 8.3 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'abc AND cde'; } { {[abc] x [cde]} {[abc] [cde]} {[abcde]} } #------------------------------------------------------------------------- reset_db do_execsql_test 9.0 { CREATE VIRTUAL TABLE t1 USING fts5( a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, tokenize=trigram ); INSERT INTO t1(rowid, a12) VALUES(111, 'thats a tricky case though'); INSERT INTO t1(rowid, a12) VALUES(222, 'the query planner cannot do'); } do_execsql_test 9.1 { SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' } {111} do_execsql_test 9.2 { SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' AND a12 LIKE '%case%' } {111} do_execsql_test 9.3 { SELECT rowid FROM t1 WHERE a12 LIKE NULL } {} #------------------------------------------------------------------------- reset_db do_execsql_test 10.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram); } do_test 10.1 { foreach {val} { "abc \UFFjkl\UFF" "abc \UFFFjkl\UFFF" "abc \UFFFFjkl\UFFFF" "abc \UFFFFFjkl\UFFFFF" "\UFFjkl\UFF abc" "\UFFFjkl\UFFF abc" "\UFFFFjkl\UFFFF abc" "\UFFFFFjkl\UFFFFF abc" "\U10001jkl\U10001 abc" } { execsql { INSERT INTO t1 VALUES( $val ) } } } {} do_test 10.2 { foreach {val} { X'E18000626320646566' X'61EDA0806320646566' X'61EDA0806320646566' X'61EFBFBE6320646566' X'76686920E18000626320646566' X'7668692061EDA0806320646566' X'7668692061EDA0806320646566' X'7668692061EFBFBE6320646566' } { execsql " INSERT INTO t1 VALUES( $val ) " } } {} do_test 10.3 { set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); INSERT INTO t1 VALUES('abcd' || $a); INSERT INTO t1 VALUES('abcd' || $b); INSERT INTO t1 VALUES('abcd' || $c); INSERT INTO t1 VALUES('abcd' || $d); } } {} do_execsql_test 11.0 { CREATE VIRTUAL TABLE t4 USING fts5(y, tokenize=trigram); } sqlite3_fts5_register_str db do_execsql_test 11.1 { INSERT INTO t4 VALUES( str('') ); } do_test 12.0 { sqlite3_fts5_tokenize db trigram "abcd" } {abc 0 3 bcd 1 4} do_test 12.1 { sqlite3_fts5_tokenize db trigram "a" } {} do_test 12.2 { sqlite3_fts5_tokenize db trigram "" } {} finish_test