1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-29 08:01:23 +03:00

Merge all the latest enhancements from trunk. This merge include FTS5

and a number of notable performance enhancements.

FossilOrigin-Name: 39936b33b0668aad81aa574d4d74c92b0ddd218a
This commit is contained in:
drh
2015-06-30 16:29:59 +00:00
115 changed files with 40679 additions and 708 deletions

View File

@ -175,6 +175,7 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
fts3_tokenize_vtab.lo \
fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
fts5.lo \
func.lo global.lo hash.lo \
icu.lo insert.lo journal.lo legacy.lo loadext.lo \
main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
@ -419,6 +420,7 @@ TESTSRC += \
$(TOP)/ext/misc/eval.c \
$(TOP)/ext/misc/fileio.c \
$(TOP)/ext/misc/fuzzer.c \
$(TOP)/ext/fts5/fts5_tcl.c \
$(TOP)/ext/misc/ieee754.c \
$(TOP)/ext/misc/nextchar.c \
$(TOP)/ext/misc/percentile.c \
@ -985,6 +987,43 @@ rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
sqlite3session.lo: $(TOP)/ext/session/sqlite3session.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/session/sqlite3session.c
# FTS5 things
#
FTS5_SRC = \
$(TOP)/ext/fts5/fts5.h \
$(TOP)/ext/fts5/fts5Int.h \
$(TOP)/ext/fts5/fts5_aux.c \
$(TOP)/ext/fts5/fts5_buffer.c \
$(TOP)/ext/fts5/fts5_main.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c fts5parse.h \
$(TOP)/ext/fts5/fts5_storage.c \
$(TOP)/ext/fts5/fts5_tokenize.c \
$(TOP)/ext/fts5/fts5_unicode2.c \
$(TOP)/ext/fts5/fts5_varint.c \
$(TOP)/ext/fts5/fts5_vocab.c \
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h
./lemon $(OPTS) fts5parse.y
mv fts5parse.c fts5parse.c.orig
echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c
cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \
| sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c
echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c
fts5parse.h: fts5parse.c
fts5.c: $(FTS5_SRC)
$(TCLSH_CMD) $(TOP)/ext/fts5/tool/mkfts5c.tcl
fts5.lo: fts5.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c fts5.c
# Rules to build the 'testfixture' application.
#

View File

@ -837,6 +837,7 @@ LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \
fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
fts5.lo \
func.lo global.lo hash.lo \
icu.lo insert.lo journal.lo legacy.lo loadext.lo \
main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
@ -1092,6 +1093,8 @@ TESTEXT = \
$(TOP)\ext\misc\eval.c \
$(TOP)\ext\misc\fileio.c \
$(TOP)\ext\misc\fuzzer.c \
fts5.c \
$(TOP)\ext\fts5\fts5_tcl.c \
$(TOP)\ext\misc\ieee754.c \
$(TOP)\ext\misc\nextchar.c \
$(TOP)\ext\misc\percentile.c \
@ -1681,6 +1684,50 @@ rtree.lo: $(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR)
sqlite3session.lo: $(TOP)\ext\session\sqlite3sesion.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)\ext\session\sqlite3session.c
# FTS5 things
#
FTS5_SRC = \
$(TOP)\ext\fts5\fts5.h \
$(TOP)\ext\fts5\fts5Int.h \
$(TOP)\ext\fts5\fts5_aux.c \
$(TOP)\ext\fts5\fts5_buffer.c \
$(TOP)\ext\fts5\fts5_main.c \
$(TOP)\ext\fts5\fts5_config.c \
$(TOP)\ext\fts5\fts5_expr.c \
$(TOP)\ext\fts5\fts5_hash.c \
$(TOP)\ext\fts5\fts5_index.c \
fts5parse.c fts5parse.h \
$(TOP)\ext\fts5\fts5_storage.c \
$(TOP)\ext\fts5\fts5_tokenize.c \
$(TOP)\ext\fts5\fts5_unicode2.c \
$(TOP)\ext\fts5\fts5_varint.c \
$(TOP)\ext\fts5\fts5_vocab.c
fts5parse.c: $(TOP)\ext\fts5\fts5parse.y lemon.exe
copy $(TOP)\ext\fts5\fts5parse.y .
del /Q fts5parse.h 2>NUL
.\lemon.exe $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(OPTS) fts5parse.y
move fts5parse.c fts5parse.c.orig
echo #ifdef SQLITE_ENABLE_FTS5 > $@
type fts5parse.c.orig \
| $(NAWK) "/.*/ { gsub(/yy/,\"fts5yy\");print }" \
| $(NAWK) "/.*/ { gsub(/YY/,\"fts5YY\");print }" \
| $(NAWK) "/.*/ { gsub(/TOKEN/,\"FTS5TOKEN\");print }" >> $@
echo #endif /* SQLITE_ENABLE_FTS5 */ >> $@
fts5parse.h: fts5parse.c
fts5.c: $(FTS5_SRC)
$(TCLSH_CMD) $(TOP)\ext\fts5\tool\mkfts5c.tcl
fts5.lo: fts5.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c fts5.c
fts5_ext.lo: fts5.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c
fts5.dll: fts5_ext.lo
$(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5_ext.lo
# Rules to build the 'testfixture' application.
#
@ -1825,6 +1872,7 @@ clean:
del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL
del /Q sqlite-*-output.vsix 2>NUL
del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL
del /Q fts5.c fts5parse.* 2>NUL
# Dynamic link library section.
#

View File

@ -1,77 +1,5 @@
#
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the
# codepoint should simply be removed from the input. Examples:
#
# { 224 a } (replace codepoint 224 to "a")
# { 769 "" } (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
global tl_lookup_table
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
if { [llength $character_decomposition_mapping]!=2
|| [string is xdigit [lindex $character_decomposition_mapping 0]]==0
} {
continue
}
set iCode [expr "0x$code"]
set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
if {[info exists tl_lookup_table($iCode)]} continue
if { ($iAscii >= 97 && $iAscii <= 122)
|| ($iAscii >= 65 && $iAscii <= 90)
} {
lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
set dia($iDia) 1
}
}
foreach d [array names dia] {
lappend lRet [list $d ""]
}
set lRet [lsort -integer -index 0 $lRet]
close $fd
set lRet
}
source [file join [file dirname [info script]] parseunicode.tcl]
proc print_rd {map} {
global tl_lookup_table
@ -117,7 +45,7 @@ proc print_rd {map} {
puts "** E\"). The resuls of passing a codepoint that corresponds to an"
puts "** uppercase letter are undefined."
puts "*/"
puts "static int remove_diacritic(int c)\{"
puts "static int ${::remove_diacritic}(int c)\{"
puts " unsigned short aDia\[\] = \{"
puts -nonewline " 0, "
set i 1
@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} {
#-------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
set iCode [expr "0x$code"]
set bAlnum [expr {
[lsearch {L N} [string range $general_category 0 0]] >= 0
|| $general_category=="Co"
}]
if { !$bAlnum } { lappend lRet $iCode }
}
close $fd
set lRet
}
proc an_load_separator_ranges {} {
global unicodedata.txt
set lSep [an_load_unicodedata_text ${unicodedata.txt}]
@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} {
#-------------------------------------------------------------------------
proc tl_load_casefolding_txt {zName} {
global tl_lookup_table
set fd [open $zName]
while { ![eof $fd] } {
set line [gets $fd]
if {[string range $line 0 0] == "#"} continue
if {$line == ""} continue
foreach x {a b c d} {unset -nocomplain $x}
foreach {a b c d} [split $line ";"] {}
set a2 [list]
set c2 [list]
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
set b [string trim $b]
set d [string trim $d]
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
}
}
proc tl_create_records {} {
global tl_lookup_table
@ -626,19 +484,20 @@ proc print_fold {zFunc} {
tl_print_table_footer toggle
tl_print_ioff_table $liOff
puts {
puts [subst -nocommands {
int ret = c;
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
@ -649,19 +508,17 @@ proc print_fold {zFunc} {
iHi = iTest-1;
}
}
assert( iRes<0 || c>=aEntry[iRes].iCode );
if( iRes>=0 ){
const struct TableEntry *p = &aEntry[iRes];
assert( iRes>=0 && c>=aEntry[iRes].iCode );
p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
}
if( bRemoveDiacritic ) ret = remove_diacritic(ret);
}
if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
}
}]
foreach entry $lHigh {
tl_print_if_entry $entry
@ -732,8 +589,12 @@ proc print_fileheader {} {
*/
}]
puts ""
if {$::generate_fts5_code} {
puts "#if defined(SQLITE_ENABLE_FTS5)"
} else {
puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
}
puts ""
puts "#include <assert.h>"
puts ""
@ -760,22 +621,40 @@ proc print_test_main {} {
# our liking.
#
proc usage {} {
puts -nonewline stderr "Usage: $::argv0 ?-test? "
puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1
}
if {[llength $argv]!=2 && [llength $argv]!=3} usage
if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
if {[llength $argv]<2} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]
set generate_test_code [expr {[llength $argv]==3}]
set remove_diacritic remove_diacritic
set generate_test_code 0
set generate_fts5_code 0
set function_prefix "sqlite3Fts"
for {set i 0} {$i < [llength $argv]-2} {incr i} {
switch -- [lindex $argv $i] {
-test {
set generate_test_code 1
}
-fts5 {
set function_prefix sqlite3Fts5
set generate_fts5_code 1
set remove_diacritic fts5_remove_diacritic
}
default {
usage
}
}
}
print_fileheader
# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
print_isalnum sqlite3FtsUnicodeIsalnum $lRange
print_isalnum ${function_prefix}UnicodeIsalnum $lRange
# Leave a gap between the two generated C functions.
#
@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings
puts ""
puts ""
print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts ""
puts ""
# Print the fold() function to stdout.
#
print_fold sqlite3FtsUnicodeFold
print_fold ${function_prefix}UnicodeFold
# Print the test routines and main() function to stdout, if -test
# was specified.
#
if {$::generate_test_code} {
print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
print_fold_test sqlite3FtsUnicodeFold $mappings
print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
print_fold_test ${function_prefix}UnicodeFold $mappings
print_test_main
}
if {$generate_fts5_code} {
puts "#endif /* defined(SQLITE_ENABLE_FTS5) */"
} else {
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
}

View File

@ -0,0 +1,146 @@
#--------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the
# codepoint should simply be removed from the input. Examples:
#
# { 224 a } (replace codepoint 224 to "a")
# { 769 "" } (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
global tl_lookup_table
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
if { [llength $character_decomposition_mapping]!=2
|| [string is xdigit [lindex $character_decomposition_mapping 0]]==0
} {
continue
}
set iCode [expr "0x$code"]
set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
if {[info exists tl_lookup_table($iCode)]} continue
if { ($iAscii >= 97 && $iAscii <= 122)
|| ($iAscii >= 65 && $iAscii <= 90)
} {
lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
set dia($iDia) 1
}
}
foreach d [array names dia] {
lappend lRet [list $d ""]
}
set lRet [lsort -integer -index 0 $lRet]
close $fd
set lRet
}
#-------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
set iCode [expr "0x$code"]
set bAlnum [expr {
[lsearch {L N} [string range $general_category 0 0]] >= 0
|| $general_category=="Co"
}]
if { !$bAlnum } { lappend lRet $iCode }
}
close $fd
set lRet
}
proc tl_load_casefolding_txt {zName} {
global tl_lookup_table
set fd [open $zName]
while { ![eof $fd] } {
set line [gets $fd]
if {[string range $line 0 0] == "#"} continue
if {$line == ""} continue
foreach x {a b c d} {unset -nocomplain $x}
foreach {a b c d} [split $line ";"] {}
set a2 [list]
set c2 [list]
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
set b [string trim $b]
set d [string trim $d]
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
}
}

View File

@ -0,0 +1,237 @@
#
# 2014 August 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#--------------------------------------------------------------------------
#
# This script extracts the documentation for the API used by fts5 auxiliary
# functions from header file fts5.h. It outputs html text on stdout that
# is included in the documentation on the web.
#
set ::fts5_docs_output ""
if {[info commands hd_putsnl]==""} {
if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] }
proc output {text} {
puts $text
}
} else {
proc output {text} {
append ::fts5_docs_output "$text\n"
}
}
if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api}
set input_file [file join [file dir [info script]] fts5.h]
set fd [open $input_file]
set data [read $fd]
close $fd
# Argument $data is the entire text of the fts5.h file. This function
# extracts the definition of the Fts5ExtensionApi structure from it and
# returns a key/value list of structure member names and definitions. i.e.
#
# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
#
proc get_struct_members {data} {
# Extract the structure definition from the fts5.h file.
regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn
# Remove all comments from the structure definition
regsub -all {/[*].*?[*]/} $defn {} defn2
set res [list]
foreach member [split $defn2 {;}] {
set member [string trim $member]
if {$member!=""} {
catch { set name [lindex $member end] }
regexp {.*?[(][*]([^)]*)[)]} $member -> name
lappend res $name $member
}
}
set res
}
proc get_struct_docs {data names} {
# Extract the structure definition from the fts5.h file.
regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
set current_doc ""
set current_header ""
foreach line [split $docs "\n"] {
regsub {[*]*} $line {} line
if {[regexp {^ } $line]} {
append current_doc "$line\n"
} elseif {[string trim $line]==""} {
if {$current_header!=""} { append current_doc "\n" }
} else {
if {$current_doc != ""} {
lappend res $current_header $current_doc
set current_doc ""
}
set subject n/a
regexp {^ *([[:alpha:]]*)} $line -> subject
if {[lsearch $names $subject]>=0} {
set current_header $subject
} else {
set current_header [string trim $line]
}
}
}
if {$current_doc != ""} {
lappend res $current_header $current_doc
}
set res
}
proc get_tokenizer_docs {data} {
regexp {(xCreate:.*?)[*]/} $data -> docs
set res "<dl>\n"
foreach line [split [string trim $docs] "\n"] {
regexp {[*][*](.*)} $line -> line
if {[regexp {^ ?x.*:} $line]} {
append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
continue
}
if {[string trim $line] == ""} {
append res "<p>\n"
} else {
append res "$line\n"
}
}
append res "</dl>\n"
set res
}
proc get_api_docs {data} {
# Initialize global array M as a map from Fts5StructureApi member name
# to member definition. i.e.
#
# iVersion -> {int iVersion}
# xUserData -> {void *(*xUserData)(Fts5Context*)}
# ...
#
array set M [get_struct_members $data]
# Initialize global list D as a map from section name to documentation
# text. Most (all?) section names are structure member names.
#
set D [get_struct_docs $data [array names M]]
foreach {sub docs} $D {
if {[info exists M($sub)]} {
set hdr $M($sub)
set link " id=$sub"
} else {
set link ""
}
output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>"
set style "padding-left:6ex;font-size:1.4em;display:block"
output "<h style=\"$style\"><pre>$hdr</pre></h>"
set mode ""
set bEmpty 1
foreach line [split [string trim $docs] "\n"] {
if {[string trim $line]==""} {
if {$mode != ""} {output "</$mode>"}
set mode ""
} elseif {$mode == ""} {
if {[regexp {^ } $line]} {
set mode codeblock
} else {
set mode p
}
output "<$mode>"
}
output $line
}
if {$mode != ""} {output "</$mode>"}
}
}
proc get_fts5_struct {data start end} {
set res ""
set bOut 0
foreach line [split $data "\n"] {
if {$bOut==0} {
if {[regexp $start $line]} {
set bOut 1
}
}
if {$bOut} {
append res "$line\n"
}
if {$bOut} {
if {[regexp $end $line]} {
set bOut 0
}
}
}
set map [list /* <i>/* */ */</i>]
string map $map $res
}
proc main {data} {
switch $::extract_api_docs_mode {
fts5_api {
output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
}
fts5_tokenizer {
output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
}
fts5_extension {
output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
}
Fts5ExtensionApi {
set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"]
set map [list]
foreach {k v} [get_struct_members $data] {
if {[string match x* $k]==0} continue
lappend map $k "<a href=#$k>$k</a>"
}
output [string map $map $struct]
}
api {
get_api_docs $data
}
tokenizer_api {
output [get_tokenizer_docs $data]
}
default {
}
}
}
main $data
set ::fts5_docs_output

325
ext/fts5/fts5.h Normal file
View File

@ -0,0 +1,325 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Interfaces to extend FTS5. Using the interfaces defined in this file,
** FTS5 may be extended with:
**
** * custom tokenizers, and
** * custom auxiliary functions.
*/
#ifndef _FTS5_H
#define _FTS5_H
#include "sqlite3.h"
/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
typedef struct Fts5Context Fts5Context;
typedef void (*fts5_extension_function)(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
);
/*
** EXTENSION API FUNCTIONS
**
** xUserData(pFts):
** Return a copy of the context pointer the extension function was
** registered with.
**
** xColumnTotalSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the FTS5 table. Or, if iCol is
** non-negative but less than the number of columns in the table, return
** the total number of tokens in column iCol, considering all rows in
** the FTS5 table.
**
** If parameter iCol is greater than or equal to the number of columns
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnCount:
** Returns the number of columns in the FTS5 table.
**
** xColumnSize:
** Reports the size in tokens of a column value from the current row.
**
** xColumnText:
** This function attempts to retrieve the text of column iCol of the
** current document. If successful, (*pz) is set to point to a buffer
** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
** if an error occurs, an SQLite error code is returned and the final values
** of (*pz) and (*pn) are undefined.
**
** xPhraseCount:
** Returns the number of phrases in the current query expression.
**
** xPhraseSize:
** Returns the number of tokens in phrase iPhrase of the query. Phrases
** are numbered starting from zero.
**
** xInstCount:
** Set *pnInst to the total number of occurrences of all phrases within
** the query within the current row. Return SQLITE_OK if successful, or
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
** Query for the details of phrase match iIdx within the current row.
** Phrase matches are numbered starting from zero, so the iIdx argument
** should be greater than or equal to zero and smaller than the value
** output by xInstCount().
**
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
** if an error occurs.
**
** xRowid:
** Returns the rowid of the current row.
**
** xTokenize:
** Tokenize text using the tokenizer belonging to the FTS5 table.
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
** This API function is used to query the FTS table for phrase iPhrase
** of the current query. Specifically, a query equivalent to:
**
** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
**
** with $p set to a phrase equivalent to the phrase iPhrase of the
** current query is executed. For each row visited, the callback function
** passed as the fourth argument is invoked. The context and API objects
** passed to the callback function may be used to access the properties of
** each matched row. Invoking Api.xUserData() returns a copy of the pointer
** passed as the third argument to pUserData.
**
** If the callback function returns any value other than SQLITE_OK, the
** query is abandoned and the xQueryPhrase function returns immediately.
** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
** Otherwise, the error code is propagated upwards.
**
** If the query runs to completion without incident, SQLITE_OK is returned.
** Or, if some error occurs before the query completes or is aborted by
** the callback, an SQLite error code is returned.
**
**
** xSetAuxdata(pFts5, pAux, xDelete)
**
** Save the pointer passed as the second argument as the extension functions
** "auxiliary data". The pointer may then be retrieved by the current or any
** future invocation of the same fts5 extension function made as part of
** of the same MATCH query using the xGetAuxdata() API.
**
** Each extension function is allocated a single auxiliary data slot for
** each FTS query (MATCH expression). If the extension function is invoked
** more than once for a single FTS query, then all invocations share a
** single auxiliary data context.
**
** If there is already an auxiliary data pointer when this function is
** invoked, then it is replaced by the new pointer. If an xDelete callback
** was specified along with the original pointer, it is invoked at this
** point.
**
** The xDelete callback, if one is specified, is also invoked on the
** auxiliary data pointer after the FTS5 query has finished.
**
** If an error (e.g. an OOM condition) occurs within this function, an
** the auxiliary data is set to NULL and an error code returned. If the
** xDelete parameter was not NULL, it is invoked on the auxiliary data
** pointer before returning.
**
**
** xGetAuxdata(pFts5, bClear)
**
** Returns the current auxiliary data pointer for the fts5 extension
** function. See the xSetAuxdata() method for details.
**
** If the bClear argument is non-zero, then the auxiliary data is cleared
** (set to NULL) before this function returns. In this case the xDelete,
** if any, is not invoked.
**
**
** xRowCount(pFts5, pnRow)
**
** This function is used to retrieve the total number of rows in the table.
** In other words, the same value that would be returned by:
**
** SELECT count(*) FROM ftstable;
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 1 */
void *(*xUserData)(Fts5Context*);
int (*xColumnCount)(Fts5Context*);
int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
int (*xPhraseSize)(Fts5Context*, int iPhrase);
int (*xInstCount)(Fts5Context*, int *pnInst);
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
sqlite3_int64 (*xRowid)(Fts5Context*);
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
);
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
void *(*xGetAuxdata)(Fts5Context*, int bClear);
};
/*
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/
/*************************************************************************
** CUSTOM TOKENIZERS
**
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
** following structure. All structure methods must be defined, setting
** any member of the fts5_tokenizer struct to NULL leads to undefined
** behaviour. The structure methods are expected to function as follows:
**
** xCreate:
** This function is used to allocate and inititalize a tokenizer instance.
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
** pointer provided by the application when the fts5_tokenizer object
** was registered with FTS5 (the third argument to xCreateTokenizer()).
** The second and third arguments are an array of nul-terminated strings
** containing the tokenizer arguments, if any, specified following the
** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
** to create the FTS5 table.
**
** The final argument is an output variable. If successful, (*ppOut)
** should be set to point to the new tokenizer handle and SQLITE_OK
** returned. If an error occurs, some value other than SQLITE_OK should
** be returned. In this case, fts5 assumes that the final value of *ppOut
** is undefined.
**
** xDelete:
** This function is invoked to delete a tokenizer handle previously
** allocated using xCreate(). Fts5 guarantees that this function will
** be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
** This function is expected to tokenize the nText byte string indicated
** by argument pText. pText may not be nul-terminated. The first argument
** passed to this function is a pointer to an Fts5Tokenizer object returned
** by an earlier call to xCreate().
**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The next two arguments
** are a pointer to a buffer containing the token text, and the size of
** the token in bytes. The 4th and 5th arguments are the byte offsets of
** the first byte of and first byte immediately following the text from
** which the token is derived within the input.
**
** FTS5 assumes the xToken() callback is invoked for each token in the
** order that they occur within the input text.
**
** If an xToken() callback returns any value other than SQLITE_OK, then
** the tokenization should be abandoned and the xTokenize() method should
** immediately return a copy of the xToken() return value. Or, if the
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
** if an error occurs with the xTokenize() implementation itself, it
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
void *pCtx,
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
)
);
};
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/
/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
int iVersion; /* Currently always set to 1 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_tokenizer *pTokenizer,
void (*xDestroy)(void*)
);
/* Find an existing tokenizer */
int (*xFindTokenizer)(
fts5_api *pApi,
const char *zName,
void **ppContext,
fts5_tokenizer *pTokenizer
);
/* Create a new auxiliary function */
int (*xCreateFunction)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_extension_function xFunction,
void (*xDestroy)(void*)
);
};
/*
** END OF REGISTRATION API
*************************************************************************/
#endif /* _FTS5_H */

696
ext/fts5/fts5Int.h Normal file
View File

@ -0,0 +1,696 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifndef _FTS5INT_H
#define _FTS5INT_H
#ifdef SQLITE_ENABLE_FTS5
#include "fts5.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#include <string.h>
#include <assert.h>
#ifndef SQLITE_AMALGAMATION
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned short u16;
typedef sqlite3_int64 i64;
typedef sqlite3_uint64 u64;
#define ArraySize(x) (sizeof(x) / sizeof(x[0]))
#define testcase(x)
#define ALWAYS(x) 1
#define NEVER(x) 0
#define MIN(x,y) (((x) < (y)) ? (x) : (y))
#endif
/*
** Maximum number of prefix indexes on single FTS5 table. This must be
** less than 32. If it is set to anything large than that, an #error
** directive in fts5_index.c will cause the build to fail.
*/
#define FTS5_MAX_PREFIX_INDEXES 31
#define FTS5_DEFAULT_NEARDIST 10
#define FTS5_DEFAULT_RANK "bm25"
/* Name of rank and rowid columns */
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"
#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif
/*
** The assert_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be
** guranteed that the database is not corrupt.
*/
#ifdef SQLITE_DEBUG
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif
typedef struct Fts5Global Fts5Global;
/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/
typedef struct Fts5Config Fts5Config;
/*
** An instance of the following structure encodes all information that can
** be gleaned from the CREATE VIRTUAL TABLE statement.
**
** And all information loaded from the %_config table.
**
** nAutomerge:
** The minimum number of segments that an auto-merge operation should
** attempt to merge together. A value of 1 sets the object to use the
** compile time default. Zero disables auto-merge altogether.
**
** zContent:
**
** zContentRowid:
** The value of the content_rowid= option, if one was specified. Or
** the string "rowid" otherwise. This text is not quoted - if it is
** used as part of an SQL statement it needs to be quoted appropriately.
**
** zContentExprlist:
**
** pzErrmsg:
** This exists in order to allow the fts5_index.c module to return a
** decent error message if it encounters a file-format version it does
** not understand.
**
** bColumnsize:
** True if the %_docsize table is created.
**
*/
struct Fts5Config {
sqlite3 *db; /* Database handle */
char *zDb; /* Database holding FTS index (e.g. "main") */
char *zName; /* Name of FTS index */
int nCol; /* Number of columns */
char **azCol; /* Column names */
u8 *abUnindexed; /* True for unindexed columns */
int nPrefix; /* Number of prefix indexes */
int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
int eContent; /* An FTS5_CONTENT value */
char *zContent; /* content table */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
char *zContentExprlist;
Fts5Tokenizer *pTok;
fts5_tokenizer *pTokApi;
/* Values loaded from the %_config table */
int iCookie; /* Incremented when %_config is modified */
int pgsz; /* Approximate page size used in %_data */
int nAutomerge; /* 'automerge' setting */
int nCrisisMerge; /* Maximum allowed segments per level */
char *zRank; /* Name of rank function */
char *zRankArgs; /* Arguments to rank function */
/* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
char **pzErrmsg;
};
/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 2
#define FTS5_CONTENT_NORMAL 0
#define FTS5_CONTENT_NONE 1
#define FTS5_CONTENT_EXTERNAL 2
int sqlite3Fts5ConfigParse(
Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
);
void sqlite3Fts5ConfigFree(Fts5Config*);
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
void sqlite3Fts5Dequote(char *z);
/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);
/* Set the value of a single config attribute */
int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
/*
** End of interface to code in fts5_config.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_buffer.c.
*/
/*
** Buffer object for the incremental building of string data.
*/
typedef struct Fts5Buffer Fts5Buffer;
struct Fts5Buffer {
u8 *p;
int n;
int nSpace;
};
int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int);
void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
void sqlite3Fts5BufferFree(Fts5Buffer*);
void sqlite3Fts5BufferZero(Fts5Buffer*);
void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int);
char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
#define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
#define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c)
#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
#define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
#define fts5BufferAppend32(a,b,c) sqlite3Fts5BufferAppend32(a,b,c)
/* Write and decode big-endian 32-bit integer values */
void sqlite3Fts5Put32(u8*, int);
int sqlite3Fts5Get32(const u8*);
#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
typedef struct Fts5PoslistReader Fts5PoslistReader;
struct Fts5PoslistReader {
/* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
int iCol; /* If (iCol>=0), this column only */
const u8 *a; /* Position list to iterate through */
int n; /* Size of buffer at a[] in bytes */
int i; /* Current offset in a[] */
/* Output variables */
int bEof; /* Set to true at EOF */
i64 iPos; /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
int iCol, /* If (iCol>=0), this column only */
const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
);
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
typedef struct Fts5PoslistWriter Fts5PoslistWriter;
struct Fts5PoslistWriter {
i64 iPrev;
};
int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
int sqlite3Fts5PoslistNext64(
const u8 *a, int n, /* Buffer containing poslist */
int *pi, /* IN/OUT: Offset within a[] */
i64 *piOff /* IN/OUT: Current offset */
);
/* Malloc utility */
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
/* Character set tests (like isspace(), isalpha() etc.) */
int sqlite3Fts5IsBareword(char t);
/*
** End of interface to code in fts5_buffer.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/
typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;
/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
/*
** Create/destroy an Fts5Index object.
*/
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
int sqlite3Fts5IndexClose(Fts5Index *p);
/*
** for(
** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
** 0==sqlite3Fts5IterEof(pIter);
** sqlite3Fts5IterNext(pIter)
** ){
** i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/
/*
** Open a new iterator to iterate though all docids that match the
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
Fts5Index *p, /* FTS index to query */
const char *pToken, int nToken, /* Token (or prefix) to query for */
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
Fts5IndexIter **ppIter
);
/*
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);
/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);
/*
** This interface is used by the fts5vocab module.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
int sqlite3Fts5IterNextScan(Fts5IndexIter*);
/*
** Insert or remove data to or from the index. Each time a document is
** added to or removed from the index, this function is called one or more
** times.
**
** For an insert, it must be called once for each token in the new document.
** If the operation is a delete, it must be called (at least) once for each
** unique token in the document with an iCol value less than zero. The iPos
** argument is ignored for a delete.
*/
int sqlite3Fts5IndexWrite(
Fts5Index *p, /* Index to write to */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
);
/*
** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
** document iDocid.
*/
int sqlite3Fts5IndexBeginWrite(
Fts5Index *p, /* Index to write to */
i64 iDocid /* Docid to add or remove data from */
);
/*
** Flush any data stored in the in-memory hash tables to the database.
** If the bCommit flag is true, also close any open blob handles.
*/
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);
/*
** Discard any data stored in the in-memory hash tables. Do not write it
** to the database. Additionally, assume that the contents of the %_data
** table may have changed on disk. So any in-memory caches of %_data
** records must be invalidated.
*/
int sqlite3Fts5IndexRollback(Fts5Index *p);
/*
** Retrieve and clear the current error code, respectively.
*/
int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);
/*
** Get or set the "averages" record.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
/*
** Functions called by the storage module as part of integrity-check.
*/
u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
/*
** Called during virtual module initialization to register UDF
** fts5_decode() with SQLite
*/
int sqlite3Fts5IndexInit(sqlite3*);
int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
/*
** Return the total number of entries read from the %_data table by
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);
int sqlite3Fts5IndexReinit(Fts5Index *p);
int sqlite3Fts5IndexOptimize(Fts5Index *p);
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
/*
** End of interface to code in fts5_index.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_varint.c.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
int sqlite3Fts5GetVarintLen(u32 iVal);
u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
#define fts5GetVarint sqlite3Fts5GetVarint
/*
** End of interface to code in fts5_varint.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5.c.
*/
int sqlite3Fts5GetTokenizer(
Fts5Global*,
const char **azArg,
int nArg,
Fts5Tokenizer**,
fts5_tokenizer**,
char **pzErr
);
Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*);
/*
** End of interface to code in fts5.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_hash.c.
*/
typedef struct Fts5Hash Fts5Hash;
/*
** Create a hash table, free a hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);
int sqlite3Fts5HashWrite(
Fts5Hash*,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
char bByte,
const char *pToken, int nToken /* Token to add or remove to or from index */
);
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);
int sqlite3Fts5HashQuery(
Fts5Hash*, /* Hash table to query */
const char *pTerm, int nTerm, /* Query term */
const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
int *pnDoclist /* OUT: Size of doclist in bytes */
);
int sqlite3Fts5HashScanInit(
Fts5Hash*, /* Hash table to query */
const char *pTerm, int nTerm /* Query prefix */
);
void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
const char **pzTerm, /* OUT: term (nul-terminated) */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
);
/*
** End of interface to code in fts5_hash.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains
** code to access the data stored in the %_content and %_docsize tables.
*/
#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
typedef struct Fts5Storage Fts5Storage;
int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
int sqlite3Fts5StorageClose(Fts5Storage *p);
int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
int sqlite3Fts5DropAll(Fts5Config*);
int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
int sqlite3Fts5StorageDelete(Fts5Storage *p, i64);
int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*);
int sqlite3Fts5StorageIntegrity(Fts5Storage *p);
int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
int sqlite3Fts5StorageRollback(Fts5Storage *p);
int sqlite3Fts5StorageConfigValue(
Fts5Storage *p, const char*, sqlite3_value*, int
);
int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**);
int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
/*
** End of interface to code in fts5_storage.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_expr.c.
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;
typedef struct Fts5ExprColset Fts5ExprColset;
struct Fts5Token {
const char *p; /* Token text (not NULL terminated) */
int n; /* Size of buffer p in bytes */
};
/* Parse a MATCH expression. */
int sqlite3Fts5ExprNew(
Fts5Config *pConfig,
const char *zExpr,
Fts5Expr **ppNew,
char **pzErr
);
/*
** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
** rc = sqlite3Fts5ExprNext(pExpr)
** ){
** // The document with rowid iRowid matches the expression!
** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
** }
*/
int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
int sqlite3Fts5ExprEof(Fts5Expr*);
i64 sqlite3Fts5ExprRowid(Fts5Expr*);
void sqlite3Fts5ExprFree(Fts5Expr*);
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y. */
void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
Fts5ExprNode *sqlite3Fts5ParseNode(
Fts5Parse *pParse,
int eType,
Fts5ExprNode *pLeft,
Fts5ExprNode *pRight,
Fts5ExprNearset *pNear
);
Fts5ExprPhrase *sqlite3Fts5ParseTerm(
Fts5Parse *pParse,
Fts5ExprPhrase *pPhrase,
Fts5Token *pToken,
int bPrefix
);
Fts5ExprNearset *sqlite3Fts5ParseNearset(
Fts5Parse*,
Fts5ExprNearset*,
Fts5ExprPhrase*
);
Fts5ExprColset *sqlite3Fts5ParseColset(
Fts5Parse*,
Fts5ExprColset*,
Fts5Token *
);
void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*);
void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
/*
** End of interface to code in fts5_expr.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_aux.c.
*/
int sqlite3Fts5AuxInit(fts5_api*);
/*
** End of interface to code in fts5_aux.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_tokenizer.c.
*/
int sqlite3Fts5TokenizerInit(fts5_api*);
/*
** End of interface to code in fts5_tokenizer.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_sorter.c.
*/
typedef struct Fts5Sorter Fts5Sorter;
int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp);
/*
** End of interface to code in fts5_sorter.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_vocab.c.
*/
int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
/*
** End of interface to code in fts5_vocab.c.
**************************************************************************/
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c.
*/
int sqlite3Fts5UnicodeIsalnum(int c);
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/
#endif
#endif

557
ext/fts5/fts5_aux.c Normal file
View File

@ -0,0 +1,557 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
#ifdef SQLITE_ENABLE_FTS5
#include "fts5Int.h"
#include <math.h>
/*
** Object used to iterate through all "coalesced phrase instances" in
** a single column of the current row. If the phrase instances in the
** column being considered do not overlap, this object simply iterates
** through them. Or, if they do overlap (share one or more tokens in
** common), each set of overlapping instances is treated as a single
** match. See documentation for the highlight() auxiliary function for
** details.
**
** Usage is:
**
** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
** rc = fts5CInstIterNext(&iter)
** ){
** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
** }
**
*/
typedef struct CInstIter CInstIter;
struct CInstIter {
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
Fts5Context *pFts; /* First arg to pass to pApi functions */
int iCol; /* Column to search */
int iInst; /* Next phrase instance index */
int nInst; /* Total number of phrase instances */
/* Output variables */
int iStart; /* First token in coalesced phrase instance */
int iEnd; /* Last token in coalesced phrase instance */
};
/*
** Advance the iterator to the next coalesced phrase instance. Return
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
*/
static int fts5CInstIterNext(CInstIter *pIter){
int rc = SQLITE_OK;
pIter->iStart = -1;
pIter->iEnd = -1;
while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
int ip; int ic; int io;
rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
if( rc==SQLITE_OK ){
if( ic==pIter->iCol ){
int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
if( pIter->iStart<0 ){
pIter->iStart = io;
pIter->iEnd = iEnd;
}else if( io<=pIter->iEnd ){
if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
}else{
break;
}
}
pIter->iInst++;
}
}
return rc;
}
/*
** Initialize the iterator object indicated by the final parameter to
** iterate through coalesced phrase instances in column iCol.
*/
static int fts5CInstIterInit(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
int iCol,
CInstIter *pIter
){
int rc;
memset(pIter, 0, sizeof(CInstIter));
pIter->pApi = pApi;
pIter->pFts = pFts;
pIter->iCol = iCol;
rc = pApi->xInstCount(pFts, &pIter->nInst);
if( rc==SQLITE_OK ){
rc = fts5CInstIterNext(pIter);
}
return rc;
}
/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
CInstIter iter; /* Coalesced Instance Iterator */
int iPos; /* Current token offset in zIn[] */
int iRangeStart; /* First token to include */
int iRangeEnd; /* If non-zero, last token to include */
const char *zOpen; /* Opening highlight */
const char *zClose; /* Closing highlight */
const char *zIn; /* Input text */
int nIn; /* Size of input text in bytes */
int iOff; /* Current offset within zIn[] */
char *zOut; /* Output value */
};
/*
** Append text to the HighlightContext output string - p->zOut. Argument
** z points to a buffer containing n bytes of text to append. If n is
** negative, everything up until the first '\0' is appended to the output.
**
** If *pRc is set to any value other than SQLITE_OK when this function is
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
** *pRc is set to an error code before returning.
*/
static void fts5HighlightAppend(
int *pRc,
HighlightContext *p,
const char *z, int n
){
if( *pRc==SQLITE_OK ){
if( n<0 ) n = strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
}
}
/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
void *pContext, /* Pointer to HighlightContext object */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStartOff, /* Start offset of token */
int iEndOff /* End offset of token */
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
int iPos = p->iPos++;
if( p->iRangeEnd>0 ){
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
}
if( iPos==p->iter.iStart ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zOpen, -1);
p->iOff = iStartOff;
}
if( iPos==p->iter.iEnd ){
if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
fts5HighlightAppend(&rc, p, p->zOpen, -1);
}
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zClose, -1);
p->iOff = iEndOff;
if( rc==SQLITE_OK ){
rc = fts5CInstIterNext(&p->iter);
}
}
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
p->iOff = iEndOff;
if( iPos<p->iter.iEnd ){
fts5HighlightAppend(&rc, p, p->zClose, -1);
}
}
return rc;
}
/*
** Implementation of highlight() function.
*/
static void fts5HighlightFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx;
int rc;
int iCol;
if( nVal!=3 ){
const char *zErr = "wrong number of arguments to function highlight()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
iCol = sqlite3_value_int(apVal[0]);
memset(&ctx, 0, sizeof(HighlightContext));
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
if( ctx.zIn ){
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if( rc==SQLITE_OK ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
}
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
}
sqlite3_free(ctx.zOut);
}
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
}
}
/*
** End of highlight() implementation.
**************************************************************************/
/*
** Implementation of snippet() function.
*/
static void fts5SnippetFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx;
int rc = SQLITE_OK; /* Return code */
int iCol; /* 1st argument to snippet() */
const char *zEllips; /* 4th argument to snippet() */
int nToken; /* 5th argument to snippet() */
int nInst; /* Number of instance matches this row */
int i; /* Used to iterate through instances */
int nPhrase; /* Number of phrases in query */
unsigned char *aSeen; /* Array of "seen instance" flags */
int iBestCol; /* Column containing best snippet */
int iBestStart = 0; /* First token of best snippet */
int iBestLast; /* Last token of best snippet */
int nBestScore = 0; /* Score of best snippet */
int nColSize; /* Total size of iBestCol in tokens */
if( nVal!=5 ){
const char *zErr = "wrong number of arguments to function snippet()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
memset(&ctx, 0, sizeof(HighlightContext));
iCol = sqlite3_value_int(apVal[0]);
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
zEllips = (const char*)sqlite3_value_text(apVal[3]);
nToken = sqlite3_value_int(apVal[4]);
iBestLast = nToken-1;
iBestCol = (iCol>=0 ? iCol : 0);
nPhrase = pApi->xPhraseCount(pFts);
aSeen = sqlite3_malloc(nPhrase);
if( aSeen==0 ){
rc = SQLITE_NOMEM;
}
if( rc==SQLITE_OK ){
rc = pApi->xInstCount(pFts, &nInst);
}
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int ip, iSnippetCol, iStart;
memset(aSeen, 0, nPhrase);
rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
int nScore = 1000;
int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
int j;
aSeen[ip] = 1;
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
int ic; int io; int iFinal;
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
nScore += aSeen[ip] ? 1000 : 1;
aSeen[ip] = 1;
if( iFinal>iLast ) iLast = iFinal;
}
}
if( rc==SQLITE_OK && nScore>nBestScore ){
iBestCol = iSnippetCol;
iBestStart = iStart;
iBestLast = iLast;
nBestScore = nScore;
}
}
}
if( rc==SQLITE_OK ){
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
}
if( rc==SQLITE_OK ){
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
}
if( ctx.zIn ){
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
if( (iBestStart+nToken-1)>iBestLast ){
iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
}
if( iBestStart+nToken>nColSize ){
iBestStart = nColSize - nToken;
}
if( iBestStart<0 ) iBestStart = 0;
ctx.iRangeStart = iBestStart;
ctx.iRangeEnd = iBestStart + nToken - 1;
if( iBestStart>0 ){
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
if( rc==SQLITE_OK ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
}
if( ctx.iRangeEnd>=(nColSize-1) ){
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
}else{
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_code(pCtx, rc);
}
sqlite3_free(ctx.zOut);
}
sqlite3_free(aSeen);
}
/************************************************************************/
/*
** The first time the bm25() function is called for a query, an instance
** of the following structure is allocated and populated.
*/
typedef struct Fts5Bm25Data Fts5Bm25Data;
struct Fts5Bm25Data {
int nPhrase; /* Number of phrases in query */
double avgdl; /* Average number of tokens in each row */
double *aIDF; /* IDF for each phrase */
double *aFreq; /* Array used to calculate phrase freq. */
};
/*
** Callback used by fts5Bm25GetData() to count the number of rows in the
** table matched by each individual phrase within the query.
*/
static int fts5CountCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
void *pUserData /* Pointer to sqlite3_int64 variable */
){
sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
(*pn)++;
return SQLITE_OK;
}
/*
** Set *ppData to point to the Fts5Bm25Data object for the current query.
** If the object has not already been allocated, allocate and populate it
** now.
*/
static int fts5Bm25GetData(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
){
int rc = SQLITE_OK; /* Return code */
Fts5Bm25Data *p; /* Object to return */
p = pApi->xGetAuxdata(pFts, 0);
if( p==0 ){
int nPhrase; /* Number of phrases in query */
sqlite3_int64 nRow; /* Number of rows in table */
sqlite3_int64 nToken; /* Number of tokens in table */
int nByte; /* Bytes of space to allocate */
int i;
/* Allocate the Fts5Bm25Data object */
nPhrase = pApi->xPhraseCount(pFts);
nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
if( p==0 ){
rc = SQLITE_NOMEM;
}else{
memset(p, 0, nByte);
p->nPhrase = nPhrase;
p->aIDF = (double*)&p[1];
p->aFreq = &p->aIDF[nPhrase];
}
/* Calculate the average document length for this FTS5 table */
if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
/* Calculate an IDF for each phrase in the query */
for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
sqlite3_int64 nHit = 0;
rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
if( rc==SQLITE_OK ){
/* Calculate the IDF (Inverse Document Frequency) for phrase i.
** This is done using the standard BM25 formula as found on wikipedia:
**
** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
**
** where "N" is the total number of documents in the set and nHit
** is the number that contain at least one instance of the phrase
** under consideration.
**
** The problem with this is that if (N < 2*nHit), the IDF is
** negative. Which is undesirable. So the mimimum allowable IDF is
** (1e-6) - roughly the same as a term that appears in just over
** half of set of 5,000,000 documents. */
double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
if( idf<=0.0 ) idf = 1e-6;
p->aIDF[i] = idf;
}
}
if( rc!=SQLITE_OK ){
sqlite3_free(p);
}else{
rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
}
if( rc!=SQLITE_OK ) p = 0;
}
*ppData = p;
return rc;
}
/*
** Implementation of bm25() function.
*/
static void fts5Bm25Function(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
const double k1 = 1.2; /* Constant "k1" from BM25 formula */
const double b = 0.75; /* Constant "b" from BM25 formula */
int rc = SQLITE_OK; /* Error code */
double score = 0.0; /* SQL function return value */
Fts5Bm25Data *pData; /* Values allocated/calculated once only */
int i; /* Iterator variable */
int nInst; /* Value returned by xInstCount() */
double D; /* Total number of tokens in row */
double *aFreq; /* Array of phrase freq. for current row */
/* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
** for each phrase in the query for the current row. */
rc = fts5Bm25GetData(pApi, pFts, &pData);
if( rc==SQLITE_OK ){
aFreq = pData->aFreq;
memset(aFreq, 0, sizeof(double) * pData->nPhrase);
rc = pApi->xInstCount(pFts, &nInst);
}
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int ip; int ic; int io;
rc = pApi->xInst(pFts, i, &ip, &ic, &io);
if( rc==SQLITE_OK ){
double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
aFreq[ip] += w;
}
}
/* Figure out the total size of the current row in tokens. */
if( rc==SQLITE_OK ){
int nTok;
rc = pApi->xColumnSize(pFts, -1, &nTok);
D = (double)nTok;
}
/* Determine the BM25 score for the current row. */
for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
score += pData->aIDF[i] * (
( aFreq[i] * (k1 + 1.0) ) /
( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
);
}
/* If no error has occurred, return the calculated score. Otherwise,
** throw an SQL exception. */
if( rc==SQLITE_OK ){
sqlite3_result_double(pCtx, -1.0 * score);
}else{
sqlite3_result_error_code(pCtx, rc);
}
}
int sqlite3Fts5AuxInit(fts5_api *pApi){
struct Builtin {
const char *zFunc; /* Function name (nul-terminated) */
void *pUserData; /* User-data pointer */
fts5_extension_function xFunc;/* Callback function */
void (*xDestroy)(void*); /* Destructor function */
} aBuiltin [] = {
{ "snippet", 0, fts5SnippetFunction, 0 },
{ "highlight", 0, fts5HighlightFunction, 0 },
{ "bm25", 0, fts5Bm25Function, 0 },
};
int rc = SQLITE_OK; /* Return code */
int i; /* To iterate through builtin functions */
for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){
rc = pApi->xCreateFunction(pApi,
aBuiltin[i].zFunc,
aBuiltin[i].pUserData,
aBuiltin[i].xFunc,
aBuiltin[i].xDestroy
);
}
return rc;
}
#endif /* SQLITE_ENABLE_FTS5 */

309
ext/fts5/fts5_buffer.c Normal file
View File

@ -0,0 +1,309 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
#ifdef SQLITE_ENABLE_FTS5
#include "fts5Int.h"
int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
/* A no-op if an error has already occurred */
if( *pRc ) return 1;
if( (pBuf->n + nByte) > pBuf->nSpace ){
u8 *pNew;
int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;
while( nNew<(pBuf->n + nByte) ){
nNew = nNew * 2;
}
pNew = sqlite3_realloc(pBuf->p, nNew);
if( pNew==0 ){
*pRc = SQLITE_NOMEM;
return 1;
}else{
pBuf->nSpace = nNew;
pBuf->p = pNew;
}
}
return 0;
}
/*
** Encode value iVal as an SQLite varint and append it to the buffer object
** pBuf. If an OOM error occurs, set the error code in p.
*/
void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return;
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
}
void sqlite3Fts5Put32(u8 *aBuf, int iVal){
aBuf[0] = (iVal>>24) & 0x00FF;
aBuf[1] = (iVal>>16) & 0x00FF;
aBuf[2] = (iVal>> 8) & 0x00FF;
aBuf[3] = (iVal>> 0) & 0x00FF;
}
int sqlite3Fts5Get32(const u8 *aBuf){
return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3];
}
void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){
if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return;
sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal);
pBuf->n += 4;
}
/*
** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferAppendBlob(
int *pRc,
Fts5Buffer *pBuf,
int nData,
const u8 *pData
){
assert( *pRc || nData>=0 );
if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return;
memcpy(&pBuf->p[pBuf->n], pData, nData);
pBuf->n += nData;
}
/*
** Append the nul-terminated string zStr to the buffer pBuf. This function
** ensures that the byte following the buffer data is set to 0x00, even
** though this byte is not included in the pBuf->n count.
*/
void sqlite3Fts5BufferAppendString(
int *pRc,
Fts5Buffer *pBuf,
const char *zStr
){
int nStr = strlen(zStr);
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
pBuf->n--;
}
/*
** Argument zFmt is a printf() style format string. This function performs
** the printf() style processing, then appends the results to buffer pBuf.
**
** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
** following the buffer data is set to 0x00, even though this byte is not
** included in the pBuf->n count.
*/
void sqlite3Fts5BufferAppendPrintf(
int *pRc,
Fts5Buffer *pBuf,
char *zFmt, ...
){
if( *pRc==SQLITE_OK ){
char *zTmp;
va_list ap;
va_start(ap, zFmt);
zTmp = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
if( zTmp==0 ){
*pRc = SQLITE_NOMEM;
}else{
sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
sqlite3_free(zTmp);
}
}
}
char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
char *zRet = 0;
if( *pRc==SQLITE_OK ){
va_list ap;
va_start(ap, zFmt);
zRet = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
if( zRet==0 ){
*pRc = SQLITE_NOMEM;
}
}
return zRet;
}
/*
** Free any buffer allocated by pBuf. Zero the structure before returning.
*/
void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
sqlite3_free(pBuf->p);
memset(pBuf, 0, sizeof(Fts5Buffer));
}
/*
** Zero the contents of the buffer object. But do not free the associated
** memory allocation.
*/
void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
pBuf->n = 0;
}
/*
** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferSet(
int *pRc,
Fts5Buffer *pBuf,
int nData,
const u8 *pData
){
pBuf->n = 0;
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
}
int sqlite3Fts5PoslistNext64(
const u8 *a, int n, /* Buffer containing poslist */
int *pi, /* IN/OUT: Offset within a[] */
i64 *piOff /* IN/OUT: Current offset */
){
int i = *pi;
if( i>=n ){
/* EOF */
*piOff = -1;
return 1;
}else{
i64 iOff = *piOff;
int iVal;
i += fts5GetVarint32(&a[i], iVal);
if( iVal==1 ){
i += fts5GetVarint32(&a[i], iVal);
iOff = ((i64)iVal) << 32;
i += fts5GetVarint32(&a[i], iVal);
}
*piOff = iOff + (iVal-2);
*pi = i;
return 0;
}
}
/*
** Advance the iterator object passed as the only argument. Return true
** if the iterator reaches EOF, or false otherwise.
*/
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos)
|| (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol)
){
pIter->bEof = 1;
}
return pIter->bEof;
}
int sqlite3Fts5PoslistReaderInit(
int iCol, /* If (iCol>=0), this column only */
const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
){
memset(pIter, 0, sizeof(*pIter));
pIter->a = a;
pIter->n = n;
pIter->iCol = iCol;
do {
sqlite3Fts5PoslistReaderNext(pIter);
}while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
return pIter->bEof;
}
int sqlite3Fts5PoslistWriterAppend(
Fts5Buffer *pBuf,
Fts5PoslistWriter *pWriter,
i64 iPos
){
static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
int rc = SQLITE_OK;
if( (iPos & colmask) != (pWriter->iPrev & colmask) ){
fts5BufferAppendVarint(&rc, pBuf, 1);
fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32));
pWriter->iPrev = (iPos & colmask);
}
fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2);
pWriter->iPrev = iPos;
return rc;
}
void *sqlite3Fts5MallocZero(int *pRc, int nByte){
void *pRet = 0;
if( *pRc==SQLITE_OK ){
pRet = sqlite3_malloc(nByte);
if( pRet==0 && nByte>0 ){
*pRc = SQLITE_NOMEM;
}else{
memset(pRet, 0, nByte);
}
}
return pRet;
}
/*
** Return a nul-terminated copy of the string indicated by pIn. If nIn
** is non-negative, then it is the length of the string in bytes. Otherwise,
** the length of the string is determined using strlen().
**
** It is the responsibility of the caller to eventually free the returned
** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
*/
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
char *zRet = 0;
if( *pRc==SQLITE_OK ){
if( nIn<0 ){
nIn = strlen(pIn);
}
zRet = (char*)sqlite3_malloc(nIn+1);
if( zRet ){
memcpy(zRet, pIn, nIn);
zRet[nIn] = '\0';
}else{
*pRc = SQLITE_NOMEM;
}
}
return zRet;
}
/*
** Return true if character 't' may be part of an FTS5 bareword, or false
** otherwise. Characters that may be part of barewords:
**
** * All non-ASCII characters,
** * The 52 upper and lower case ASCII characters, and
** * The 10 integer ASCII characters.
** * The underscore character "_" (0x5F).
*/
int sqlite3Fts5IsBareword(char t){
u8 aBareword[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
};
return (t & 0x80) || aBareword[(int)t];
}
#endif /* SQLITE_ENABLE_FTS5 */

864
ext/fts5/fts5_config.c Normal file
View File

@ -0,0 +1,864 @@
/*
** 2014 Jun 09
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
*/
#ifdef SQLITE_ENABLE_FTS5
#include "fts5Int.h"
#define FTS5_DEFAULT_PAGE_SIZE 1000
#define FTS5_DEFAULT_AUTOMERGE 4
#define FTS5_DEFAULT_CRISISMERGE 16
/* Maximum allowed page size */
#define FTS5_MAX_PAGE_SIZE (128*1024)
static int fts5_iswhitespace(char x){
return (x==' ');
}
static int fts5_isopenquote(char x){
return (x=='"' || x=='\'' || x=='[' || x=='`');
}
/*
** Argument pIn points to a character that is part of a nul-terminated
** string. Return a pointer to the first character following *pIn in
** the string that is not a white-space character.
*/
static const char *fts5ConfigSkipWhitespace(const char *pIn){
const char *p = pIn;
if( p ){
while( fts5_iswhitespace(*p) ){ p++; }
}
return p;
}
/*
** Argument pIn points to a character that is part of a nul-terminated
** string. Return a pointer to the first character following *pIn in
** the string that is not a "bareword" character.
*/
static const char *fts5ConfigSkipBareword(const char *pIn){
const char *p = pIn;
while ( sqlite3Fts5IsBareword(*p) ) p++;
if( p==pIn ) p = 0;
return p;
}
static int fts5_isdigit(char a){
return (a>='0' && a<='9');
}
static const char *fts5ConfigSkipLiteral(const char *pIn){
const char *p = pIn;
switch( *p ){
case 'n': case 'N':
if( sqlite3_strnicmp("null", p, 4)==0 ){
p = &p[4];
}else{
p = 0;
}
break;
case 'x': case 'X':
p++;
if( *p=='\'' ){
p++;
while( (*p>='a' && *p<='f')
|| (*p>='A' && *p<='F')
|| (*p>='0' && *p<='9')
){
p++;
}
if( *p=='\'' && 0==((p-pIn)%2) ){
p++;
}else{
p = 0;
}
}else{
p = 0;
}
break;
case '\'':
p++;
while( p ){
if( *p=='\'' ){
p++;
if( *p!='\'' ) break;
}
p++;
if( *p==0 ) p = 0;
}
break;
default:
/* maybe a number */
if( *p=='+' || *p=='-' ) p++;
while( fts5_isdigit(*p) ) p++;
/* At this point, if the literal was an integer, the parse is
** finished. Or, if it is a floating point value, it may continue
** with either a decimal point or an 'E' character. */
if( *p=='.' && fts5_isdigit(p[1]) ){
p += 2;
while( fts5_isdigit(*p) ) p++;
}
if( p==pIn ) p = 0;
break;
}
return p;
}
/*
** The first character of the string pointed to by argument z is guaranteed
** to be an open-quote character (see function fts5_isopenquote()).
**
** This function searches for the corresponding close-quote character within
** the string and, if found, dequotes the string in place and adds a new
** nul-terminator byte.
**
** If the close-quote is found, the value returned is the byte offset of
** the character immediately following it. Or, if the close-quote is not
** found, -1 is returned. If -1 is returned, the buffer is left in an
** undefined state.
*/
static int fts5Dequote(char *z){
char q;
int iIn = 1;
int iOut = 0;
q = z[0];
/* Set stack variable q to the close-quote character */
assert( q=='[' || q=='\'' || q=='"' || q=='`' );
if( q=='[' ) q = ']';
while( ALWAYS(z[iIn]) ){
if( z[iIn]==q ){
if( z[iIn+1]!=q ){
/* Character iIn was the close quote. */
iIn++;
break;
}else{
/* Character iIn and iIn+1 form an escaped quote character. Skip
** the input cursor past both and copy a single quote character
** to the output buffer. */
iIn += 2;
z[iOut++] = q;
}
}else{
z[iOut++] = z[iIn++];
}
}
z[iOut] = '\0';
return iIn;
}
/*
** Convert an SQL-style quoted string into a normal string by removing
** the quote characters. The conversion is done in-place. If the
** input does not begin with a quote character, then this routine
** is a no-op.
**
** Examples:
**
** "abc" becomes abc
** 'xyz' becomes xyz
** [pqr] becomes pqr
** `mno` becomes mno
*/
void sqlite3Fts5Dequote(char *z){
char quote; /* Quote character (if any ) */
assert( 0==fts5_iswhitespace(z[0]) );
quote = z[0];
if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
fts5Dequote(z);
}
}
/*
** Parse a "special" CREATE VIRTUAL TABLE directive and update
** configuration object pConfig as appropriate.
**
** If successful, object pConfig is updated and SQLITE_OK returned. If
** an error occurs, an SQLite error code is returned and an error message
** may be left in *pzErr. It is the responsibility of the caller to
** eventually free any such error message using sqlite3_free().
*/
static int fts5ConfigParseSpecial(
Fts5Global *pGlobal,
Fts5Config *pConfig, /* Configuration object to update */
const char *zCmd, /* Special command to parse */
const char *zArg, /* Argument to parse */
char **pzErr /* OUT: Error message */
){
int rc = SQLITE_OK;
int nCmd = strlen(zCmd);
if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
const char *p;
if( pConfig->aPrefix ){
*pzErr = sqlite3_mprintf("multiple prefix=... directives");
rc = SQLITE_ERROR;
}else{
pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
}
p = zArg;
while( rc==SQLITE_OK && p[0] ){
int nPre = 0;
while( p[0]==' ' ) p++;
while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
nPre = nPre*10 + (p[0] - '0');
p++;
}
while( p[0]==' ' ) p++;
if( p[0]==',' ){
p++;
}else if( p[0] ){
*pzErr = sqlite3_mprintf("malformed prefix=... directive");
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK && (nPre==0 || nPre>=1000) ){
*pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre);
rc = SQLITE_ERROR;
}
pConfig->aPrefix[pConfig->nPrefix] = nPre;
pConfig->nPrefix++;
}
return rc;
}
if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
const char *p = (const char*)zArg;
int nArg = strlen(zArg) + 1;
char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
char *pSpace = pDel;
if( azArg && pSpace ){
if( pConfig->pTok ){
*pzErr = sqlite3_mprintf("multiple tokenize=... directives");
rc = SQLITE_ERROR;
}else{
for(nArg=0; p && *p; nArg++){
const char *p2 = fts5ConfigSkipWhitespace(p);
if( *p2=='\'' ){
p = fts5ConfigSkipLiteral(p2);
}else{
p = fts5ConfigSkipBareword(p2);
}
if( p ){
memcpy(pSpace, p2, p-p2);
azArg[nArg] = pSpace;
sqlite3Fts5Dequote(pSpace);
pSpace += (p - p2) + 1;
p = fts5ConfigSkipWhitespace(p);
}
}
if( p==0 ){
*pzErr = sqlite3_mprintf("parse error in tokenize directive");
rc = SQLITE_ERROR;
}else{
rc = sqlite3Fts5GetTokenizer(pGlobal,
(const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi,
pzErr
);
}
}
}
sqlite3_free(azArg);
sqlite3_free(pDel);
return rc;
}
if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
*pzErr = sqlite3_mprintf("multiple content=... directives");
rc = SQLITE_ERROR;
}else{
if( zArg[0] ){
pConfig->eContent = FTS5_CONTENT_EXTERNAL;
pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
}else{
pConfig->eContent = FTS5_CONTENT_NONE;
}
}
return rc;
}
if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
if( pConfig->zContentRowid ){
*pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
rc = SQLITE_ERROR;
}else{
pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
}
return rc;
}
if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
*pzErr = sqlite3_mprintf("malformed columnsize=... directive");
rc = SQLITE_ERROR;
}else{
pConfig->bColumnsize = (zArg[0]=='1');
}
return rc;
}
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}
/*
** Allocate an instance of the default tokenizer ("simple") at
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
** code if an error occurs.
*/
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
return sqlite3Fts5GetTokenizer(
pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
);
}
/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a
** nul-terminated, dequoted copy of the gobbled word. If the word was
** quoted, *pbQuoted is also set to 1 before returning.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
** set if a parse error (failed to find close quote) occurs.
*/
static const char *fts5ConfigGobbleWord(
int *pRc, /* IN/OUT: Error code */
const char *zIn, /* Buffer to gobble string/bareword from */
char **pzOut, /* OUT: malloc'd buffer containing str/bw */
int *pbQuoted /* OUT: Set to true if dequoting required */
){
const char *zRet = 0;
int nIn = strlen(zIn);
char *zOut = sqlite3_malloc(nIn+1);
assert( *pRc==SQLITE_OK );
*pbQuoted = 0;
*pzOut = 0;
if( zOut==0 ){
*pRc = SQLITE_NOMEM;
}else{
memcpy(zOut, zIn, nIn+1);
if( fts5_isopenquote(zOut[0]) ){
int ii = fts5Dequote(zOut);
zRet = &zIn[ii];
*pbQuoted = 1;
}else{
zRet = fts5ConfigSkipBareword(zIn);
zOut[zRet-zIn] = '\0';
}
}
if( zRet==0 ){
sqlite3_free(zOut);
}else{
*pzOut = zOut;
}
return zRet;
}
static int fts5ConfigParseColumn(
Fts5Config *p,
char *zCol,
char *zArg,
char **pzErr
){
int rc = SQLITE_OK;
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
){
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
rc = SQLITE_ERROR;
}else if( zArg ){
if( 0==sqlite3_stricmp(zArg, "unindexed") ){
p->abUnindexed[p->nCol] = 1;
}else{
*pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
rc = SQLITE_ERROR;
}
}
p->azCol[p->nCol++] = zCol;
return rc;
}
/*
** Populate the Fts5Config.zContentExprlist string.
*/
static int fts5ConfigMakeExprlist(Fts5Config *p){
int i;
int rc = SQLITE_OK;
Fts5Buffer buf = {0, 0, 0};
sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
if( p->eContent!=FTS5_CONTENT_NONE ){
for(i=0; i<p->nCol; i++){
if( p->eContent==FTS5_CONTENT_EXTERNAL ){
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
}else{
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
}
}
}
assert( p->zContentExprlist==0 );
p->zContentExprlist = (char*)buf.p;
return rc;
}
/*
** Arguments nArg/azArg contain the string arguments passed to the xCreate
** or xConnect method of the virtual table. This function attempts to
** allocate an instance of Fts5Config containing the results of parsing
** those arguments.
**
** If successful, SQLITE_OK is returned and *ppOut is set to point to the
** new Fts5Config object. If an error occurs, an SQLite error code is
** returned, *ppOut is set to NULL and an error message may be left in
** *pzErr. It is the responsibility of the caller to eventually free any
** such error message using sqlite3_free().
*/
int sqlite3Fts5ConfigParse(
Fts5Global *pGlobal,
sqlite3 *db,
int nArg, /* Number of arguments */
const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
Fts5Config **ppOut, /* OUT: Results of parse */
char **pzErr /* OUT: Error message */
){
int rc = SQLITE_OK; /* Return code */
Fts5Config *pRet; /* New object to return */
int i;
int nByte;
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
if( pRet==0 ) return SQLITE_NOMEM;
memset(pRet, 0, sizeof(Fts5Config));
pRet->db = db;
pRet->iCookie = -1;
nByte = nArg * (sizeof(char*) + sizeof(u8));
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
pRet->bColumnsize = 1;
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
*pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
rc = SQLITE_ERROR;
}
for(i=3; rc==SQLITE_OK && i<nArg; i++){
const char *zOrig = azArg[i];
const char *z;
char *zOne = 0;
char *zTwo = 0;
int bOption = 0;
int bMustBeCol = 0;
z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
z = fts5ConfigSkipWhitespace(z);
if( z && *z=='=' ){
bOption = 1;
z++;
if( bMustBeCol ) z = 0;
}
z = fts5ConfigSkipWhitespace(z);
if( z && z[0] ){
int bDummy;
z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
if( z && z[0] ) z = 0;
}
if( rc==SQLITE_OK ){
if( z==0 ){
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
rc = SQLITE_ERROR;
}else{
if( bOption ){
rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
}else{
rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
zOne = 0;
}
}
}
sqlite3_free(zOne);
sqlite3_free(zTwo);
}
/* If a tokenizer= option was successfully parsed, the tokenizer has
** already been allocated. Otherwise, allocate an instance of the default
** tokenizer (unicode61) now. */
if( rc==SQLITE_OK && pRet->pTok==0 ){
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
}
/* If no zContent option was specified, fill in the default values. */
if( rc==SQLITE_OK && pRet->zContent==0 ){
const char *zTail = 0;
assert( pRet->eContent==FTS5_CONTENT_NORMAL
|| pRet->eContent==FTS5_CONTENT_NONE
);
if( pRet->eContent==FTS5_CONTENT_NORMAL ){
zTail = "content";
}else if( pRet->bColumnsize ){
zTail = "docsize";
}
if( zTail ){
pRet->zContent = sqlite3Fts5Mprintf(
&rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
);
}
}
if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
}
/* Formulate the zContentExprlist text */
if( rc==SQLITE_OK ){
rc = fts5ConfigMakeExprlist(pRet);
}
if( rc!=SQLITE_OK ){
sqlite3Fts5ConfigFree(pRet);
*ppOut = 0;
}
return rc;
}
/*
** Free the configuration object passed as the only argument.
*/
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
if( pConfig ){
int i;
if( pConfig->pTok ){
pConfig->pTokApi->xDelete(pConfig->pTok);
}
sqlite3_free(pConfig->zDb);
sqlite3_free(pConfig->zName);
for(i=0; i<pConfig->nCol; i++){
sqlite3_free(pConfig->azCol[i]);
}
sqlite3_free(pConfig->azCol);
sqlite3_free(pConfig->aPrefix);
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);
sqlite3_free(pConfig->zContent);
sqlite3_free(pConfig->zContentRowid);
sqlite3_free(pConfig->zContentExprlist);
sqlite3_free(pConfig);
}
}
/*
** Call sqlite3_declare_vtab() based on the contents of the configuration
** object passed as the only argument. Return SQLITE_OK if successful, or
** an SQLite error code if an error occurs.
*/
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
int i;
int rc = SQLITE_OK;
char *zSql;
zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
for(i=0; zSql && i<pConfig->nCol; i++){
const char *zSep = (i==0?"":", ");
zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
}
zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
zSql, pConfig->zName, FTS5_RANK_NAME
);
assert( zSql || rc==SQLITE_NOMEM );
if( zSql ){
rc = sqlite3_declare_vtab(pConfig->db, zSql);
sqlite3_free(zSql);
}
return rc;
}
/*
** Tokenize the text passed via the second and third arguments.
**
** The callback is invoked once for each token in the input text. The
** arguments passed to it are, in order:
**
** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
** const char *pToken // Pointer to buffer containing token
** int nToken // Size of token in bytes
** int iStart // Byte offset of start of token within input text
** int iEnd // Byte offset of end of token within input text
** int iPos // Position of token in input (first token is 0)
**
** If the callback returns a non-zero value the tokenization is abandoned
** and no further callbacks are issued.
**
** This function returns SQLITE_OK if successful or an SQLite error code
** if an error occurs. If the tokenization was abandoned early because
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
){
if( pText==0 ) return SQLITE_OK;
return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
}
/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.
*/
static const char *fts5ConfigSkipArgs(const char *pIn){
const char *p = pIn;
while( 1 ){
p = fts5ConfigSkipWhitespace(p);
p = fts5ConfigSkipLiteral(p);
p = fts5ConfigSkipWhitespace(p);
if( p==0 || *p==')' ) break;
if( *p!=',' ){
p = 0;
break;
}
p++;
}
return p;
}
/*
** Parameter zIn contains a rank() function specification. The format of
** this is:
**
** + Bareword (function name)
** + Open parenthesis - "("
** + Zero or more SQL literals in a comma separated list
** + Close parenthesis - ")"
*/
int sqlite3Fts5ConfigParseRank(
const char *zIn, /* Input string */
char **pzRank, /* OUT: Rank function name */
char **pzRankArgs /* OUT: Rank function arguments */
){
const char *p = zIn;
const char *pRank;
char *zRank = 0;
char *zRankArgs = 0;
int rc = SQLITE_OK;
*pzRank = 0;
*pzRankArgs = 0;
p = fts5ConfigSkipWhitespace(p);
pRank = p;
p = fts5ConfigSkipBareword(p);
if( p ){
zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
if( zRank ) memcpy(zRank, pRank, p-pRank);
}else{
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK ){
p = fts5ConfigSkipWhitespace(p);
if( *p!='(' ) rc = SQLITE_ERROR;
p++;
}
if( rc==SQLITE_OK ){
const char *pArgs;
p = fts5ConfigSkipWhitespace(p);
pArgs = p;
if( *p!=')' ){
p = fts5ConfigSkipArgs(p);
if( p==0 ){
rc = SQLITE_ERROR;
}else{
zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
}
}
}
if( rc!=SQLITE_OK ){
sqlite3_free(zRank);
assert( zRankArgs==0 );
}else{
*pzRank = zRank;
*pzRankArgs = zRankArgs;
}
return rc;
}
int sqlite3Fts5ConfigSetValue(
Fts5Config *pConfig,
const char *zKey,
sqlite3_value *pVal,
int *pbBadkey
){
int rc = SQLITE_OK;
if( 0==sqlite3_stricmp(zKey, "pgsz") ){
int pgsz = 0;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
pgsz = sqlite3_value_int(pVal);
}
if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
*pbBadkey = 1;
}else{
pConfig->pgsz = pgsz;
}
}
else if( 0==sqlite3_stricmp(zKey, "automerge") ){
int nAutomerge = -1;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
nAutomerge = sqlite3_value_int(pVal);
}
if( nAutomerge<0 || nAutomerge>64 ){
*pbBadkey = 1;
}else{
if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
pConfig->nAutomerge = nAutomerge;
}
}
else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
int nCrisisMerge = -1;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
nCrisisMerge = sqlite3_value_int(pVal);
}
if( nCrisisMerge<0 ){
*pbBadkey = 1;
}else{
if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
pConfig->nCrisisMerge = nCrisisMerge;
}
}
else if( 0==sqlite3_stricmp(zKey, "rank") ){
const char *zIn = (const char*)sqlite3_value_text(pVal);
char *zRank;
char *zRankArgs;
rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
if( rc==SQLITE_OK ){
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);
pConfig->zRank = zRank;
pConfig->zRankArgs = zRankArgs;
}else if( rc==SQLITE_ERROR ){
rc = SQLITE_OK;
*pbBadkey = 1;
}
}else{
*pbBadkey = 1;
}
return rc;
}
/*
** Load the contents of the %_config table into memory.
*/
int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
char *zSql;
sqlite3_stmt *p = 0;
int rc = SQLITE_OK;
int iVersion = 0;
/* Set default values */
pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
if( zSql ){
rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
sqlite3_free(zSql);
}
assert( rc==SQLITE_OK || p==0 );
if( rc==SQLITE_OK ){
while( SQLITE_ROW==sqlite3_step(p) ){
const char *zK = (const char*)sqlite3_column_text(p, 0);
sqlite3_value *pVal = sqlite3_column_value(p, 1);
if( 0==sqlite3_stricmp(zK, "version") ){
iVersion = sqlite3_value_int(pVal);
}else{
int bDummy = 0;
sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
}
}
rc = sqlite3_finalize(p);
}
if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
rc = SQLITE_ERROR;
if( pConfig->pzErrmsg ){
assert( 0==*pConfig->pzErrmsg );
*pConfig->pzErrmsg = sqlite3_mprintf(
"invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
iVersion, FTS5_CURRENT_VERSION
);
}
}
if( rc==SQLITE_OK ){
pConfig->iCookie = iCookie;
}
return rc;
}
#endif /* SQLITE_ENABLE_FTS5 */

2036
ext/fts5/fts5_expr.c Normal file

File diff suppressed because it is too large Load Diff

466
ext/fts5/fts5_hash.c Normal file
View File

@ -0,0 +1,466 @@
/*
** 2014 August 11
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifdef SQLITE_ENABLE_FTS5
#include "fts5Int.h"
typedef struct Fts5HashEntry Fts5HashEntry;
/*
** This file contains the implementation of an in-memory hash table used
** to accumuluate "term -> doclist" content before it is flused to a level-0
** segment.
*/
struct Fts5Hash {
int *pnByte; /* Pointer to bytes counter */
int nEntry; /* Number of entries currently in hash */
int nSlot; /* Size of aSlot[] array */
Fts5HashEntry *pScan; /* Current ordered scan item */
Fts5HashEntry **aSlot; /* Array of hash slots */
};
/*
** Each entry in the hash table is represented by an object of the
** following type. Each object, its key (zKey[]) and its current data
** are stored in a single memory allocation. The position list data
** immediately follows the key data in memory.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
**
** * Rowid, as a varint
** * Position list, without 0x00 terminator.
** * Size of previous position list and rowid, as a 4 byte
** big-endian integer.
**
** iRowidOff:
** Offset of last rowid written to data area. Relative to first byte of
** structure.
**
** nData:
** Bytes of data written since iRowidOff.
*/
struct Fts5HashEntry {
Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
Fts5HashEntry *pScanNext; /* Next entry in sorted order */
int nAlloc; /* Total size of allocation */
int iSzPoslist; /* Offset of space for 4-byte poslist size */
int nData; /* Total bytes of data (incl. structure) */
u8 bDel; /* Set delete-flag @ iSzPoslist */
int iCol; /* Column of last value written */
int iPos; /* Position of last value written */
i64 iRowid; /* Rowid of last value written */
char zKey[0]; /* Nul-terminated entry key */
};
/*
** Allocate a new hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
int rc = SQLITE_OK;
Fts5Hash *pNew;
*ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
int nByte;
memset(pNew, 0, sizeof(Fts5Hash));
pNew->pnByte = pnByte;
pNew->nSlot = 1024;
nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
if( pNew->aSlot==0 ){
sqlite3_free(pNew);
*ppNew = 0;
rc = SQLITE_NOMEM;
}else{
memset(pNew->aSlot, 0, nByte);
}
}
return rc;
}
/*
** Free a hash table object.
*/
void sqlite3Fts5HashFree(Fts5Hash *pHash){
if( pHash ){
sqlite3Fts5HashClear(pHash);
sqlite3_free(pHash->aSlot);
sqlite3_free(pHash);
}
}
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash *pHash){
int i;
for(i=0; i<pHash->nSlot; i++){
Fts5HashEntry *pNext;
Fts5HashEntry *pSlot;
for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
pNext = pSlot->pHashNext;
sqlite3_free(pSlot);
}
}
memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
pHash->nEntry = 0;
}
static unsigned int fts5HashKey(int nSlot, const char *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
return (h % nSlot);
}
static unsigned int fts5HashKey2(int nSlot, char b, const char *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
h = (h << 3) ^ h ^ b;
return (h % nSlot);
}
/*
** Resize the hash table by doubling the number of slots.
*/
static int fts5HashResize(Fts5Hash *pHash){
int nNew = pHash->nSlot*2;
int i;
Fts5HashEntry **apNew;
Fts5HashEntry **apOld = pHash->aSlot;
apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
if( !apNew ) return SQLITE_NOMEM;
memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
for(i=0; i<pHash->nSlot; i++){
while( apOld[i] ){
int iHash;
Fts5HashEntry *p = apOld[i];
apOld[i] = p->pHashNext;
iHash = fts5HashKey(nNew, p->zKey, strlen(p->zKey));
p->pHashNext = apNew[iHash];
apNew[iHash] = p;
}
}
sqlite3_free(apOld);
pHash->nSlot = nNew;
pHash->aSlot = apNew;
return SQLITE_OK;
}
static void fts5HashAddPoslistSize(Fts5HashEntry *p){
if( p->iSzPoslist ){
u8 *pPtr = (u8*)p;
int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
int nPos = nSz*2 + p->bDel; /* Value of nPos field */
assert( p->bDel==0 || p->bDel==1 );
if( nPos<=127 ){
pPtr[p->iSzPoslist] = nPos;
}else{
int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
p->nData += (nByte-1);
}
p->bDel = 0;
p->iSzPoslist = 0;
}
}
int sqlite3Fts5HashWrite(
Fts5Hash *pHash,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
char bByte, /* First byte of token */
const char *pToken, int nToken /* Token to add or remove to or from index */
){
unsigned int iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken);
Fts5HashEntry *p;
u8 *pPtr;
int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
/* Attempt to locate an existing hash entry */
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
if( p->zKey[0]==bByte
&& memcmp(&p->zKey[1], pToken, nToken)==0
&& p->zKey[nToken+1]==0
){
break;
}
}
/* If an existing hash entry cannot be found, create a new one. */
if( p==0 ){
int nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
if( nByte<128 ) nByte = 128;
if( (pHash->nEntry*2)>=pHash->nSlot ){
int rc = fts5HashResize(pHash);
if( rc!=SQLITE_OK ) return rc;
iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken);
}
p = (Fts5HashEntry*)sqlite3_malloc(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, sizeof(Fts5HashEntry));
p->nAlloc = nByte;
p->zKey[0] = bByte;
memcpy(&p->zKey[1], pToken, nToken);
assert( iHash==fts5HashKey(pHash->nSlot, p->zKey, nToken+1) );
p->zKey[nToken+1] = '\0';
p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
p->iSzPoslist = p->nData;
p->nData += 1;
p->iRowid = iRowid;
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
nIncr += p->nData;
}
/* Check there is enough space to append a new entry. Worst case scenario
** is:
**
** + 9 bytes for a new rowid,
** + 4 byte reserved for the "poslist size" varint.
** + 1 byte for a "new column" byte,
** + 3 bytes for a new column number (16-bit max) as a varint,
** + 5 bytes for the new position offset (32-bit max).
*/
if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
int nNew = p->nAlloc * 2;
Fts5HashEntry *pNew;
Fts5HashEntry **pp;
pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
if( pNew==0 ) return SQLITE_NOMEM;
pNew->nAlloc = nNew;
for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
*pp = pNew;
p = pNew;
}
pPtr = (u8*)p;
nIncr -= p->nData;
/* If this is a new rowid, append the 4-byte size field for the previous
** entry, and the new rowid for this entry. */
if( iRowid!=p->iRowid ){
fts5HashAddPoslistSize(p);
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
p->iSzPoslist = p->nData;
p->nData += 1;
p->iCol = 0;
p->iPos = 0;
p->iRowid = iRowid;
}
if( iCol>=0 ){
/* Append a new column value, if necessary */
assert( iCol>=p->iCol );
if( iCol!=p->iCol ){
pPtr[p->nData++] = 0x01;
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
p->iCol = iCol;
p->iPos = 0;
}
/* Append the new position offset */
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
p->iPos = iPos;
}else{
/* This is a delete. Set the delete flag. */
p->bDel = 1;
}
nIncr += p->nData;
*pHash->pnByte += nIncr;
return SQLITE_OK;
}
/*
** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
** each sorted in key order. This function merges the two lists into a
** single list and returns a pointer to its first element.
*/
static Fts5HashEntry *fts5HashEntryMerge(
Fts5HashEntry *pLeft,
Fts5HashEntry *pRight
){
Fts5HashEntry *p1 = pLeft;
Fts5HashEntry *p2 = pRight;
Fts5HashEntry *pRet = 0;
Fts5HashEntry **ppOut = &pRet;
while( p1 || p2 ){
if( p1==0 ){
*ppOut = p2;
p2 = 0;
}else if( p2==0 ){
*ppOut = p1;
p1 = 0;
}else{
int i = 0;
while( p1->zKey[i]==p2->zKey[i] ) i++;
if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pScanNext;
p2 = p2->pScanNext;
}else{
/* p1 is smaller */
*ppOut = p1;
ppOut = &p1->pScanNext;
p1 = p1->pScanNext;
}
*ppOut = 0;
}
}
return pRet;
}
/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
*/
static int fts5HashEntrySort(
Fts5Hash *pHash,
const char *pTerm, int nTerm, /* Query prefix, if any */
Fts5HashEntry **ppSorted
){
const int nMergeSlot = 32;
Fts5HashEntry **ap;
Fts5HashEntry *pList;
int iSlot;
int i;
*ppSorted = 0;
ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
if( !ap ) return SQLITE_NOMEM;
memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
Fts5HashEntry *pIter;
for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
Fts5HashEntry *pEntry = pIter;
pEntry->pScanNext = 0;
for(i=0; ap[i]; i++){
pEntry = fts5HashEntryMerge(pEntry, ap[i]);
ap[i] = 0;
}
ap[i] = pEntry;
}
}
}
pList = 0;
for(i=0; i<nMergeSlot; i++){
pList = fts5HashEntryMerge(pList, ap[i]);
}
pHash->nEntry = 0;
sqlite3_free(ap);
*ppSorted = pList;
return SQLITE_OK;
}
/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
int sqlite3Fts5HashQuery(
Fts5Hash *pHash, /* Hash table to query */
const char *pTerm, int nTerm, /* Query term */
const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
int *pnDoclist /* OUT: Size of doclist in bytes */
){
unsigned int iHash = fts5HashKey(pHash->nSlot, pTerm, nTerm);
Fts5HashEntry *p;
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
}
if( p ){
fts5HashAddPoslistSize(p);
*ppDoclist = (const u8*)&p->zKey[nTerm+1];
*pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
}else{
*ppDoclist = 0;
*pnDoclist = 0;
}
return SQLITE_OK;
}
int sqlite3Fts5HashScanInit(
Fts5Hash *p, /* Hash table to query */
const char *pTerm, int nTerm /* Query prefix */
){
return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}
void sqlite3Fts5HashScanNext(Fts5Hash *p){
assert( !sqlite3Fts5HashScanEof(p) );
p->pScan = p->pScan->pScanNext;
}
int sqlite3Fts5HashScanEof(Fts5Hash *p){
return (p->pScan==0);
}
void sqlite3Fts5HashScanEntry(
Fts5Hash *pHash,
const char **pzTerm, /* OUT: term (nul-terminated) */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
){
Fts5HashEntry *p;
if( (p = pHash->pScan) ){
int nTerm = strlen(p->zKey);
fts5HashAddPoslistSize(p);
*pzTerm = p->zKey;
*ppDoclist = (const u8*)&p->zKey[nTerm+1];
*pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
}else{
*pzTerm = 0;
*ppDoclist = 0;
*pnDoclist = 0;
}
}
#endif /* SQLITE_ENABLE_FTS5 */

5411
ext/fts5/fts5_index.c Normal file

File diff suppressed because it is too large Load Diff

2293
ext/fts5/fts5_main.c Normal file

File diff suppressed because it is too large Load Diff

1095
ext/fts5/fts5_storage.c Normal file

File diff suppressed because it is too large Load Diff

985
ext/fts5/fts5_tcl.c Normal file
View File

@ -0,0 +1,985 @@
/*
** 2014 Dec 01
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifdef SQLITE_TEST
#include <tcl.h>
#ifdef SQLITE_ENABLE_FTS5
#include "fts5.h"
#include <string.h>
#include <assert.h>
extern int sqlite3_fts5_may_be_corrupt;
/*************************************************************************
** This is a copy of the first part of the SqliteDb structure in
** tclsqlite.c. We need it here so that the get_sqlite_pointer routine
** can extract the sqlite3* pointer from an existing Tcl SQLite
** connection.
*/
extern const char *sqlite3ErrName(int);
struct SqliteDb {
sqlite3 *db;
};
/*
** Decode a pointer to an sqlite3 object.
*/
static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){
struct SqliteDb *p;
Tcl_CmdInfo cmdInfo;
char *z = Tcl_GetString(pObj);
if( Tcl_GetCommandInfo(interp, z, &cmdInfo) ){
p = (struct SqliteDb*)cmdInfo.objClientData;
*ppDb = p->db;
return TCL_OK;
}
return TCL_ERROR;
}
/* End of code that accesses the SqliteDb struct.
**************************************************************************/
static int f5tResultToErrorCode(const char *zRes){
struct ErrorCode {
int rc;
const char *zError;
} aErr[] = {
{ SQLITE_DONE, "SQLITE_DONE" },
{ SQLITE_ERROR, "SQLITE_ERROR" },
{ SQLITE_OK, "SQLITE_OK" },
{ SQLITE_OK, "" },
};
int i;
for(i=0; i<sizeof(aErr)/sizeof(aErr[0]); i++){
if( 0==sqlite3_stricmp(zRes, aErr[i].zError) ){
return aErr[i].rc;
}
}
return SQLITE_ERROR;
}
static int f5tDbAndApi(
Tcl_Interp *interp,
Tcl_Obj *pObj,
sqlite3 **ppDb,
fts5_api **ppApi
){
sqlite3 *db = 0;
int rc = f5tDbPointer(interp, pObj, &db);
if( rc!=TCL_OK ){
return TCL_ERROR;
}else{
sqlite3_stmt *pStmt = 0;
fts5_api *pApi = 0;
rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
return TCL_ERROR;
}
if( SQLITE_ROW==sqlite3_step(pStmt) ){
const void *pPtr = sqlite3_column_blob(pStmt, 0);
memcpy((void*)&pApi, pPtr, sizeof(pApi));
}
if( sqlite3_finalize(pStmt)!=SQLITE_OK ){
Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
return TCL_ERROR;
}
*ppDb = db;
*ppApi = pApi;
}
return TCL_OK;
}
typedef struct F5tFunction F5tFunction;
struct F5tFunction {
Tcl_Interp *interp;
Tcl_Obj *pScript;
};
typedef struct F5tApi F5tApi;
struct F5tApi {
const Fts5ExtensionApi *pApi;
Fts5Context *pFts;
};
/*
** An object of this type is used with the xSetAuxdata() and xGetAuxdata()
** API test wrappers. The tcl interface allows a single tcl value to be
** saved using xSetAuxdata(). Instead of simply storing a pointer to the
** tcl object, the code in this file wraps it in an sqlite3_malloc'd
** instance of the following struct so that if the destructor is not
** correctly invoked it will be reported as an SQLite memory leak.
*/
typedef struct F5tAuxData F5tAuxData;
struct F5tAuxData {
Tcl_Obj *pObj;
};
static int xTokenizeCb(
void *pCtx,
const char *zToken, int nToken,
int iStart, int iEnd
){
F5tFunction *p = (F5tFunction*)pCtx;
Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
int rc;
Tcl_IncrRefCount(pEval);
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart));
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd));
rc = Tcl_EvalObjEx(p->interp, pEval, 0);
Tcl_DecrRefCount(pEval);
if( rc==TCL_OK ){
rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp));
}
return rc;
}
static int xF5tApi(void*, Tcl_Interp*, int, Tcl_Obj *CONST []);
static int xQueryPhraseCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
void *pCtx
){
F5tFunction *p = (F5tFunction*)pCtx;
static sqlite3_int64 iCmd = 0;
Tcl_Obj *pEval;
int rc;
char zCmd[64];
F5tApi sApi;
sApi.pApi = pApi;
sApi.pFts = pFts;
sprintf(zCmd, "f5t_2_%lld", iCmd++);
Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0);
pEval = Tcl_DuplicateObj(p->pScript);
Tcl_IncrRefCount(pEval);
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1));
rc = Tcl_EvalObjEx(p->interp, pEval, 0);
Tcl_DecrRefCount(pEval);
Tcl_DeleteCommand(p->interp, zCmd);
if( rc==TCL_OK ){
rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp));
}
return rc;
}
static void xSetAuxdataDestructor(void *p){
F5tAuxData *pData = (F5tAuxData*)p;
Tcl_DecrRefCount(pData->pObj);
sqlite3_free(pData);
}
/*
** api sub-command...
**
** Description...
*/
static int xF5tApi(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
struct Sub {
const char *zName;
int nArg;
const char *zMsg;
} aSub[] = {
{ "xColumnCount", 0, "" }, /* 0 */
{ "xRowCount", 0, "" }, /* 1 */
{ "xColumnTotalSize", 1, "COL" }, /* 2 */
{ "xTokenize", 2, "TEXT SCRIPT" }, /* 3 */
{ "xPhraseCount", 0, "" }, /* 4 */
{ "xPhraseSize", 1, "PHRASE" }, /* 5 */
{ "xInstCount", 0, "" }, /* 6 */
{ "xInst", 1, "IDX" }, /* 7 */
{ "xRowid", 0, "" }, /* 8 */
{ "xColumnText", 1, "COL" }, /* 9 */
{ "xColumnSize", 1, "COL" }, /* 10 */
{ "xQueryPhrase", 2, "PHRASE SCRIPT" }, /* 11 */
{ "xSetAuxdata", 1, "VALUE" }, /* 12 */
{ "xGetAuxdata", 1, "CLEAR" }, /* 13 */
{ "xSetAuxdataInt", 1, "INTEGER" }, /* 14 */
{ "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */
{ 0, 0, 0}
};
int rc;
int iSub = 0;
F5tApi *p = (F5tApi*)clientData;
if( objc<2 ){
Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND");
return TCL_ERROR;
}
rc = Tcl_GetIndexFromObjStruct(
interp, objv[1], aSub, sizeof(aSub[0]), "SUB-COMMAND", 0, &iSub
);
if( rc!=TCL_OK ) return rc;
if( aSub[iSub].nArg!=objc-2 ){
Tcl_WrongNumArgs(interp, 1, objv, aSub[iSub].zMsg);
return TCL_ERROR;
}
#define CASE(i,str) case i: assert( strcmp(aSub[i].zName, str)==0 );
switch( iSub ){
CASE(0, "xColumnCount") {
int nCol;
nCol = p->pApi->xColumnCount(p->pFts);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewIntObj(nCol));
}
break;
}
CASE(1, "xRowCount") {
sqlite3_int64 nRow;
rc = p->pApi->xRowCount(p->pFts, &nRow);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nRow));
}
break;
}
CASE(2, "xColumnTotalSize") {
int iCol;
sqlite3_int64 nSize;
if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ) return TCL_ERROR;
rc = p->pApi->xColumnTotalSize(p->pFts, iCol, &nSize);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize));
}
break;
}
CASE(3, "xTokenize") {
int nText;
char *zText = Tcl_GetStringFromObj(objv[2], &nText);
F5tFunction ctx;
ctx.interp = interp;
ctx.pScript = objv[3];
rc = p->pApi->xTokenize(p->pFts, zText, nText, &ctx, xTokenizeCb);
if( rc==SQLITE_OK ){
Tcl_ResetResult(interp);
}
return rc;
}
CASE(4, "xPhraseCount") {
int nPhrase;
nPhrase = p->pApi->xPhraseCount(p->pFts);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewIntObj(nPhrase));
}
break;
}
CASE(5, "xPhraseSize") {
int iPhrase;
int sz;
if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){
return TCL_ERROR;
}
sz = p->pApi->xPhraseSize(p->pFts, iPhrase);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewIntObj(sz));
}
break;
}
CASE(6, "xInstCount") {
int nInst;
rc = p->pApi->xInstCount(p->pFts, &nInst);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewIntObj(nInst));
}
break;
}
CASE(7, "xInst") {
int iIdx, ip, ic, io;
if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ){
return TCL_ERROR;
}
rc = p->pApi->xInst(p->pFts, iIdx, &ip, &ic, &io);
if( rc==SQLITE_OK ){
Tcl_Obj *pList = Tcl_NewObj();
Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ip));
Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ic));
Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(io));
Tcl_SetObjResult(interp, pList);
}
break;
}
CASE(8, "xRowid") {
sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts);
Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid));
break;
}
CASE(9, "xColumnText") {
const char *z = 0;
int n = 0;
int iCol;
if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
return TCL_ERROR;
}
rc = p->pApi->xColumnText(p->pFts, iCol, &z, &n);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n));
}
break;
}
CASE(10, "xColumnSize") {
int n = 0;
int iCol;
if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
return TCL_ERROR;
}
rc = p->pApi->xColumnSize(p->pFts, iCol, &n);
if( rc==SQLITE_OK ){
Tcl_SetObjResult(interp, Tcl_NewIntObj(n));
}
break;
}
CASE(11, "xQueryPhrase") {
int iPhrase;
F5tFunction ctx;
if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){
return TCL_ERROR;
}
ctx.interp = interp;
ctx.pScript = objv[3];
rc = p->pApi->xQueryPhrase(p->pFts, iPhrase, &ctx, xQueryPhraseCb);
if( rc==SQLITE_OK ){
Tcl_ResetResult(interp);
}
break;
}
CASE(12, "xSetAuxdata") {
F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData));
if( pData==0 ){
Tcl_AppendResult(interp, "out of memory", 0);
return TCL_ERROR;
}
pData->pObj = objv[2];
Tcl_IncrRefCount(pData->pObj);
rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor);
break;
}
CASE(13, "xGetAuxdata") {
F5tAuxData *pData;
int bClear;
if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){
return TCL_ERROR;
}
pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear);
if( pData==0 ){
Tcl_ResetResult(interp);
}else{
Tcl_SetObjResult(interp, pData->pObj);
if( bClear ){
xSetAuxdataDestructor((void*)pData);
}
}
break;
}
/* These two - xSetAuxdataInt and xGetAuxdataInt - are similar to the
** xSetAuxdata and xGetAuxdata methods implemented above. The difference
** is that they may only save an integer value as auxiliary data, and
** do not specify a destructor function. */
CASE(14, "xSetAuxdataInt") {
int iVal;
if( Tcl_GetIntFromObj(interp, objv[2], &iVal) ) return TCL_ERROR;
rc = p->pApi->xSetAuxdata(p->pFts, (void*)iVal, 0);
break;
}
CASE(15, "xGetAuxdataInt") {
int iVal;
int bClear;
if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR;
iVal = (int)p->pApi->xGetAuxdata(p->pFts, bClear);
Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
break;
}
default:
assert( 0 );
break;
}
#undef CASE
if( rc!=SQLITE_OK ){
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_ERROR;
}
return TCL_OK;
}
static void xF5tFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
F5tFunction *p = (F5tFunction*)pApi->xUserData(pFts);
Tcl_Obj *pEval; /* Script to evaluate */
int i;
int rc;
static sqlite3_int64 iCmd = 0;
char zCmd[64];
F5tApi sApi;
sApi.pApi = pApi;
sApi.pFts = pFts;
sprintf(zCmd, "f5t_%lld", iCmd++);
Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0);
pEval = Tcl_DuplicateObj(p->pScript);
Tcl_IncrRefCount(pEval);
Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1));
for(i=0; i<nVal; i++){
Tcl_Obj *pObj = 0;
switch( sqlite3_value_type(apVal[i]) ){
case SQLITE_TEXT:
pObj = Tcl_NewStringObj((const char*)sqlite3_value_text(apVal[i]), -1);
break;
case SQLITE_BLOB:
pObj = Tcl_NewByteArrayObj(
sqlite3_value_blob(apVal[i]), sqlite3_value_bytes(apVal[i])
);
break;
case SQLITE_INTEGER:
pObj = Tcl_NewWideIntObj(sqlite3_value_int64(apVal[i]));
break;
case SQLITE_FLOAT:
pObj = Tcl_NewDoubleObj(sqlite3_value_double(apVal[i]));
break;
default:
pObj = Tcl_NewObj();
break;
}
Tcl_ListObjAppendElement(p->interp, pEval, pObj);
}
rc = Tcl_EvalObjEx(p->interp, pEval, TCL_GLOBAL_ONLY);
Tcl_DecrRefCount(pEval);
Tcl_DeleteCommand(p->interp, zCmd);
if( rc!=TCL_OK ){
sqlite3_result_error(pCtx, Tcl_GetStringResult(p->interp), -1);
}else{
Tcl_Obj *pVar = Tcl_GetObjResult(p->interp);
int n;
const char *zType = (pVar->typePtr ? pVar->typePtr->name : "");
char c = zType[0];
if( c=='b' && strcmp(zType,"bytearray")==0 && pVar->bytes==0 ){
/* Only return a BLOB type if the Tcl variable is a bytearray and
** has no string representation. */
unsigned char *data = Tcl_GetByteArrayFromObj(pVar, &n);
sqlite3_result_blob(pCtx, data, n, SQLITE_TRANSIENT);
}else if( c=='b' && strcmp(zType,"boolean")==0 ){
Tcl_GetIntFromObj(0, pVar, &n);
sqlite3_result_int(pCtx, n);
}else if( c=='d' && strcmp(zType,"double")==0 ){
double r;
Tcl_GetDoubleFromObj(0, pVar, &r);
sqlite3_result_double(pCtx, r);
}else if( (c=='w' && strcmp(zType,"wideInt")==0) ||
(c=='i' && strcmp(zType,"int")==0) ){
Tcl_WideInt v;
Tcl_GetWideIntFromObj(0, pVar, &v);
sqlite3_result_int64(pCtx, v);
}else{
unsigned char *data = (unsigned char *)Tcl_GetStringFromObj(pVar, &n);
sqlite3_result_text(pCtx, (char *)data, n, SQLITE_TRANSIENT);
}
}
}
static void xF5tDestroy(void *pCtx){
F5tFunction *p = (F5tFunction*)pCtx;
Tcl_DecrRefCount(p->pScript);
ckfree((char *)p);
}
/*
** sqlite3_fts5_create_function DB NAME SCRIPT
**
** Description...
*/
static int f5tCreateFunction(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
char *zName;
Tcl_Obj *pScript;
sqlite3 *db = 0;
fts5_api *pApi = 0;
F5tFunction *pCtx = 0;
int rc;
if( objc!=4 ){
Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
return TCL_ERROR;
}
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR;
zName = Tcl_GetString(objv[2]);
pScript = objv[3];
pCtx = (F5tFunction*)ckalloc(sizeof(F5tFunction));
pCtx->interp = interp;
pCtx->pScript = pScript;
Tcl_IncrRefCount(pScript);
rc = pApi->xCreateFunction(
pApi, zName, (void*)pCtx, xF5tFunction, xF5tDestroy
);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
return TCL_ERROR;
}
return TCL_OK;
}
typedef struct F5tTokenizeCtx F5tTokenizeCtx;
struct F5tTokenizeCtx {
Tcl_Obj *pRet;
int bSubst;
const char *zInput;
};
static int xTokenizeCb2(
void *pCtx,
const char *zToken, int nToken,
int iStart, int iEnd
){
F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
if( p->bSubst ){
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(
0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
);
}else{
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart));
Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd));
}
return SQLITE_OK;
}
/*
** sqlite3_fts5_tokenize DB TOKENIZER TEXT
**
** Description...
*/
static int f5tTokenize(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
char *zText;
int nText;
sqlite3 *db = 0;
fts5_api *pApi = 0;
Fts5Tokenizer *pTok = 0;
fts5_tokenizer tokenizer;
Tcl_Obj *pRet = 0;
void *pUserdata;
int rc;
int nArg;
const char **azArg;
F5tTokenizeCtx ctx;
if( objc!=4 && objc!=5 ){
Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT");
return TCL_ERROR;
}
if( objc==5 ){
char *zOpt = Tcl_GetString(objv[1]);
if( strcmp("-subst", zOpt) ){
Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0);
return TCL_ERROR;
}
}
if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR;
if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){
return TCL_ERROR;
}
if( nArg==0 ){
Tcl_AppendResult(interp, "no such tokenizer: ", 0);
Tcl_Free((void*)azArg);
return TCL_ERROR;
}
zText = Tcl_GetStringFromObj(objv[objc-1], &nText);
rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0);
return TCL_ERROR;
}
rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0);
return TCL_ERROR;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
ctx.bSubst = (objc==5);
ctx.pRet = pRet;
ctx.zInput = zText;
rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
tokenizer.xDelete(pTok);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
Tcl_DecrRefCount(pRet);
return TCL_ERROR;
}
Tcl_Free((void*)azArg);
Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet);
return TCL_OK;
}
/*************************************************************************
** Start of tokenizer wrapper.
*/
typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
typedef struct F5tTokenizerModule F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int);
};
struct F5tTokenizerModule {
Tcl_Interp *interp;
Tcl_Obj *pScript;
F5tTokenizerContext *pContext;
};
static int f5tTokenizerCreate(
void *pCtx,
const char **azArg,
int nArg,
Fts5Tokenizer **ppOut
){
F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
Tcl_Obj *pEval;
int rc = TCL_OK;
int i;
pEval = Tcl_DuplicateObj(pMod->pScript);
Tcl_IncrRefCount(pEval);
for(i=0; rc==TCL_OK && i<nArg; i++){
Tcl_Obj *pObj = Tcl_NewStringObj(azArg[i], -1);
rc = Tcl_ListObjAppendElement(pMod->interp, pEval, pObj);
}
if( rc==TCL_OK ){
rc = Tcl_EvalObjEx(pMod->interp, pEval, TCL_GLOBAL_ONLY);
}
Tcl_DecrRefCount(pEval);
if( rc==TCL_OK ){
F5tTokenizerInstance *pInst;
pInst = (F5tTokenizerInstance*)ckalloc(sizeof(F5tTokenizerInstance));
memset(pInst, 0, sizeof(F5tTokenizerInstance));
pInst->interp = pMod->interp;
pInst->pScript = Tcl_GetObjResult(pMod->interp);
pInst->pContext = pMod->pContext;
Tcl_IncrRefCount(pInst->pScript);
*ppOut = (Fts5Tokenizer*)pInst;
}
return rc;
}
static void f5tTokenizerDelete(Fts5Tokenizer *p){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
Tcl_DecrRefCount(pInst->pScript);
ckfree((char *)pInst);
}
static int f5tTokenizerTokenize(
Fts5Tokenizer *p,
void *pCtx,
const char *pText, int nText,
int (*xToken)(void*, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
void *pOldCtx;
int (*xOldToken)(void*, const char*, int, int, int);
Tcl_Obj *pEval;
int rc;
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
pEval = Tcl_DuplicateObj(pInst->pScript);
Tcl_IncrRefCount(pEval);
rc = Tcl_ListObjAppendElement(
pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
);
if( rc==TCL_OK ){
rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
}
Tcl_DecrRefCount(pEval);
pInst->pContext->pCtx = pOldCtx;
pInst->pContext->xToken = xOldToken;
return rc;
}
/*
** sqlite3_fts5_token TEXT START END POS
*/
static int f5tTokenizerReturn(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
int iStart;
int iEnd;
int nToken;
char *zToken;
int rc;
assert( p );
if( objc!=4 ){
Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
return TCL_ERROR;
}
if( p->xToken==0 ){
Tcl_AppendResult(interp,
"sqlite3_fts5_token may only be used by tokenizer callback", 0
);
return TCL_ERROR;
}
zToken = Tcl_GetStringFromObj(objv[1], &nToken);
if( Tcl_GetIntFromObj(interp, objv[2], &iStart)
|| Tcl_GetIntFromObj(interp, objv[3], &iEnd)
){
return TCL_ERROR;
}
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_OK;
}
static void f5tDelTokenizer(void *pCtx){
F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
Tcl_DecrRefCount(pMod->pScript);
ckfree((char *)pMod);
}
/*
** sqlite3_fts5_create_tokenizer DB NAME SCRIPT
**
** Register a tokenizer named NAME implemented by script SCRIPT. When
** a tokenizer instance is created (fts5_tokenizer.xCreate), any tokenizer
** arguments are appended to SCRIPT and the result executed.
**
** The value returned by (SCRIPT + args) is itself a tcl script. This
** script - call it SCRIPT2 - is executed to tokenize text using the
** tokenizer instance "returned" by SCRIPT. Specifically, to tokenize
** text SCRIPT2 is invoked with a single argument appended to it - the
** text to tokenize.
**
** SCRIPT2 should invoke the [sqlite3_fts5_token] command once for each
** token within the tokenized text.
*/
static int f5tCreateTokenizer(
ClientData clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
F5tTokenizerContext *pContext = (F5tTokenizerContext*)clientData;
sqlite3 *db;
fts5_api *pApi;
char *zName;
Tcl_Obj *pScript;
fts5_tokenizer t;
F5tTokenizerModule *pMod;
int rc;
if( objc!=4 ){
Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
return TCL_ERROR;
}
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
return TCL_ERROR;
}
zName = Tcl_GetString(objv[2]);
pScript = objv[3];
t.xCreate = f5tTokenizerCreate;
t.xTokenize = f5tTokenizerTokenize;
t.xDelete = f5tTokenizerDelete;
pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
pMod->interp = interp;
pMod->pScript = pScript;
pMod->pContext = pContext;
Tcl_IncrRefCount(pScript);
rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0);
return TCL_ERROR;
}
return TCL_OK;
}
static void xF5tFree(ClientData clientData){
ckfree(clientData);
}
/*
** sqlite3_fts5_may_be_corrupt BOOLEAN
**
** Set or clear the global "may-be-corrupt" flag. Return the old value.
*/
static int f5tMayBeCorrupt(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
int bOld = sqlite3_fts5_may_be_corrupt;
if( objc!=2 && objc!=1 ){
Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?");
return TCL_ERROR;
}
if( objc==2 ){
int bNew;
if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR;
sqlite3_fts5_may_be_corrupt = bNew;
}
Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld));
return TCL_OK;
}
static unsigned int f5t_fts5HashKey(int nSlot, const char *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
return (h % nSlot);
}
static int f5tTokenHash(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
int bOld = sqlite3_fts5_may_be_corrupt;
char *z;
int n;
unsigned int iVal;
int nSlot;
if( objc!=3 ){
Tcl_WrongNumArgs(interp, 1, objv, "NSLOT TOKEN");
return TCL_ERROR;
}
if( Tcl_GetIntFromObj(interp, objv[1], &nSlot) ){
return TCL_ERROR;
}
z = Tcl_GetStringFromObj(objv[2], &n);
iVal = f5t_fts5HashKey(nSlot, z, n);
Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
return TCL_OK;
}
/*
** Entry point.
*/
int Fts5tcl_Init(Tcl_Interp *interp){
static struct Cmd {
char *zName;
Tcl_ObjCmdProc *xProc;
int bTokenizeCtx;
} aCmd[] = {
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 }
};
int i;
F5tTokenizerContext *pContext;
pContext = (F5tTokenizerContext*)ckalloc(sizeof(F5tTokenizerContext));
memset(pContext, 0, sizeof(*pContext));
for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
struct Cmd *p = &aCmd[i];
void *pCtx = 0;
if( p->bTokenizeCtx ) pCtx = (void*)pContext;
Tcl_CreateObjCommand(interp, p->zName, p->xProc, pCtx, (i ? 0 : xF5tFree));
}
return TCL_OK;
}
#else /* SQLITE_ENABLE_FTS5 */
int Fts5tcl_Init(Tcl_Interp *interp){
return TCL_OK;
}
#endif /* SQLITE_ENABLE_FTS5 */
#endif /* SQLITE_TEST */

1231
ext/fts5/fts5_tokenize.c Normal file

File diff suppressed because it is too large Load Diff

362
ext/fts5/fts5_unicode2.c Normal file
View File

@ -0,0 +1,362 @@
/*
** 2012 May 25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#if defined(SQLITE_ENABLE_FTS5)
#include <assert.h>
/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeIsalnum(int c){
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
static const unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
0x380400F0,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;
}
/*
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int fts5_remove_diacritic(int c){
unsigned short aDia[] = {
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
62924, 63050, 63082, 63274, 63390,
};
char aChar[] = {
'\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
'\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
'e', 'i', 'o', 'u', 'y',
};
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
int iRes = 0;
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aDia[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}
/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
unsigned int mask0 = 0x08029FDF;
unsigned int mask1 = 0x000361F8;
if( c<768 || c>817 ) return 0;
return (c < 768+32) ?
(mask0 & (1 << (c-768))) :
(mask1 & (1 << (c-768-32)));
}
/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
static const struct TableEntry {
unsigned short iCode;
unsigned char flags;
unsigned char nRange;
} aEntry[] = {
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
{65313, 14, 26},
};
static const unsigned short aiOff[] = {
1, 2, 8, 15, 16, 26, 28, 32,
37, 38, 40, 48, 63, 64, 69, 71,
79, 80, 116, 202, 203, 205, 206, 207,
209, 210, 211, 213, 214, 217, 218, 219,
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
65514, 65521, 65527, 65528, 65529,
};
int ret = c;
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes>=0 && c>=aEntry[iRes].iCode );
p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
}
else if( c>=66560 && c<66600 ){
ret = c + 40;
}
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS5) */

344
ext/fts5/fts5_varint.c Normal file
View File

@ -0,0 +1,344 @@
/*
** 2015 May 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Routines for varint serialization and deserialization.
*/
#ifdef SQLITE_ENABLE_FTS5
#include "fts5Int.h"
/*
** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
** Except, this version does handle the single byte case that the core
** version depends on being handled before its function is called.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
u32 a,b;
/* The 1-byte case. Overwhelmingly the most common. */
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
/* Values between 0 and 127 */
*v = a;
return 1;
}
/* The 2-byte case */
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
/* Values between 128 and 16383 */
a &= 0x7f;
a = a<<7;
*v = a | b;
return 2;
}
/* The 3-byte case */
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
/* Values between 16384 and 2097151 */
a &= (0x7f<<14)|(0x7f);
b &= 0x7f;
b = b<<7;
*v = a | b;
return 3;
}
/* A 32-bit varint is used to store size information in btrees.
** Objects are rarely larger than 2MiB limit of a 3-byte varint.
** A 3-byte varint is sufficient, for example, to record the size
** of a 1048569-byte BLOB or string.
**
** We only unroll the first 1-, 2-, and 3- byte cases. The very
** rare larger cases can be handled by the slower 64-bit varint
** routine.
*/
{
u64 v64;
u8 n;
p -= 2;
n = sqlite3Fts5GetVarint(p, &v64);
*v = (u32)v64;
assert( n>3 && n<=9 );
return n;
}
}
/*
** Bitmasks used by sqlite3GetVarint(). These precomputed constants
** are defined here rather than simply putting the constant expressions
** inline in order to work around bugs in the RVT compiler.
**
** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
**
** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
*/
#define SLOT_2_0 0x001fc07f
#define SLOT_4_2_0 0xf01fc07f
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read. The value is stored in *v.
*/
u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
u32 a,b,s;
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
*v = a;
return 1;
}
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
a &= 0x7f;
a = a<<7;
a |= b;
*v = a;
return 2;
}
/* Verify that constants are precomputed correctly */
assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_2_0;
b &= 0x7f;
b = b<<7;
a |= b;
*v = a;
return 3;
}
/* CSE1 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p1<<14 | p3 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_2_0;
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
*v = a;
return 4;
}
/* a: p0<<14 | p2 (masked) */
/* b: p1<<14 | p3 (unmasked) */
/* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
b &= SLOT_2_0;
s = a;
/* s: p0<<14 | p2 (masked) */
p++;
a = a<<14;
a |= *p;
/* a: p0<<28 | p2<<14 | p4 (unmasked) */
if (!(a&0x80))
{
/* we can skip these cause they were (effectively) done above in calc'ing s */
/* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
/* b &= (0x7f<<14)|(0x7f); */
b = b<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 5;
}
/* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
s = s<<7;
s |= b;
/* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
p++;
b = b<<14;
b |= *p;
/* b: p1<<28 | p3<<14 | p5 (unmasked) */
if (!(b&0x80))
{
/* we can skip this cause it was (effectively) done above in calc'ing s */
/* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
a &= SLOT_2_0;
a = a<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 6;
}
p++;
a = a<<14;
a |= *p;
/* a: p2<<28 | p4<<14 | p6 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_4_2_0;
b &= SLOT_2_0;
b = b<<7;
a |= b;
s = s>>11;
*v = ((u64)s)<<32 | a;
return 7;
}
/* CSE2 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p3<<28 | p5<<14 | p7 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_4_2_0;
/* moved CSE2 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
s = s>>4;
*v = ((u64)s)<<32 | a;
return 8;
}
p++;
a = a<<15;
a |= *p;
/* a: p4<<29 | p6<<15 | p8 (unmasked) */
/* moved CSE2 up */
/* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
b &= SLOT_2_0;
b = b<<8;
a |= b;
s = s<<4;
b = p[-4];
b &= 0x7f;
b = b>>3;
s |= b;
*v = ((u64)s)<<32 | a;
return 9;
}
/*
** The variable-length integer encoding is as follows:
**
** KEY:
** A = 0xxxxxxx 7 bits of data and one flag bit
** B = 1xxxxxxx 7 bits of data and one flag bit
** C = xxxxxxxx 8 bits of data
**
** 7 bits - A
** 14 bits - BA
** 21 bits - BBA
** 28 bits - BBBA
** 35 bits - BBBBA
** 42 bits - BBBBBA
** 49 bits - BBBBBBA
** 56 bits - BBBBBBBA
** 64 bits - BBBBBBBBC
*/
#ifdef SQLITE_NOINLINE
# define FTS5_NOINLINE SQLITE_NOINLINE
#else
# define FTS5_NOINLINE
#endif
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data write will be between 1 and 9 bytes. The number
** of bytes written is returned.
**
** A variable-length integer consists of the lower 7 bits of each byte
** for all bytes that have the 8th bit set and one byte with the 8th
** bit clear. Except, if we get to the 9th byte, it stores the full
** 8 bits and is the last byte.
*/
static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
int i, j, n;
u8 buf[10];
if( v & (((u64)0xff000000)<<32) ){
p[8] = (u8)v;
v >>= 8;
for(i=7; i>=0; i--){
p[i] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}
return 9;
}
n = 0;
do{
buf[n++] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}while( v!=0 );
buf[0] &= 0x7f;
assert( n<=9 );
for(i=0, j=n-1; j>=0; j--, i++){
p[i] = buf[j];
}
return n;
}
int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
if( v<=0x7f ){
p[0] = v&0x7f;
return 1;
}
if( v<=0x3fff ){
p[0] = ((v>>7)&0x7f)|0x80;
p[1] = v&0x7f;
return 2;
}
return fts5PutVarint64(p,v);
}
int sqlite3Fts5GetVarintLen(u32 iVal){
if( iVal<(1 << 7 ) ) return 1;
if( iVal<(1 << 14) ) return 2;
if( iVal<(1 << 21) ) return 3;
if( iVal<(1 << 28) ) return 4;
return 5;
}
#endif /* SQLITE_ENABLE_FTS5 */

491
ext/fts5/fts5_vocab.c Normal file
View File

@ -0,0 +1,491 @@
/*
** 2015 May 08
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite virtual table module implementing direct access to an
** existing FTS5 index. The module may create several different types of
** tables:
**
** col:
** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
**
** One row for each term/column combination. The value of $doc is set to
** the number of fts5 rows that contain at least one instance of term
** $term within column $col. Field $cnt is set to the total number of
** instances of term $term in column $col (in any row of the fts5 table).
**
** row:
** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
**
** One row for each term in the database. The value of $doc is set to
** the number of fts5 rows that contain at least one instance of term
** $term. Field $cnt is set to the total number of instances of term
** $term in the database.
*/
#if defined(SQLITE_ENABLE_FTS5)
#include "fts5Int.h"
typedef struct Fts5VocabTable Fts5VocabTable;
typedef struct Fts5VocabCursor Fts5VocabCursor;
struct Fts5VocabTable {
sqlite3_vtab base;
char *zFts5Tbl; /* Name of fts5 table */
char *zFts5Db; /* Db containing fts5 table */
sqlite3 *db; /* Database handle */
Fts5Global *pGlobal; /* FTS5 global object for this database */
int eType; /* FTS5_VOCAB_COL or ROW */
};
struct Fts5VocabCursor {
sqlite3_vtab_cursor base;
sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
Fts5Index *pIndex; /* Associated FTS5 index */
int bEof; /* True if this cursor is at EOF */
Fts5IndexIter *pIter; /* Term/rowid iterator object */
/* These are used by 'col' tables only */
int nCol;
int iCol;
i64 *aCnt;
i64 *aDoc;
/* Output values */
i64 rowid; /* This table's current rowid value */
Fts5Buffer term; /* Current value of 'term' column */
i64 aVal[3]; /* Up to three columns left of 'term' */
};
#define FTS5_VOCAB_COL 0
#define FTS5_VOCAB_ROW 1
#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
/*
** Translate a string containing an fts5vocab table type to an
** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
** and return SQLITE_ERROR.
*/
static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
int rc = SQLITE_OK;
char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
if( rc==SQLITE_OK ){
sqlite3Fts5Dequote(zCopy);
if( sqlite3_stricmp(zCopy, "col")==0 ){
*peType = FTS5_VOCAB_COL;
}else
if( sqlite3_stricmp(zCopy, "row")==0 ){
*peType = FTS5_VOCAB_ROW;
}else
{
*pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
rc = SQLITE_ERROR;
}
sqlite3_free(zCopy);
}
return rc;
}
/*
** The xDisconnect() virtual table method.
*/
static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
sqlite3_free(pTab);
return SQLITE_OK;
}
/*
** The xDestroy() virtual table method.
*/
static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
sqlite3_free(pTab);
return SQLITE_OK;
}
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
** The argv[] array contains the following:
**
** argv[0] -> module name ("fts5vocab")
** argv[1] -> database name
** argv[2] -> table name
**
** then:
**
** argv[3] -> name of fts5 table
** argv[4] -> type of fts5vocab table
**
** or, for tables in the TEMP schema only.
**
** argv[3] -> name of fts5 tables database
** argv[4] -> name of fts5 table
** argv[5] -> type of fts5vocab table
*/
static int fts5VocabInitVtab(
sqlite3 *db, /* The SQLite database connection */
void *pAux, /* Pointer to Fts5Global object */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
){
const char *azSchema[] = {
"CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
"CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")"
};
Fts5VocabTable *pRet = 0;
int rc = SQLITE_OK; /* Return code */
int bDb;
bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
if( argc!=5 && bDb==0 ){
*pzErr = sqlite3_mprintf("wrong number of vtable arguments");
rc = SQLITE_ERROR;
}else{
int nByte; /* Bytes of space to allocate */
const char *zDb = bDb ? argv[3] : argv[1];
const char *zTab = bDb ? argv[4] : argv[3];
const char *zType = bDb ? argv[5] : argv[4];
int nDb = strlen(zDb)+1;
int nTab = strlen(zTab)+1;
int eType;
rc = fts5VocabTableType(zType, pzErr, &eType);
if( rc==SQLITE_OK ){
assert( eType>=0 && eType<sizeof(azSchema)/sizeof(azSchema[0]) );
rc = sqlite3_declare_vtab(db, azSchema[eType]);
}
nByte = sizeof(Fts5VocabTable) + nDb + nTab;
pRet = sqlite3Fts5MallocZero(&rc, nByte);
if( pRet ){
pRet->pGlobal = (Fts5Global*)pAux;
pRet->eType = eType;
pRet->db = db;
pRet->zFts5Tbl = (char*)&pRet[1];
pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
memcpy(pRet->zFts5Tbl, zTab, nTab);
memcpy(pRet->zFts5Db, zDb, nDb);
sqlite3Fts5Dequote(pRet->zFts5Tbl);
sqlite3Fts5Dequote(pRet->zFts5Db);
}
}
*ppVTab = (sqlite3_vtab*)pRet;
return rc;
}
/*
** The xConnect() and xCreate() methods for the virtual table. All the
** work is done in function fts5VocabInitVtab().
*/
static int fts5VocabConnectMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts5VocabCreateMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}
/*
** Implementation of the xBestIndex method.
*/
static int fts5VocabBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
return SQLITE_OK;
}
/*
** Implementation of xOpen method.
*/
static int fts5VocabOpenMethod(
sqlite3_vtab *pVTab,
sqlite3_vtab_cursor **ppCsr
){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
Fts5Index *pIndex = 0;
int nCol = 0;
Fts5VocabCursor *pCsr = 0;
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
char *zSql = 0;
int nByte;
zSql = sqlite3Fts5Mprintf(&rc,
"SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
);
if( zSql ){
rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
}
sqlite3_free(zSql);
assert( rc==SQLITE_OK || pStmt==0 );
if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
i64 iId = sqlite3_column_int64(pStmt, 0);
pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol);
}
if( rc==SQLITE_OK && pIndex==0 ){
rc = sqlite3_finalize(pStmt);
pStmt = 0;
if( rc==SQLITE_OK ){
pVTab->zErrMsg = sqlite3_mprintf(
"no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
);
rc = SQLITE_ERROR;
}
}
nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor);
pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
if( pCsr ){
pCsr->pIndex = pIndex;
pCsr->pStmt = pStmt;
pCsr->nCol = nCol;
pCsr->aCnt = (i64*)&pCsr[1];
pCsr->aDoc = &pCsr->aCnt[nCol];
}else{
sqlite3_finalize(pStmt);
}
*ppCsr = (sqlite3_vtab_cursor*)pCsr;
return rc;
}
static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
pCsr->rowid = 0;
sqlite3Fts5IterClose(pCsr->pIter);
pCsr->pIter = 0;
}
/*
** Close the cursor. For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
fts5VocabResetCursor(pCsr);
sqlite3Fts5BufferFree(&pCsr->term);
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Advance the cursor to the next row in the table.
*/
static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
int rc = SQLITE_OK;
pCsr->rowid++;
if( pTab->eType==FTS5_VOCAB_COL ){
for(pCsr->iCol++; pCsr->iCol<pCsr->nCol; pCsr->iCol++){
if( pCsr->aCnt[pCsr->iCol] ) break;
}
}
if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){
if( sqlite3Fts5IterEof(pCsr->pIter) ){
pCsr->bEof = 1;
}else{
const char *zTerm;
int nTerm;
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
memset(pCsr->aVal, 0, sizeof(pCsr->aVal));
memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64));
memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64));
pCsr->iCol = 0;
assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
while( rc==SQLITE_OK ){
i64 dummy;
const u8 *pPos; int nPos; /* Position list */
i64 iPos = 0; /* 64-bit position read from poslist */
int iOff = 0; /* Current offset within position list */
rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
if( rc==SQLITE_OK ){
if( pTab->eType==FTS5_VOCAB_ROW ){
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
pCsr->aVal[1]++;
}
pCsr->aVal[0]++;
}else{
int iCol = -1;
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
int ii = FTS5_POS2COLUMN(iPos);
pCsr->aCnt[ii]++;
if( iCol!=ii ){
pCsr->aDoc[ii]++;
iCol = ii;
}
}
}
rc = sqlite3Fts5IterNextScan(pCsr->pIter);
}
if( rc==SQLITE_OK ){
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break;
if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
}
}
}
}
if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++;
pCsr->aVal[0] = pCsr->iCol;
pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol];
pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol];
}
return rc;
}
/*
** This is the xFilter implementation for the virtual table.
*/
static int fts5VocabFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
int rc;
const int flags = FTS5INDEX_QUERY_SCAN;
fts5VocabResetCursor(pCsr);
rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter);
if( rc==SQLITE_OK ){
rc = fts5VocabNextMethod(pCursor);
}
return rc;
}
/*
** This is the xEof method of the virtual table. SQLite calls this
** routine to find out if it has reached the end of a result set.
*/
static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
return pCsr->bEof;
}
static int fts5VocabColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
switch( iCol ){
case 0: /* term */
sqlite3_result_text(
pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
);
break;
default:
assert( iCol<4 && iCol>0 );
sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]);
break;
}
return SQLITE_OK;
}
/*
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. fts5
** exposes %_content.docid as the rowid for the virtual table. The
** rowid should be written to *pRowid.
*/
static int fts5VocabRowidMethod(
sqlite3_vtab_cursor *pCursor,
sqlite_int64 *pRowid
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
*pRowid = pCsr->rowid;
return SQLITE_OK;
}
int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
static const sqlite3_module fts5Vocab = {
/* iVersion */ 2,
/* xCreate */ fts5VocabCreateMethod,
/* xConnect */ fts5VocabConnectMethod,
/* xBestIndex */ fts5VocabBestIndexMethod,
/* xDisconnect */ fts5VocabDisconnectMethod,
/* xDestroy */ fts5VocabDestroyMethod,
/* xOpen */ fts5VocabOpenMethod,
/* xClose */ fts5VocabCloseMethod,
/* xFilter */ fts5VocabFilterMethod,
/* xNext */ fts5VocabNextMethod,
/* xEof */ fts5VocabEofMethod,
/* xColumn */ fts5VocabColumnMethod,
/* xRowid */ fts5VocabRowidMethod,
/* xUpdate */ 0,
/* xBegin */ 0,
/* xSync */ 0,
/* xCommit */ 0,
/* xRollback */ 0,
/* xFindFunction */ 0,
/* xRename */ 0,
/* xSavepoint */ 0,
/* xRelease */ 0,
/* xRollbackTo */ 0,
};
void *p = (void*)pGlobal;
return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
}
#endif /* defined(SQLITE_ENABLE_FTS5) */

173
ext/fts5/fts5parse.y Normal file
View File

@ -0,0 +1,173 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
// All token codes are small integers with #defines that begin with "TK_"
%token_prefix FTS5_
// The type of the data attached to each token is Token. This is also the
// default type for non-terminals.
//
%token_type {Fts5Token}
%default_type {Fts5Token}
// The generated parser function takes a 4th argument as follows:
%extra_argument {Fts5Parse *pParse}
// This code runs whenever there is a syntax error
//
%syntax_error {
sqlite3Fts5ParseError(
pParse, "fts5: syntax error near \"%.*s\"",TOKEN.n,TOKEN.p
);
}
%stack_overflow {
assert( 0 );
}
// The name of the generated procedure that implements the parser
// is as follows:
%name sqlite3Fts5Parser
// The following text is included near the beginning of the C source
// code file that implements the parser.
//
%include {
#include "fts5Int.h"
#include "fts5parse.h"
/*
** Disable all error recovery processing in the parser push-down
** automaton.
*/
#define YYNOERRORRECOVERY 1
/*
** Make yytestcase() the same as testcase()
*/
#define yytestcase(X) testcase(X)
} // end %include
%left OR.
%left AND.
%left NOT.
%left TERM.
%left COLON.
input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); }
%type cnearset {Fts5ExprNode*}
%type expr {Fts5ExprNode*}
%type exprlist {Fts5ExprNode*}
%destructor cnearset { sqlite3Fts5ParseNodeFree($$); }
%destructor expr { sqlite3Fts5ParseNodeFree($$); }
%destructor exprlist { sqlite3Fts5ParseNodeFree($$); }
expr(A) ::= expr(X) AND expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}
expr(A) ::= expr(X) OR expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_OR, X, Y, 0);
}
expr(A) ::= expr(X) NOT expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_NOT, X, Y, 0);
}
expr(A) ::= LP expr(X) RP. {A = X;}
expr(A) ::= exprlist(X). {A = X;}
exprlist(A) ::= cnearset(X). {A = X;}
exprlist(A) ::= exprlist(X) cnearset(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}
cnearset(A) ::= nearset(X). {
A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X);
}
cnearset(A) ::= colset(X) COLON nearset(Y). {
sqlite3Fts5ParseSetColset(pParse, Y, X);
A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y);
}
%type colset {Fts5ExprColset*}
%destructor colset { sqlite3_free($$); }
%type colsetlist {Fts5ExprColset*}
%destructor colsetlist { sqlite3_free($$); }
colset(A) ::= LCP colsetlist(X) RCP. { A = X; }
colset(A) ::= STRING(X). {
A = sqlite3Fts5ParseColset(pParse, 0, &X);
}
colsetlist(A) ::= colsetlist(Y) STRING(X). {
A = sqlite3Fts5ParseColset(pParse, Y, &X); }
colsetlist(A) ::= STRING(X). {
A = sqlite3Fts5ParseColset(pParse, 0, &X);
}
%type nearset {Fts5ExprNearset*}
%type nearphrases {Fts5ExprNearset*}
%destructor nearset { sqlite3Fts5ParseNearsetFree($$); }
%destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); }
nearset(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); }
nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. {
sqlite3Fts5ParseNear(pParse, &X);
sqlite3Fts5ParseSetDistance(pParse, Y, &Z);
A = Y;
}
nearphrases(A) ::= phrase(X). {
A = sqlite3Fts5ParseNearset(pParse, 0, X);
}
nearphrases(A) ::= nearphrases(X) phrase(Y). {
A = sqlite3Fts5ParseNearset(pParse, X, Y);
}
/*
** The optional ", <integer>" at the end of the NEAR() arguments.
*/
neardist_opt(A) ::= . { A.p = 0; A.n = 0; }
neardist_opt(A) ::= COMMA STRING(X). { A = X; }
/*
** A phrase. A set of primitives connected by "+" operators. Examples:
**
** "the" + "quick brown" + fo *
** "the quick brown fo" *
** the+quick+brown+fo*
*/
%type phrase {Fts5ExprPhrase*}
%destructor phrase { sqlite3Fts5ParsePhraseFree($$); }
phrase(A) ::= phrase(X) PLUS STRING(Y) star_opt(Z). {
A = sqlite3Fts5ParseTerm(pParse, X, &Y, Z);
}
phrase(A) ::= STRING(Y) star_opt(Z). {
A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z);
}
/*
** Optional "*" character.
*/
%type star_opt {int}
star_opt(A) ::= STAR. { A = 1; }
star_opt(A) ::= . { A = 0; }

222
ext/fts5/mkportersteps.tcl Normal file
View File

@ -0,0 +1,222 @@
#
# 2014 Jun 09
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#-------------------------------------------------------------------------
#
# This script generates the implementations of the following C functions,
# which are part of the porter tokenizer implementation:
#
# static int fts5PorterStep1B(char *aBuf, int *pnBuf);
# static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
# static int fts5PorterStep2(char *aBuf, int *pnBuf);
# static int fts5PorterStep3(char *aBuf, int *pnBuf);
# static int fts5PorterStep4(char *aBuf, int *pnBuf);
#
set O(Step1B2) {
{ at {} ate 1 }
{ bl {} ble 1 }
{ iz {} ize 1 }
}
set O(Step1B) {
{ "eed" fts5Porter_MGt0 "ee" 0 }
{ "ed" fts5Porter_Vowel "" 1 }
{ "ing" fts5Porter_Vowel "" 1 }
}
set O(Step2) {
{ "ational" fts5Porter_MGt0 "ate" }
{ "tional" fts5Porter_MGt0 "tion" }
{ "enci" fts5Porter_MGt0 "ence" }
{ "anci" fts5Porter_MGt0 "ance" }
{ "izer" fts5Porter_MGt0 "ize" }
{ "logi" fts5Porter_MGt0 "log" }
{ "bli" fts5Porter_MGt0 "ble" }
{ "alli" fts5Porter_MGt0 "al" }
{ "entli" fts5Porter_MGt0 "ent" }
{ "eli" fts5Porter_MGt0 "e" }
{ "ousli" fts5Porter_MGt0 "ous" }
{ "ization" fts5Porter_MGt0 "ize" }
{ "ation" fts5Porter_MGt0 "ate" }
{ "ator" fts5Porter_MGt0 "ate" }
{ "alism" fts5Porter_MGt0 "al" }
{ "iveness" fts5Porter_MGt0 "ive" }
{ "fulness" fts5Porter_MGt0 "ful" }
{ "ousness" fts5Porter_MGt0 "ous" }
{ "aliti" fts5Porter_MGt0 "al" }
{ "iviti" fts5Porter_MGt0 "ive" }
{ "biliti" fts5Porter_MGt0 "ble" }
}
set O(Step3) {
{ "icate" fts5Porter_MGt0 "ic" }
{ "ative" fts5Porter_MGt0 "" }
{ "alize" fts5Porter_MGt0 "al" }
{ "iciti" fts5Porter_MGt0 "ic" }
{ "ical" fts5Porter_MGt0 "ic" }
{ "ful" fts5Porter_MGt0 "" }
{ "ness" fts5Porter_MGt0 "" }
}
set O(Step4) {
{ "al" fts5Porter_MGt1 "" }
{ "ance" fts5Porter_MGt1 "" }
{ "ence" fts5Porter_MGt1 "" }
{ "er" fts5Porter_MGt1 "" }
{ "ic" fts5Porter_MGt1 "" }
{ "able" fts5Porter_MGt1 "" }
{ "ible" fts5Porter_MGt1 "" }
{ "ant" fts5Porter_MGt1 "" }
{ "ement" fts5Porter_MGt1 "" }
{ "ment" fts5Porter_MGt1 "" }
{ "ent" fts5Porter_MGt1 "" }
{ "ion" fts5Porter_MGt1_and_S_or_T "" }
{ "ou" fts5Porter_MGt1 "" }
{ "ism" fts5Porter_MGt1 "" }
{ "ate" fts5Porter_MGt1 "" }
{ "iti" fts5Porter_MGt1 "" }
{ "ous" fts5Porter_MGt1 "" }
{ "ive" fts5Porter_MGt1 "" }
{ "ize" fts5Porter_MGt1 "" }
}
proc sort_cb {lhs rhs} {
set L [string range [lindex $lhs 0] end-1 end-1]
set R [string range [lindex $rhs 0] end-1 end-1]
string compare $L $R
}
proc create_step_function {name data} {
set T(function) {
static int fts5Porter${name}(char *aBuf, int *pnBuf){
int ret = 0;
int nBuf = *pnBuf;
switch( aBuf[nBuf-2] ){
${switchbody}
}
return ret;
}
}
set T(case) {
case '${k}':
${ifstmts}
break;
}
set T(if_0_0_0) {
if( ${match} ){
*pnBuf = nBuf - $n;
}
}
set T(if_1_0_0) {
if( ${match} ){
if( ${cond} ){
*pnBuf = nBuf - $n;
}
}
}
set T(if_0_1_0) {
if( ${match} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
}
}
set T(if_1_1_0) {
if( ${match} ){
if( ${cond} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
}
}
}
set T(if_1_0_1) {
if( ${match} ){
if( ${cond} ){
*pnBuf = nBuf - $n;
ret = 1;
}
}
}
set T(if_0_1_1) {
if( ${match} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
ret = 1;
}
}
set T(if_1_1_1) {
if( ${match} ){
if( ${cond} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
ret = 1;
}
}
}
set switchbody ""
foreach I $data {
set k [string range [lindex $I 0] end-1 end-1]
lappend aCase($k) $I
}
foreach k [lsort [array names aCase]] {
set ifstmts ""
foreach I $aCase($k) {
set zSuffix [lindex $I 0] ;# Suffix text for this rule
set zRep [lindex $I 2] ;# Replacement text for rule
set xCond [lindex $I 1] ;# Condition callback (or "")
set n [string length $zSuffix]
set nRep [string length $zRep]
set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
set cond "${xCond}(aBuf, nBuf-$n)"
set bMemcpy [expr {$nRep>0}]
set bCond [expr {$xCond!=""}]
set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
set t $T(if_${bCond}_${bMemcpy}_${bRet})
lappend ifstmts [string trim [subst -nocommands $t]]
}
set ifstmts [join $ifstmts "else "]
append switchbody [subst -nocommands $T(case)]
}
puts [subst -nocommands $T(function)]
}
puts [string trim {
/**************************************************************************
***************************************************************************
** GENERATED CODE STARTS HERE (mkportersteps.tcl)
*/
}]
foreach step [array names O] {
create_step_function $step $O($step)
}
puts [string trim {
/*
** GENERATED CODE ENDS HERE (mkportersteps.tcl)
***************************************************************************
**************************************************************************/
}]

View File

@ -0,0 +1,292 @@
# 2014 Dec 19
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
catch {
sqlite3_fts5_may_be_corrupt 0
append G(perm:dbconfig) "; load_static_extension \$::dbhandle fts5"
reset_db
}
proc fts5_test_poslist {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [string map {{ } .} [$cmd xInst $i]]
}
set res
}
proc fts5_test_columnsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnSize $i]
}
set res
}
proc fts5_test_columntext {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnText $i]
}
set res
}
proc fts5_test_columntotalsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnTotalSize $i]
}
set res
}
proc test_append_token {varname token iStart iEnd} {
upvar $varname var
lappend var $token
return "SQLITE_OK"
}
proc fts5_test_tokenize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
set tokens [list]
$cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
lappend res $tokens
}
set res
}
proc fts5_test_rowcount {cmd} {
$cmd xRowCount
}
proc test_queryphrase_cb {cnt cmd} {
upvar $cnt L
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
foreach {ip ic io} [$cmd xInst $i] break
set A($ic) 1
}
foreach ic [array names A] {
lset L $ic [expr {[lindex $L $ic] + 1}]
}
}
proc fts5_test_queryphrase {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
set cnt [list]
for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
$cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
lappend res $cnt
}
set res
}
proc fts5_test_all {cmd} {
set res [list]
lappend res columnsize [fts5_test_columnsize $cmd]
lappend res columntext [fts5_test_columntext $cmd]
lappend res columntotalsize [fts5_test_columntotalsize $cmd]
lappend res poslist [fts5_test_poslist $cmd]
lappend res tokenize [fts5_test_tokenize $cmd]
lappend res rowcount [fts5_test_rowcount $cmd]
set res
}
proc fts5_aux_test_functions {db} {
foreach f {
fts5_test_columnsize
fts5_test_columntext
fts5_test_columntotalsize
fts5_test_poslist
fts5_test_tokenize
fts5_test_rowcount
fts5_test_all
fts5_test_queryphrase
} {
sqlite3_fts5_create_function $db $f $f
}
}
proc fts5_level_segs {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
lappend ret [expr [llength $L] - 2]
}
set ret
}
proc fts5_level_segids {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
set lvl [list]
foreach S [lrange $L 2 end] {
regexp {id=([1234567890]*)} $S -> segid
lappend lvl $segid
}
lappend ret $lvl
}
set ret
}
proc fts5_rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
}
set doc
}
#-------------------------------------------------------------------------
# Usage:
#
# nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
#
# This command is used to test if a document (set of column values) matches
# the logical equivalent of a single FTS5 NEAR() clump and, if so, return
# the equivalent of an FTS5 position list.
#
# Parameter $aCol is passed a list of the column values for the document
# to test. Parameters $phrase1 and so on are the phrases.
#
# The result is a list of phrase hits. Each phrase hit is formatted as
# three integers separated by "." characters, in the following format:
#
# <phrase number> . <column number> . <token offset>
#
# Options:
#
# -near N (NEAR distance. Default 10)
# -col C (List of column indexes to match against)
# -pc VARNAME (variable in caller frame to use for phrase numbering)
#
proc nearset {aCol args} {
set O(-near) 10
set O(-col) {}
set O(-pc) ""
set nOpt [lsearch -exact $args --]
if {$nOpt<0} { error "no -- option" }
foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
if {[info exists O($k)]==0} { error "unrecognized option $k" }
set O($k) $v
}
if {$O(-pc) == ""} {
set counter 0
} else {
upvar $O(-pc) counter
}
# Set $phraselist to be a list of phrases. $nPhrase its length.
set phraselist [lrange $args [expr $nOpt+1] end]
set nPhrase [llength $phraselist]
for {set j 0} {$j < [llength $aCol]} {incr j} {
for {set i 0} {$i < $nPhrase} {incr i} {
set A($j,$i) [list]
}
}
set iCol -1
foreach col $aCol {
incr iCol
if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
set nToken [llength $col]
set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
for { } {$iFL < $nToken} {incr iFL} {
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set B($iPhrase) [list]
}
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set p [lindex $phraselist $iPhrase]
set nPm1 [expr {[llength $p] - 1}]
set iFirst [expr $iFL - $O(-near) - [llength $p]]
for {set i $iFirst} {$i <= $iFL} {incr i} {
if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i }
}
if {[llength $B($iPhrase)] == 0} break
}
if {$iPhrase==$nPhrase} {
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
}
}
}
}
set res [list]
#puts [array names A]
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
foreach a $A($iCol,$iPhrase) {
lappend res "$counter.$iCol.$a"
}
}
incr counter
}
#puts $res
sort_poslist $res
}
#-------------------------------------------------------------------------
# Usage:
#
# sort_poslist LIST
#
# Sort a position list of the type returned by command [nearset]
#
proc sort_poslist {L} {
lsort -command instcompare $L
}
proc instcompare {lhs rhs} {
foreach {p1 c1 o1} [split $lhs .] {}
foreach {p2 c2 o2} [split $rhs .] {}
set res [expr $c1 - $c2]
if {$res==0} { set res [expr $o1 - $o2] }
if {$res==0} { set res [expr $p1 - $p2] }
return $res
}
#-------------------------------------------------------------------------
# Logical operators used by the commands returned by fts5_tcl_expr().
#
proc AND {args} {
foreach a $args {
if {[llength $a]==0} { return [list] }
}
sort_poslist [concat {*}$args]
}
proc OR {args} {
sort_poslist [concat {*}$args]
}
proc NOT {a b} {
if {[llength $b]>0} { return [list] }
return $a
}

501
ext/fts5/test/fts5aa.test Normal file
View File

@ -0,0 +1,501 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aa
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, c);
SELECT name, sql FROM sqlite_master;
} {
t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)}
t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)}
t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)}
t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)}
t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID}
}
do_execsql_test 1.1 {
DROP TABLE t1;
SELECT name, sql FROM sqlite_master;
} {
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
do_execsql_test 2.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
} {
do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
} {
do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
1 {dd abc abc abc abcde} {aaa dd ddd ddd aab}
2 {dd aab d aaa b} {abcde c aaa aaa aaa}
3 {abcde dd b b dd} {abc abc d abc ddddd}
4 {aaa abcde dddd dddd abcde} {abc b b abcde abc}
5 {aab dddd d dddd c} {ddd abcde dddd abcde c}
6 {ddd dd b aab abcde} {d ddddd dddd c abc}
7 {d ddddd ddd c abcde} {c aab d abcde ddd}
8 {abcde aaa aab c c} {ddd c dddd b aaa}
9 {abcde aab ddddd c aab} {dddd dddd b c dd}
10 {ddd abcde dddd dd c} {dddd c c d abcde}
} {
do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
breakpoint
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 6.1 {
INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a');
REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d');
}
do_execsql_test 6.2 {
INSERT INTO t1(t1) VALUES('integrity-check')
}
do_execsql_test 6.3 {
REPLACE INTO t1(rowid, x, y) VALUES('22', 'l l l', 'l l l');
}
do_execsql_test 6.4 {
INSERT INTO t1(t1) VALUES('integrity-check')
}
#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
proc doc {} {
set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
set ret [list]
for {set j 0} {$j < 20} {incr j} {
lappend ret [lindex $v [expr int(rand()*[llength $v])]]
}
return $ret
}
proc dump_structure {} {
db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
foreach lvl [lrange $t 1 end] {
set seg [string repeat . [expr [llength $lvl]-2]]
puts "[lrange $lvl 0 1] $seg"
}
}
}
for {set i 1} {$i <= 10} {incr i} {
do_test 7.$i {
for {set j 0} {$j < 10} {incr j} {
set x [doc]
set y [doc]
set z [doc]
set rowid [expr int(rand() * 100)]
execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 8.1 {
INSERT INTO t1 VALUES('the quick brown fox');
INSERT INTO t1(t1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
proc doc {} {
set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
set ret [list]
for {set j 0} {$j < 20} {incr j} {
lappend ret [lindex $v [expr int(rand()*[llength $v])]]
}
return $ret
}
proc dump_structure {} {
db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
foreach lvl [lrange $t 1 end] {
set seg [string repeat . [expr [llength $lvl]-2]]
puts "[lrange $lvl 0 1] $seg"
}
}
}
for {set i 1} {$i <= 10} {incr i} {
do_test 9.$i {
for {set j 0} {$j < 100} {incr j} {
set x [doc]
set y [doc]
set z [doc]
set rowid [expr int(rand() * 100)]
execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
set d10 {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
}
foreach {rowid x y} $d10 {
do_execsql_test 10.1.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 10.1.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach rowid {5 9 8 1 2 4 10 7 3 5 6} {
do_execsql_test 10.2.$rowid.1 { DELETE FROM t1 WHERE rowid = $rowid }
do_execsql_test 10.2.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach {rowid x y} $d10 {
do_execsql_test 10.3.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 10.3.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
do_execsql_test 10.4.1 { DELETE FROM t1 }
do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
#-------------------------------------------------------------------------
#
do_catchsql_test 11.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank);
} {1 {reserved fts5 column name: rank}}
do_catchsql_test 11.2 {
CREATE VIRTUAL TABLE rank USING fts5(a, b, c);
} {1 {reserved fts5 table name: rank}}
do_catchsql_test 11.3 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid);
} {1 {reserved fts5 column name: rowid}}
#-------------------------------------------------------------------------
#
do_execsql_test 12.1 {
CREATE VIRTUAL TABLE t2 USING fts5(x,y);
} {}
do_catchsql_test 12.2 {
SELECT t2 FROM t2 WHERE t2 MATCH '*stuff'
} {1 {unknown special query: stuff}}
do_test 12.3 {
set res [db eval { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }]
string is integer $res
} {1}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 13.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o');
} {}
do_execsql_test 13.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1 2}
do_execsql_test 13.4 {
DELETE FROM t1 WHERE rowid=2;
} {}
do_execsql_test 13.5 {
SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}
do_execsql_test 13.6 {
SELECT rowid FROM t1 WHERE t1 MATCH '.';
} {}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH d(x,y) AS (
SELECT NULL, 'xyz xyz xyz xyz xyz xyz'
UNION ALL
SELECT NULL, 'xyz xyz xyz xyz xyz xyz' FROM d
)
INSERT INTO t1 SELECT * FROM d LIMIT 200;
}
do_test 14.2 {
set nRow 0
db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
db eval {
BEGIN;
CREATE TABLE t2(a, b);
ROLLBACK;
}
incr nRow
}
set nRow
} {200}
do_test 14.3 {
set nRow 0
db eval { BEGIN; }
db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
db eval {
SAVEPOINT aaa;
CREATE TABLE t2(a, b);
ROLLBACK TO aaa;
RELEASE aaa;
}
incr nRow
}
set nRow
} {200}
do_execsql_test 15.0 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 15.1 {
UPDATE t1_content SET c1 = 'xyz xyz xyz xyz xyz abc' WHERE rowid = 1;
}
do_catchsql_test 15.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
} {1 {database disk image is malformed}}
#-------------------------------------------------------------------------
#
do_execsql_test 16.1 {
CREATE VIRTUAL TABLE n1 USING fts5(a);
INSERT INTO n1 VALUES('a b c d');
}
proc funk {} {
set fd [db incrblob main n1_data block 10]
fconfigure $fd -encoding binary -translation binary
puts -nonewline $fd "\x44\x45"
close $fd
db eval { UPDATE n1_config SET v=50 WHERE k='version' }
}
db func funk funk
do_catchsql_test 16.2 {
SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d'
} {1 {SQL logic error or missing database}}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE b2 USING fts5(x);
INSERT INTO b2 VALUES('a');
INSERT INTO b2 VALUES('b');
INSERT INTO b2 VALUES('c');
}
do_test 17.2 {
set res [list]
db eval { SELECT * FROM b2 ORDER BY rowid ASC } {
lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }]
}
set res
} {{a b c} {a b c} {a b c}}
reset_db
do_execsql_test 18.1 {
CREATE VIRTUAL TABLE c2 USING fts5(x, y);
INSERT INTO c2 VALUES('x x x', 'x x x');
SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE uio USING fts5(ttt);
INSERT INTO uio VALUES(NULL);
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
SELECT count(*) FROM uio;
} {256}
do_execsql_test 17.2 {
SELECT count(*) FROM uio WHERE rowid BETWEEN 8 AND 17
} {10}
do_execsql_test 17.3 {
SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17
} {8 9 10 11 12 13 14 15 16 17}
do_execsql_test 17.4 {
SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 ORDER BY rowid DESC
} {17 16 15 14 13 12 11 10 9 8}
do_execsql_test 17.5 {
SELECT count(*) FROM uio
} {256}
do_execsql_test 17.6 {
INSERT INTO uio(rowid) VALUES(9223372036854775807);
INSERT INTO uio(rowid) VALUES(-9223372036854775808);
SELECT count(*) FROM uio;
} {258}
do_execsql_test 17.7 {
SELECT min(rowid), max(rowid) FROM uio;
} {-9223372036854775808 9223372036854775807}
do_execsql_test 17.8 {
INSERT INTO uio DEFAULT VALUES;
SELECT min(rowid), max(rowid), count(*) FROM uio;
} {-9223372036854775808 9223372036854775807 259}
do_execsql_test 17.9 {
SELECT min(rowid), max(rowid), count(*) FROM uio WHERE rowid < 10;
} {-9223372036854775808 9 10}
#--------------------------------------------------------------------
#
do_execsql_test 18.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
CREATE VIRTUAL TABLE t2 USING fts5(c, d);
INSERT INTO t1 VALUES('abc*', NULL);
INSERT INTO t2 VALUES(1, 'abcdefg');
}
do_execsql_test 18.2 {
SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
do_execsql_test 18.3 {
SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
finish_test

289
ext/fts5/test/fts5ab.test Normal file
View File

@ -0,0 +1,289 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ab
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1 VALUES('hello', 'world');
INSERT INTO t1 VALUES('one two', 'three four');
INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five');
}
do_execsql_test 1.1 {
SELECT * FROM t1 ORDER BY rowid DESC;
} { forty five {one two} {three four} hello world }
do_execsql_test 1.2 {
SELECT rowid FROM t1 ORDER BY rowid DESC;
} {45 2 1}
do_execsql_test 1.3 {
SELECT rowid FROM t1 ORDER BY rowid ASC;
} {1 2 45}
do_execsql_test 1.4 {
SELECT * FROM t1 WHERE rowid=2;
} {{one two} {three four}}
do_execsql_test 1.5 {
SELECT * FROM t1 WHERE rowid=2.01;
} {}
do_execsql_test 1.6 {
SELECT * FROM t1 WHERE rowid=1.99;
} {}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
INSERT INTO t1 VALUES('one');
INSERT INTO t1 VALUES('two');
INSERT INTO t1 VALUES('three');
}
do_catchsql_test 2.2 {
SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND'
} {1 {fts5: syntax error near "AND"}}
do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two}
do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three}
do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one}
do_execsql_test 2.6 {
INSERT INTO t1 VALUES('a b c d e f g');
INSERT INTO t1 VALUES('b d e a a a i');
INSERT INTO t1 VALUES('x y z b c c c');
}
foreach {tn expr res} {
1 a {5 4}
2 b {6 5 4}
3 c {6 4}
4 d {5 4}
5 e {5 4}
6 f {4}
7 g {4}
8 x {6}
9 y {6}
10 z {6}
} {
do_execsql_test 2.7.$tn.1 {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
} $res
do_execsql_test 2.7.$tn.2 {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC
} [lsort -integer $res]
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a,b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {tn a b} {
1 {abashed abandons abase abash abaft} {abases abased}
2 {abasing abases abaft abated abandons} {abases abandoned}
3 {abatement abash abash abated abase} {abasements abashing}
4 {abaft abasements abase abasement abasing} {abasement abases}
5 {abaft abashing abatement abash abasements} {abandons abandoning}
6 {aback abate abasements abashes abandoned} {abasement abased}
7 {abandons abated abased aback abandoning} {abases abandoned}
8 {abashing abases abasement abaft abashing} {abashed abate}
9 {abash abase abate abashing abashed} {abandon abandoned}
10 {abate abandoning abandons abasement aback} {abandon abandoning}
} {
do_execsql_test 3.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) }
do_execsql_test 3.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach {tn expr res} {
1 {abash} {9 5 3 1}
2 {abase} {9 4 3 1}
3 {abase + abash} {1}
4 {abash + abase} {9}
5 {abaft + abashing} {8 5}
6 {abandon + abandoning} {10}
7 {"abashing abases abasement abaft abashing"} {8}
} {
do_execsql_test 3.2.$tn {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
} $res
}
do_execsql_test 3.3 {
SELECT rowid FROM t1 WHERE t1 MATCH 'NEAR(aback abate, 2)'
} {6}
foreach {tn expr res} {
1 {abash} {1 3 5 9}
2 {abase} {1 3 4 9}
3 {abase + abash} {1}
4 {abash + abase} {9}
5 {abaft + abashing} {5 8}
6 {abandon + abandoning} {10}
7 {"abashing abases abasement abaft abashing"} {8}
} {
do_execsql_test 3.4.$tn {
SELECT rowid FROM t1 WHERE t1 MATCH $expr
} $res
}
#-------------------------------------------------------------------------
# Documents with more than 2M tokens.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE s1 USING fts5(x);
}
foreach {tn doc} [list \
1 [string repeat {a x } 1500000] \
2 "[string repeat {a a } 1500000] x" \
] {
do_execsql_test 4.$tn { INSERT INTO s1 VALUES($doc) }
}
do_execsql_test 4.3 {
SELECT rowid FROM s1 WHERE s1 MATCH 'x'
} {1 2}
do_execsql_test 4.4 {
SELECT rowid FROM s1 WHERE s1 MATCH '"a x"'
} {1 2}
#-------------------------------------------------------------------------
# Check that a special case of segment promotion works. The case is where
# a new segment is written to level L, but the oldest segment within level
# (L-2) is larger than it.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
}
set doc
}
db func rnddoc rnddoc
do_test 5.1 {
for {set i 1} {$i <= 65} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(10)) }
}
for {set i 1} {$i <= 63} {incr i} {
execsql { DELETE FROM s2 WHERE rowid = $i }
}
fts5_level_segs s2
} {0 8}
do_test 5.2 {
execsql {
INSERT INTO s2(s2, rank) VALUES('automerge', 8);
}
for {set i 0} {$i < 7} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(50)) }
}
fts5_level_segs s2
} {8 0 0}
# Test also the other type of segment promotion - when a new segment is written
# that is larger than segments immediately following it.
do_test 5.3 {
execsql {
DROP TABLE s2;
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
for {set i 1} {$i <= 16} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
}
fts5_level_segs s2
} {0 1}
do_test 5.4 {
execsql { INSERT INTO s2 VALUES(rnddoc(160)) }
fts5_level_segs s2
} {2 0}
#-------------------------------------------------------------------------
#
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE s3 USING fts5(x);
BEGIN;
INSERT INTO s3 VALUES('a b c');
INSERT INTO s3 VALUES('A B C');
}
do_execsql_test 6.1.1 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}
do_execsql_test 6.1.2 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a' ORDER BY rowid DESC
} {2 1}
do_execsql_test 6.2 {
COMMIT;
}
do_execsql_test 6.3 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}
do_test 6.4 {
db close
sqlite3 db test.db
execsql {
BEGIN;
INSERT INTO s3(s3) VALUES('optimize');
ROLLBACK;
}
} {}
#-------------------------------------------------------------------------
#
set doc [string repeat "a b c " 500]
breakpoint
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
INSERT INTO x1 VALUES($doc);
}
finish_test

358
ext/fts5/test/fts5ac.test Normal file
View File

@ -0,0 +1,358 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ac
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set data {
0 {p o q e z k z p n f y u z y n y} {l o o l v v k}
1 {p k h h p y l l h i p v n} {p p l u r i f a j g e r r x w}
2 {l s z j k i m p s} {l w e j t j e e i t w r o p o}
3 {x g y m y m h p} {k j j b r e y y a k y}
4 {q m a i y i z} {o w a g k x g j m w e u k}
5 {k o a w y b s z} {s g l m m l m g p}
6 {d a q i z h b l c p k j g k} {p x u j x t v c z}
7 {f d a g o c t i} {w f c x l d r k i j}
8 {y g w u b q p o m j y b p a e k} {r i d k y w o z q m a t p}
9 {r k o m c c j s x m x m x m q r} {y r c a q d z k n x n}
10 {k j q m g q a j d} {d d e z g w h c d o o g x d}
11 {j z u m o y q j f w e e w t r j w} {g m o r x n t n w i f g l z f}
12 {s y w a w d o h x m k} {c w k z b p o r a}
13 {u t h x e g s k n g i} {f j w g c s r}
14 {b f i c s u z t k} {c k q s j u i z o}
15 {n a f n u s w h y n s i q e w} {x g e g a s s h n}
16 {k s q e j n p} {t r j f t o e k k l m i}
17 {g d t u w r o p m n m n p h b o u} {h s w o s l j e}
18 {f l q y q q g e e x j r} {n b r r g e i r t x q k}
19 {f i r g o a w e p i l o a w} {e k r z t d g h g i b d i e m}
20 {l d u u f p y} {g o m m u x m g l j t t x x u}
21 {m c d k x i c z l} {m i a i e u h}
22 {w b f o c g x y j} {z d w x d f h i p}
23 {w u i u x t c h k i b} {b y k h b v r t g j}
24 {h f d j s w s b a p k} {a q y u z e y m m j q r}
25 {d i x y x x k i y f s d j h z p n} {l l q m e t c w g y h t s v g}
26 {g s q w t d k x g f m j p k y} {r m b x e l t d}
27 {j l s q u g y v e c l o} {m f l m m m h g x x l n c}
28 {c t j g v r s b z j} {l c f y d t q n}
29 {e x z y w i h l} {b n b x e y q e n u m}
30 {g y y h j b w r} {q b q f u s k c k g r}
31 {g u l x l b r c m z b u c} {k g t b x k x n t e z d h o}
32 {w g v l z f b z h p s c v h} {g e w v m h k r g w a r f q}
33 {c g n f u d o y o b} {e y o h x x y y i z s b h a j}
34 {v y h c q u u s q y x x k s q} {d n r m y k n t i r n w e}
35 {o u c x l e b t a} {y b a x y f z x r}
36 {x p h l j a a u u j h} {x o f s z m b c q p}
37 {k q t i c a q n m v v} {v r z e f m y o}
38 {r w t t t t r v v o e p g h} {l w x a g a u h y}
39 {o p v g v b a g o} {j t q c r b b g y z}
40 {f s o r o d t h q f x l} {r d b m k i f s t d l m y x j w}
41 {t m o t m f m f} {i p i q j v n v m b q}
42 {t x w a r l w d t b c o d o} {a h f h w z d n s}
43 {t u q c d g p q x j o l c x c} {m n t o z z j a y}
44 {v d i i k b f s z r v r z y} {g n q y s x x m b x c l w}
45 {p v v a c s z y e o l} {m v t u d k m k q b d c v z r}
46 {f y k l d r q w r s t r e} {h m v r r l r r t f q e x y}
47 {w l n l t y x} {n h s l a f c h u f l x x m v n o}
48 {t n v i k e b p z p d j j l i o} {i v z p g u e j s i k n h w d c}
49 {z v x p n l t a j c} {e j l e n c e t a d}
50 {w u b x u i v h a i y m m r p m s} {s r h d o g z y f f x e}
51 {d c c x b c a x g} {p r a j v u y}
52 {f w g r c o d l t u e z h i} {j l l s s b j m}
53 {p m t f k i x} {u v y a z g w v v m x h i}
54 {l c z g l o j i c d e b} {b f v y w u i b e i y}
55 {r h c x f x a d s} {z x y k f l r b q c v}
56 {v x x c y h z x b g m o q n c} {h n b i t g h a q b c o r u}
57 {d g l o h t b s b r} {n u e p t i m u}
58 {t d y e t d c w u o s w x f c h} {i o s v y b r d r}
59 {l b a p q n d r} {k d c c d n y q h g a o p e x}
60 {f r z v m p k r} {x x r i s b a g f c}
61 {s a z i e r f i w c n y v z t k s} {y y i r y n l s b w i e k n}
62 {n x p r e x q r m v i b y} {f o o z n b s r q j}
63 {y j s u j x o n r q t f} {f v k n v x u s o a d e f e}
64 {u s i l y c x q} {r k c h p c h b o s s u s p b}
65 {m p i o s h o} {s w h u n d m n q t y k b w c}
66 {l d f g m x x x o} {s w d d f b y j j h h t i y p j o}
67 {c b m h f n v w n h} {i r w i e x r w l z p x u g u l s}
68 {y a h u h i m a y q} {d d r x h e v q n z y c j}
69 {c x f d x o n p o b r t b l p l} {m i t k b x v f p t m l l y r o}
70 {u t l w w m s} {m f m o l t k o p e}
71 {f g q e l n d m z x q} {z s i i i m f w w f n g p e q}
72 {n l h a v u o d f j d e x} {v v s l f g d g r a j x i f z x}
73 {x v m v f i g q e w} {r y s j i k m j j e d g r n o i f}
74 {g d y n o h p s y q z j d w n h w} {x o d l t j i b r d o r y}
75 {p g b i u r b e q d v o a g w m k} {q y z s f q o h}
76 {u z a q u f i f f b} {b s p b a a d x r r i q f}
77 {w h h z t h p o a h h e e} {h w r p h k z v y f r x}
78 {c a r k i a p u x} {f w l p t e m l}
79 {q q u k o t r k z} {f b m c w p s s o z}
80 {t i g v y q s r x m r x z e f} {x o j w a u e y s j c b u p p r o}
81 {n j n h r l a r e o z w e} {v o r r j a v b}
82 {i f i d k w d n h} {o i d z i z l m w s b q v u}
83 {m d g q q b k b w f q q p p} {j m q f b y c i z k y q p l e a}
84 {m x o n y f g} {y c n x n q j i y c l h b r q z}
85 {v o z l n p c} {g n j n t b b x n c l d a g j v}
86 {z n a y f b t k k t d b z a v} {r p c n r u k u}
87 {b q t x z e c w} {q a o a l o a h i m j r}
88 {j f h o x x a z g b a f a m i b} {j z c z y x e x w t}
89 {t c t p r s u c q n} {z x l i k n f q l n t}
90 {w t d q j g m r f k n} {l e w f w w a l y q k i q t p c t}
91 {c b o k l i c b s j n m b l} {y f p q o w g}
92 {f y d j o q t c c q m f j s t} {f h e d y m o k}
93 {k x j r m a d o i z j} {r t t t f e b r x i v j v g o}
94 {s f e a e t i h h d q p z t q} {b k m k w h c}
95 {h b n j t k i h o q u} {w n g i t o k c a m y p f l x c p}
96 {f c x p y r b m o l m o a} {p c a q s u n n x d c f a o}
97 {u h h k m n k} {u b v n u a o c}
98 {s p e t c z d f n w f} {l s f j b l c e s h}
99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o}
}
# Argument $expr is an FTS5 match expression designed to be executed against
# an FTS5 table with the following schema:
#
# CREATE VIRTUAL TABLE xy USING fts5(x, y);
#
# Assuming the table contains the same records as stored int the global
# $::data array (see above), this function returns a list containing one
# element for each match in the dataset. The elements are themselves lists
# formatted as follows:
#
# <rowid> {<phrase 0 matches> <phrase 1 matches>...}
#
# where each <phrase X matches> element is a list of phrase matches in the
# same form as returned by auxiliary scalar function fts5_test().
#
proc matchdata {bPos expr {bAsc 1}} {
set tclexpr [db one {
SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y')
}]
set res [list]
#puts $tclexpr
foreach {id x y} $::data {
set cols [list $x $y]
set ::pc 0
#set hits [lsort -command instcompare [eval $tclexpr]]
set hits [eval $tclexpr]
if {[llength $hits]>0} {
if {$bPos} {
lappend res [list $id $hits]
} else {
lappend res $id
}
}
}
if {$bAsc} {
set res [lsort -integer -increasing -index 0 $res]
} else {
set res [lsort -integer -decreasing -index 0 $res]
}
return [concat {*}$res]
}
#
# End of test code
#-------------------------------------------------------------------------
proc fts5_test_poslist {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [string map {{ } .} [$cmd xInst $i]]
}
set res
}
foreach {tn2 sql} {
1 {}
2 {BEGIN}
} {
reset_db
sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x,y);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
}
execsql $sql
do_test $tn2.1.1 {
foreach {id x y} $data {
execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
}
execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
} {}
#-------------------------------------------------------------------------
# Test phrase queries.
#
foreach {tn phrase} {
1 "o"
2 "b q"
3 "e a e"
4 "m d g q q b k b w f q q p p"
5 "l o o l v v k"
6 "a"
7 "b"
8 "c"
9 "no"
10 "L O O L V V K"
} {
set expr "\"$phrase\""
set res [matchdata 1 $expr]
do_execsql_test $tn2.1.2.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Test some AND and OR queries.
#
foreach {tn expr} {
1.1 "a AND b"
1.2 "a+b AND c"
1.3 "d+c AND u"
1.4 "d+c AND u+d"
2.1 "a OR b"
2.2 "a+b OR c"
2.3 "d+c OR u"
2.4 "d+c OR u+d"
3.1 { a AND b AND c }
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.2.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Queries on a specific column.
#
foreach {tn expr} {
1.1 "x:a"
1.2 "y:a"
1.3 "x:b"
1.4 "y:b"
2.1 "{x}:a"
2.2 "{y}:a"
2.3 "{x}:b"
2.4 "{y}:b"
3.1 "{x y}:a"
3.2 "{y x}:a"
3.3 "{x x}:b"
3.4 "{y y}:b"
4.1 {{"x" "y"}:a}
4.2 {{"y" x}:a}
4.3 {{x "x"}:b}
4.4 {{"y" y}:b}
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.3.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Some NEAR queries.
#
foreach {tn expr} {
1 "NEAR(a b)"
2 "NEAR(r c)"
2 { NEAR(r c, 5) }
3 { NEAR(r c, 3) }
4 { NEAR(r c, 2) }
5 { NEAR(r c, 0) }
6 { NEAR(a b c) }
7 { NEAR(a b c, 8) }
8 { x : NEAR(r c) }
9 { y : NEAR(r c) }
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.4.1.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
do_test $tn2.4.1 { nearset {{a b c}} -- a } {0.0.0}
do_test $tn2.4.2 { nearset {{a b c}} -- c } {0.0.2}
foreach {tn expr tclexpr} {
1 {a b} {AND [N $x -- {a}] [N $x -- {b}]}
} {
do_execsql_test $tn2.5.$tn {
SELECT fts5_expr_tcl($expr, 'N $x')
} [list $tclexpr]
}
#-------------------------------------------------------------------------
#
do_execsql_test $tn2.6.integrity {
INSERT INTO xx(xx) VALUES('integrity-check');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r}
foreach {bAsc sql} {
1 {SELECT rowid FROM xx WHERE xx MATCH $expr}
0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC}
} {
foreach {tn expr} {
0.1 x
1 { NEAR(r c) }
2 { NEAR(r c, 5) }
3 { NEAR(r c, 3) }
4 { NEAR(r c, 2) }
5 { NEAR(r c, 0) }
6 { NEAR(a b c) }
7 { NEAR(a b c, 8) }
8 { x : NEAR(r c) }
9 { y : NEAR(r c) }
10 { x : "r c" }
11 { y : "r c" }
12 { a AND b }
13 { a AND b AND c }
14a { a }
14b { a OR b }
15 { a OR b AND c }
16 { c AND b OR a }
17 { c AND (b OR a) }
18 { c NOT (b OR a) }
19 { c NOT b OR a AND d }
} {
set res [matchdata 0 $expr $bAsc]
do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res
}
}
}
do_execsql_test 3.1 {
SELECT fts5_expr_tcl('a AND b');
} {{AND [nearset -- {a}] [nearset -- {b}]}}
finish_test

235
ext/fts5/test/fts5ad.test Normal file
View File

@ -0,0 +1,235 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ad
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE yy USING fts5(x, y);
INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching');
INSERT INTO yy VALUES('indices (or all matching', 'values if -inline is');
INSERT INTO yy VALUES('specified as well.) If', 'indices are returned, the');
} {}
foreach {tn match res} {
1 {c*} {1}
2 {i*} {3 2}
3 {t*} {3 1}
4 {r*} {3 1}
} {
do_execsql_test 1.$tn {
SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid DESC
} $res
}
foreach {tn match res} {
5 {c*} {1}
6 {i*} {2 3}
7 {t*} {1 3}
8 {r*} {1 3}
} {
do_execsql_test 1.$tn {
SELECT rowid FROM yy WHERE yy MATCH $match
} $res
}
foreach {T create} {
2 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
3 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
4 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
5 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
} {
do_test $T.1 {
execsql { DROP TABLE IF EXISTS t1 }
execsql $create
} {}
do_test $T.1 {
foreach {rowid a b} {
0 {fghij uvwxyz klmn pq uvwx} {klmn f fgh uv fghij klmno}
1 {uv f abcd abcd fghi} {pq klm uv uv fgh uv a}
2 {klmn klm pqrs fghij uv} {f k uvw ab abcd pqr uv}
3 {ab pqrst a fghi ab pqr fg} {k klmno a fg abcd}
4 {abcd pqrst uvwx a fgh} {f klmno fghij kl pqrst}
5 {uvwxyz k abcde u a} {uv k k kl klmn}
6 {uvwxyz k klmn pqrst uv} {fghi pqrs abcde u k}
7 {uvwxy klmn u p pqrst fgh} {p f fghi abcd uvw kl uv}
8 {f klmno pqrst uvwxy pqrst} {uv abcde klm pq pqr}
9 {f abcde a uvwxyz pqrst} {fghij abc k uvwx pqr fghij uvwxy}
10 {ab uv f fg pqrst uvwxy} {fgh p uv k abc klm uvw}
11 {pq klmno a uvw abcde uvwxyz} {fghij pq uvwxyz pqr fghi}
12 {fgh u pq fgh uvw} {uvw pqr f uvwxy uvwx}
13 {uvwx klmn f fgh abcd pqr} {uvw k fg uv klm abcd}
14 {ab uvwx pqrst pqr uvwxyz pqrs} {uvwxyz abcde ab ab uvw abcde}
15 {abc abcde uvwxyz abc kl k pqr} {klm k k klmno u fgh}
16 {fghi abcd fghij uv uvwxyz ab uv} {klmn pqr a uvw fghi}
17 {abc pqrst fghi uvwx uvw klmn fghi} {ab fg pqr pqrs p}
18 {pqr kl a fghij fgh fg kl} {pqr uvwxyz uvw abcd uvwxyz}
19 {fghi fghi pqr kl fghi f} {klmn u u klmno klmno}
20 {abc pqrst klmno kl pq uvwxy} {abc k fghi pqrs klm}
21 {a pqr uvwxyz uv fghi a fgh} {abc pqrs pqrst pq klm}
22 {klm abc uvwxyz klm pqrst} {fghij k pq pqr u klm fghij}
23 {p klm uv p a a} {uvwxy klmn uvw abcde pq}
24 {uv fgh fg pq uvwxy u uvwxy} {pqrs a uvw p uvwx uvwxyz fg}
25 {fghij fghi klmn abcd pq kl} {fghi abcde pqrs abcd fgh uvwxy}
26 {pq fgh a abc klmno klmn} {fgh p k p fg fghij}
27 {fg pq kl uvwx fghij pqrst klmn} {abcd uvw abcd fghij f fghij}
28 {uvw fghi p fghij pq fgh uvwx} {k fghij abcd uvwx pqr fghi}
29 {klm pq abcd pq f uvwxy} {pqrst p fghij pqr p}
30 {ab uvwx fg uvwx klmn klm} {klmn klmno fghij klmn klm}
31 {pq k pqr abcd a pqrs} {abcd abcd uvw a abcd klmno ab}
32 {pqrst u abc pq klm} {abc kl uvwxyz fghij u fghi p}
33 {f uvwxy u k f uvw uvwx} {pqrs uvw fghi fg pqrst klm}
34 {pqrs pq fghij uvwxyz pqr} {ab abc abc uvw f pq f}
35 {uvwxy ab uvwxy klmno kl pqrs} {abcde uvw pqrs uvwx k k}
36 {uvwxyz k ab abcde abc uvw} {uvw abcde uvw klmn uv klmn}
37 {k kl uv abcde uvwx fg u} {u abc uvwxy k fg abcd}
38 {fghi pqrst fghi pqr pqrst uvwx} {u uv uvwx fghi abcde}
39 {k pqrst k uvw fg pqrst fghij} {uvwxy ab kl klmn uvwxyz abcde}
40 {fg uvwxy pqrs klmn uvwxyz klm p} {k uv ab fghij fgh k pqrs}
41 {uvwx abc f pq uvwxy k} {ab uvwxyz abc f fghij}
42 {uvwxy klmno uvwxyz uvwxyz pqrst} {uv kl kl klmno k f abcde}
43 {abcde ab pqrs fg f fgh} {abc fghij fghi k k}
44 {uvw abcd a ab pqrst klmn fg} {pqrst u uvwx pqrst fghij f pqrst}
45 {uvwxy p kl uvwxyz ab pqrst fghi} {abc f pqr fg a k}
46 {u p f a fgh} {a kl pq uv f}
47 {pqrs abc fghij fg abcde ab a} {p ab uv pqrs kl fghi abcd}
48 {abcde uvwxy pqrst uv abc pqr uvwx} {uvwxy klm uvwxy uvwx k}
49 {fgh klm abcde klmno u} {a f fghij f uvwxyz abc u}
50 {uv uvw uvwxyz uvwxyz uv ab} {uvwx pq fg u k uvwxy}
51 {uvwxy pq p kl fghi} {pqrs fghi pqrs abcde uvwxyz ab}
52 {pqr p uvwxy kl pqrs klmno fghij} {ab abcde abc pqrst pqrs uv}
53 {fgh pqrst p a klmno} {ab ab pqrst pqr kl pqrst}
54 {abcd klm ab uvw a fg u} {f pqr f abcd uv}
55 {u fg uvwxyz k uvw} {abc pqrs f fghij fg pqrs uvwxy}
56 {klm fg p fghi fg a} {uv a fghi uvwxyz a fghi}
57 {uvwxy k abcde fgh f fghi} {f kl klmn f fghi klm}
58 {klm k fgh uvw fgh fghi} {klmno uvwx u pqrst u}
59 {fghi pqr pqrst p uvw fghij} {uv pqrst pqrs pq fghij klm}
60 {uvwx klm uvwxy uv klmn} {p a a abc klmn ab k}
61 {uvwxy uvwx klm uvwx klm} {pqrs ab ab uvwxyz fg}
62 {kl uv uv uvw fg kl k} {abcde uvw fgh uvwxy klm}
63 {a abc fgh u klm abcd} {fgh pqr uv klmn fghij}
64 {klmn k klmn klmno pqrs pqr} {fg kl abcde klmno uvwxy kl pq}
65 {uvwxyz klm fghi abc abcde kl} {uvwxy uvw uvwxyz uvwxyz pq pqrst}
66 {pq klm abc pqrst fgh f} {u abcde pqrst abcde fg}
67 {u pqrst kl u uvw klmno} {u pqr pqrs fgh u p}
68 {abc fghi uvwxy fgh k pq} {uv p uvwx uvwxyz ab}
69 {klmno f uvwxyz uvwxy klmn fg ab} {fgh kl a pqr abcd pqr}
70 {fghi pqrst pqrst uv a} {uvwxy k p uvw uvwx a}
71 {a fghij f p uvw} {klm fg abcd abcde klmno pqrs}
72 {uv uvwx uvwx uvw klm} {uv fghi klmno uvwxy uvw}
73 {kl uvwxy ab f pq klm u} {uvwxy klmn klm abcd pq fg k}
74 {uvw pqrst abcd uvwxyz ab} {fgh fgh klmn abc pq}
75 {uvwxyz klm pq abcd klmno pqr uvwxyz} {kl f a fg pqr klmn}
76 {uvw uvwxy pqr k pqrst kl} {uvwxy abc uvw uvw u}
77 {fgh klm u uvwxyz f uvwxy abcde} {uv abcde klmno u u ab}
78 {klmno abc pq pqr fgh} {p uv abcd fgh abc u k}
79 {fg pqr uvw pq uvwx} {uv uvw fghij pqrs fg p}
80 {abcd pqrs uvwx uvwxy uvwx} {u uvw pqrst pqr abcde pqrs kl}
81 {uvwxyz klm pq uvwxy fghij} {p pq klm fghij u a a}
82 {uvwx k uvwxyz klmno pqrst kl} {abcde p f pqrst abcd uvwxyz p}
83 {abcd abcde klm pqrst uvwxyz} {uvw pqrst u p uvwxyz a pqrs}
84 {k klm abc uv uvwxy klm klmn} {k abc pqr a abc p kl}
85 {klmn abcd pqrs p pq klm a} {klmn kl ab uvw pq}
86 {klmn a pqrs abc uvw pqrst} {a pqr kl klm a k f}
87 {pqrs ab uvwx uvwxy a pqr f} {fg klm uvwx pqr pqr}
88 {klmno ab k kl u uvwxyz} {uv kl uvw fghi uv uvw}
89 {pq fghi pqrst klmn uvwxy abc pqrs} {fg f f fg abc abcde klm}
90 {kl a k fghi uvwx fghi u} {ab uvw pqr fg a p abc}
91 {uvwx pqrs klmno ab fgh uvwx} {pqr uvwx abc kl f klmno kl}
92 {fghij pq pqrs fghij f pqrst} {u abcde fg pq pqr fgh k}
93 {fgh u pqrs abcde klmno abc} {abc fg pqrst pqr abcde}
94 {uvwx p abc f pqr p} {k pqrs kl klm abc fghi klm}
95 {kl p klmno uvwxyz klmn} {fghi ab a fghi pqrs kl}
96 {pqr fgh pq uvwx a} {uvw klm klmno fg uvwxy uvwx}
97 {fg abc uvwxyz fghi pqrst pq} {abc k a ab abcde f}
98 {uvwxy fghi uvwxy u abcde abcde uvw} {klmn uvwx pqrs uvw uvwxy abcde}
99 {pq fg fghi uvwx uvwx fghij uvwxy} {klmn klmn f abc fg a}
} {
execsql {
INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b);
}
}
} {}
proc prefix_query {prefixlist} {
set ret [list]
db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} {
set bMatch 1
foreach pref $prefixlist {
if { [lsearch -glob $a $pref]<0 && [lsearch -glob $b $pref]<0 } {
set bMatch 0
break
}
}
if {$bMatch} { lappend ret $rowid }
}
return $ret
}
foreach {bAsc sql} {
0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
} {
foreach {tn prefix} {
1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*}
6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}
27 {x*}
28 {a f*} 29 {a* f*} 30 {a* fghij*}
} {
set res [prefix_query $prefix]
if {$bAsc} {
set res [lsort -integer -increasing $res]
}
set n [llength $res]
if {$T==5} breakpoint
do_execsql_test $T.$bAsc.$tn.$n $sql $res
}
}
catchsql COMMIT
}
finish_test

281
ext/fts5/test/fts5ae.test Normal file
View File

@ -0,0 +1,281 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ae
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 1.1 {
INSERT INTO t1 VALUES('hello', 'world');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1}
do_execsql_test 1.2 {
INSERT INTO t1 VALUES('world', 'hello');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}
do_execsql_test 1.3 {
INSERT INTO t1 VALUES('world', 'world');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}
do_execsql_test 1.4.1 {
INSERT INTO t1 VALUES('hello', 'hello');
}
do_execsql_test 1.4.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2 4}
fts5_aux_test_functions db
#-------------------------------------------------------------------------
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x, y);
INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e');
INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p');
}
do_execsql_test 2.1 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'm' ORDER BY rowid;
} {
1 {0.0.5 0.1.0 0.1.2}
2 {0.0.7 0.1.5}
}
do_execsql_test 2.2 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'u OR q' ORDER BY rowid;
} {
1 {0.0.0}
2 {1.0.2 1.0.10}
}
do_execsql_test 2.3 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'y:o' ORDER BY rowid;
} {
1 {0.1.3 0.1.7}
}
#-------------------------------------------------------------------------
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x, y);
INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t');
INSERT INTO t3 VALUES( 'r c', '');
}
do_execsql_test 3.1 {
SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)';
} {
1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15}
}
do_execsql_test 3.2 {
SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)';
} {
2 {0.0.0 1.0.1}
}
do_execsql_test 3.3 {
INSERT INTO t3
VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
SELECT rowid, fts5_test_poslist(t3)
FROM t3 WHERE t3 MATCH 'a OR b AND c';
} {
1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2}
3 0.0.5
}
#-------------------------------------------------------------------------
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t4 USING fts5(x, y);
INSERT INTO t4
VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
}
do_execsql_test 4.1 {
SELECT rowid, fts5_test_poslist(t4) FROM t4 WHERE t4 MATCH 'a OR b AND c';
} {
1 0.0.5
}
#-------------------------------------------------------------------------
# Test that the xColumnSize() and xColumnAvgsize() APIs work.
#
reset_db
fts5_aux_test_functions db
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE t5 USING fts5(x, y);
INSERT INTO t5 VALUES('a b c d', 'e f g h i j');
INSERT INTO t5 VALUES('', 'a');
INSERT INTO t5 VALUES('a', '');
}
do_execsql_test 5.2 {
SELECT rowid, fts5_test_columnsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {1 0}
2 {0 1}
1 {4 6}
}
do_execsql_test 5.3 {
SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {a {}}
2 {{} a}
1 {{a b c d} {e f g h i j}}
}
do_execsql_test 5.4 {
SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {5 7}
2 {5 7}
1 {5 7}
}
do_execsql_test 5.5 {
INSERT INTO t5 VALUES('x y z', 'v w x y z');
SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {8 12}
2 {8 12}
1 {8 12}
}
#-------------------------------------------------------------------------
# Test the xTokenize() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE t6 USING fts5(x, y);
INSERT INTO t6 VALUES('There are more', 'things in heaven and earth');
INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.');
}
do_execsql_test 6.2 {
SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*'
} {
1 {{there are more} {things in heaven and earth}}
2 {{horatio than are} {dreamt of in your philosophy}}
}
#-------------------------------------------------------------------------
# Test the xQueryPhrase() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 7.1 {
CREATE VIRTUAL TABLE t7 USING fts5(x, y);
}
do_test 7.2 {
foreach {x y} {
{q i b w s a a e l o} {i b z a l f p t e u}
{b a z t a l o x d i} {b p a d b f h d w y}
{z m h n p p u i e g} {v h d v b x j j c z}
{a g i m v a u c b i} {p k s o t l r t b m}
{v v c j o d a s c p} {f f v o k p o f o g}
} {
execsql {INSERT INTO t7 VALUES($x, $y)}
}
execsql { SELECT count(*) FROM t7 }
} {5}
foreach {tn q res} {
1 a {{4 2}}
2 b {{3 4}}
3 c {{2 1}}
4 d {{2 2}}
5 {a AND b} {{4 2} {3 4}}
6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}}
} {
do_execsql_test 7.3.$tn {
SELECT fts5_test_queryphrase(t7) FROM t7 WHERE t7 MATCH $q LIMIT 1
} [list $res]
}
do_execsql_test 7.4 {
SELECT fts5_test_rowcount(t7) FROM t7 WHERE t7 MATCH 'a';
} {5 5 5 5}
#do_execsql_test 7.4 {
# SELECT rowid, bm25debug(t7) FROM t7 WHERE t7 MATCH 'a';
#} {5 5 5 5}
#
#-------------------------------------------------------------------------
#
do_test 8.1 {
execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) }
foreach {rowid x y} {
0 {A o} {o o o C o o o o o o o o}
1 {o o B} {o o o C C o o o o o o o}
2 {A o o} {o o o o D D o o o o o o}
3 {o B} {o o o o o D o o o o o o}
4 {E o G} {H o o o o o o o o o o o}
5 {F o G} {I o J o o o o o o o o o}
6 {E o o} {H o J o o o o o o o o o}
7 {o o o} {o o o o o o o o o o o o}
9 {o o o} {o o o o o o o o o o o o}
} {
execsql { INSERT INTO t8(rowid, x, y) VALUES($rowid, $x, $y) }
}
} {}
foreach {tn q res} {
1 {a} {0 2}
2 {b} {3 1}
3 {c} {1 0}
4 {d} {2 3}
5 {g AND (e OR f)} {5 4}
6 {j AND (h OR i)} {5 6}
} {
do_execsql_test 8.2.$tn.1 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8);
} $res
do_execsql_test 8.2.$tn.2 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank;
} $res
do_execsql_test 8.2.$tn.3 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank;
} $res
}
finish_test

144
ext/fts5/test/fts5af.test Normal file
View File

@ -0,0 +1,144 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# More specifically, the tests in this file focus on the built-in
# snippet() function.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5af
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}
proc do_snippet_test {tn doc match res} {
uplevel #0 [list set v1 $doc]
uplevel #0 [list set v2 $match]
do_execsql_test $tn.1 {
DELETE FROM t1;
INSERT INTO t1 VALUES($v1, NULL);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
} [list $res]
do_execsql_test $tn.2 {
DELETE FROM t1;
INSERT INTO t1 VALUES(NULL, $v1);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
} [list $res]
do_execsql_test $tn.3 {
DELETE FROM t1;
INSERT INTO t1 VALUES($v1, NULL);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2
ORDER BY rank DESC;
} [list $res]
}
foreach {tn doc res} {
1.1 {X o o o o o o} {[X] o o o o o o}
1.2 {o X o o o o o} {o [X] o o o o o}
1.3 {o o X o o o o} {o o [X] o o o o}
1.4 {o o o X o o o} {o o o [X] o o o}
1.5 {o o o o X o o} {o o o o [X] o o}
1.6 {o o o o o X o} {o o o o o [X] o}
1.7 {o o o o o o X} {o o o o o o [X]}
2.1 {X o o o o o o o} {[X] o o o o o o...}
2.2 {o X o o o o o o} {o [X] o o o o o...}
2.3 {o o X o o o o o} {o o [X] o o o o...}
2.4 {o o o X o o o o} {o o o [X] o o o...}
2.5 {o o o o X o o o} {...o o o [X] o o o}
2.6 {o o o o o X o o} {...o o o o [X] o o}
2.7 {o o o o o o X o} {...o o o o o [X] o}
2.8 {o o o o o o o X} {...o o o o o o [X]}
3.1 {X o o o o o o o o} {[X] o o o o o o...}
3.2 {o X o o o o o o o} {o [X] o o o o o...}
3.3 {o o X o o o o o o} {o o [X] o o o o...}
3.4 {o o o X o o o o o} {o o o [X] o o o...}
3.5 {o o o o X o o o o} {...o o o [X] o o o...}
3.6 {o o o o o X o o o} {...o o o [X] o o o}
3.7 {o o o o o o X o o} {...o o o o [X] o o}
3.8 {o o o o o o o X o} {...o o o o o [X] o}
3.9 {o o o o o o o o X} {...o o o o o o [X]}
4.1 {X o o o o o X o o} {[X] o o o o o [X]...}
4.2 {o X o o o o o X o} {...[X] o o o o o [X]...}
4.3 {o o X o o o o o X} {...[X] o o o o o [X]}
5.1 {X o o o o X o o o} {[X] o o o o [X] o...}
5.2 {o X o o o o X o o} {...[X] o o o o [X] o...}
5.3 {o o X o o o o X o} {...[X] o o o o [X] o}
5.4 {o o o X o o o o X} {...o [X] o o o o [X]}
6.1 {X o o o X o o o} {[X] o o o [X] o o...}
6.2 {o X o o o X o o o} {o [X] o o o [X] o...}
6.3 {o o X o o o X o o} {...o [X] o o o [X] o...}
6.4 {o o o X o o o X o} {...o [X] o o o [X] o}
6.5 {o o o o X o o o X} {...o o [X] o o o [X]}
7.1 {X o o X o o o o o} {[X] o o [X] o o o...}
7.2 {o X o o X o o o o} {o [X] o o [X] o o...}
7.3 {o o X o o X o o o} {...o [X] o o [X] o o...}
7.4 {o o o X o o X o o} {...o [X] o o [X] o o}
7.5 {o o o o X o o X o} {...o o [X] o o [X] o}
7.6 {o o o o o X o o X} {...o o o [X] o o [X]}
} {
do_snippet_test 1.$tn $doc X $res
}
foreach {tn doc res} {
1.1 {X Y o o o o o} {[X Y] o o o o o}
1.2 {o X Y o o o o} {o [X Y] o o o o}
1.3 {o o X Y o o o} {o o [X Y] o o o}
1.4 {o o o X Y o o} {o o o [X Y] o o}
1.5 {o o o o X Y o} {o o o o [X Y] o}
1.6 {o o o o o X Y} {o o o o o [X Y]}
2.1 {X Y o o o o o o} {[X Y] o o o o o...}
2.2 {o X Y o o o o o} {o [X Y] o o o o...}
2.3 {o o X Y o o o o} {o o [X Y] o o o...}
2.4 {o o o X Y o o o} {...o o [X Y] o o o}
2.5 {o o o o X Y o o} {...o o o [X Y] o o}
2.6 {o o o o o X Y o} {...o o o o [X Y] o}
2.7 {o o o o o o X Y} {...o o o o o [X Y]}
3.1 {X Y o o o o o o o} {[X Y] o o o o o...}
3.2 {o X Y o o o o o o} {o [X Y] o o o o...}
3.3 {o o X Y o o o o o} {o o [X Y] o o o...}
3.4 {o o o X Y o o o o} {...o o [X Y] o o o...}
3.5 {o o o o X Y o o o} {...o o [X Y] o o o}
3.6 {o o o o o X Y o o} {...o o o [X Y] o o}
3.7 {o o o o o o X Y o} {...o o o o [X Y] o}
3.8 {o o o o o o o X Y} {...o o o o o [X Y]}
} {
do_snippet_test 2.$tn $doc "X + Y" $res
}
finish_test

138
ext/fts5/test/fts5ag.test Normal file
View File

@ -0,0 +1,138 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ag
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# This file attempts to verify that the extension APIs work with
# "ORDER BY rank" queries. This is done by comparing the results of
# the fts5_test() function when run with queries of the form:
#
# ... WHERE fts MATCH ? ORDER BY bm25(fts) [ASC|DESC]
#
# and
#
# ... WHERE fts MATCH ? ORDER BY rank [ASC|DESC]
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, z);
}
do_test 1.1 {
foreach {x y z} {
{j s m y m r n l u k} {z k f u z g h s w g} {r n o s s b v n w w}
{m v g n d x q r r s} {q t d a q a v l h j} {s k l f s i n v q v}
{m f f d h h s o h a} {y e v r q i u m h d} {b c k q m z l z h n}
{j e m v k p e c j m} {m p v z d x l n i a} {v p u p m t p q i f}
{v r w l e e t d z p} {c s b w k m n k o u} {w g y f v w v w v p}
{k d g o u j p z n o} {t g e q l z i g b j} {f i q q j y h b g h}
{j s w x o t j b t m} {v a v v r t x c q a} {r t k x w u l h a g}
{j y b i u d e m d w} {y s o j h i n a u p} {n a g b u c w e b m}
{b c k s c w j p w b} {m o c o w o b d q q} {n t y o y z y r z e}
{p n q l e l h z q c} {n s e i h c v b b u} {m p d i t a o o f f}
{k c o n v e z l b m} {s m n i n s d e s u} {t a u e q d a o u c}
{h d t o i a g b b p} {k x c i g f g b b k} {x f i v n a n n j i}
{f z k r b u s k z e} {n z v z w l e r h t} {t i s v v a v p n s}
{k f e c t z r e f d} {f m g r c w q k b v} {v y s y f r b f e f}
{z r c t d q q h x b} {u c g z n z u v s s} {y t n f f x b f d x}
{u n p n u t i m e j} {p j j d m f k p m z} {d o l v c o e a h w}
{h o q w t f v i c y} {c q u n r z s l l q} {z x a q w s b w s y}
{y m s x k i m n x c} {b i a n v h z n k a} {w l q p b h h g d y}
{z v s j f p v l f w} {c s b i z e k i g c} {x b v d w j f e d z}
{r k k j e o m k g b} {h b d c h m y b t u} {u j s h k z c u d y}
{v h i v s y z i k l} {d t m w q w c a z p} {r s e s x v d w k b}
{u r e q j y h o o s} {x x z r x y t f j s} {k n h x i i u e c v}
{q l f d a p w l q o} {y z q w j o p b o v} {s u h z h f d f n l}
{q o e o x x l g q i} {j g m h q q w c d b} {o m d h w a g b f n}
{m x k t s s y l v a} {j x t c a u w b w g} {n f j b v x y p u t}
{u w k a q b u w k w} {a h j u o w f s k p} {j o f s h y t j h g}
{x v b l m t l m h l} {t p y i y i q b q a} {k o o z w a c h c f}
{j g c d k w b d t v} {a k v c m a v h v p} {i c a i j g h l j h}
{l m v l c z j b p b} {z p z f l n k i b a} {j v q k g i x g i b}
{m c i w u z m i s z} {i z r f n l q z k w} {x n b p b q r g i z}
{d g i o o x l f x d} {r t m f b n q y c b} {i u g k w x n m p o}
{t o s i q d z x d t} {v a k s q z j c o o} {z f n n r l y w v v}
{w k h d t l j g n n} {r z m v y b l n c u} {v b v s c l n k g v}
{m a g r a b u u n z} {u y l h v w v k b f} {x l p g i s j f x v}
{v s g x k z a k a r} {l t g v j q l k p l} {f h n a x t v s t y}
{z u v u x p s j y t} {g b q e e g l n w g} {e n p j i g j f u r}
{q z l t w o l m p e} {t s g h r p r o t z} {y b f a o n u m z g}
{d t w n y b o g f o} {d a j e r l g g s h} {d z e l w q l t h f}
{f l u w q v x j a h} {f n u l l d m h h w} {d x c c e r o d q j}
{b y f q s q f u l g} {u z w l f d b i a g} {m v q b g u o z e z}
{h z p t s e x i v m} {l h q m e o x x x j} {e e d n p r m g j f}
{k h s g o n s d a x} {u d t t s j o v h a} {z r b a e u v o e s}
{m b b g a f c p a t} {w c m j o d b l g e} {f p j p m o s y v j}
{c r n h d w c a b l} {s g e u s d n j b g} {b o n a x a b x y l}
{r h u x f c d z n o} {x y l g u m i i w d} {t f h b z v r s r g}
{t i o r b v g g p a} {d x l u q k m o s u} {j f h t u n z u k m}
{g j t y d c n j y g} {w e s k v c w i g t} {g a h r g v g h r o}
{e j l a q j g i n h} {d z k c u p n u p p} {t u e e v z v r r g}
{l j s g k j k h z l} {p v d a t x d e q u} {r l u z b m g k s j}
{i e y d u x d i n l} {p f z k m m w p u l} {z l p m r q w n d a}
} {
execsql { INSERT INTO t1 VALUES($x, $y, $z) }
}
set {} {}
} {}
fts5_aux_test_functions db
proc do_fts5ag_test {tn E} {
set q1 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank}
set q2 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)}
set res [execsql $q1]
set expected [execsql $q2]
uplevel [list do_test $tn.1 [list set {} $res] $expected]
append q1 " DESC"
append q2 " DESC"
set res [execsql $q1]
set expected [execsql $q2]
uplevel [list do_test $tn.2 [list set {} $res] $expected]
}
foreach {tn expr} {
2.1 a
2.2 b
2.3 c
2.4 d
2.5 {"m m"}
2.6 {e + s}
3.0 {a AND b}
3.1 {a OR b}
3.2 {b OR c AND d}
3.3 {NEAR(c d)}
} {
do_fts5ag_test $tn $expr
if {[set_test_counter errors]} break
}
finish_test

150
ext/fts5/test/fts5ah.test Normal file
View File

@ -0,0 +1,150 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ah
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# This file contains tests for very large doclists.
#
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) }
execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) }
set v {w w w w w w w w w w w w w w w w w w w w}
execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) }
for {set i 1} {$i <= 10000} {incr i} {
set v {x x x x x x x x x x x x x x x x x x x x}
if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i}
if {($i % 1577)==0} {lset v 5 W ; lappend W $i}
execsql { INSERT INTO t1 VALUES($v) }
}
set v {w w w w w w w w w w w w w w w w w w w w}
execsql { INSERT INTO t1 VALUES($v) }
} {}
do_execsql_test 1.1.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -incr $W]
do_execsql_test 1.1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -incr $W]
do_execsql_test 1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -incr $Y]
do_execsql_test 1.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
proc reads {} {
db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}
proc execsql_reads {sql} {
set nRead [reads]
execsql $sql
expr [reads] - $nRead
}
do_test 1.4 {
set nRead [reads]
execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
set nReadX [expr [reads] - $nRead]
expr $nReadX>1000
} {1}
do_test 1.5 {
set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }]
set bwd [execsql_reads {
SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC
}]
expr {$bwd < $fwd + 12}
} {1}
foreach {tn q res} "
1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W]
2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W]
3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W]
4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y]
" {
do_test 1.6.$tn.1 {
set n [execsql_reads $q]
puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}
do_test 1.6.$tn.2 {
set n [execsql_reads "$q ORDER BY rowid DESC"]
puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}
do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}
#-------------------------------------------------------------------------
# Now test that adding range constraints on the rowid field reduces the
# number of pages loaded from disk.
#
foreach {tn fraction tail cnt} {
1 0.6 {rowid > 5000} 5000
2 0.2 {rowid > 9000} 1000
3 0.2 {rowid < 1000} 999
4 0.2 {rowid BETWEEN 4000 AND 5000} 1001
5 0.6 {rowid >= 5000} 5001
6 0.2 {rowid >= 9000} 1001
7 0.2 {rowid <= 1000} 1000
8 0.6 {rowid > '5000'} 5000
9 0.2 {rowid > '9000'} 1000
10 0.1 {rowid = 444} 1
} {
set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail"
set n [execsql_reads $q]
set ret [llength [execsql $q]]
do_test "1.7.$tn.asc.(n=$n ret=$ret)" {
expr {$n < ($fraction*$nReadX) && $ret==$cnt}
} {1}
set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC"
set n [execsql_reads $q]
set ret [llength [execsql $q]]
do_test "1.7.$tn.desc.(n=$n ret=$ret)" {
expr {$n < 2*$fraction*$nReadX && $ret==$cnt}
} {1}
}
do_execsql_test 1.8.1 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND +rowid < 'text';
} {10000}
do_execsql_test 1.8.2 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text';
} {10000}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
finish_test

55
ext/fts5/test/fts5ai.test Normal file
View File

@ -0,0 +1,55 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, it tests transactions and savepoints
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ai
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a);
} {}
do_execsql_test 1.1 {
BEGIN;
INSERT INTO t1 VALUES('a b c');
INSERT INTO t1 VALUES('d e f');
SAVEPOINT one;
INSERT INTO t1 VALUES('g h i');
SAVEPOINT two;
INSERT INTO t1 VALUES('j k l');
ROLLBACK TO one;
INSERT INTO t1 VALUES('m n o');
SAVEPOINT two;
INSERT INTO t1 VALUES('p q r');
RELEASE one;
SAVEPOINT one;
INSERT INTO t1 VALUES('s t u');
ROLLBACK TO one;
COMMIT;
}
do_execsql_test 1.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
finish_test

69
ext/fts5/test/fts5aj.test Normal file
View File

@ -0,0 +1,69 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, this tests that, provided the amount of data remains
# constant, the FTS index does not grow indefinitely as rows are inserted
# and deleted,
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aj
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc doc {} {
set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
set res [list]
for {set i 0} {$i < 20} {incr i} {
lappend res [lindex $dict [expr int(rand() * 26)]]
}
set res
}
proc structure {} {
set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}]
foreach lvl [lrange $val 1 end] {
lappend res [expr [llength $lvl]-2]
}
set res
}
expr srand(0)
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
}
for {set iTest 0} {$iTest < 50000} {incr iTest} {
if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
set new [doc]
execsql { INSERT INTO t1 VALUES($new) }
if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
if {0==($iTest % 1000)} {
set sz [db one {SELECT count(*) FROM t1_data}]
set s [structure]
do_execsql_test 1.$iTest.$sz.{$s} {
INSERT INTO t1(t1) VALUES('integrity-check')
}
}
}
do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }
finish_test

143
ext/fts5/test/fts5ak.test Normal file
View File

@ -0,0 +1,143 @@
# 2014 November 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, the auxiliary function "highlight".
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ak
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x);
INSERT INTO ft1 VALUES('i d d a g i b g d d');
INSERT INTO ft1 VALUES('h d b j c c g a c a');
INSERT INTO ft1 VALUES('e j a e f h b f h h');
INSERT INTO ft1 VALUES('j f h d g h i b d f');
INSERT INTO ft1 VALUES('d c j d c j b c g e');
INSERT INTO ft1 VALUES('i a d e g j g d a a');
INSERT INTO ft1 VALUES('j f c e d a h j d b');
INSERT INTO ft1 VALUES('i c c f a d g h j e');
INSERT INTO ft1 VALUES('i d i g c d c h b f');
INSERT INTO ft1 VALUES('g d a e h a b c f j');
}
do_execsql_test 1.2 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e';
} {
{[e] j a [e] f h b f h h}
{d c j d c j b c g [e]}
{i a d [e] g j g d a a}
{j f c [e] d a h j d b}
{i c c f a d g h j [e]}
{g d a [e] h a b c f j}
}
do_execsql_test 1.3 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d';
} {
{[h d] b j c c g a c a}
{j f [h d] g h i b d f}
}
do_execsql_test 1.4 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d';
} {
{i [d d] a g i b g [d d]}
}
do_execsql_test 1.5 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e'
} {
{[e] j a [e] f h b f h h}
{d c j d c j b c g [e]}
{i a d [e] g j g d a a}
{j f c [e] d a h j d b}
{i c c f a d g h j [e]}
{g d a [e] h a b c f j}
}
do_execsql_test 1.6 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d';
} {
{i [d d] a g i b g [d d]}
}
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE ft2 USING fts5(x);
INSERT INTO ft2 VALUES('a b c d e f g h i j');
}
do_execsql_test 2.2 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e'
} {{a [b c d e] f g h i j}}
do_execsql_test 2.3 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g'
} {
{a [b c d] [e f g] h i j}
}
do_execsql_test 2.4 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c'
} {
{a [b c d] e f g h i j}
}
do_execsql_test 2.5 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e'
} {
{a [b c d e] f g h i j}
}
do_execsql_test 2.6.1 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d'
} {
{a b c [d] e [f] g h i j}
}
do_execsql_test 2.6.2 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f'
} {
{a b c [d] e [f] g h i j}
}
#-------------------------------------------------------------------------
# The example from the docs.
#
do_execsql_test 3.1 {
-- Assuming this:
CREATE VIRTUAL TABLE ft USING fts5(a);
INSERT INTO ft VALUES('a b c x c d e');
INSERT INTO ft VALUES('a b c c d e');
INSERT INTO ft VALUES('a b c d e');
-- The following SELECT statement returns these three rows:
-- '[a b c] x [c d e]'
-- '[a b c] [c d e]'
-- '[a b c d e]'
SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e';
} {
{[a b c] x [c d e]}
{[a b c] [c d e]}
{[a b c d e]}
}
finish_test

281
ext/fts5/test/fts5al.test Normal file
View File

@ -0,0 +1,281 @@
# 2014 November 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, this function tests the %_config table.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5al
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x);
SELECT * FROM ft1_config;
} {version 2}
do_execsql_test 1.2 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
SELECT * FROM ft1_config;
} {pgsz 32 version 2}
do_execsql_test 1.3 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
SELECT * FROM ft1_config;
} {pgsz 64 version 2}
#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
1 "fname()"
2 "fname(1)"
3 "fname(1,2)"
4 "fname(null,NULL,nUlL)"
5 " fname ( null , NULL , nUlL ) "
6 "fname('abc')"
7 "fname('a''bc')"
8 "fname('''abc')"
9 "fname('abc''')"
7 "fname( 'a''bc' )"
8 "fname('''abc' )"
9 "fname( 'abc''' )"
10 "fname(X'1234ab')"
11 "myfunc(1.2)"
12 "myfunc(-1.0)"
13 "myfunc(.01,'abc')"
} {
do_execsql_test 2.1.$tn {
INSERT INTO ft1(ft1, rank) VALUES('rank', $defn);
}
}
foreach {tn defn} {
1 ""
2 "fname"
3 "fname(X'234ab')"
4 "myfunc(-1.,'abc')"
} {
do_test 2.2.$tn {
catchsql { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn) }
} {1 {SQL logic error or missing database}}
}
#-------------------------------------------------------------------------
# Assorted tests of the tcl interface for creating extension functions.
#
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1 VALUES('q w e r t y');
INSERT INTO t1 VALUES('y t r e w q');
}
proc argtest {cmd args} { return $args }
sqlite3_fts5_create_function db argtest argtest
do_execsql_test 3.2.1 {
SELECT argtest(t1, 123) FROM t1 WHERE t1 MATCH 'q'
} {123 123}
do_execsql_test 3.2.2 {
SELECT argtest(t1, 123, 456) FROM t1 WHERE t1 MATCH 'q'
} {{123 456} {123 456}}
proc rowidtest {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db rowidtest rowidtest
do_execsql_test 3.3.1 {
SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q'
} {1 2}
proc insttest {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [$cmd xInst $i]
}
set res
}
sqlite3_fts5_create_function db insttest insttest
do_execsql_test 3.4.1 {
SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
{{0 0 0}}
{{0 0 5}}
}
do_execsql_test 3.4.2 {
SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w'
} {
{{1 0 1}}
{{0 0 2} {1 0 4}}
}
proc coltest {cmd} {
list [$cmd xColumnSize 0] [$cmd xColumnText 0]
}
sqlite3_fts5_create_function db coltest coltest
do_execsql_test 3.5.1 {
SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
{6 {q w e r t y}}
{6 {y t r e w q}}
}
#-------------------------------------------------------------------------
# Tests for remapping the "rank" column.
#
# 4.1.*: Mapped to a function with no arguments.
# 4.2.*: Mapped to a function with one or more arguments.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b);
INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e');
INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q');
INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t');
INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s');
INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l');
INSERT INTO t2 VALUES('k r i l j b g i p a', 't q c h a i m g n l');
INSERT INTO t2 VALUES('a e c q n m o m d g', 'l c t g i s q g q e');
INSERT INTO t2 VALUES('b o j h f o g b p e', 'r t l h s b g i c p');
INSERT INTO t2 VALUES('s q k f q b j g h f', 'n m a o p e i e k t');
INSERT INTO t2 VALUES('o q g g q c o k a b', 'r t k p t f t h p c');
}
proc firstinst {cmd} {
foreach {p c o} [$cmd xInst 0] {}
expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst
do_execsql_test 4.1.1 {
SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.2 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
do_execsql_test 4.1.4 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.5 {
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
do_execsql_test 4.1.6 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
proc rowidplus {cmd ival} {
expr [$cmd xRowid] + $ival
}
sqlite3_fts5_create_function db rowidplus rowidplus
do_execsql_test 4.2.1 {
INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
10 110
}
do_execsql_test 4.2.2 {
INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
10 121
}
do_execsql_test 4.2.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)'
} {
10 122
}
proc rowidmod {cmd imod} {
expr [$cmd xRowid] % $imod
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_execsql_test 4.3.1 {
CREATE VIRTUAL TABLE t3 USING fts5(x);
INSERT INTO t3 VALUES('a one');
INSERT INTO t3 VALUES('a two');
INSERT INTO t3 VALUES('a three');
INSERT INTO t3 VALUES('a four');
INSERT INTO t3 VALUES('a five');
INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()');
}
breakpoint
do_execsql_test 4.3.2 {
SELECT * FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)'
ORDER BY rank ASC
} {
{a four} {a one} {a five} {a two} {a three}
}
do_execsql_test 4.3.3 {
SELECT *, rank FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)'
ORDER BY rank ASC
} {
{a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2
}
do_catchsql_test 4.4.3 {
SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'xyz(3)'
} {1 {no such function: xyz}}
do_catchsql_test 4.4.4 {
SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL
} {1 {parse error in rank function: }}
finish_test

View File

@ -0,0 +1,86 @@
# 2015 Jun 10
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on renaming FTS5 tables using the
# "ALTER TABLE ... RENAME TO ..." command
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5alter
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Test renaming regular, contentless and columnsize=0 FTS5 tables.
#
do_execsql_test 1.1.0 {
CREATE VIRTUAL TABLE "a x" USING fts5(a, x);
INSERT INTO "a x" VALUES('a a a', 'x x x');
ALTER TABLE "a x" RENAME TO "x y";
}
do_execsql_test 1.1.1 {
SELECT * FROM "x y";
SELECT rowid FROM "x y" WHERE "x y" MATCH 'a'
} {{a a a} {x x x} 1}
do_execsql_test 1.2.0 {
CREATE VIRTUAL TABLE "one/two" USING fts5(one, columnsize=0);
INSERT INTO "one/two"(rowid, one) VALUES(456, 'd d d');
ALTER TABLE "one/two" RENAME TO "three/four";
}
do_execsql_test 1.2.1 {
SELECT * FROM "three/four";
SELECT rowid FROM "three/four" WHERE "three/four" MATCH 'd'
} {{d d d} 456}
do_execsql_test 1.3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(val, content='');
INSERT INTO t1(rowid, val) VALUES(-1, 'drop table');
INSERT INTO t1(rowid, val) VALUES(-2, 'drop view');
ALTER TABLE t1 RENAME TO t2;
}
do_execsql_test 1.3.1 {
SELECT rowid, * FROM t2;
SELECT rowid FROM t2 WHERE t2 MATCH 'table'
} {-2 {} -1 {} -1}
#-------------------------------------------------------------------------
# Test renaming an FTS5 table within a transaction.
#
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE zz USING fts5(a);
INSERT INTO zz(rowid, a) VALUES(-56, 'a b c');
BEGIN;
INSERT INTO zz(rowid, a) VALUES(-22, 'a b c');
ALTER TABLE zz RENAME TO yy;
SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
COMMIT;
} {-56 -22}
do_execsql_test 2.2 {
BEGIN;
ALTER TABLE yy RENAME TO ww;
INSERT INTO ww(rowid, a) VALUES(-11, 'a b c');
SELECT rowid FROM ww WHERE ww MATCH 'a + b + c';
} {-56 -22 -11}
do_execsql_test 2.3 {
ROLLBACK;
SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}
finish_test

379
ext/fts5/test/fts5auto.test Normal file
View File

@ -0,0 +1,379 @@
# 2015 May 30
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file contains automatically generated tests for various types
# of MATCH expressions.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auto
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set data {
-4026076
{n x w k b p x b n t t d s} {f j j s p j o}
{w v i y r} {i p y s}
{a o q v e n q r} {q v g u c y a z y}
3995120
{c} {e e w d t}
{x c p f w r s m l r b f d} {g g u e}
{s n u t d v p d} {b k v p m f}
-2913881
{k m} {a}
{w r j z n s l} {m j i w d t w e l}
{z n c} {v f b m}
174082
{j} {q l w u k e q v r i}
{j l} {u v w r s p e l}
{p i k j k q c t g u s} {g u y s m h q k g t e s o r}
3207399
{e t} {}
{p} {y v r b e k h d e v}
{t m w z b g q t s d d h} {o n v u i t o y k j}
182399
{} {m o s o x d y f a x j z}
{x n z r c d} {n r x i r}
{s v s} {a u}
768994
{e u t q v z q k j p u f j p} {y c b}
{p s d} {k n w p m p p}
{u o x s d} {f s g r d b d r m m m z y}
3931037
{c j p x e} {c n k t h z o i}
{} {r r p j k x w q}
{o r d z d} {x}
3105748
{p x r u} {x i s w o t o g x m z i w}
{q x m z} {h c j w b l y w x c o}
{m b k v} {t v q i s a d x}
-2501642
{o u d n w o m o o s n t r h} {k p e u y p e z d j r y g}
{v b b h d d q y j q j} {a m w d t}
{y e f n} {a k x i x}
-1745680
{z u w j f d b f} {j w i c g u d w e}
{m f p v m a s p v c o s} {s c r z o t w l b e a q}
{m k q} {k b a v o}
-932328
{r v i u m q d r} {f z u v h c m r f g}
{r x r} {k p i d h h w h z u a x}
{k m j p} {h l j a e u c i q x x f x g}
-3923818
{t t p b n u i h e c k} {m z}
{v u d c} {v y y j s g}
{o a f k k q p h g x e n z x} {h d w c o l}
-2145922
{z z l f a l g e d c d h} {j b j p k o o u b q}
{d i g q t f d r h k} {n w g j c x r p t y f l c t}
{d o c u k f o} {r y s x z s p p h g t p y c}
4552917
{j w j y h l k u} {n a}
{y h w c n k} {b}
{w} {z l r t s i m v c y}
2292008
{q v q j w y y x u t} {r q z n h a b o}
{d q y} {y v o e j}
{} {a b h c d l p d x}
1407892
{n j j u q d o a u c f} {r d b w o q n g}
{d e v w s} {v d v o u o x s l s j z y}
{j y w h i f g i h m} {v n z b n y}
-4412544
{g h h r s} {h e r e}
{n q s} {o p z r m l l t}
{p} {f s u o b j}
1209110
{o a a z t t u h j} {z z i r k r}
{i c x q w g v o x z i z p} {q o g k i n z x e d v w v}
{p f v b g f e d n p u c y k} {q z z a i p a a s r e z}
3448977
{i v} {l u x t b o k}
{f h u v p} {k a o y j}
{d m k c j} {v c e r u e f i t}
-4703774
{d h v w u z r e h x o l t} {p s f y w y r q d a m w}
{c h g c g j j f t b i c q} {s e}
{c t q j g f} {v n r w y r a g e j d}
2414151
{s o o s d s k q b f q v p e} {j r o b t o p d l o o x}
{d d k t v e} {}
{t v o d w} {w e q w h y c y y i j b a m}
-3342407
{m c h n e p d o c r w n t} {j d k s p q l}
{t g s r w x j l r z r} {h}
{r q v x i r a n h s} {m y p b v w r a u o g q r}
-993951
{l n p u o j d x t u u c o j} {k r n a r e k v i t o e}
{q f t t a a c z v f} {o n m p v f o e n}
{h z h i p s b j z h} {i t w m k c u g n i}
1575251
{} {z s i j d o x j a r t}
{h g j u j n v e n z} {p z j n n f}
{s q q f d w r l y i z d o m} {b a n d h t b y g h d}
4263668
{q g t h f s} {s g x p f q z i s o f l i}
{q k} {w v h a x n a r b}
{m j a h o b i x k r w z q u} {m t r g j o e q t m p u l}
2487819
{m w g x r n e u t s r} {b x a t u u j c r n}
{j} {w f j r e e y l p}
{o u h b} {o c a c a b v}
167966
{o d b s d o a u m o x y} {c}
{r w d o b v} {z e b}
{i n z a f g z o} {m u b a g}
1948599
{n r g q d j s} {n k}
{l b p d v t k h y y} {u m k e c}
{t b n y o t b} {j w c i r x x}
2941631
{l d p l b g f} {e k e}
{p j} {m c s w t b k n l d x}
{f o v y v l} {c w p s w j w c u t y}
3561104
{d r j j r j i g p} {u}
{g r j q} {z l p d s n f c h t d c v z}
{w r c f s x z y} {g f o k g g}
-2223281
{y e t j j z f p o m m z} {h k o g o}
{m x a t} {l q x l}
{r w k d l s y b} {q g k b}
-4502874
{k k b x k l f} {r}
{} {q m z b k h k u n e z}
{z q g y m y u} {}
1757599
{d p z j y u r} {z p l q w j t j}
{n i r x r y j} {}
{h} {w t d q c x z z x e e}
-4809589
{} {z p x u h i i n g}
{w q s u d b f x n} {l y k b b r x t i}
{n d v j q o t o d p z e} {u r y u v u c}
1068408
{y e} {e g s k e w t p v o b k}
{z c m s} {r u r u h n h b p q g b}
{j k b l} {m c d t s r s q a d b o f}
-1972554
{m s w} {d k v s a r k p a r i v}
{g j z k p} {y k c v r e u o q f i b a}
{i p i} {c z w c y b n z i v}
-2052385
{} {x e u f f g n c i x n e i e}
{} {p s w d x p g}
{} {s j a h n}
2805981
{m x g c w o e} {k g u y r y i u e g g}
{f k j v t x p h x k u} {w i}
{b l f z f v t n} {i u d o d p h s m u}
2507621
{} {u b n l x f n j t}
{u r x l h} {h r l m r}
{d y e n b s q v t k n q q} {x l t v w h a s k}
-3138375
{e o f j y x u w v e w z} {r d q g k n n v r c z n e w}
{l y i q z k j p u f q s k} {c i l l i m a a g a z r x f}
{a v k h m q z b y n z} {q g w c y r r o a}
-457971
{j x a w e c s h f l f} {q}
{j f v j u m d q r v v} {x n v a w}
{i e h d h f u w t t z} {v s u l s v o v i k n e}
2265221
{z t c y w n y r t} {n b a x s}
{q w a v} {a b s d x i g w t e z h}
{t l} {j k r w f f y j o k u}
-3941280
{r x t o z} {f j n z k}
{t x e b t d b k w i s} {j t y h i h}
{y q g n g s u v c z j z n g} {n n g t l p h}
2084745
{z d z d} {j}
{o e k t b k a z l w} {o p i h k c x}
{c r b t i j f} {z e n m}
1265843
{} {j s g j j x u y}
{u q t f} {g o g}
{w o j e d} {w q n a c t q x j}
-2941116
{i n c u o} {f b}
{o m s q d o z a q} {f s v o b b}
{o a z c h r} {j e w h b f z}
-1265441
{p g z q v a o a x a} {s t h}
{w i p o c} {s n d g f z w q o d v v l j}
{y f b i a s v} {u m o z k k s t s d p b l p}
-1989158
{r i c n} {r e w w i n z}
{q u s y b w u g y g f o} {y}
{d} {j x i b x u y d c p v a h}
2391989
{b n w x w f q h p i} {e u b b i n a i o c d g}
{v a z o i e n l x l r} {r u f o r k w m d w}
{k s} {r f e j q p w}
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
} {}
fts5_aux_test_functions db
proc matchdata {expr tbl collist {order ASC}} {
set cols ""
foreach e $collist {
append cols ", '$e'"
}
set tclexpr [db one [subst -novar {
SELECT fts5_expr_tcl(
$expr, 'nearset $cols -pc ::pc' [set cols]
)
}]]
set res [list]
db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
set cols [list]
foreach col $x(*) {
if {$col != "rowid"} { lappend cols $x($col) }
}
# set cols [list $a $b $c $d $e $f]
set ::pc 0
set rowdata [eval $tclexpr]
if {$rowdata != ""} { lappend res $x(rowid) $rowdata }
}
set res
}
proc do_auto_test {tn tbl cols expr} {
foreach order {asc desc} {
set res [matchdata $expr $tbl $cols $order]
set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"
set ::autotest_expr $expr
do_execsql_test $testname [subst -novar {
SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl]
WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order]
}] $res
}
}
#-------------------------------------------------------------------------
#
for {set fold 0} {$fold < 3} {incr fold} {
switch $fold {
0 { set map {} }
1 { set map {
a a b a c b d b e c f c g d h d
i e j e k f l f m g g g o h p h
q i r i s j t j u k v k w l x l
y m z m
}}
2 { set map {
a a b a c a d a e a f a g a h a
i b j b k b l b m b g b o b p b
q c r c s c t c u c v c w c x c
}}
}
execsql {
BEGIN;
DELETE FROM tt;
}
foreach {rowid a b c d e f} [string map $map $data] {
if {$rowid==-4703774} {
execsql {
INSERT INTO tt(rowid, a, b, c, d, e, f)
VALUES($rowid, $a, $b, $c, $d, $e, $f)
}
}
}
execsql COMMIT
foreach {tn expr} {
A.1 { {a} : x }
A.2 { {a b} : x }
A.3 { {a b f} : x }
A.4 { {f a b} : x }
A.5 { {f a b} : x y }
A.6 { {f a b} : x + y }
A.7 { {c a b} : x + c }
A.8 { {c d} : "l m" }
A.9 { {c e} : "l m" }
A.10 { {a b c a b c a b c f f e} : "l m" }
B.1 { a NOT b }
B.2 { a NOT a:b }
B.3 { a OR (b AND c) }
B.4 { a OR (b AND {a b c}:c) }
B.5 { a OR "b c" }
B.6 { a OR b OR c }
C.1 { a OR (b AND "b c") }
C.2 { a OR (b AND "z c") }
} {
do_auto_test 3.$fold.$tn tt {a b c d e f} $expr
}
}
proc replace_elems {list args} {
set ret $list
foreach {idx elem} $args {
set ret [lreplace $ret $idx $idx $elem]
}
set ret
}
#-------------------------------------------------------------------------
#
set bigdoc [string trim [string repeat "a " 1000]]
do_test 4.0 {
set a [replace_elems $bigdoc 50 x 950 x]
set b [replace_elems $bigdoc 20 y 21 x 887 x 888 y]
set c [replace_elems $bigdoc 1 z 444 z 789 z]
execsql {
CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3);
INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c);
INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c);
}
} {}
foreach {tn expr} {
1 x
2 y
3 z
4 {c1 : x} 5 {c2 : x} 6 {c3 : x}
7 {c1 : y} 8 {c2 : y} 9 {c3 : y}
10 {c1 : z} 11 {c2 : z} 12 {c3 : z}
} {
breakpoint
do_auto_test 4.$tn yy {c1 c2 c3} $expr
}
finish_test

250
ext/fts5/test/fts5aux.test Normal file
View File

@ -0,0 +1,250 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the auxiliary function APIs.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aux
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc inst {cmd i} {
$cmd xInst $i
}
sqlite3_fts5_create_function db inst inst
proc colsize {cmd i} {
$cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize
proc totalsize {cmd i} {
$cmd xColumnTotalSize $i
}
sqlite3_fts5_create_function db totalsize totalsize
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
INSERT INTO f1 VALUES('one two', 'two one zero');
INSERT INTO f1 VALUES('one one', 'one one one');
}
do_catchsql_test 1.1 {
SELECT inst(f1, -1) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_catchsql_test 1.2 {
SELECT inst(f1, 0) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 0 1}}}
do_catchsql_test 1.3 {
SELECT inst(f1, 1) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 1 0}}}
do_catchsql_test 1.4 {
SELECT inst(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_catchsql_test 2.1 {
SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_execsql_test 2.2 {
SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {2 3}
do_execsql_test 2.3 {
SELECT colsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {5}
do_execsql_test 2.4.1 {
SELECT totalsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {10}
do_execsql_test 2.4.2 {
SELECT totalsize(f1, 0) FROM f1 WHERE f1 MATCH 'zero';
} {4}
do_execsql_test 2.4.3 {
SELECT totalsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {6}
do_catchsql_test 2.4.4 {
SELECT totalsize(f1, 2) FROM f1 WHERE f1 MATCH 'zero';
} {1 SQLITE_RANGE}
#-------------------------------------------------------------------------
# Test the xSet and xGetAuxdata APIs with a NULL destructor.
#
proc prevrowid {add cmd} {
set res [$cmd xGetAuxdataInt 0]
set r [$cmd xRowid]
$cmd xSetAuxdataInt $r
return [expr $res + $add]
}
sqlite3_fts5_create_function db prevrowid [list prevrowid 0]
sqlite3_fts5_create_function db prevrowid1 [list prevrowid 1]
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE e5 USING fts5(x);
INSERT INTO e5 VALUES('a b c');
INSERT INTO e5 VALUES('d e f');
INSERT INTO e5 VALUES('a b c');
INSERT INTO e5 VALUES('d e f');
INSERT INTO e5 VALUES('a b c');
}
do_execsql_test 3.1 {
SELECT prevrowid(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'c'
} {0+1 1+3 3+5}
do_execsql_test 3.2 {
SELECT prevrowid(e5) || '+' || prevrowid1(e5) || '+' || rowid
FROM e5 WHERE e5 MATCH 'e'
} {0+1+2 2+3+4}
#-------------------------------------------------------------------------
# Test that if the xQueryPhrase callback returns other than SQLITE_OK,
# the query is abandoned. And that if it returns an error code other than
# SQLITE_DONE, the error is propagated back to the caller.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE e7 USING fts5(x);
INSERT INTO e7 VALUES('a x a');
INSERT INTO e7 VALUES('b x b');
INSERT INTO e7 VALUES('c x c');
INSERT INTO e7 VALUES('d x d');
INSERT INTO e7 VALUES('e x e');
}
proc xCallback {rowid code cmd} {
set r [$cmd xRowid]
lappend ::cb $r
if {$r==$rowid} { return $code }
return ""
}
proc phrasequery {cmd code} {
set ::cb [list]
$cmd xQueryPhrase 1 [list xCallback [$cmd xRowid] $code]
set ::cb
}
sqlite3_fts5_create_function db phrasequery phrasequery
do_execsql_test 4.1 {
SELECT phrasequery(e7, 'SQLITE_OK') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3 4 5}}
do_execsql_test 4.2 {
SELECT phrasequery(e7, 'SQLITE_DONE') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3}}
do_catchsql_test 4.3 {
SELECT phrasequery(e7, 'SQLITE_ERROR') FROM e7 WHERE e7 MATCH 'c x'
} {1 SQLITE_ERROR}
#-------------------------------------------------------------------------
# Auxiliary function calls with many cursors in the global cursor list.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE e9 USING fts5(y);
INSERT INTO e9(rowid, y) VALUES(1, 'i iii');
INSERT INTO e9(rowid, y) VALUES(2, 'ii iv');
INSERT INTO e9(rowid, y) VALUES(3, 'ii');
INSERT INTO e9(rowid, y) VALUES(4, 'i iv');
INSERT INTO e9(rowid, y) VALUES(5, 'iii');
}
proc my_rowid {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db my_rowid my_rowid
foreach {var q} {
s1 i
s2 ii
s3 iii
s4 iv
} {
set sql "SELECT my_rowid(e9) FROM e9 WHERE e9 MATCH '$q'"
set $var [sqlite3_prepare db $sql -1 dummy]
}
do_test 5.1.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 1
do_test 5.1.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 2
do_test 5.1.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 1
do_test 5.1.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 2
do_test 5.2.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 4
do_test 5.2.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 3
do_test 5.2.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 5
do_test 5.2.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 4
sqlite3_finalize $s1
sqlite3_finalize $s2
sqlite3_finalize $s3
sqlite3_finalize $s4
#-------------------------------------------------------------------------
# Passing an invalid first argument to an auxiliary function is detected.
#
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE e11 USING fts5(y, z);
INSERT INTO e11(rowid, y, z) VALUES(1, 'a b', 45);
INSERT INTO e11(rowid, y, z) VALUES(2, 'b c', 46);
}
do_catchsql_test 6.1 {
SELECT my_rowid(z) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 45}}
do_catchsql_test 6.2 {
SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 0}}
#-------------------------------------------------------------------------
# Test passing an out-of-range phrase number to xPhraseSize (should
# return 0).
#
proc my_phrasesize {cmd iPhrase} { $cmd xPhraseSize $iPhrase }
sqlite3_fts5_create_function db my_phrasesize my_phrasesize
do_execsql_test 7.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a);
INSERT INTO t1 VALUES('a b c');
}
do_execsql_test 7.2 {
SELECT
my_phrasesize(t1, -1),
my_phrasesize(t1, 0),
my_phrasesize(t1, 1),
my_phrasesize(t1, 2)
FROM t1 WHERE t1 MATCH 'a OR b+c'
} {0 1 2 0}
#-------------------------------------------------------------------------
#
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE x1 USING fts5(a);
}
foreach {tn lRow res} {
4 {"a a a" "b" "a d"} {"[a] [a] [a]" "[a] d"}
1 {"b d" "a b"} {"[b] [d]" "[a] b"}
2 {"d b" "a d"} {"[d] [b]" "[a] d"}
3 {"a a d"} {"[a] [a] d"}
} {
execsql { DELETE FROM x1 }
foreach row $lRow { execsql { INSERT INTO x1 VALUES($row) } }
breakpoint
do_execsql_test 8.$tn {
SELECT highlight(x1, 0, '[', ']') FROM x1 WHERE x1 MATCH 'a OR (b AND d)';
} $res
}
finish_test

View File

@ -0,0 +1,115 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auxdata
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1');
INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2');
INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3');
INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4');
INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5');
}
proc aux_function_1 {cmd tn} {
switch [$cmd xRowid] {
1 {
do_test $tn.1 [list $cmd xGetAuxdata 0 ] {}
$cmd xSetAuxdata "one"
}
2 {
do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one}
$cmd xSetAuxdata "two"
}
3 {
do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two}
}
4 {
do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two}
}
5 {
do_test $tn.5 [list $cmd xGetAuxdata 0 ] {}
}
}
}
sqlite3_fts5_create_function db aux_function_1 aux_function_1
db eval {
SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a'
ORDER BY rowid ASC
}
proc aux_function_2 {cmd tn inst} {
if {$inst == "A"} {
switch [$cmd xRowid] {
1 {
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {}
$cmd xSetAuxdata "one $inst"
}
2 {
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst"
$cmd xSetAuxdata "two $inst"
}
3 {
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst"
}
4 {
do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst"
}
5 {
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
}
}
} else {
switch [$cmd xRowid] {
1 {
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A"
}
2 {
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A"
}
3 {
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A"
}
4 {
do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {}
}
5 {
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
}
}
}
}
sqlite3_fts5_create_function db aux_function_2 aux_function_2
db eval {
SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B')
FROM f1 WHERE f1 MATCH 'a'
ORDER BY rowid ASC
}
finish_test

View File

@ -0,0 +1,64 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on really large position lists. Those that require
# 4 or 5 byte position-list size varints. Because of the amount of memory
# required, these tests only run on 64-bit platforms.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5bigpl
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
if { $tcl_platform(wordSize)<8 } {
finish_test
return
}
do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
do_test 1.1 {
foreach t {a b c d e f g h i j} {
set doc [string repeat "$t " 1200000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
do_test 1.2 {
execsql { DELETE FROM t1 }
foreach t {"a b" "b a" "c d" "d c"} {
set doc [string repeat "$t " 600000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation.
# The generated database is roughly 635MiB.
#
do_test 2.1...slow {
execsql { DELETE FROM t1 }
foreach t {a} {
set doc [string repeat "$t " 150000000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
finish_test

View File

@ -0,0 +1,138 @@
# 2015 Jun 10
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on fts5 tables with the columnsize=0 option.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5columnsize
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Check that the option can be parsed and that the %_docsize table is
# only created if it is set to true.
#
foreach {tn outcome stmt} {
1 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0) }
2 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=1) }
3 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='0') }
4 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='1') }
5 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='') }
6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) }
7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) }
8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
9 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=11) }
} {
execsql {
DROP TABLE IF EXISTS t1;
}
if {$outcome==2} {
do_catchsql_test 1.$tn.1 $stmt {1 {malformed columnsize=... directive}}
} else {
do_execsql_test 1.$tn.2 $stmt
do_execsql_test 1.$tn.3 {
SELECT count(*) FROM sqlite_master WHERE name = 't1_docsize'
} $outcome
}
}
#-------------------------------------------------------------------------
# Run tests on a table with no %_content or %_docsize backing store.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x, columnsize=0, content='');
}
do_catchsql_test 2.1 {
INSERT INTO t2 VALUES('a b c d e f');
} {1 {datatype mismatch}}
do_execsql_test 2.2 {
INSERT INTO t2(rowid, x) VALUES(1, 'c d e f');
INSERT INTO t2(rowid, x) VALUES(2, 'c d e f g h');
INSERT INTO t2(rowid, x) VALUES(3, 'a b c d e f g h');
} {}
do_execsql_test 2.3 {
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {3 :: 1 2 3 :: 2 3}
do_execsql_test 2.4 {
INSERT INTO t2(t2, rowid, x) VALUES('delete', 2, 'c d e f g h');
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {3 :: 1 3 :: 3}
do_execsql_test 2.5 {
INSERT INTO t2(t2) VALUES('delete-all');
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {:: ::}
do_execsql_test 2.6 {
INSERT INTO t2(rowid, x) VALUES(1, 'o t t f');
INSERT INTO t2(rowid, x) VALUES(2, 'f s s e');
INSERT INTO t2(rowid, x) VALUES(3, 'n t e t');
}
do_catchsql_test 2.7.1 {
SELECT rowid FROM t2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.2 {
SELECT rowid FROM t2 WHERE rowid=2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.3 {
SELECT rowid FROM t2 WHERE rowid BETWEEN 1 AND 3
} {1 {t2: table does not support scanning}}
do_execsql_test 2.X {
DROP TABLE t2
}
#-------------------------------------------------------------------------
# Test the xColumnSize() API
#
fts5_aux_test_functions db
do_execsql_test 3.1.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0);
INSERT INTO t3 VALUES('a a', 'b b b', 'c');
INSERT INTO t3 VALUES('x a x', 'b b b y', '');
}
do_execsql_test 3.1.1 {
SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
1 {2 0 1} 2 {3 0 0}
}
do_execsql_test 3.1.2 {
INSERT INTO t3 VALUES(NULL, NULL, 'a a a a');
DELETE FROM t3 WHERE rowid = 1;
SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
2 {3 0 0} 3 {0 0 4}
}
do_execsql_test 3.2.0 {
CREATE VIRTUAL TABLE t4 USING fts5(x, y UNINDEXED, z, columnsize=0, content='');
INSERT INTO t4(rowid, x, y, z) VALUES(1, 'a a', 'b b b', 'c');
INSERT INTO t4(rowid, x, y, z) VALUES(2, 'x a x', 'b b b y', '');
}
do_execsql_test 3.2.1 {
SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
} {
1 {-1 0 -1} 2 {-1 0 -1}
}
finish_test

View File

@ -0,0 +1,208 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on the code in fts5_config.c, which is largely concerned
# with parsing the various configuration and CREATE TABLE options.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5config
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Try different types of quote characters.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5('a', "b", [c], `d`);
PRAGMA table_info = t1;
} {
0 a {} 0 {} 0
1 b {} 0 {} 0
2 c {} 0 {} 0
3 d {} 0 {} 0
}
#-------------------------------------------------------------------------
# Syntax errors in the prefix= option.
#
foreach {tn opt} {
1 {prefix=x}
2 {prefix='x'}
3 {prefix='$'}
} {
set res [list 1 {malformed prefix=... directive}]
do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}
#-------------------------------------------------------------------------
# Syntax errors in the 'rank' option.
#
foreach {tn val} {
1 "f1(xyz)"
2 "f1(zyx)"
3 "f1(nzz)"
4 "f1(x'!!')"
5 "f1(x':;')"
6 "f1(x'[]')"
7 "f1(x'{}')"
8 "f1('abc)"
} {
do_catchsql_test 3.$tn {
INSERT INTO t1(t1, rank) VALUES('rank', $val);
} {1 {SQL logic error or missing database}}
}
#-------------------------------------------------------------------------
# The parsing of SQL literals specified as part of 'rank' options.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE zzz USING fts5(one);
INSERT INTO zzz VALUES('a b c');
}
proc first {cmd A} { return $A }
sqlite3_fts5_create_function db first first
foreach {tn arg} {
1 "123"
2 "'01234567890ABCDEF'"
3 "x'0123'"
4 "x'ABCD'"
5 "x'0123456789ABCDEF'"
6 "x'0123456789abcdef'"
7 "22.5"
8 "-91.5"
9 "-.5"
10 "''''"
11 "+.5"
} {
set func [string map {' ''} "first($arg)"]
do_execsql_test 4.1.$tn "
INSERT INTO zzz(zzz, rank) VALUES('rank', '$func');
SELECT rank IS $arg FROM zzz WHERE zzz MATCH 'a + b + c'
" 1
}
do_execsql_test 4.2 {
INSERT INTO zzz(zzz, rank) VALUES('rank', 'f1()');
} {}
#-------------------------------------------------------------------------
# Misquoting in tokenize= and other options.
#
do_catchsql_test 5.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, tokenize="porter 'ascii");
} {1 {parse error in tokenize directive}}
breakpoint
do_catchsql_test 5.2 {
CREATE VIRTUAL TABLE xx USING fts5(x, [y[]);
} {0 {}}
do_catchsql_test 5.3 {
CREATE VIRTUAL TABLE yy USING fts5(x, [y]]);
} {1 {unrecognized token: "]"}}
#-------------------------------------------------------------------------
# Errors in prefix= directives.
#
do_catchsql_test 6.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix=1, prefix=2);
} {1 {multiple prefix=... directives}}
do_catchsql_test 6.2 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1, 2, 1001');
} {1 {prefix length out of range: 1001}}
do_catchsql_test 6.3 {
CREATE VIRTUAL TAbLE abc USING fts5(a, prefix='1, 2, 0000');
} {1 {prefix length out of range: 0}}
do_catchsql_test 6.4 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1 , 1000000');
} {1 {malformed prefix=... directive}}
#-------------------------------------------------------------------------
# Duplicate tokenize= and other options.
#
do_catchsql_test 7.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, tokenize=porter, tokenize=ascii);
} {1 {multiple tokenize=... directives}}
do_catchsql_test 7.2 {
CREATE VIRTUAL TABLE abc USING fts5(a, content=porter, content=ascii);
} {1 {multiple content=... directives}}
do_catchsql_test 7.3 {
CREATE VIRTUAL TABLE abc USING fts5(a, content_rowid=porter, content_rowid=a);
} {1 {multiple content_rowid=... directives}}
#-------------------------------------------------------------------------
# Unrecognized option.
#
do_catchsql_test 8.0 {
CREATE VIRTUAL TABLE abc USING fts5(a, nosuchoption=123);
} {1 {unrecognized option: "nosuchoption"}}
do_catchsql_test 8.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, "nosuchoption"=123);
} {1 {parse error in ""nosuchoption"=123"}}
#-------------------------------------------------------------------------
# Errors in:
#
# 9.1.* 'pgsz' options.
# 9.2.* 'automerge' options.
# 9.3.* 'crisismerge' options.
#
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE abc USING fts5(a, b);
} {}
do_catchsql_test 9.1.1 {
INSERT INTO abc(abc, rank) VALUES('pgsz', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.2 {
INSERT INTO abc(abc, rank) VALUES('pgsz', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.3 {
INSERT INTO abc(abc, rank) VALUES('pgsz', 66.67);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.1 {
INSERT INTO abc(abc, rank) VALUES('automerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.2 {
INSERT INTO abc(abc, rank) VALUES('automerge', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.3 {
INSERT INTO abc(abc, rank) VALUES('automerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.2.4 {
INSERT INTO abc(abc, rank) VALUES('automerge', 1);
} {}
do_catchsql_test 9.3.1 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.3.2 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.3.3 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 1);
} {}
do_execsql_test 9.3.4 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000);
} {}
do_catchsql_test 9.4.1 {
INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1);
} {1 {SQL logic error or missing database}}
finish_test

View File

@ -0,0 +1,258 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file contains tests for the content= and content_rowid= options.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5content
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Contentless tables
#
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b, content='');
INSERT INTO f1(rowid, a, b) VALUES(1, 'one', 'o n e');
INSERT INTO f1(rowid, a, b) VALUES(2, 'two', 't w o');
INSERT INTO f1(rowid, a, b) VALUES(3, 'three', 't h r e e');
}
do_execsql_test 1.2 {
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2}
do_execsql_test 1.3 {
INSERT INTO f1(a, b) VALUES('four', 'f o u r');
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2 4}
do_execsql_test 1.4 {
SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o';
} {1 {} {} 2 {} {} 4 {} {}}
do_execsql_test 1.5 {
SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o';
} {1 {} 2 {} 4 {}}
do_execsql_test 1.6 {
SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.7 {
SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL
FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.8 {
SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL
FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.9 {
SELECT rowid FROM f1;
} {1 2 3 4}
do_execsql_test 1.10 {
SELECT * FROM f1;
} {{} {} {} {} {} {} {} {}}
do_execsql_test 1.11 {
SELECT rowid, a, b FROM f1 ORDER BY rowid ASC;
} {1 {} {} 2 {} {} 3 {} {} 4 {} {}}
do_execsql_test 1.12 {
SELECT a IS NULL FROM f1;
} {1 1 1 1}
do_catchsql_test 1.13 {
DELETE FROM f1 WHERE rowid = 2;
} {1 {cannot DELETE from contentless fts5 table: f1}}
do_catchsql_test 1.14 {
UPDATE f1 SET a = 'a b c' WHERE rowid = 2;
} {1 {cannot UPDATE contentless fts5 table: f1}}
do_execsql_test 1.15 {
INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o');
} {}
do_execsql_test 1.16 {
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 4}
do_execsql_test 1.17 {
SELECT rowid FROM f1;
} {1 3 4}
#-------------------------------------------------------------------------
# External content tables
#
reset_db
do_execsql_test 2.1 {
-- Create a table. And an external content fts5 table to index it.
CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c);
CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a');
-- Triggers to keep the FTS index up to date.
CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN
INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
END;
CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN
INSERT INTO fts_idx(fts_idx, rowid, b, c)
VALUES('delete', old.a, old.b, old.c);
END;
CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN
INSERT INTO fts_idx(fts_idx, rowid, b, c)
VALUES('delete', old.a, old.b, old.c);
INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
END;
}
do_execsql_test 2.2 {
INSERT INTO tbl VALUES(1, 'one', 'o n e');
INSERT INTO tbl VALUES(NULL, 'two', 't w o');
INSERT INTO tbl VALUES(3, 'three', 't h r e e');
}
do_execsql_test 2.3 {
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.4 {
DELETE FROM tbl WHERE rowid=2;
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.5 {
UPDATE tbl SET c = c || ' x y z';
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.6 {
SELECT * FROM fts_idx WHERE fts_idx MATCH 't AND x';
} {three {t h r e e x y z}}
do_execsql_test 2.7 {
SELECT highlight(fts_idx, 1, '[', ']') FROM fts_idx
WHERE fts_idx MATCH 't AND x';
} {{[t] h r e e [x] y z}}
#-------------------------------------------------------------------------
# Quick tests of the 'delete-all' command.
#
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t3 USING fts5(x, content='');
INSERT INTO t3 VALUES('a b c');
INSERT INTO t3 VALUES('d e f');
}
do_execsql_test 3.2 {
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {2 4}
do_execsql_test 3.3 {
INSERT INTO t3(t3) VALUES('delete-all');
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {0 2}
do_execsql_test 3.4 {
INSERT INTO t3 VALUES('a b c');
INSERT INTO t3 VALUES('d e f');
SELECT rowid FROM t3 WHERE t3 MATCH 'e';
} {2}
do_execsql_test 3.5 {
SELECT rowid FROM t3 WHERE t3 MATCH 'c';
} {1}
do_execsql_test 3.6 {
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {2 4}
do_execsql_test 3.7 {
CREATE VIRTUAL TABLE t4 USING fts5(x);
} {}
do_catchsql_test 3.8 {
INSERT INTO t4(t4) VALUES('delete-all');
} {1 {'delete-all' may only be used with a contentless or external content fts5 table}}
#-------------------------------------------------------------------------
# Test an external content table with a more interesting schema.
#
do_execsql_test 4.1 {
CREATE TABLE x2(a, "key col" PRIMARY KEY, b, c) WITHOUT ROWID;
INSERT INTO x2 VALUES('a b', 1, 'c d' , 'e f');
INSERT INTO x2 VALUES('x y', -40, 'z z' , 'y x');
CREATE VIRTUAL TABLE t2 USING fts5(a, c, content=x2, content_rowid='key col');
INSERT INTO t2(t2) VALUES('rebuild');
}
do_execsql_test 4.2 { SELECT rowid FROM t2 } {-40 1}
do_execsql_test 4.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'c'} {}
do_execsql_test 4.4 { SELECT rowid FROM t2 WHERE t2 MATCH 'a'} {1}
do_execsql_test 4.5 { SELECT rowid FROM t2 WHERE t2 MATCH 'x'} {-40}
do_execsql_test 4.6 { INSERT INTO t2(t2) VALUES('integrity-check') } {}
do_execsql_test 4.7 {
DELETE FROM x2 WHERE "key col" = 1;
INSERT INTO t2(t2, rowid, a, c) VALUES('delete', 1, 'a b', 'e f');
INSERT INTO t2(t2) VALUES('integrity-check');
}
do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {}
do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40}
#-------------------------------------------------------------------------
# Test that if the 'rowid' field of a 'delete' is not an integer, no
# changes are made to the FTS index.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE t5 USING fts5(a, b, content=);
INSERT INTO t5(rowid, a, b) VALUES(-1, 'one', 'two');
INSERT INTO t5(rowid, a, b) VALUES( 0, 'three', 'four');
INSERT INTO t5(rowid, a, b) VALUES( 1, 'five', 'six');
}
set ::checksum [execsql {SELECT md5sum(id, block) FROM t5_data}]
do_execsql_test 5.1 {
INSERT INTO t5(t5, rowid, a, b) VALUES('delete', NULL, 'three', 'four');
SELECT md5sum(id, block) FROM t5_data;
} $::checksum
#-------------------------------------------------------------------------
# Check that a contentless table can be dropped.
#
reset_db
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, y, content="");
SELECT name FROM sqlite_master;
} {xx xx_data xx_docsize xx_config}
do_execsql_test 6.2 {
DROP TABLE xx;
SELECT name FROM sqlite_master;
} {}
finish_test

View File

@ -0,0 +1,99 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that the FTS5 'integrity-check' command detects
# inconsistencies (corruption) in the on-disk backing tables.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_test 1.1 {
db transaction {
for {set i 1} {$i < 200} {incr i} {
set doc [list [string repeat x $i] [string repeat y $i]]
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
}
}
fts5_level_segs t1
} {1}
db_save
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]
do_test 1.3 {
execsql {
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
do_test 1.4 {
db_restore_and_reopen
execsql {
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
rowid = fts5_rowid('segment', $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#--------------------------------------------------------------------
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
for {set i 0} {$i < 500} {incr i} {
execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
}
execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
} {}
#--------------------------------------------------------------------
# A mundane test - missing row in the %_content table.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x);
INSERT INTO t3 VALUES('one o');
INSERT INTO t3 VALUES('two e');
INSERT INTO t3 VALUES('three o');
INSERT INTO t3 VALUES('four e');
INSERT INTO t3 VALUES('five o');
}
do_execsql_test 3.1 {
SELECT * FROM t3 WHERE t3 MATCH 'o'
} {{one o} {three o} {five o}}
do_catchsql_test 3.1 {
DELETE FROM t3_content WHERE rowid = 3;
SELECT * FROM t3 WHERE t3 MATCH 'o';
} {1 {database disk image is malformed}}
finish_test

View File

@ -0,0 +1,272 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case
# "correctly" means without crashing.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_may_be_corrupt 1
# Create a simple FTS5 table containing 100 documents. Each document
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]
if 1 {
# Test 1:
#
# For each page in the t1_data table, open a transaction and DELETE
# the t1_data entry. Then run:
#
# * an integrity-check, and
# * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'"
#
# and check that the corruption is detected in both cases. The
# rollback the transaction.
#
# Test 2:
#
# Same thing, except instead of deleting a row from t1_data, replace its
# blob content with integer value 14.
#
foreach {tno stmt} {
1 { DELETE FROM t1_data WHERE rowid=$rowid }
2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
} {
set tn 0
foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
incr tn
#if {$tn!=224} continue
do_test 1.$tno.$tn.1.$rowid {
execsql { BEGIN }
execsql $stmt
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
if {($rowid & $mask)==0} {
# Node is a leaf node, not a b-tree node.
do_catchsql_test 1.$tno.$tn.2.$rowid {
SELECT rowid FROM t1 WHERE t1 MATCH 'x*'
} {1 {database disk image is malformed}}
}
do_execsql_test 1.$tno.$tn.3.$rowid {
ROLLBACK;
INSERT INTO t1(t1) VALUES('integrity-check');
} {}
}
}
# Using the same database as the 1.* tests.
#
# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
# of the fts index. For test $i, truncate the rightmost leafpage to $i
# bytes. Then test both the integrity-check detects the corruption.
#
# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
# corruption. It may not report the db as corrupt because truncating the
# final leaf to some sizes may create a valid leaf page.
#
set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}]
set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}]
set all [db eval {SELECT rowid FROM t1}]
for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} {
do_execsql_test 2.$i.1 {
BEGIN;
UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid;
}
do_catchsql_test 2.$i.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_test 2.$i.3 {
set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}]
expr {
$res=="1 {database disk image is malformed}"
|| $res=="0 {$all}"
}
} 1
do_execsql_test 2.$i.4 {
ROLLBACK;
INSERT INTO t1(t1) VALUES('integrity-check');
} {}
}
#-------------------------------------------------------------------------
# Test that corruption in leaf page headers is detected by queries that use
# doclist-indexes.
#
set doc "A B C D E F G H I J "
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE x3 USING fts5(tt);
INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000)
INSERT INTO x3
SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}
foreach {tn hdr} {
1 "\x00\x00\x00\x00"
2 "\xFF\xFF\xFF\xFF"
3 "\x44\x45"
} {
set tn2 0
set nCorrupt 0
set nCorrupt2 0
foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 3.$tn.$tn2.1 {
execsql BEGIN
set fd [db incrblob main x3_data block $rowid]
fconfigure $fd -encoding binary -translation binary
set existing [read $fd [string length $hdr]]
seek $fd 0
puts -nonewline $fd $hdr
close $fd
set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}]
if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
set {} 1
} {1}
if {($tn2 % 10)==0 && $existing != $hdr} {
do_test 3.$tn.$tn2.2 {
catchsql { INSERT INTO x3(x3) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
}
execsql ROLLBACK
}
do_test 3.$tn.x { expr $nCorrupt>0 } 1
}
#--------------------------------------------------------------------
#
set doc "A B C D E F G H I J "
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x4 USING fts5(tt);
INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10)
INSERT INTO x4
SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}
foreach {tn nCut} {
1 1
2 10
} {
set tn2 0
set nCorrupt 0
foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 4.$tn.$tn2 {
execsql {
BEGIN;
UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut)
WHERE id = $rowid;
}
set res [catchsql {
SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC
}]
if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
set {} 1
} {1}
execsql ROLLBACK
}
do_test 4.$tn.x { expr $nCorrupt>0 } 1
}
}
set doc [string repeat "A B C " 1000]
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10)
INSERT INTO x5 SELECT $doc FROM ii;
}
foreach {tn hdr} {
1 "\x00\x01"
} {
set tn2 0
set nCorrupt 0
foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 4.$tn.$tn2 {
execsql BEGIN
set fd [db incrblob main x5_data block $rowid]
fconfigure $fd -encoding binary -translation binary
puts -nonewline $fd $hdr
close $fd
catchsql { INSERT INTO x5(x5) VALUES('integrity-check') }
set {} {}
} {}
execsql ROLLBACK
}
}
#--------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5 VALUES('a');
INSERT INTO x5 VALUES('a a');
INSERT INTO x5 VALUES('a a a');
INSERT INTO x5 VALUES('a a a a');
UPDATE x5_docsize SET sz = X'' WHERE id=3;
}
proc colsize {cmd i} {
$cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize
do_catchsql_test 5.2 {
SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}
sqlite3_fts5_may_be_corrupt 0
finish_test

View File

@ -0,0 +1,80 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case
# "correctly" means without crashing.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt3
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_may_be_corrupt 1
# Create a simple FTS5 table containing 100 documents. Each document
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]
do_test 1.1 {
# Pick out the rowid of the right-most b-tree leaf in the new segment.
set rowid [db one {
SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1
}]
set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}]
set {} {}
} {}
for {set i 0} {$i < $L} {incr i} {
do_test 1.2.$i {
catchsql {
BEGIN;
UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid;
INSERT INTO t1(t1) VALUES('integrity-check');
}
} {1 {database disk image is malformed}}
catchsql ROLLBACK
}
#-------------------------------------------------------------------------
# Test that trailing bytes appended to the averages record are ignored.
#
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2 VALUES(rnddoc(10));
INSERT INTO t2 VALUES(rnddoc(10));
SELECT length(block) FROM t2_data WHERE id=1;
} {2}
do_execsql_test 2.2 {
UPDATE t2_data SET block = block || 'abcd' WHERE id=1;
SELECT length(block) FROM t2_data WHERE id=1;
} {6}
do_execsql_test 2.2 {
INSERT INTO t2 VALUES(rnddoc(10));
SELECT length(block) FROM t2_data WHERE id=1;
} {2}
sqlite3_fts5_may_be_corrupt 0
finish_test

View File

@ -0,0 +1,132 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on uses of doclist-index records.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5dlidx
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
if { $tcl_platform(wordSize)<8 } {
finish_test
return
}
proc do_fb_test {tn sql res} {
set res2 [lsort -integer -decr $res]
uplevel [list do_execsql_test $tn.1 $sql $res]
uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2]
}
# This test populates the FTS5 table containing $nEntry entries. Rows are
# numbered from 0 to ($nEntry-1). The rowid for row $i is:
#
# ($iFirst + $i*$nStep)
#
# Each document is of the form "a b c a b c a b c...". If the row number ($i)
# is an integer multiple of $spc1, then an "x" token is appended to the
# document. If it is *also* a multiple of $spc2, a "y" token is also appended.
#
proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} {
do_execsql_test $tn.0 { DELETE FROM t1 }
set xdoc [list]
set ydoc [list]
execsql BEGIN
for {set i 0} {$i < $nEntry} {incr i} {
set rowid [expr $i * $nStep]
set doc [string trim [string repeat "a b c " 100]]
if {($i % $spc1)==0} {
lappend xdoc $rowid
append doc " x"
if {($i % $spc2)==0} {
lappend ydoc $rowid
append doc " y"
}
}
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
}
execsql COMMIT
breakpoint
do_test $tn.1 {
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc
do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc
do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc
do_fb_test $tn.5.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc
do_fb_test $tn.5.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}
foreach {tn pgsz} {
1 32
2 200
} {
do_execsql_test $tn.0 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
}
do_dlidx_test1 1.$tn.1 10 100 10000 0 1000
do_dlidx_test1 1.$tn.2 10 10 10000 0 128
do_dlidx_test1 1.$tn.3 10 10 66 0 36028797018963970
do_dlidx_test1 1.$tn.4 10 10 50 0 150000000000000000
do_dlidx_test1 1.$tn.5 10 10 200 0 [expr 1<<55]
do_dlidx_test1 1.$tn.6 10 10 30 0 [expr 1<<58]
}
proc do_dlidx_test2 {tn nEntry iFirst nStep} {
set str [string repeat "a " 500]
execsql {
BEGIN;
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
INSERT INTO t1 VALUES('b a');
WITH iii(ii, i) AS (
SELECT 1, $iFirst UNION ALL
SELECT ii+1, i+$nStep FROM iii WHERE ii<$nEntry
)
INSERT INTO t1(rowid,x) SELECT i, $str FROM iii;
COMMIT;
}
do_execsql_test $tn.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a'
} {1}
breakpoint
do_execsql_test $tn.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC
} {1}
}
do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128]
finish_test

View File

@ -0,0 +1,47 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on edge cases in the doclist format.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5doclist
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Create a table with 1000 columns. Then add some large documents to it.
# All text is in the right most column of the table.
#
do_test 1.0 {
set cols [list]
for {set i 0} {$i < 900} {incr i} { lappend cols "x$i" }
execsql "CREATE VIRTUAL TABLE ccc USING fts5([join $cols ,])"
} {}
db func rnddoc fts5_rnddoc
do_execsql_test 1.1 {
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO ccc(x899) SELECT rnddoc(500) FROM ii;
}
do_execsql_test 1.2 {
INSERT INTO ccc(ccc) VALUES('integrity-check');
}
finish_test

93
ext/fts5/test/fts5ea.test Normal file
View File

@ -0,0 +1,93 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# Test the fts5 expression parser directly using the fts5_expr() SQL
# test function.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ea
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_syntax_error_test {tn expr err} {
set ::se_expr $expr
do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}
proc do_syntax_test {tn expr res} {
set ::se_expr $expr
do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}
foreach {tn expr res} {
1 {abc} {"abc"}
2 {abc def} {"abc" AND "def"}
3 {abc*} {"abc" *}
4 {"abc def ghi" *} {"abc" + "def" + "ghi" *}
5 {one AND two} {"one" AND "two"}
6 {one+two} {"one" + "two"}
7 {one AND two OR three} {("one" AND "two") OR "three"}
8 {one OR two AND three} {"one" OR ("two" AND "three")}
9 {NEAR(one two)} {NEAR("one" "two", 10)}
10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)}
11 {a OR b NOT c} {"a" OR ("b" NOT "c")}
12 "\x20one\x20two\x20three" {"one" AND "two" AND "three"}
13 "\x09one\x0Atwo\x0Dthree" {"one" AND "two" AND "three"}
14 {"abc""def"} {"abc" + "def"}
} {
do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}
foreach {tn expr res} {
1 {c1:abc}
{c1 : "abc"}
2 {c2 : NEAR(one two) c1:"hello world"}
{c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"}
} {
do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res]
}
foreach {tn expr err} {
1 {AND} {fts5: syntax error near "AND"}
2 {abc def AND} {fts5: syntax error near ""}
3 {abc OR AND} {fts5: syntax error near "AND"}
4 {(a OR b) abc} {fts5: syntax error near "abc"}
5 {NEaR (a b)} {fts5: syntax error near "NEaR"}
6 {NEa (a b)} {fts5: syntax error near "NEa"}
7 {(a OR b) NOT c)} {fts5: syntax error near ")"}
8 {nosuch: a nosuch2: b} {no such column: nosuch}
9 {addr: a nosuch2: b} {no such column: nosuch2}
10 {NOT} {fts5: syntax error near "NOT"}
11 {a AND "abc} {unterminated string}
12 {NEAR(a b, xyz)} {expected integer, got "xyz"}
13 {NEAR(a b, // )} {fts5: syntax error near "/"}
14 {NEAR(a b, "xyz" )} {expected integer, got ""xyz""}
} {
do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err]
}
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}
finish_test

53
ext/fts5/test/fts5eb.test Normal file
View File

@ -0,0 +1,53 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5eb
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_syntax_error_test {tn expr err} {
set ::se_expr $expr
do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}
proc do_syntax_test {tn expr res} {
set ::se_expr $expr
do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}
foreach {tn expr res} {
1 {abc} {"abc"}
2 {abc .} {"abc"}
3 {.} {}
4 {abc OR .} {"abc"}
5 {abc NOT .} {"abc"}
6 {abc AND .} {"abc"}
7 {. OR abc} {"abc"}
8 {. NOT abc} {"abc"}
9 {. AND abc} {"abc"}
10 {abc + . + def} {"abc" + "def"}
11 {abc . def} {"abc" AND "def"}
12 {r+e OR w} {"r" + "e" OR "w"}
} {
do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}
finish_test

View File

@ -0,0 +1,353 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault1
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
# Simple tests:
#
# 1: CREATE VIRTUAL TABLE
# 2: INSERT statement
# 3: DELETE statement
# 4: MATCH expressions
#
#
faultsim_save_and_close
do_faultsim_test 1 -faults ioerr-t* -prep {
faultsim_restore_and_reopen
} -body {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') }
} -test {
faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}}
}
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
}
faultsim_save_and_close
do_faultsim_test 2 -prep {
faultsim_restore_and_reopen
} -body {
execsql {
INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
}
} -test {
faultsim_test_result {0 {}}
}
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
}
faultsim_save_and_close
do_faultsim_test 3 -prep {
faultsim_restore_and_reopen
} -body {
execsql { DELETE FROM t1 }
} -test {
faultsim_test_result {0 {}}
}
reset_db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b);
INSERT INTO t2 VALUES('m f a jj th q jr ar', 'hj n h h sg j i m');
INSERT INTO t2 VALUES('nr s t g od j kf h', 'sb h aq rg op rb n nl');
INSERT INTO t2 VALUES('do h h pb p p q fr', 'c rj qs or cr a l i');
INSERT INTO t2 VALUES('lk gp t i lq mq qm p', 'h mr g f op ld aj h');
INSERT INTO t2 VALUES('ct d sq kc qi k f j', 'sn gh c of g s qt q');
INSERT INTO t2 VALUES('d ea d d om mp s ab', 'dm hg l df cm ft pa c');
INSERT INTO t2 VALUES('tc dk c jn n t sr ge', 'a a kn bc n i af h');
INSERT INTO t2 VALUES('ie ii d i b sa qo rf', 'a h m aq i b m fn');
INSERT INTO t2 VALUES('gs r fo a er m h li', 'tm c p gl eb ml q r');
INSERT INTO t2 VALUES('k fe fd rd a gi ho kk', 'ng m c r d ml rm r');
}
faultsim_save_and_close
foreach {tn expr res} {
1 { dk } 7
2 { m f } 1
3 { f* } {1 3 4 5 6 8 9 10}
4 { m OR f } {1 4 5 8 9 10}
5 { sn + gh } {5}
6 { "sn gh" } {5}
7 { NEAR(r a, 5) } {9}
8 { m* f* } {1 4 6 8 9 10}
9 { m* + f* } {1 8}
} {
do_faultsim_test 4.$tn -prep {
faultsim_restore_and_reopen
} -body "
execsql { SELECT rowid FROM t2 WHERE t2 MATCH '$expr' }
" -test "
faultsim_test_result {[list 0 $res]}
"
}
#-------------------------------------------------------------------------
# The following tests use a larger database populated with random data.
#
# The database page size is set to 512 bytes and the FTS5 page size left
# at the default 1000 bytes. This means that reading a node may require
# pulling an overflow page from disk, which is an extra opportunity for
# an error to occur.
#
reset_db
do_execsql_test 5.0.1 {
PRAGMA main.page_size = 512;
CREATE VIRTUAL TABLE x1 USING fts5(a, b);
PRAGMA main.page_size;
} {512}
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
}
set doc
}
db func rnddoc rnddoc
do_execsql_test 5.0.2 {
WITH r(a, b) AS (
SELECT rnddoc(6), rnddoc(6) UNION ALL
SELECT rnddoc(6), rnddoc(6) FROM r
)
INSERT INTO x1 SELECT * FROM r LIMIT 10000;
}
set res [db one {
SELECT count(*) FROM x1 WHERE x1.a LIKE '%abc%' OR x1.b LIKE '%abc%'}
]
do_faultsim_test 5.1 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abc' }
} -test {
faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.2 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abcd' }
} -test {
faultsim_test_result [list 0 0]
}
proc test_astar {a b} {
return [expr { [regexp {a[^ ][^ ]} $a] || [regexp {a[^ ][^ ]} $b] }]
}
db func test_astar test_astar
set res [db one { SELECT count(*) FROM x1 WHERE test_astar(a, b) } ]
do_faultsim_test 5.3 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'a*' }
} -test {
faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.4 -faults oom* -prep {
db close
sqlite3 db test.db
} -body {
execsql { INSERT INTO x1 VALUES('a b c d', 'e f g h') }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 5.5.1 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=1
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.2 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=10
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.3 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
SELECT min(rowid) FROM x1_data WHERE rowid>20
)
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.4 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
SELECT max(rowid) FROM x1_data
)
}
} -test {
faultsim_test_result [list 0 1]
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('automerge', 0);
INSERT INTO x1 VALUES('a b c'); -- 1
INSERT INTO x1 VALUES('a b c'); -- 2
INSERT INTO x1 VALUES('a b c'); -- 3
INSERT INTO x1 VALUES('a b c'); -- 4
INSERT INTO x1 VALUES('a b c'); -- 5
INSERT INTO x1 VALUES('a b c'); -- 6
INSERT INTO x1 VALUES('a b c'); -- 7
INSERT INTO x1 VALUES('a b c'); -- 8
INSERT INTO x1 VALUES('a b c'); -- 9
INSERT INTO x1 VALUES('a b c'); -- 10
INSERT INTO x1 VALUES('a b c'); -- 11
INSERT INTO x1 VALUES('a b c'); -- 12
INSERT INTO x1 VALUES('a b c'); -- 13
INSERT INTO x1 VALUES('a b c'); -- 14
INSERT INTO x1 VALUES('a b c'); -- 15
SELECT count(*) FROM x1_data;
} {17}
faultsim_save_and_close
do_faultsim_test 6.1 -faults oom* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1 VALUES('d e f') }
} -test {
faultsim_test_result [list 0 {}]
if {$testrc==0} {
set nCnt [db one {SELECT count(*) FROM x1_data}]
if {$nCnt!=3} { error "expected 3 entries but there are $nCnt" }
}
}
do_faultsim_test 6.2 -faults oom* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1, rank) VALUES('pgsz', 32) }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 6.3 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1) VALUES('integrity-check') }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 6.4 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1) VALUES('optimize') }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
#
do_faultsim_test 7.0 -faults oom* -prep {
catch { db close }
} -body {
sqlite3 db test.db
} -test {
faultsim_test_result [list 0 {}] {1 {}} {1 {initialization of fts5 failed: }}
}
#-------------------------------------------------------------------------
# A prefix query against a large document set.
#
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
}
set doc
}
reset_db
db func rnddoc rnddoc
do_test 8.0 {
execsql { CREATE VIRTUAL TABLE x1 USING fts5(a) }
set ::res [list]
for {set i 1} {$i<100} {incr i 1} {
execsql { INSERT INTO x1 VALUES( rnddoc(50) ) }
lappend ::res $i
}
} {}
do_faultsim_test 8.1 -faults oom* -prep {
} -body {
execsql {
SELECT rowid FROM x1 WHERE x1 MATCH 'x*'
}
} -test {
faultsim_test_result [list 0 $::res]
}
#-------------------------------------------------------------------------
# Segment promotion.
#
do_test 9.0 {
reset_db
db func rnddoc fts5_rnddoc
execsql {
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
for {set i 1} {$i <= 16} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
}
fts5_level_segs s2
} {0 1}
set insert_doc [db one {SELECT rnddoc(160)}]
faultsim_save_and_close
do_faultsim_test 9.1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO s2 VALUES($::insert_doc) }
} -test {
faultsim_test_result {0 {}}
if {$testrc==0} {
set ls [fts5_level_segs s2]
if {$ls != "2 0"} { error "fts5_level_segs says {$ls}" }
}
}
finish_test

View File

@ -0,0 +1,140 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault2
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set doc [string trim [string repeat "x y z " 200]]
do_execsql_test 1.0 {
CREATE TABLE t1(a INTEGER PRIMARY KEY, x);
CREATE VIRTUAL TABLE x1 USING fts5(x, content='t1', content_rowid='a');
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
WITH input(a,b) AS (
SELECT 1, $doc UNION ALL
SELECT a+1, ($doc || CASE WHEN (a+1)%100 THEN '' ELSE ' xyz' END)
FROM input WHERE a < 1000
)
INSERT INTO t1 SELECT * FROM input;
INSERT INTO x1(x1) VALUES('rebuild');
}
do_faultsim_test 1.1 -faults oom-* -prep {
} -body {
execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z AND xyz' }
} -test {
faultsim_test_result {0 {100 200 300 400 500 600 700 800 900 1000}}
}
do_faultsim_test 1.2 -faults oom-* -prep {
} -body {
execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC}
} -test {
faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}}
}
#-------------------------------------------------------------------------
# OOM within a query that accesses the in-memory hash table.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c);
INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four');
INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two');
}
do_faultsim_test 2.1 -faults oom-trans* -prep {
execsql {
BEGIN;
INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten');
}
} -body {
execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' }
} -test {
faultsim_test_result {0 {1 3}}
catchsql { ROLLBACK }
}
#-------------------------------------------------------------------------
# OOM within an 'optimize' operation that writes multiple pages to disk.
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE zzz USING fts5(z);
INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
INSERT INTO zzz VALUES('a b c d');
INSERT INTO zzz SELECT 'c d e f' FROM zzz;
INSERT INTO zzz SELECT 'e f g h' FROM zzz;
INSERT INTO zzz SELECT 'i j k l' FROM zzz;
INSERT INTO zzz SELECT 'l k m n' FROM zzz;
INSERT INTO zzz SELECT 'o p q r' FROM zzz;
}
faultsim_save_and_close
do_faultsim_test 3.1 -faults oom-trans* -prep {
faultsim_restore_and_reopen
execsql { SELECT rowid FROM zzz }
} -body {
execsql { INSERT INTO zzz(zzz) VALUES('optimize') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM within an 'integrity-check' operation.
#
reset_db
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE zzz USING fts5(z);
INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<10)
INSERT INTO zzz SELECT rnddoc(10) || ' xccc' FROM ii;
}
do_faultsim_test 4.1 -faults oom-trans* -prep {
} -body {
execsql { INSERT INTO zzz(zzz) VALUES('integrity-check') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while parsing a tokenize=option
#
reset_db
faultsim_save_and_close
do_faultsim_test 5.0 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql {
CREATE VIRTUAL TABLE uio USING fts5(a, b,
tokenize="porter 'ascii'",
content="another table",
content_rowid="somecolumn"
);
}
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@ -0,0 +1,113 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault3
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# An OOM while resuming a partially completed segment merge.
#
db func rnddoc fts5_rnddoc
do_test 1.0 {
expr srand(0)
execsql {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
INSERT INTO xx(xx, rank) VALUES('automerge', 16);
}
for {set i 0} {$i < 10} {incr i} {
execsql {
BEGIN;
INSERT INTO xx(x) VALUES(rnddoc(20));
INSERT INTO xx(x) VALUES(rnddoc(20));
INSERT INTO xx(x) VALUES(rnddoc(20));
COMMIT
}
}
execsql {
INSERT INTO xx(xx, rank) VALUES('automerge', 2);
INSERT INTO xx(xx, rank) VALUES('merge', 50);
}
} {}
faultsim_save_and_close
do_faultsim_test 1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(xx, rank) VALUES('merge', 1) }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close
set doc "a long term abcdefghijklmnopqrstuvwxyz "
append doc "and then abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz "
append doc [string repeat "abcdefghijklmnopqrstuvwxyz" 10]
do_faultsim_test 2 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
}
faultsim_save_and_close
set doc [fts5_rnddoc 1000]
do_faultsim_test 3.1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
set doc [string repeat "abc " 100]
do_faultsim_test 3.2 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
finish_test

View File

@ -0,0 +1,419 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault4
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# An OOM while dropping an fts5 table.
#
db func rnddoc fts5_rnddoc
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE xx USING fts5(x) }
} {}
faultsim_save_and_close
do_faultsim_test 1 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM xx }
} -body {
execsql { DROP TABLE xx }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM within an "ORDER BY rank" query.
#
db func rnddoc fts5_rnddoc
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx VALUES ('abc ' || rnddoc(10));
INSERT INTO xx VALUES ('abc abc' || rnddoc(9));
INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8));
} {}
faultsim_save_and_close
do_faultsim_test 2 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM xx }
} -body {
execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank }
} -test {
faultsim_test_result [list 0 {3 2 1}]
}
#-------------------------------------------------------------------------
# An OOM while "reseeking" an FTS cursor.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE jj USING fts5(j);
INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s');
INSERT INTO jj(rowid, j) VALUES(202, 't w t f s');
INSERT INTO jj(rowid, j) VALUES(303, 'w t f');
INSERT INTO jj(rowid, j) VALUES(404, 't');
}
faultsim_save_and_close
do_faultsim_test 3 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM jj }
} -body {
set res [list]
db eval { SELECT rowid FROM jj WHERE jj MATCH 't' } {
lappend res $rowid
if {$rowid==303} {
execsql { DELETE FROM jj WHERE rowid=404 }
}
}
set res
} -test {
faultsim_test_result [list 0 {101 202 303}]
}
#-------------------------------------------------------------------------
# An OOM within a special "*reads" query.
#
reset_db
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
WITH ii(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10 )
INSERT INTO x1 SELECT rnddoc(5) FROM ii;
}
set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}]
do_faultsim_test 4 -faults oom-* -body {
db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'}
} -test {
faultsim_test_result {0 {0 {} 3}}
}
#-------------------------------------------------------------------------
# An OOM within a query that uses a custom rank function.
#
reset_db
do_execsql_test 5.0 {
PRAGMA encoding='utf16';
CREATE VIRTUAL TABLE x2 USING fts5(x);
INSERT INTO x2(rowid, x) VALUES(10, 'a b c'); -- 3
INSERT INTO x2(rowid, x) VALUES(20, 'a b c'); -- 6
INSERT INTO x2(rowid, x) VALUES(30, 'a b c'); -- 2
INSERT INTO x2(rowid, x) VALUES(40, 'a b c'); -- 5
INSERT INTO x2(rowid, x) VALUES(50, 'a b c'); -- 1
}
proc rowidmod {cmd mod} {
set row [$cmd xRowid]
expr {$row % $mod}
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_faultsim_test 5.1 -faults oom-* -body {
db eval {
SELECT rowid || '-' || rank FROM x2 WHERE x2 MATCH 'b' AND
rank MATCH "rowidmod('7')" ORDER BY rank
}
} -test {
faultsim_test_result {0 {50-1 30-2 10-3 40-5 20-6}}
}
proc rowidprefix {cmd prefix} {
set row [$cmd xRowid]
set {} "${row}-${prefix}"
}
sqlite3_fts5_create_function db rowidprefix rowidprefix
set str [string repeat abcdefghijklmnopqrstuvwxyz 10]
do_faultsim_test 5.2 -faults oom-* -body {
db eval "
SELECT rank, x FROM x2 WHERE x2 MATCH 'b' AND
rank MATCH 'rowidprefix(''$::str'')'
LIMIT 1
"
} -test {
faultsim_test_result "0 {10-$::str {a b c}}"
}
#-------------------------------------------------------------------------
# OOM errors within auxiliary functions.
#
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x3 USING fts5(xxx);
INSERT INTO x3 VALUES('a b c d c b a');
INSERT INTO x3 VALUES('a a a a a a a');
INSERT INTO x3 VALUES('a a a a a a a');
}
do_faultsim_test 6.1 -faults oom-t* -body {
db eval { SELECT highlight(x3, 0, '*', '*') FROM x3 WHERE x3 MATCH 'c' }
} -test {
faultsim_test_result {0 {{a b *c* d *c* b a}}}
}
proc firstinst {cmd} {
foreach {p c o} [$cmd xInst 0] {}
expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst
do_faultsim_test 6.2 -faults oom-t* -body {
db eval { SELECT firstinst(x3) FROM x3 WHERE x3 MATCH 'c' }
} -test {
faultsim_test_result {0 2} {1 SQLITE_NOMEM}
}
proc previc {cmd} {
set res [$cmd xGetAuxdataInt 0]
$cmd xSetAuxdataInt [$cmd xInstCount]
return $res
}
sqlite3_fts5_create_function db previc previc
do_faultsim_test 6.2 -faults oom-t* -body {
db eval { SELECT previc(x3) FROM x3 WHERE x3 MATCH 'a' }
} -test {
faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# OOM error when querying for a phrase with many tokens.
#
reset_db
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE tt USING fts5(x, y);
INSERT INTO tt VALUES('f b g b c b', 'f a d c c b'); -- 1
INSERT INTO tt VALUES('d a e f e d', 'f b b d e e'); -- 2
INSERT INTO tt VALUES('f b g a d c', 'e f c f a d'); -- 3
INSERT INTO tt VALUES('f f c d g f', 'f a e b g b'); -- 4
INSERT INTO tt VALUES('a g b d a g', 'e g a e a c'); -- 5
INSERT INTO tt VALUES('c d b d e f', 'f g e g e e'); -- 6
INSERT INTO tt VALUES('e g f f b c', 'f c e f g f'); -- 7
INSERT INTO tt VALUES('e g c f c e', 'f e e a f g'); -- 8
INSERT INTO tt VALUES('e a e b e e', 'd c c f f f'); -- 9
INSERT INTO tt VALUES('f a g g c c', 'e g d g c e'); -- 10
INSERT INTO tt VALUES('c d b a e f', 'f g e h e e'); -- 11
CREATE VIRTUAL TABLE tt2 USING fts5(o);
INSERT INTO tt2(rowid, o) SELECT rowid, x||' '||y FROM tt;
INSERT INTO tt2(rowid, o) VALUES(12, 'a b c d e f g h i j k l');
}
do_faultsim_test 7.2 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'f+g+e+g+e+e' }
} -test {
faultsim_test_result {0 6} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.3 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d e f)' }
} -test {
faultsim_test_result {0 11} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.4 -faults oom-t* -body {
db eval { SELECT rowid FROM tt2 WHERE tt2 MATCH '"g c f c e f e e a f"' }
} -test {
faultsim_test_result {0 8} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.5 -faults oom-* -body {
db eval {SELECT rowid FROM tt2 WHERE tt2 MATCH 'NEAR(a b c d e f g h i j k)'}
} -test {
faultsim_test_result {0 12} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.6 -faults oom-* -body {
db eval {SELECT rowid FROM tt WHERE tt MATCH 'y: "c c"'}
} -test {
faultsim_test_result {0 {1 9}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
INSERT INTO tt(rowid, x) VALUES(1, 'a b c d x x');
WITH ii(i) AS (SELECT 2 UNION ALL SELECT i+1 FROM ii WHERE i<99)
INSERT INTO tt(rowid, x) SELECT i, 'a b c x x d' FROM ii;
INSERT INTO tt(rowid, x) VALUES(100, 'a b c d x x');
COMMIT;
}
do_faultsim_test 8.1 -faults oom-t* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d, 2)' }
} -test {
faultsim_test_result {0 {1 100}} {1 SQLITE_NOMEM}
}
do_faultsim_test 8.2 -faults oom-t* -body {
db eval { SELECT count(*) FROM tt WHERE tt MATCH 'a OR d' }
} -test {
faultsim_test_result {0 100} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# Fault in NOT query.
#
reset_db
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200)
INSERT INTO tt(rowid, x)
SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END
FROM ii;
COMMIT;
}
do_faultsim_test 9.1 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'a NOT x' }
} -test {
faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# OOM in fts5_expr() SQL function.
#
do_faultsim_test 10.1 -faults oom-t* -body {
db one { SELECT fts5_expr('a AND b NEAR(a b)') }
} -test {
faultsim_test_result {0 {"a" AND "b" AND NEAR("a" "b", 10)}}
}
do_faultsim_test 10.2 -faults oom-t* -body {
db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') }
} -test {
set res {AND [ns -col 0 -- {a b c}] [ns -- {b}] [ns -near 10 -- {a} {b}]}
faultsim_test_result [list 0 $res]
}
do_faultsim_test 10.3 -faults oom-t* -body {
db one { SELECT fts5_expr('x:a', 'x') }
} -test {
faultsim_test_result {0 {x : "a"}}
}
#-------------------------------------------------------------------------
# OOM while configuring 'rank' option.
#
reset_db
do_execsql_test 11.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
}
do_faultsim_test 11.1 -faults oom-t* -body {
db eval { INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)') }
} -test {
faultsim_test_result {0 {}} {1 {disk I/O error}}
}
#-------------------------------------------------------------------------
# OOM while creating an fts5vocab table.
#
reset_db
do_execsql_test 12.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
}
faultsim_save_and_close
do_faultsim_test 12.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM sqlite_master }
} -body {
db eval { CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while querying an fts5vocab table.
#
reset_db
do_execsql_test 13.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
INSERT INTO ft VALUES('a b');
CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row');
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM vv }
} -body {
db eval { SELECT * FROM vv }
} -test {
faultsim_test_result {0 {a 1 1 b 1 1}}
}
#-------------------------------------------------------------------------
# OOM in multi-column token query.
#
reset_db
do_execsql_test 13.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, y, z);
INSERT INTO ft(ft, rank) VALUES('pgsz', 32);
INSERT INTO ft VALUES(
'x x x x x x x x x x x x x x x x',
'y y y y y y y y y y y y y y y y',
'z z z z z z z z x x x x x x x x'
);
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM ft }
} -body {
db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' }
} -test {
faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}}
}
#-------------------------------------------------------------------------
# OOM in an "ALTER TABLE RENAME TO"
#
reset_db
do_execsql_test 14.0 {
CREATE VIRTUAL TABLE "tbl one" USING fts5(x, y, z);
}
faultsim_save_and_close
do_faultsim_test 14.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM "tbl one" }
} -body {
db eval { ALTER TABLE "tbl one" RENAME TO "tbl two" }
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@ -0,0 +1,96 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault5
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# OOM while creating an FTS5 table.
#
do_faultsim_test 1.1 -faults oom-t* -prep {
db eval { DROP TABLE IF EXISTS abc }
} -body {
db eval { CREATE VIRTUAL TABLE abc USING fts5(x,y) }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while writing a multi-tier doclist-index. And while running
# integrity-check on the same.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close
do_faultsim_test 2.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM tt }
} -body {
set str [string repeat "abc " 50]
db eval {
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO tt(rowid, x) SELECT i, $str FROM ii;
}
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.2 -faults oom-t* -body {
db eval { INSERT INTO tt(tt) VALUES('integrity-check') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while scanning an fts5vocab table.
#
reset_db
do_test 3.0 {
execsql {
CREATE VIRTUAL TABLE tt USING fts5(x);
CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'row');
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
}
for {set i 0} {$i < 20} {incr i} {
set str [string repeat "$i " 50]
execsql { INSERT INTO tt VALUES($str) }
}
execsql COMMIT
} {}
do_faultsim_test 3.1 -faults oom-t* -body {
db eval {
SELECT term FROM tv;
}
} -test {
faultsim_test_result {0 {0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9}}
}
finish_test

View File

@ -0,0 +1,152 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault6
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, b);
INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');
INSERT INTO tt VALUES('c d g b f d', 'b g e c g c');
INSERT INTO tt VALUES('c c f d e d', 'c e g d b c');
INSERT INTO tt VALUES('e a f c e f', 'g b a c d g');
INSERT INTO tt VALUES('c g f b b d', 'g c d c f g');
INSERT INTO tt VALUES('d a g a b b', 'g c g g c e');
INSERT INTO tt VALUES('e f a b c e', 'f d c d c c');
INSERT INTO tt VALUES('e c a g c d', 'b b g f f b');
INSERT INTO tt VALUES('g b d d e b', 'f f b d a c');
INSERT INTO tt VALUES('e a d a e d', 'c e a e f g');
}
faultsim_save_and_close
do_faultsim_test 1.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt) VALUES('rebuild') }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 1.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { REPLACE INTO tt(rowid, a, b) VALUES(6, 'x y z', 'l l l'); }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM within a special delete.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, content="");
INSERT INTO tt VALUES('c d c g g f');
INSERT INTO tt VALUES('c d g b f d');
INSERT INTO tt VALUES('c c f d e d');
INSERT INTO tt VALUES('e a f c e f');
INSERT INTO tt VALUES('c g f b b d');
INSERT INTO tt VALUES('d a g a b b');
INSERT INTO tt VALUES('e f a b c e');
INSERT INTO tt VALUES('e c a g c d');
INSERT INTO tt VALUES('g b d d e b');
INSERT INTO tt VALUES('e a d a e d');
}
faultsim_save_and_close
do_faultsim_test 2.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt, rowid, a) VALUES('delete', 3, 'c d g b f d'); }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt) VALUES('delete-all') }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.3 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt VALUES('x y z') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM in the ASCII tokenizer with very large tokens.
#
# Also the unicode tokenizer.
#
set t1 [string repeat wxyz 20]
set t2 [string repeat wxyz 200]
set t3 [string repeat wxyz 2000]
set doc "$t1 $t2 $t3"
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content="");
CREATE VIRTUAL TABLE xyz2 USING fts5(c, content="");
}
faultsim_save_and_close
do_faultsim_test 3.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM xyz }
} -body {
db eval { INSERT INTO xyz VALUES($::doc) }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 3.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM xyz2 }
} -body {
db eval { INSERT INTO xyz2 VALUES($::doc) }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while initializing a unicode61 tokenizer.
#
reset_db
faultsim_save_and_close
do_faultsim_test 4.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval {
CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
}
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@ -0,0 +1,43 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that SQLITE_FULL is returned if the FTS5 table cannot find a free
# segid to use. In practice this can only really happen when automerge and
# crisismerge are both disabled.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5full
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('automerge', 0);
INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000);
}
db func rnddoc fts5_rnddoc
do_test 1.1 {
list [catch {
for {set i 0} {$i < 2500} {incr i} {
execsql { INSERT INTO x8 VALUES( rnddoc(5) ); }
}
} msg] $msg
} {1 {database or disk is full}}
finish_test

108
ext/fts5/test/fts5hash.test Normal file
View File

@ -0,0 +1,108 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file are focused on the code in fts5_hash.c.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5hash
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Return a list of tokens (a vocabulary) that all share the same hash
# key value. This can be used to test hash collisions.
#
proc build_vocab1 {args} {
set O(-nslot) 1024
set O(-nword) 20
set O(-hash) 88
set O(-prefix) ""
if {[llength $args] % 2} { error "bad args" }
array set O2 $args
foreach {k v} $args {
if {[info exists O($k)]==0} { error "bad option: $k" }
set O($k) $v
}
set L [list]
while {[llength $L] < $O(-nword)} {
set t "$O(-prefix)[random_token]"
set h [sqlite3_fts5_token_hash $O(-nslot) $t]
if {$O(-hash)==$h} { lappend L $t }
}
return $L
}
proc random_token {} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set iVal [expr int(rand() * 2000000)]
return [string map $map $iVal]
}
proc random_doc {vocab nWord} {
set doc ""
set nVocab [llength $vocab]
for {set i 0} {$i<$nWord} {incr i} {
set j [expr {int(rand() * $nVocab)}]
lappend doc [lindex $vocab $j]
}
return $doc
}
set vocab [build_vocab1]
db func r random_doc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE eee USING fts5(e, ee);
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
INSERT INTO eee(eee) VALUES('integrity-check');
COMMIT;
INSERT INTO eee(eee) VALUES('integrity-check');
}
set hash [sqlite3_fts5_token_hash 1024 xyz]
set vocab [build_vocab1 -prefix xyz -hash $hash]
lappend vocab xyz
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row');
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
INSERT INTO eee(eee) VALUES('integrity-check');
}
do_test 1.2 {
db eval { SELECT term, doc FROM vocab } {
set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
if {$nRow != $doc} {
error "term=$term fts5vocab=$doc cnt=$nRow"
}
}
set {} {}
} {}
do_execsql_test 1.3 {
COMMIT;
INSERT INTO eee(eee) VALUES('integrity-check');
}
finish_test

View File

@ -0,0 +1,107 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on the integrity-check procedure.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5integrity
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx VALUES('term');
}
do_execsql_test 1.1 {
INSERT INTO xx(xx) VALUES('integrity-check');
}
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
INSERT INTO yy VALUES('term');
}
do_execsql_test 2.1 {
INSERT INTO yy(yy) VALUES('integrity-check');
}
#--------------------------------------------------------------------
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE zz USING fts5(z);
INSERT INTO zz(zz, rank) VALUES('pgsz', 32);
INSERT INTO zz VALUES('b b b b b b b b b b b b b b');
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz(zz) VALUES('optimize');
}
do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); }
#--------------------------------------------------------------------
# Mess around with a docsize record. And the averages record. Then
# check that integrity-check picks it up.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE aa USING fts5(zz);
INSERT INTO aa(zz) VALUES('a b c d e');
INSERT INTO aa(zz) VALUES('a b c d');
INSERT INTO aa(zz) VALUES('a b c');
INSERT INTO aa(zz) VALUES('a b');
INSERT INTO aa(zz) VALUES('a');
SELECT length(sz) FROM aa_docsize;
} {1 1 1 1 1}
do_execsql_test 4.1 {
INSERT INTO aa(aa) VALUES('integrity-check');
}
do_catchsql_test 4.2 {
BEGIN;
UPDATE aa_docsize SET sz = X'44' WHERE rowid = 3;
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.3 {
ROLLBACK;
BEGIN;
UPDATE aa_data SET block = X'44' WHERE rowid = 1;
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.4 {
ROLLBACK;
BEGIN;
INSERT INTO aa_docsize VALUES(23, X'04');
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.5 {
ROLLBACK;
BEGIN;
INSERT INTO aa_docsize VALUES(23, X'00');
INSERT INTO aa_content VALUES(23, '');
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit
finish_test

View File

@ -0,0 +1,194 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that focus on incremental merges of segments.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5merge
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
db func repeat [list string repeat]
#-------------------------------------------------------------------------
# Create an fts index so that:
#
# * the index consists of two top-level segments
# * each segment contains records related to $nRowPerSeg rows
# * all rows consist of tokens "x" and "y" only.
#
# Then run ('merge', 1) until everything is completely merged.
#
proc do_merge1_test {testname nRowPerSeg} {
set ::nRowPerSeg [expr $nRowPerSeg]
do_execsql_test $testname.0 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
}
for {set tn 1} {[lindex [fts5_level_segs x8] 0]>0} {incr tn} {
do_execsql_test $testname.$tn {
INSERT INTO x8(x8, rank) VALUES('merge', 1);
INSERT INTO x8(x8) VALUES('integrity-check');
}
if {$tn>5} break
}
do_test $testname.x [list expr "$tn < 5"] 1
}
do_merge1_test 1.1 1
do_merge1_test 1.2 2
do_merge1_test 1.3 3
do_merge1_test 1.4 4
do_merge1_test 1.5 10
do_merge1_test 1.6 20
do_merge1_test 1.7 100
#-------------------------------------------------------------------------
#
proc do_merge2_test {testname nRow} {
db func rnddoc fts5_rnddoc
do_execsql_test $testname.0 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
}
set ::nRow $nRow
do_test $testname.1 {
for {set i 0} {$i < $::nRow} {incr i} {
execsql { INSERT INTO x8 VALUES( rnddoc(($i%16) + 5) ) }
while {[not_merged x8]} {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
INSERT INTO x8(x8, rank) VALUES('automerge', 16);
INSERT INTO x8(x8) VALUES('integrity-check');
}
}
}
} {}
}
proc not_merged {tbl} {
set segs [fts5_level_segs $tbl]
foreach s $segs { if {$s>1} { return 1 } }
return 0
}
do_merge2_test 2.1 5
do_merge2_test 2.2 10
do_merge2_test 2.3 20
#-------------------------------------------------------------------------
# Test that an auto-merge will complete any merge that has already been
# started, even if the number of input segments is less than the current
# value of the 'automerge' configuration parameter.
#
db func rnddoc fts5_rnddoc
do_execsql_test 3.1 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
INSERT INTO x8 VALUES(rnddoc(100));
INSERT INTO x8 VALUES(rnddoc(100));
}
do_test 3.2 {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 4);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
fts5_level_segs x8
} {2}
do_test 3.3 {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
fts5_level_segs x8
} {2 1}
do_test 3.4 {
execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4) }
while {[not_merged x8]} {
execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) }
}
fts5_level_segs x8
} {0 1}
#-------------------------------------------------------------------------
#
proc mydoc {} {
set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]]
return [string repeat "$x " 30]
}
db func mydoc mydoc
proc mycount {} {
set res [list]
foreach x {a b c d e f g h i j} {
lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}]
}
set res
}
#1 32
foreach {tn pgsz} {
2 1000
} {
do_execsql_test 4.$tn.1 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz);
}
do_execsql_test 4.$tn.2 {
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
do_execsql_test 4.$tn.3 {
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO x8 SELECT mydoc() FROM ii;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO x8 SELECT mydoc() FROM ii;
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
}
set expect [mycount]
for {set i 0} {$i < 20} {incr i} {
do_test 4.$tn.4.$i {
execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); }
mycount
} $expect
break
}
# db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r }
}
finish_test

View File

@ -0,0 +1,71 @@
# 2014 Jan 08
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on the NEAR operator.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5near
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_near_test {tn doc near res} {
uplevel [list do_execsql_test $tn "
DELETE FROM t1;
INSERT INTO t1 VALUES('$doc');
SELECT count(*) FROM t1 WHERE t1 MATCH '$near';
" $res]
}
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "ascii tokenchars '.'")
}
do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1
do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1
do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1
do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0
do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1
do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1
do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1
do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0
do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1
do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0
do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1
do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0
do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1
do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0
do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1
do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0
do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1
do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0
do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1
do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1
do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0
do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1
do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0
do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1
do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0
finish_test

View File

@ -0,0 +1,66 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5optimize
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc rnddoc {nWord} {
set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z}
set nVocab [llength $vocab]
set ret [list]
for {set i 0} {$i < $nWord} {incr i} {
lappend ret [lindex $vocab [expr {int(rand() * $nVocab)}]]
}
return $ret
}
foreach {tn nStep} {
1 2
2 10
3 50
4 500
} {
if {$tn!=4} continue
reset_db
db func rnddoc rnddoc
do_execsql_test 1.$tn.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}
do_test 1.$tn.2 {
for {set i 0} {$i < $nStep} {incr i} {
execsql { INSERT INTO t1 VALUES( rnddoc(5), rnddoc(5) ) }
}
} {}
do_execsql_test 1.$tn.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 1.$tn.4 {
INSERT INTO t1(t1) VALUES('optimize');
}
do_execsql_test 1.$tn.5 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
}
finish_test

View File

@ -0,0 +1,67 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing the planner (xBestIndex function).
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5plan
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE TABLE t1(x, y);
CREATE VIRTUAL TABLE f1 USING fts5(ff);
}
do_eqp_test 1.1 {
SELECT * FROM t1, f1 WHERE f1 MATCH t1.x
} {
0 0 0 {SCAN TABLE t1}
0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
}
do_eqp_test 1.2 {
SELECT * FROM t1, f1 WHERE f1 > t1.x
} {
0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
0 1 0 {SCAN TABLE t1}
}
do_eqp_test 1.3 {
SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}
do_eqp_test 1.4 {
SELECT * FROM f1 ORDER BY rank
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}
do_eqp_test 1.5 {
SELECT * FROM f1 WHERE rank MATCH ?
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:}
}
finish_test

11806
ext/fts5/test/fts5porter.test Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 porter stemmer implementation.
#
# These are extra tests added to those in fts5porter.test in order to
# improve test coverage of the porter stemmer implementation.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5porter2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set test_vocab {
tion tion
ation ation
vation vation
avation avat
vion vion
ion ion
relational relat
relation relat
relate relat
zzz zzz
ii ii
iiing ii
xtional xtional
xenci xenci
xlogi xlogi
realization realiz
realize realiz
xization xizat
capitalism capit
talism talism
xiveness xive
xfulness xful
xousness xous
xical xical
xicate xicat
xicity xiciti
ies ie
eed e
eing e
s s
}
set i 0
foreach {in out} $test_vocab {
do_test "1.$i.($in -> $out)" {
lindex [sqlite3_fts5_tokenize db porter $in] 0
} $out
incr i
}
finish_test

View File

@ -0,0 +1,67 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on prefix indexes.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5prefix
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
INSERT INTO xx VALUES('one two three');
INSERT INTO xx VALUES('four five six');
INSERT INTO xx VALUES('seven eight nine ten');
}
do_execsql_test 1.1 {
SELECT rowid FROM xx WHERE xx MATCH 't*'
} {1 3}
#-------------------------------------------------------------------------
# Check that prefix indexes really do index n-character prefixes, not
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
# diacritic removal.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
}
do_test 2.1 {
foreach {rowid string} {
1 "\xCA\xCB\xCC\xCD"
2 "\u1234\u5678\u4321\u8765"
} {
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
}
} {}
do_execsql_test 2.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
foreach {tn q res} {
1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
do_execsql_test 2.3.$tn $q $res
}
finish_test

View File

@ -0,0 +1,45 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing queries that use the "rank" column.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rank
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# "ORDER BY rank" + highlight() + large poslists.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xyz USING fts5(z);
}
do_test 1.1 {
set doc [string trim [string repeat "x y " 500]]
execsql { INSERT INTO xyz VALUES($doc) }
} {}
do_execsql_test 1.2 {
SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x' ORDER BY rank
} [list [string map {x [x]} $doc]]
do_execsql_test 1.3 {
SELECT highlight(xyz, 0, '[', ']') FROM xyz
WHERE xyz MATCH 'x AND y' ORDER BY rank
} [list [string map {x [x] y [y]} $doc]]
finish_test

View File

@ -0,0 +1,67 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rebuild
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
INSERT INTO f1(a, b) VALUES('one', 'o n e');
INSERT INTO f1(a, b) VALUES('two', 't w o');
INSERT INTO f1(a, b) VALUES('three', 't h r e e');
}
do_execsql_test 1.2 {
INSERT INTO f1(f1) VALUES('integrity-check');
} {}
do_execsql_test 1.3 {
INSERT INTO f1(f1) VALUES('rebuild');
} {}
do_execsql_test 1.4 {
INSERT INTO f1(f1) VALUES('integrity-check');
} {}
do_execsql_test 1.5 {
DELETE FROM f1_data;
} {}
do_catchsql_test 1.6 {
INSERT INTO f1(f1) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_execsql_test 1.7 {
INSERT INTO f1(f1) VALUES('rebuild');
INSERT INTO f1(f1) VALUES('integrity-check');
} {}
#-------------------------------------------------------------------------
# Check that 'rebuild' may not be used with a contentless table.
#
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE nc USING fts5(doc, content=);
}
do_catchsql_test 2.2 {
INSERT INTO nc(nc) VALUES('rebuild');
} {1 {'rebuild' may not be used with a contentless fts5 table}}
finish_test

View File

@ -0,0 +1,152 @@
# 2015 April 28
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing the planner (xBestIndex function).
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5restart
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE f1 USING fts5(ff);
}
#-------------------------------------------------------------------------
# Run the 'optimize' command. Check that it does not disturb ongoing
# full-text queries.
#
do_test 1.1 {
for {set i 1} {$i < 1000} {incr i} {
execsql { INSERT INTO f1 VALUES('a b c d e') }
lappend lRowid $i
}
} {}
do_execsql_test 1.2 {
SELECT rowid FROM f1 WHERE f1 MATCH 'c';
} $lRowid
do_test 1.3 {
set res [list]
db eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } {
if {$rowid == 100} {
execsql { INSERT INTO f1(f1) VALUES('optimize') }
}
lappend res $rowid
}
set res
} $lRowid
do_test 1.4.1 {
sqlite3 db2 test.db
set res [list]
db2 eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } {
if {$rowid == 100} {
set cres [catchsql { INSERT INTO f1(f1) VALUES('optimize') }]
}
lappend res $rowid
}
set res
} $lRowid
do_test 1.4.2 {
db2 close
set cres
} {1 {database is locked}}
#-------------------------------------------------------------------------
# Open a couple of cursors. Then close them in the same order.
#
do_test 2.1 {
set ::s1 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'b'" -1 X]
set ::s2 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'c'" -1 X]
sqlite3_step $::s1
} {SQLITE_ROW}
do_test 2.2 {
sqlite3_step $::s2
} {SQLITE_ROW}
do_test 2.1 {
sqlite3_finalize $::s1
sqlite3_finalize $::s2
} {SQLITE_OK}
#-------------------------------------------------------------------------
# Copy data between two FTS5 tables.
#
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE f2 USING fts5(gg);
INSERT INTO f2 SELECT ff FROM f1 WHERE f1 MATCH 'b+c+d';
}
do_execsql_test 3.2 {
SELECT rowid FROM f2 WHERE f2 MATCH 'a+b+c+d+e'
} $lRowid
#-------------------------------------------------------------------------
# Remove the row that an FTS5 cursor is currently pointing to. And
# various other similar things. Check that this does not disturb
# ongoing scans.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE n4 USING fts5(n);
INSERT INTO n4(rowid, n) VALUES(100, '1 2 3 4 5');
INSERT INTO n4(rowid, n) VALUES(200, '1 2 3 4');
INSERT INTO n4(rowid, n) VALUES(300, '2 3 4');
INSERT INTO n4(rowid, n) VALUES(400, '2 3');
INSERT INTO n4(rowid, n) VALUES(500, '3');
}
do_test 4.1 {
set res [list]
db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' } {
if {$rowid==300} {
execsql { DELETE FROM n4 WHERE rowid=300 }
}
lappend res $rowid
}
set res
} {100 200 300 400 500}
do_test 4.2 {
execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') }
set res [list]
db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} {
if {$rowid==300} {
execsql { DELETE FROM n4 WHERE rowid=300 }
}
lappend res $rowid
}
set res
} {500 400 300 200 100}
do_test 4.3 {
execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') }
set res [list]
db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} {
if {$rowid==300} {
execsql { DELETE FROM n4 }
}
lappend res $rowid
}
set res
} {500 400 300}
finish_test

View File

@ -0,0 +1,181 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests of the scalar fts5_rowid() and fts5_decode() functions.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rowid
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_catchsql_test 1.1 {
SELECT fts5_rowid()
} {1 {should be: fts5_rowid(subject, ....)}}
do_catchsql_test 1.2 {
SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}
do_execsql_test 1.3 {
SELECT fts5_rowid('segment', 1, 1, 1)
} {139586437121}
do_catchsql_test 1.4 {
SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}}
#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE x1 USING fts5(a, b);
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
} {}
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc [string map $map [format %.3d [expr int(rand()*100)]]]
}
set doc
}
db func rnddoc rnddoc
do_execsql_test 2.2 {
WITH r(a, b) AS (
SELECT rnddoc(6), rnddoc(6) UNION ALL
SELECT rnddoc(6), rnddoc(6) FROM r
)
INSERT INTO x1 SELECT * FROM r LIMIT 10000;
}
set res [db one {SELECT count(*) FROM x1_data}]
do_execsql_test 2.3 {
SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
do_execsql_test 2.4 {
UPDATE x1_data SET block = X'';
SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
do_execsql_test 2.5 {
INSERT INTO x1(x1, rank) VALUES('pgsz', 1024);
INSERT INTO x1(x1) VALUES('rebuild');
}
set res [db one {SELECT count(*) FROM x1_data}]
do_execsql_test 2.6 {
SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
do_execsql_test 2.7 {
UPDATE x1_data SET block = X'';
SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
#-------------------------------------------------------------------------
# Tests with very large tokens.
#
set strlist [list \
"[string repeat x 400]" \
"[string repeat x 300][string repeat w 100]" \
"[string repeat x 300][string repeat y 100]" \
"[string repeat x 300][string repeat z 600]" \
]
do_test 3.0 {
execsql {
BEGIN;
CREATE VIRTUAL TABLE x2 USING fts5(a);
}
foreach str $strlist { execsql { INSERT INTO x2 VALUES($str) } }
execsql COMMIT
} {}
for {set tn 0} {$tn<[llength $strlist]} {incr tn} {
set str [lindex $strlist $tn]
do_execsql_test 3.1.$tn {
SELECT rowid FROM x2 WHERE x2 MATCH $str
} [expr $tn+1]
}
set res [db one {SELECT count(*) FROM x2_data}]
do_execsql_test 3.2 {
SELECT count(fts5_decode(rowid, block)) FROM x2_data;
} $res
#-------------------------------------------------------------------------
# Leaf pages with no terms or rowids at all.
#
set strlist [list \
"[string repeat {w } 400]" \
"[string repeat {x } 400]" \
"[string repeat {y } 400]" \
"[string repeat {z } 400]" \
]
do_test 4.0 {
execsql {
BEGIN;
CREATE VIRTUAL TABLE x3 USING fts5(a);
INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
}
foreach str $strlist { execsql { INSERT INTO x3 VALUES($str) } }
execsql COMMIT
} {}
for {set tn 0} {$tn<[llength $strlist]} {incr tn} {
set str [lindex $strlist $tn]
do_execsql_test 4.1.$tn {
SELECT rowid FROM x3 WHERE x3 MATCH $str
} [expr $tn+1]
}
set res [db one {SELECT count(*) FROM x3_data}]
do_execsql_test 4.2 {
SELECT count(fts5_decode(rowid, block)) FROM x3_data;
} $res
#-------------------------------------------------------------------------
# Position lists with large values.
#
set strlist [list \
"[string repeat {w } 400]a" \
"[string repeat {x } 400]a" \
"[string repeat {y } 400]a" \
"[string repeat {z } 400]a" \
]
do_test 5.0 {
execsql {
BEGIN;
CREATE VIRTUAL TABLE x4 USING fts5(a);
INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
}
foreach str $strlist { execsql { INSERT INTO x4 VALUES($str) } }
execsql COMMIT
} {}
do_execsql_test 5.1 {
SELECT rowid FROM x4 WHERE x4 MATCH 'a'
} {1 2 3 4}
set res [db one {SELECT count(*) FROM x4_data}]
do_execsql_test 5.2 {
SELECT count(fts5_decode(rowid, block)) FROM x4_data;
} $res
finish_test

View File

@ -0,0 +1,251 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the built-in fts5 tokenizers.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5tokenizer
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
DROP TABLE ft1;
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter');
DROP TABLE ft1;
}
do_execsql_test 1.2 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter);
DROP TABLE ft1;
}
do_execsql_test 1.3 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter');
DROP TABLE ft1;
}
do_execsql_test 1.4 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii');
DROP TABLE ft1;
}
do_catchsql_test 1.5 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch');
} {1 {no such tokenizer: nosuch}}
do_catchsql_test 1.6 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch');
} {1 {error in tokenizer constructor}}
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
INSERT INTO ft1 VALUES('embedded databases');
}
do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1
do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1
do_execsql_test 2.3 {
SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding'
} 1
proc tcl_create {args} {
set ::targs $args
error "failed"
}
sqlite3_fts5_create_tokenizer db tcl tcl_create
foreach {tn directive expected} {
1 {tokenize='tcl a b c'} {a b c}
2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f}
3 {tokenize="tcl 'g' 'h' 'i'"} {g h i}
4 {tokenize = tcl} {}
} {
do_catchsql_test 3.$tn.1 "
CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive)
" {1 {error in tokenizer constructor}}
do_test 3.$tn.2 { set ::targs } $expected
}
do_catchsql_test 4.1 {
CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc);
} {1 {parse error in "tokenize = tcl abc"}}
do_catchsql_test 4.2 {
CREATE VIRTUAL TABLE ft2 USING fts5(x y)
} {1 {unrecognized column option: y}}
#-------------------------------------------------------------------------
# Test the "separators" and "tokenchars" options a bit.
#
foreach {tn tokenizer} {1 ascii 2 unicode61} {
reset_db
set T "$tokenizer tokenchars ',.:' separators 'xyz'"
execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")"
do_execsql_test 5.$tn.1 {
INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz');
}
foreach {tn2 token res} {
1 abc 1 2 def 1 3 ghi 1 4 jkl {}
5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1
9 vw 1
} {
do_execsql_test 5.$tn.2.$tn2 "
SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"'
" $res
}
}
#-------------------------------------------------------------------------
# Miscellaneous tests for the ascii tokenizer.
#
# 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the
# 'separators' option. But unicode61 does not.
#
# 5.2.*: An option without an argument is an error.
#
do_test 5.1.1 {
execsql "
CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`);
INSERT INTO a1 VALUES('abc\u1234def');
"
execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' }
} {}
do_test 5.1.2 {
execsql "
CREATE VIRTUAL TABLE a2 USING fts5(
x, tokenize=`unicode61 separators '\u1234'`);
INSERT INTO a2 VALUES('abc\u1234def');
"
execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' }
} {1}
do_catchsql_test 5.2 {
CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 5.3 {
CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg');
} {1 {error in tokenizer constructor}}
#-------------------------------------------------------------------------
# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE
# correctly.
#
proc test_token_cb {varname token iStart iEnd} {
upvar $varname var
lappend var $token
if {[llength $var]==3} { return "SQLITE_DONE" }
return "SQLITE_OK"
}
proc tokenize {cmd} {
set res [list]
$cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res]
set res
}
sqlite3_fts5_create_function db tokenize tokenize
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii);
INSERT INTO x1 VALUES('q w e r t y');
INSERT INTO x1 VALUES('y t r e w q');
SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r';
} {
{q w e} {y t r}
}
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61);
INSERT INTO x2 VALUES('q w e r t y');
INSERT INTO x2 VALUES('y t r e w q');
SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r';
} {
{q w e} {y t r}
}
#-------------------------------------------------------------------------
# Miscellaneous tests for the unicode tokenizer.
#
do_catchsql_test 6.1 {
CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.2 {
CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.3 {
CREATE VIRTUAL TABLE a3 USING fts5(
x, y, tokenize = 'unicode61 remove_diacritics 2'
);
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.4 {
CREATE VIRTUAL TABLE a3 USING fts5(
x, y, tokenize = 'unicode61 remove_diacritics 10'
);
} {1 {error in tokenizer constructor}}
#-------------------------------------------------------------------------
# Porter tokenizer with very large tokens.
#
set a [string repeat a 100]
set b [string repeat b 500]
set c [string repeat c 1000]
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter);
INSERT INTO e5 VALUES($a || ' ' || $b);
INSERT INTO e5 VALUES($b || ' ' || $c);
INSERT INTO e5 VALUES($c || ' ' || $a);
}
do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 }
do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }
#-------------------------------------------------------------------------
# Test the 'separators' option with the unicode61 tokenizer.
#
do_execsql_test 8.1 {
BEGIN;
CREATE VIRTUAL TABLE e6 USING fts5(x,
tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
);
INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
SELECT term FROM e7;
ROLLBACK;
} {
brown dog fox jumped lazy over quick the
}
do_execsql_test 8.2 [subst {
BEGIN;
CREATE VIRTUAL TABLE e6 USING fts5(x,
tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'"
);
INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01'
|| 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog'
);
INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09');
CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
SELECT term FROM e7;
ROLLBACK;
}] [subst {
brown dog fox jumped lazy over quick the \u0E08 \u0E09
}]
finish_test

View File

@ -0,0 +1,62 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc tokenize_test {tn tokenizer input output} {
uplevel [list do_test $tn [subst -nocommands {
set ret {}
foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
lappend ret [set z]
}
set ret
}] [list {*}$output]]
}
foreach {tn t} {1 ascii 2 unicode61} {
tokenize_test 1.$tn.0 $t {A B C D} {a b c d}
tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely}
tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely}
tokenize_test 1.$tn.3 $t {} {}
}
#-------------------------------------------------------------------------
# Check that "unicode61" really is the default tokenizer.
#
do_execsql_test 2.0 "
CREATE VIRTUAL TABLE t1 USING fts5(x);
CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61);
CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii);
INSERT INTO t1 VALUES('\xC0\xC8\xCC');
INSERT INTO t2 VALUES('\xC0\xC8\xCC');
INSERT INTO t3 VALUES('\xC0\xC8\xCC');
"
breakpoint
do_execsql_test 2.1 "
SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC';
SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC';
SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC';
" {t1 t2}
finish_test

View File

@ -0,0 +1,589 @@
# 2012 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
# This is a modified copy of FTS4 test file "fts4_unicode.test".
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_unicode_token_test {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
] [list {*}$res]]
}
proc do_unicode_token_test2 {tn input res} {
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db "unicode61" $input
] [list {*}$res]]
}
proc do_unicode_token_test3 {tn args} {
set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
set input [lindex $args end-1]
set res [lindex $args end]
uplevel [list do_test $tn [list \
sqlite3_fts5_tokenize -subst db $tokenizer $input
] [list {*}$res]]
}
do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
"\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
"x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
do_unicode_token_test 1.5 "The quick brown fox" {
the The quick quick brown brown fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
the The quick quick brown brown fox fox
}
do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D}
do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
"xax x\uC4x xox x\uD6x xux x\uDCx"
# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
"\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
"abc abc def def"
#-------------------------------------------------------------------------
#
set docs [list {
Enhance the INSERT syntax to allow multiple rows to be inserted via the
VALUES clause.
} {
Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
} {
Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
} {
Added the sqlite3_db_readonly() interface.
} {
Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
ability to add new PRAGMA statements or to override built-in PRAGMAs.
} {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
} {
Added support for the FTS4 languageid option.
} {
Documented support for the FTS4 content option. This feature has actually
been in the code since version 3.7.9 but is only now considered to be
officially supported.
} {
Pending statements no longer block ROLLBACK. Instead, the pending statement
will return SQLITE_ABORT upon next access after the ROLLBACK.
} {
Improvements to the handling of CSV inputs in the command-line shell
} {
Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
connected by OR.
}]
set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
foreach k [array names map] {
lappend mappings [string toupper $k] [lindex $map($k) 0]
lappend mappings $k [lindex $map($k) 1]
}
proc mapdoc {doc} {
set doc [regsub -all {[[:space:]]+} $doc " "]
string map $::mappings [string trim $doc]
}
do_test 2.0 {
execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
foreach doc $docs {
set d [mapdoc $doc]
execsql { INSERT INTO t2 VALUES($d) }
}
} {}
do_test 2.1 {
set q [mapdoc "row"]
execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc {
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
the same row that contains the maximum x value.
}]]
foreach {tn query snippet} {
2 "row" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
3 "ROW" {
...returns the value of y on the same [row] that contains
the maximum x value.
}
4 "rollback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
5 "rOllback" {
...[ROLLBACK]. Instead, the pending statement
will return SQLITE_ABORT upon next access after the [ROLLBACK].
}
6 "lang*" {
Added support for the FTS4 [languageid] option.
}
} {
do_test 2.$tn {
set q [mapdoc $query]
execsql {
SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
}
} [list [mapdoc $snippet]]
}
#-------------------------------------------------------------------------
# Make sure the unicode61 tokenizer does not crash if it is passed a
# NULL pointer.
reset_db
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
INSERT INTO t1 VALUES(NULL, 'a b c');
}
do_execsql_test 3.2 {
SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}
do_execsql_test 3.3 {
BEGIN;
DELETE FROM t1;
INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 VALUES('a b c', NULL);
INSERT INTO t1 VALUES('a x c', NULL);
COMMIT;
}
do_execsql_test 3.4 {
SELECT * FROM t1 WHERE t1 MATCH 'a b';
} {{a b c} {}}
#-------------------------------------------------------------------------
#
reset_db
do_test 4.1 {
set a "abc\uFFFEdef"
set b "abc\uD800def"
set c "\uFFFEdef"
set d "\uD800def"
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
execsql "CREATE VIRTUAL TABLE t8 USING fts5(
a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
)"
} {}
do_test 4.2 {
set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.3 {
set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
execsql {
INSERT INTO t1 VALUES($a);
INSERT INTO t1 VALUES($b);
INSERT INTO t1 VALUES($c);
INSERT INTO t1 VALUES($d);
}
} {}
do_test 4.4 {
sqlite3_exec_hex db {
CREATE VIRTUAL TABLE t9 USING fts5(a, b,
tokenize="unicode61 separators '%C09004'"
);
INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
}
} {0 {}}
#-------------------------------------------------------------------------
breakpoint
do_unicode_token_test3 5.1 {tokenchars {}} {
sqlite3_reset sqlite3_column_int
} {
sqlite3 sqlite3
reset reset
sqlite3 sqlite3
column column
int int
}
do_unicode_token_test3 5.2 {tokenchars _} {
sqlite3_reset sqlite3_column_int
} {
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
}
do_unicode_token_test3 5.3 {separators xyz} {
Laotianxhorseyrunszfast
} {
laotian Laotian
horse horse
runs runs
fast fast
}
do_unicode_token_test3 5.4 {tokenchars xyz} {
Laotianxhorseyrunszfast
} {
laotianxhorseyrunszfast Laotianxhorseyrunszfast
}
do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
sqlite3_reset sqlite3_reset
sqlite3_column_int sqlite3_column_int
honda_phantom honda_phantom
}
do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
abc abc def def
}
do_unicode_token_test3 5.7 \
"tokenchars \u2444\u2445" \
"separators \u05D0\u05D1\u05D2" \
"\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
[list \
\u2444fre\u2445sh \u2444fre\u2445sh \
water water \
fish fish \
\u2445timer \u2445timer \
]
# Check that it is not possible to add a standalone diacritic codepoint
# to either separators or tokenchars.
do_unicode_token_test3 5.8 "separators \u0301" \
"hello\u0301world \u0301helloworld" \
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.9 "tokenchars \u0301" \
"hello\u0301world \u0301helloworld" \
"helloworld hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.10 "separators \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"hello\u0301world hello\u0301world helloworld helloworld"
do_unicode_token_test3 5.11 "tokenchars \u0301" \
"remove_diacritics 0" \
"hello\u0301world \u0301helloworld" \
"hello\u0301world hello\u0301world helloworld helloworld"
#-------------------------------------------------------------------------
proc do_tokenize {tokenizer txt} {
set res [list]
foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
lappend res $b
}
set res
}
# Argument $lCodepoint must be a list of codepoints (integers) that
# correspond to whitespace characters. This command creates a string
# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
# using tokenizer $tokenizer. The test passes if the tokenizer successfully
# extracts the two 5 character tokens.
#
proc do_isspace_test {tn tokenizer lCp} {
set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
set txt "${whitespace}hello${whitespace}world${whitespace}"
uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
}
set tokenizers [list unicode61]
ifcapable icu { lappend tokenizers icu }
# Some tests to check that the tokenizers can both identify white-space
# codepoints. All codepoints tested below are of type "Zs" in the
# UnicodeData.txt file.
foreach T $tokenizers {
do_isspace_test 6.$T.1 $T 32
do_isspace_test 6.$T.2 $T 160
do_isspace_test 6.$T.3 $T 5760
do_isspace_test 6.$T.4 $T 6158
do_isspace_test 6.$T.5 $T 8192
do_isspace_test 6.$T.6 $T 8193
do_isspace_test 6.$T.7 $T 8194
do_isspace_test 6.$T.8 $T 8195
do_isspace_test 6.$T.9 $T 8196
do_isspace_test 6.$T.10 $T 8197
do_isspace_test 6.$T.11 $T 8198
do_isspace_test 6.$T.12 $T 8199
do_isspace_test 6.$T.13 $T 8200
do_isspace_test 6.$T.14 $T 8201
do_isspace_test 6.$T.15 $T 8202
do_isspace_test 6.$T.16 $T 8239
do_isspace_test 6.$T.17 $T 8287
do_isspace_test 6.$T.18 $T 12288
do_isspace_test 6.$T.19 $T {32 160 5760 6158}
do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}
do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}
do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}
do_isspace_test 6.$T.23 $T {8287 12288}
}
#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
foreach {tn1 c} {
1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
foreach {tn2 config res} {
1 "" "hello*world hello*world"
2 "separators *" "hello hello world world"
} {
set config [string map [list * $c] $config]
set input [string map [list * $c] "hello*world"]
set output [string map [list * $c] $res]
do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
}
}
#-------------------------------------------------------------------------
# Cursory test of remove_diacritics=0.
#
# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
#
do_execsql_test 8.1.1 "
CREATE VIRTUAL TABLE t3 USING fts5(
content, tokenize='unicode61 remove_diacritics 1'
);
INSERT INTO t3 VALUES('o');
INSERT INTO t3 VALUES('a');
INSERT INTO t3 VALUES('O');
INSERT INTO t3 VALUES('A');
INSERT INTO t3 VALUES('\xD6');
INSERT INTO t3 VALUES('\xC4');
INSERT INTO t3 VALUES('\xF6');
INSERT INTO t3 VALUES('\xE4');
"
do_execsql_test 8.1.2 {
SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
} {1 3 5 7}
do_execsql_test 8.1.3 {
SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
} {2 4 6 8}
do_execsql_test 8.2.1 {
CREATE VIRTUAL TABLE t4 USING fts5(
content, tokenize='unicode61 remove_diacritics 0'
);
INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
}
do_execsql_test 8.2.2 {
SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
} {1 3}
do_execsql_test 8.2.3 {
SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}
#-------------------------------------------------------------------------
#
if 0 {
foreach {tn sql} {
1 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
CREATE VIRTUAL TABLE t6 USING fts4(
tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
}
2 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
}
3 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
}
4 {
CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
}
} {
do_execsql_test 9.$tn.0 {
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t5aux;
DROP TABLE IF EXISTS t6;
DROP TABLE IF EXISTS t6aux;
DROP TABLE IF EXISTS t7;
DROP TABLE IF EXISTS t7aux;
}
do_execsql_test 9.$tn.1 $sql
do_execsql_test 9.$tn.2 {
CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
INSERT INTO t5 VALUES('one two three/four.five.six');
SELECT * FROM t5aux;
} {
four.five.six * 1 1 four.five.six 0 1 1
{one two three} * 1 1 {one two three} 0 1 1
}
do_execsql_test 9.$tn.3 {
CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
SELECT * FROM t6aux;
} {
{alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
{delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
}
do_execsql_test 9.$tn.4 {
CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
SELECT * FROM t7aux;
} {
aleph * 1 1 aleph 0 1 1
beth * 1 1 beth 0 1 1
gimel * 1 1 gimel 0 1 1
}
}
# Check that multiple options are handled correctly.
#
do_execsql_test 10.1 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
"tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
"separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
);
INSERT INTO t1 VALUES('oneatwoxthreeyfour');
INSERT INTO t1 VALUES('a.single=word');
CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
SELECT * FROM t1aux;
} {
.single=word * 1 1 .single=word 0 1 1
four * 1 1 four 0 1 1
one * 1 1 one 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that case folding happens after tokenization, not before.
#
do_execsql_test 10.2 {
DROP TABLE IF EXISTS t2;
CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
INSERT INTO t2 VALUES('oneatwoBthree');
INSERT INTO t2 VALUES('onebtwoAthree');
CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
SELECT * FROM t2aux;
} {
one * 1 1 one 0 1 1
onebtwoathree * 1 1 onebtwoathree 0 1 1
three * 1 1 three 0 1 1
two * 1 1 two 0 1 1
}
# Test that the tokenchars and separators options work with the
# fts3tokenize table.
#
do_execsql_test 11.1 {
CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
"unicode61", "tokenchars=@.", "separators=1234567890"
);
SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
} {
berlin@street sydney.road
}
}
finish_test

View File

@ -0,0 +1,129 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#
source [file join [file dirname [info script]] fts5_common.tcl]
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc fts3_unicode_path {file} {
file join [file dirname [info script]] .. .. fts3 unicode $file
}
source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3
set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]
tl_load_casefolding_txt $CF
foreach x [an_load_unicodedata_text $UD] {
set aNotAlnum($x) 1
}
foreach {y} [rd_load_unicodedata_text $UD] {
foreach {code ascii} $y {}
if {$ascii==""} {
set int 0
} else {
binary scan $ascii c int
}
set aDiacritic($code) $int
}
proc tcl_fold {i {bRemoveDiacritic 0}} {
global tl_lookup_table
global aDiacritic
if {[info exists tl_lookup_table($i)]} {
set i $tl_lookup_table($i)
}
if {$bRemoveDiacritic && [info exists aDiacritic($i)]} {
set i $aDiacritic($i)
}
expr $i
}
db func tcl_fold tcl_fold
proc tcl_isalnum {i} {
global aNotAlnum
expr {![info exists aNotAlnum($i)]}
}
db func tcl_isalnum tcl_isalnum
do_catchsql_test 1.0.1 {
SELECT fts5_isalnum(1, 2, 3);
} {1 {wrong number of arguments to function fts5_isalnum}}
do_catchsql_test 1.0.2 {
SELECT fts5_fold();
} {1 {wrong number of arguments to function fts5_fold}}
do_catchsql_test 1.0.3 {
SELECT fts5_fold(1,2,3);
} {1 {wrong number of arguments to function fts5_fold}}
do_execsql_test 1.1 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
} {0 {}}
do_execsql_test 1.2 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii
WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
} {0 {}}
do_execsql_test 1.3 {
WITH ii(i) AS (
SELECT -1
UNION ALL
SELECT i+1 FROM ii WHERE i<100000
)
SELECT count(*), min(i) FROM ii
WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
} {0 {}}
do_test 1.4 {
set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
append str {"unicode61 separators '}
for {set i 700} {$i<900} {incr i} {
append str [format %c $i]
}
append str {'");}
execsql $str
} {}
do_test 1.5 {
set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
append str {"unicode61 tokenchars '}
for {set i 700} {$i<900} {incr i} {
append str [format %c $i]
}
append str {'");}
execsql $str
} {}
finish_test

View File

@ -0,0 +1,79 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on "unindexed" columns.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unindexed
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED);
INSERT INTO t1 VALUES('a b c', 'd e f');
INSERT INTO t1 VALUES('g h i', 'j k l');
} {}
do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}
do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {}
do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2}
do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {}
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED);
INSERT INTO t1 VALUES('a b c', 'd e f');
INSERT INTO t1 VALUES('g h i', 'j k l');
SELECT rowid FROM t2_data;
} {1 10}
do_execsql_test 2.2 {
INSERT INTO t2(t2) VALUES('rebuild');
INSERT INTO t2(t2) VALUES('integrity-check');
SELECT rowid FROM t2_data;
} {1 10}
do_execsql_test 3.1 {
CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c);
CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4);
INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i');
INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r');
INSERT INTO t4(t4) VALUES('rebuild');
INSERT INTO t4(t4) VALUES('integrity-check');
} {}
do_execsql_test 3.2 {
INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r');
DELETE FROM x4 WHERE rowid=20;
INSERT INTO t4(t4) VALUES('integrity-check');
} {}
finish_test

View File

@ -0,0 +1,65 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on testing that unrecognized file-format
# versions are detected and reported.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5version
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE t1 USING fts5(one);
INSERT INTO t1 VALUES('a b c d');
} {}
do_execsql_test 1.2 {
SELECT * FROM t1_config WHERE k='version'
} {version 2}
do_execsql_test 1.3 {
SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}
do_execsql_test 1.4 {
UPDATE t1_config set v=3 WHERE k='version';
}
do_test 1.5 {
db close
sqlite3 db test.db
catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}
breakpoint
do_test 1.6 {
db close
sqlite3 db test.db
catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}
do_test 1.7 {
execsql { DELETE FROM t1_config WHERE k='version' }
db close
sqlite3 db test.db
catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}}
finish_test

View File

@ -0,0 +1,217 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on testing the fts5vocab module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5vocab
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1.1 {
CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1);
CREATE VIRTUAL TABLE v1 USING fts5vocab(t1, 'row');
PRAGMA table_info = v1;
} {
0 term {} 0 {} 0
1 doc {} 0 {} 0
2 cnt {} 0 {} 0
}
do_execsql_test 1.1.2 {
CREATE VIRTUAL TABLE v2 USING fts5vocab(t1, 'col');
PRAGMA table_info = v2;
} {
0 term {} 0 {} 0
1 col {} 0 {} 0
2 doc {} 0 {} 0
3 cnt {} 0 {} 0
}
do_execsql_test 1.2.1 { SELECT * FROM v1 } { }
do_execsql_test 1.2.2 { SELECT * FROM v2 } { }
do_execsql_test 1.3 {
INSERT INTO t1 VALUES('x y z');
INSERT INTO t1 VALUES('x x x');
}
do_execsql_test 1.4.1 {
SELECT * FROM v1;
} {x 2 4 y 1 1 z 1 1}
do_execsql_test 1.4.2 {
SELECT * FROM v2;
} {x 0 2 4 y 0 1 1 z 0 1 1}
do_execsql_test 1.5.1 {
BEGIN;
INSERT INTO t1 VALUES('a b c');
SELECT * FROM v1 WHERE term<'d';
} {a 1 1 b 1 1 c 1 1}
do_execsql_test 1.5.2 {
SELECT * FROM v2 WHERE term<'d';
COMMIT;
} {a 0 1 1 b 0 1 1 c 0 1 1}
do_execsql_test 1.6 {
DELETE FROM t1 WHERE one = 'a b c';
SELECT * FROM v1;
} {x 2 4 y 1 1 z 1 1}
#-------------------------------------------------------------------------
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, b);
INSERT INTO tt VALUES('d g b f d f', 'f c e c d a');
INSERT INTO tt VALUES('f a e a a b', 'e d c f d d');
INSERT INTO tt VALUES('b c a a a b', 'f f c c b c');
INSERT INTO tt VALUES('f d c a c e', 'd g d e g d');
INSERT INTO tt VALUES('g d e f a g x', 'f f d a a b');
INSERT INTO tt VALUES('g c f b c g', 'a g f d c b');
INSERT INTO tt VALUES('c e c f g b', 'f e d b g a');
INSERT INTO tt VALUES('g d e f d e', 'a c d b a g');
INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y');
INSERT INTO tt VALUES('c c a a c f', 'd g a e b g');
}
set res_col {
a 0 6 11 a 1 7 9
b 0 6 7 b 1 7 7
c 0 6 12 c 1 5 8
d 0 4 6 d 1 9 13
e 0 6 7 e 1 6 6
f 0 9 10 f 1 7 10
g 0 5 7 g 1 5 7
x 0 1 1 y 1 1 1
}
set res_row {
a 10 20 b 9 14 c 9 20 d 9 19
e 8 13 f 10 20 g 7 14 x 1 1
y 1 1
}
foreach {tn tbl resname} {
1 "fts5vocab(tt, 'col')" res_col
2 "fts5vocab(tt, 'row')" res_row
3 "fts5vocab(tt, \"row\")" res_row
4 "fts5vocab(tt, [row])" res_row
5 "fts5vocab(tt, `row`)" res_row
6 "fts5vocab('tt', 'row')" res_row
7 "fts5vocab(\"tt\", \"row\")" res_row
8 "fts5vocab([tt], [row])" res_row
9 "fts5vocab(`tt`, `row`)" res_row
} {
do_execsql_test 2.$tn "
DROP TABLE IF EXISTS tv;
CREATE VIRTUAL TABLE tv USING $tbl;
SELECT * FROM tv;
" [set $resname]
}
#-------------------------------------------------------------------------
# Test errors in the CREATE VIRTUAL TABLE statement.
#
foreach {tn sql} {
1 { CREATE VIRTUAL TABLE aa USING fts5vocab() }
2 { CREATE VIRTUAL TABLE aa USING fts5vocab(x) }
3 { CREATE VIRTUAL TABLE aa USING fts5vocab(x,y,z) }
4 { CREATE VIRTUAL TABLE temp.aa USING fts5vocab(x,y,z,y) }
} {
do_catchsql_test 3.$tn $sql {1 {wrong number of vtable arguments}}
}
do_catchsql_test 4.0 {
CREATE VIRTUAL TABLE cc USING fts5vocab(tbl, unknown);
} {1 {fts5vocab: unknown table type: 'unknown'}}
do_catchsql_test 4.1 {
ATTACH 'test.db' AS aux;
CREATE VIRTUAL TABLE aux.cc USING fts5vocab(main, tbl, row);
} {1 {wrong number of vtable arguments}}
#-------------------------------------------------------------------------
# Test fts5vocab tables created in the temp schema.
#
reset_db
forcedelete test.db2
do_execsql_test 5.0 {
ATTACH 'test.db2' AS aux;
CREATE VIRTUAL TABLE t1 USING fts5(x);
CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
CREATE VIRTUAL TABLE aux.t1 USING fts5(x);
INSERT INTO main.t1 VALUES('a b c');
INSERT INTO main.t1 VALUES('d e f');
INSERT INTO main.t1 VALUES('a e c');
INSERT INTO temp.t1 VALUES('1 2 3');
INSERT INTO temp.t1 VALUES('4 5 6');
INSERT INTO temp.t1 VALUES('1 5 3');
INSERT INTO aux.t1 VALUES('x y z');
INSERT INTO aux.t1 VALUES('m n o');
INSERT INTO aux.t1 VALUES('x n z');
}
breakpoint
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE temp.vm USING fts5vocab(main, t1, row);
CREATE VIRTUAL TABLE temp.vt1 USING fts5vocab(t1, row);
CREATE VIRTUAL TABLE temp.vt2 USING fts5vocab(temp, t1, row);
CREATE VIRTUAL TABLE temp.va USING fts5vocab(aux, t1, row);
}
do_execsql_test 5.2 { SELECT * FROM vm } {
a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1
}
do_execsql_test 5.3 { SELECT * FROM vt1 } {
1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1
}
do_execsql_test 5.4 { SELECT * FROM vt2 } {
1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1
}
do_execsql_test 5.5 { SELECT * FROM va } {
m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2
}
#-------------------------------------------------------------------------
#
do_execsql_test 6.0 {
CREATE TABLE iii(iii);
CREATE TABLE jjj(x);
}
do_catchsql_test 6.1 {
CREATE VIRTUAL TABLE vocab1 USING fts5vocab(iii, row);
SELECT * FROM vocab1;
} {1 {no such fts5 table: main.iii}}
do_catchsql_test 6.2 {
CREATE VIRTUAL TABLE vocab2 USING fts5vocab(jjj, row);
SELECT * FROM vocab2;
} {1 {no such fts5 table: main.jjj}}
do_catchsql_test 6.2 {
CREATE VIRTUAL TABLE vocab3 USING fts5vocab(lll, row);
SELECT * FROM vocab3;
} {1 {no such fts5 table: main.lll}}
finish_test

131
ext/fts5/tool/loadfts5.tcl Normal file
View File

@ -0,0 +1,131 @@
proc loadfile {f} {
set fd [open $f]
set data [read $fd]
close $fd
return $data
}
set ::nRow 0
set ::nRowPerDot 1000
proc load_hierachy {dir} {
foreach f [glob -nocomplain -dir $dir *] {
if {$::O(limit) && $::nRow>=$::O(limit)} break
if {[file isdir $f]} {
load_hierachy $f
} else {
db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
incr ::nRow
if {($::nRow % $::nRowPerDot)==0} {
puts -nonewline .
if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
flush stdout
}
}
}
}
proc usage {} {
puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
puts stderr ""
puts stderr "Switches are:"
puts stderr " -fts4 (use fts4 instead of fts5)"
puts stderr " -fts5 (use fts5)"
puts stderr " -porter (use porter tokenizer)"
puts stderr " -delete (delete the database file before starting)"
puts stderr " -limit N (load no more than N documents)"
puts stderr " -automerge N (set the automerge parameter to N)"
puts stderr " -crisismerge N (set the crisismerge parameter to N)"
puts stderr " -prefix PREFIX (comma separated prefix= argument)"
exit 1
}
set O(vtab) fts5
set O(tok) ""
set O(limit) 0
set O(delete) 0
set O(automerge) -1
set O(crisismerge) -1
set O(prefix) ""
if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
set arg [lindex $argv $i]
switch -- [lindex $argv $i] {
-fts4 {
set O(vtab) fts4
}
-fts5 {
set O(vtab) fts5
}
-porter {
set O(tok) ", tokenize=porter"
}
-delete {
set O(delete) 1
}
-limit {
if { [incr i]>=$nOpt } usage
set O(limit) [lindex $argv $i]
}
-automerge {
if { [incr i]>=$nOpt } usage
set O(automerge) [lindex $argv $i]
}
-crisismerge {
if { [incr i]>=$nOpt } usage
set O(crisismerge) [lindex $argv $i]
}
-prefix {
if { [incr i]>=$nOpt } usage
set O(prefix) [lindex $argv $i]
}
default {
usage
}
}
}
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
db func loadfile loadfile
db transaction {
set pref ""
if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
catch {
db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
}
if {$O(automerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
} else {
db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
}
}
if {$O(crisismerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
} else {
}
}
load_hierachy [lindex $argv end]
}

78
ext/fts5/tool/mkfts5c.tcl Normal file
View File

@ -0,0 +1,78 @@
#!/bin/sh
# restart with tclsh \
exec tclsh "$0" "$@"
set srcdir [file dirname [file dirname [info script]]]
set G(src) [string map [list %dir% $srcdir] {
%dir%/fts5.h
%dir%/fts5Int.h
fts5parse.h
%dir%/fts5_aux.c
%dir%/fts5_buffer.c
%dir%/fts5_config.c
%dir%/fts5_expr.c
%dir%/fts5_hash.c
%dir%/fts5_index.c
%dir%/fts5_main.c
%dir%/fts5_storage.c
%dir%/fts5_tokenize.c
%dir%/fts5_unicode2.c
%dir%/fts5_varint.c
%dir%/fts5_vocab.c
fts5parse.c
}]
set G(hdr) {
#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
# define NDEBUG 1
#endif
#if defined(NDEBUG) && defined(SQLITE_DEBUG)
# undef NDEBUG
#endif
}
proc readfile {zFile} {
set fd [open $zFile]
set data [read $fd]
close $fd
return $data
}
proc fts5c_init {zOut} {
global G
set G(fd) stdout
set G(fd) [open $zOut w]
puts -nonewline $G(fd) $G(hdr)
}
proc fts5c_printfile {zIn} {
global G
set data [readfile $zIn]
puts $G(fd) "#line 1 \"[file tail $zIn]\""
foreach line [split $data "\n"] {
if {[regexp {^#include.*fts5} $line]} continue
if {[regexp {^(const )?[a-zA-Z][a-zA-Z0-9]* [*]?sqlite3Fts5} $line]} {
set line "static $line"
}
puts $G(fd) $line
}
}
proc fts5c_close {} {
global G
if {$G(fd)!="stdout"} {
close $G(fd)
}
}
fts5c_init fts5.c
foreach f $G(src) { fts5c_printfile $f }
fts5c_close

View File

@ -0,0 +1,31 @@
proc usage {} {
puts stderr "usage: $::argv0 database table"
puts stderr ""
exit 1
}
set o(vtab) fts5
set o(tok) ""
set o(limit) 0
set o(automerge) -1
set o(crisismerge) -1
if {[llength $argv]!=2} usage
set database [lindex $argv 0]
set tbl [lindex $argv 1]
sqlite3 db $database
db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" {
foreach lvl [lrange $d 1 end] {
puts $lvl
}
}

View File

@ -89,7 +89,7 @@ int main(int argc, char **argv){
** sqlite3ota_step() until either the OTA has been completely applied
** or an error occurs. Or, if nStep is greater than zero, call
** sqlite3ota_step() a maximum of nStep times. */
pOta = sqlite3ota_open(zTarget, zOta);
pOta = sqlite3ota_open(zTarget, zOta, 0);
report_ota_vfs(pOta);
for(i=0; (nStep<=0 || i<nStep) && sqlite3ota_step(pOta)==SQLITE_OK; i++);
nProgress = sqlite3ota_progress(pOta);

View File

@ -3120,8 +3120,8 @@ static int otaVfsFileControl(sqlite3_file *pFile, int op, void *pArg){
int (*xControl)(sqlite3_file*,int,void*) = p->pReal->pMethods->xFileControl;
int rc;
assert( p->openFlags &
(SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB|SQLITE_OPEN_TRANSIENT_DB)
assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB)
|| p->openFlags & (SQLITE_OPEN_TRANSIENT_DB|SQLITE_OPEN_TEMP_JOURNAL)
);
if( op==SQLITE_FCNTL_OTA ){
sqlite3ota *pOta = (sqlite3ota*)pArg;

57
main.mk
View File

@ -75,6 +75,8 @@ LIBOBJ+= vdbe.o parse.o \
LIBOBJ += sqlite3session.o
LIBOBJ += fts5.o
# All of the source code files.
@ -231,10 +233,12 @@ SRC += \
SRC += \
$(TOP)/ext/userauth/userauth.c \
$(TOP)/ext/userauth/sqlite3userauth.h
SRC += \
$(TOP)/ext/ota/sqlite3ota.c \
$(TOP)/ext/ota/sqlite3ota.h
# Generated source code files
#
SRC += \
@ -308,7 +312,8 @@ TESTSRC += \
$(TOP)/ext/misc/spellfix.c \
$(TOP)/ext/misc/totype.c \
$(TOP)/ext/misc/wholenumber.c \
$(TOP)/ext/misc/vfslog.c
$(TOP)/ext/misc/vfslog.c \
$(TOP)/ext/fts5/fts5_tcl.c
#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
@ -406,6 +411,10 @@ EXTHDR += \
$(TOP)/ext/rtree/rtree.h
EXTHDR += \
$(TOP)/ext/icu/sqliteicu.h
EXTHDR += \
$(TOP)/ext/fts5/fts5Int.h \
fts5parse.h \
$(TOP)/ext/fts5/fts5.h
EXTHDR += \
$(TOP)/ext/userauth/sqlite3userauth.h
@ -634,6 +643,45 @@ fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
# FTS5 things
#
FTS5_SRC = \
$(TOP)/ext/fts5/fts5.h \
$(TOP)/ext/fts5/fts5Int.h \
$(TOP)/ext/fts5/fts5_aux.c \
$(TOP)/ext/fts5/fts5_buffer.c \
$(TOP)/ext/fts5/fts5_main.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c fts5parse.h \
$(TOP)/ext/fts5/fts5_storage.c \
$(TOP)/ext/fts5/fts5_tokenize.c \
$(TOP)/ext/fts5/fts5_unicode2.c \
$(TOP)/ext/fts5/fts5_varint.c \
$(TOP)/ext/fts5/fts5_vocab.c \
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h
./lemon $(OPTS) fts5parse.y
mv fts5parse.c fts5parse.c.orig
echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c
cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \
| sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c
echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c
fts5parse.h: fts5parse.c
fts5.c: $(FTS5_SRC)
tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl
fts5.o: fts5.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c fts5.c
userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c
@ -671,10 +719,10 @@ testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c
$(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c \
-o testfixture$(EXE) $(LIBTCL) libsqlite3.a $(THREADLIB)
amalgamation-testfixture$(EXE): sqlite3.c $(TESTSRC) $(TOP)/src/tclsqlite.c \
amalgamation-testfixture$(EXE): sqlite3.c fts5.c $(TESTSRC) $(TOP)/src/tclsqlite.c \
$(TOP)/ext/session/test_session.c
$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS) \
$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c \
$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts5.c \
$(TOP)/ext/session/test_session.c \
-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)
@ -794,6 +842,9 @@ ota$(EXE): $(TOP)/ext/ota/ota.c $(TOP)/ext/ota/sqlite3ota.c sqlite3.o
$(TCC) -I. -o ota$(EXE) $(TOP)/ext/ota/ota.c sqlite3.o \
$(THREADLIB)
loadfts: $(TOP)/tool/loadfts.c libsqlite3.a
$(TCC) $(TOP)/tool/loadfts.c libsqlite3.a -o loadfts $(THREADLIB)
# This target will fail if the SQLite amalgamation contains any exported
# symbols that do not begin with "sqlite3_". It is run as part of the
# releasetest.tcl script.

158
manifest
View File

@ -1,9 +1,9 @@
C Merge\sall\sthe\slatest\senhancements\sfrom\strunk.
D 2015-06-25T15:44:49.660
C Merge\sall\sthe\slatest\senhancements\sfrom\strunk.\s\sThis\smerge\sinclude\sFTS5\nand\sa\snumber\sof\snotable\sperformance\senhancements.
D 2015-06-30T16:29:59.664
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5f56f6186fdbd0fb33226e9d2279acde3b3fa88b
F Makefile.in c7566d9d9892f855145cbd28bf176fddf44a98b4
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F Makefile.msc 3165ac5ae2fee79cf7d5e025b879f7a52ae66776
F Makefile.msc 1c184a23e55ed4b1781dfa127df295615a6eba0f
F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858
F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7
F VERSION ce0ae95abd7121c534f6917c1c8f2b70d9acd4db
@ -102,7 +102,84 @@ F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 8e53d0190a7b3443764bbd32ad47be2bd852026d
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl a2567f9d6ad6779879a2e394c120ad8718557e65
F ext/fts3/unicode/mkunicode.tcl ed0534dd51efce39878bce33944c6073d37a1e20
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba
F ext/fts5/fts5Int.h 918e947c0c20122ed5eb9ea695d83c6c8cf7239a
F ext/fts5/fts5_aux.c d53f00f31ad615ca4f139dd8751f9041afa00971
F ext/fts5/fts5_buffer.c 7428b0bcb257641cbecc3bacce7f40686cf99f36
F ext/fts5/fts5_config.c 7d19f4516cd79f1f8b58d38aa051b70195404422
F ext/fts5/fts5_expr.c 3386ab0a71dbab7e1259c3b16d6113c97d14123e
F ext/fts5/fts5_hash.c c1cfdb2cae0fad00b06fae38a40eaf9261563ccc
F ext/fts5/fts5_index.c ad32235180757f182050b8d24c9dbe61056385d2
F ext/fts5/fts5_main.c c5b2a219d65967c07fd1bc8fd45206863a2fe360
F ext/fts5/fts5_storage.c 3e672a0d35f63979556903861b324e7b8932cecc
F ext/fts5/fts5_tcl.c b82f13f73a30f0959f539743f8818bece994a970
F ext/fts5/fts5_tokenize.c 97251d68d7a6a9415bde1203f9382864dfc1f989
F ext/fts5/fts5_unicode2.c da3cf712f05cd8347c8c5bc00964cc0361c88da9
F ext/fts5/fts5_varint.c 366452037bf9a000c351374b489badc1b3541796
F ext/fts5/fts5_vocab.c e454fa58c6d591024659a9b61eece0d708e8b575
F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl 9553cce0757092d194307c2168d4edd100eab578
F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c
F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc
F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0
F ext/fts5/test/fts5ae.test ddc558e3e3b52db0101f7541b2e3849b77052c92
F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a
F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505
F ext/fts5/test/fts5ah.test b9e78fa986a7bd564ebadfb244de02c84d7ac3ae
F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37
F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7
F ext/fts5/test/fts5alter.test 78b63e088646dd623cacbdc1899a54d638dcf3d8
F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca
F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f
F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e
F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99
F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b
F ext/fts5/test/fts5content.test d0d90a45f0bcf07d75d474500d81f941b45e2021
F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6
F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b
F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3
F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1
F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b
F ext/fts5/test/fts5ea.test 451bb37310ee6df8ef72e4354fda5621b3b51448
F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e
F ext/fts5/test/fts5fault1.test 7a562367cb4a735b57b410dbdb62dcc8d971faec
F ext/fts5/test/fts5fault2.test 28c36c843bb39ae855ba79827417ecc37f114341
F ext/fts5/test/fts5fault3.test d6e9577d4312e331a913c72931bf131704efc8f3
F ext/fts5/test/fts5fault4.test 762991d526ee67c2b374351a17248097ea38bee7
F ext/fts5/test/fts5fault5.test 54da9fd4c3434a1d4f6abdcb6469299d91cf5875
F ext/fts5/test/fts5fault6.test 234dc6355f8d3f8b5be2763f30699d770247c215
F ext/fts5/test/fts5full.test 6f6143af0c6700501d9fd597189dfab1555bb741
F ext/fts5/test/fts5hash.test 42eb066f667e9a389a63437cb7038c51974d4fc6
F ext/fts5/test/fts5integrity.test 29f41d2c7126c6122fbb5d54e556506456876145
F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367
F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc
F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5
F ext/fts5/test/fts5plan.test 6a55ecbac9890765b0e16f8c421c7e0888cfe436
F ext/fts5/test/fts5porter.test 7cdc07bef301d70eebbfa75dcaf45c3680e1d0e1
F ext/fts5/test/fts5porter2.test 2e65633d58a1c525d5af0f6c01e5a59155bb3487
F ext/fts5/test/fts5prefix.test 552a462f0e8595676611f41643de217fb4ac2808
F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1
F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b
F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17
F ext/fts5/test/fts5rowid.test f7674e19a40987bf59624d8db9827114cb7f7a3e
F ext/fts5/test/fts5tokenizer.test 83e7e01a21ec7fdf814d51f6184cc26bb77d7695
F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841
F ext/fts5/test/fts5unicode2.test 84282d4a6dd34370dc19a3486dd6fecc89c7ed0b
F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e
F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680
F ext/fts5/test/fts5version.test bed59038e937c40d3c0056d08076db7874c6cd4a
F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c
F ext/fts5/tool/loadfts5.tcl 7ef3e62131f0434a78e4f5c5b056b09d221710a8
F ext/fts5/tool/mkfts5c.tcl fdb449263837a18d9131bc2f61b256fd77e64361
F ext/fts5/tool/showfts5.tcl 921f33b30c3189deefd2b2cc81f951638544aaf1
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c b2732aef0b076e4276d9b39b5a33cec7a05e1413
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
@ -123,7 +200,7 @@ F ext/misc/totype.c 4a167594e791abeed95e0a8db028822b5e8fe512
F ext/misc/vfslog.c fe40fab5c077a40477f7e5eba994309ecac6cc95
F ext/misc/vtshim.c babb0dc2bf116029e3e7c9a618b8a1377045303e
F ext/misc/wholenumber.c 784b12543d60702ebdd47da936e278aa03076212
F ext/ota/ota.c c47352838b967384a81eda5de75c352922a0dd6e
F ext/ota/ota.c 3a849c3b0a4ad6e63125668be9f67be03621216e
F ext/ota/ota1.test abdcbe746db4c7f7b51e842b576cacb33eef28f5
F ext/ota/ota10.test 85e0f6e7964db5007590c1b299e75211ed4240d4
F ext/ota/ota11.test 2f606cd2b4af260a86b549e91b9f395450fc75cb
@ -139,7 +216,7 @@ F ext/ota/otaA.test ab67f7f53670b81c750dcc946c5b704f51c429a4
F ext/ota/otacrash.test 8346192b2d46cbe7787d5d65904d81d3262a3cbf
F ext/ota/otafault.test 8c43586c2b96ca16bbce00b5d7e7d67316126db8
F ext/ota/otafault2.test fa202a98ca221faec318f3e5c5f39485b1256561
F ext/ota/sqlite3ota.c 2246b779f46ab20d5e7876f5b96c378c601d20f4
F ext/ota/sqlite3ota.c 21575d86eac30482a9bfbb2a531f433015e0e03c
F ext/ota/sqlite3ota.h 00028de37eede471ff1947d455cc3f33d3a911c6
F ext/ota/test_ota.c a876f88550d7d59a3ef62d4c1a5c04c4c2f1ebe1
F ext/rtree/README 6315c0d73ebf0ec40dedb5aa0e942bc8b54e3761
@ -190,9 +267,9 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
F main.mk 2395b88d31e71bbd6f9dd26bd6db26948ead9eb3
F main.mk eedc2e607f56729cdcc4b90d8654dfd472bc5bee
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk d5e22023b5238985bb54a72d33e0ac71fe4f8a32
F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271
F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504
@ -205,23 +282,23 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b
F sqlite3.1 fc7ad8990fc8409983309bb80de8c811a7506786
F sqlite3.pc.in 48fed132e7cb71ab676105d2a4dc77127d8c1f3a
F src/alter.c 48e14b8aea28dc58baafe3cfcb8889c086b7744a
F src/analyze.c d23790787f80ebed58df7774744b4cf96401498b
F src/attach.c c38ac5a520a231d5d0308fd7f2ad95191c867bae
F src/analyze.c f89727c36f997bd2bf6c5e546c2f51dc94e6f2a4
F src/attach.c e944d0052b577703b9b83aac1638452ff42a8395
F src/auth.c b56c78ebe40a2110fd361379f7e8162d23f92240
F src/backup.c ff743689c4d6c5cb55ad42ed9d174b2b3e71f1e3
F src/bitvec.c 5eb7958c3bf65210211cbcfc44eff86d0ded7c9d
F src/bitvec.c d1f21d7d91690747881f03940584f4cc548c9d3d
F src/btmutex.c 45a968cc85afed9b5e6cf55bf1f42f8d18107f79
F src/btree.c cb50b3f00fec44481c4b0a7f9ab1e4bed2ffce18
F src/btree.c 3a535c0118872c3ee4e198b80a62d09315381dab
F src/btree.h 969adc948e89e449220ff0ff724c94bb2a52e9f1
F src/btreeInt.h fdd1aff02fb2a63812bd95716e7f579fc3759107
F src/btreeInt.h 64b5723d8410a1f02c94d99c46b1b74cd547e6ef
F src/build.c b3f15255d5b16e42dafeaa638fd4f8a47c94ed70
F src/callback.c 7b44ce59674338ad48b0e84e7b72f935ea4f68b0
F src/complete.c a5cf5b4b56390cfb7b8636e8f7ddef90258dd575
F src/complete.c addcd8160b081131005d5bc2d34adf20c1c5c92f
F src/ctime.c 5a0b735dc95604766f5dac73973658eef782ee8b
F src/date.c e4d50b3283696836ec1036b695ead9a19e37a5ac
F src/dbstat.c f402e77e25089c6003d0c60b3233b9b3947d599a
F src/delete.c b998fbc3c55e8331a5f40aa7ff80972254de8de1
F src/expr.c 32c836d9fa22c25371039febf074849dcefb3de9
F src/expr.c c5c58e4d01c7ceb2266791d8d877f1b23a88e316
F src/fault.c 160a0c015b6c2629d3899ed2daf63d75754a32bb
F src/fkey.c c9b63a217d86582c22121699a47f22f524608869
F src/func.c a98ea5880dc50e9ca6dd6f57079a37b9cfcdecf1
@ -232,10 +309,10 @@ F src/hwtime.h d32741c8f4df852c7d959236615444e2b1063b08
F src/insert.c a81d4454051c92d058d79cd77099e700e36a74f6
F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d
F src/legacy.c ba1863ea58c4c840335a84ec276fc2b25e22bc4e
F src/lempar.c 7274c97d24bb46631e504332ccd3bd1b37841770
F src/lempar.c 92bafa308607dd985ca389a788cd9e0a2b608712
F src/loadext.c e722f4b832f923744788365df5fb8515c0bc8a47
F src/main.c 949e2096a9375cbff11e4835c7b99c813ea2f610
F src/malloc.c 908c780fdddd472163c2d1b1820ae4081f01ad20
F src/main.c 5e170f7c4872c272d733572a99628e47fe92ab43
F src/malloc.c 9be4e645f2fb411e5a04cf97e91f68b4faa6dc81
F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645
F src/mem1.c abe6ee469b6c5a35c7f22bfeb9c9bac664a1c987
F src/mem2.c f1940d9e91948dd6a908fbb9ce3835c36b5d83c3
@ -256,12 +333,12 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa
F src/os_unix.c 23eb5f56fac54d8fe0cb204291f3b3b2d94f23fc
F src/os_win.c 27cc135e2d0b8b1e2e4944db1e2669a6a18fa0f8
F src/os_win.h eb7a47aa17b26b77eb97e4823f20a00b8bda12ca
F src/pager.c 9bc918a009285f96ec6dac62dd764c7063552455
F src/pager.c 922d8ea28387b79a117488da06ee84f77d50d71e
F src/pager.h c3476e7c89cdf1c6914e50a11f3714e30b4e0a77
F src/parse.y 6d60dda8f8d418b6dc034f1fbccd816c459983a8
F src/pcache.c 10539fb959849ad6efff80050541cab3d25089d4
F src/pcache.h b44658c9c932d203510279439d891a2a83e12ba8
F src/pcache1.c 8e3799b33c41d517d86444d4abefc80d4f02adca
F src/pcache.c cde06aa50962595e412d497e22fd2e07878ba1f0
F src/pcache.h 9968603796240cdf83da7e7bef76edf90619cea9
F src/pcache1.c 9ec20f98f50ed7415019303ae9bd3745d4b7bd9b
F src/pragma.c c1f4d012ea9f6b1ce52d341b2cd0ad72d560afd7
F src/pragma.h b8632d7cdda7b25323fa580e3e558a4f0d4502cc
F src/prepare.c 82e5db1013846a819f198336fed72c44c974e7b1
@ -269,17 +346,17 @@ F src/printf.c db11b5960105ee661dcac690f2ae6276e49bf251
F src/random.c ba2679f80ec82c4190062d756f22d0c358180696
F src/resolve.c 2d47554370de8de6dd5be060cef9559eec315005
F src/rowset.c eccf6af6d620aaa4579bd3b72c1b6395d9e9fa1e
F src/select.c 9baeda79f93cfd180d471273a2f9c82c682a37a2
F src/select.c 009c6138be8788449d4f911f380d99e8608040e2
F src/shell.c e4ad9031072a6d679b2c69a780014d30db62dc7f
F src/sqlite.h.in 876ad21b9a6bb5034db7c44cdebd5df2292a5336
F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad
F src/sqlite3ext.h be1a718b7d2ce40ceba725ae92c8eb5f18003066
F src/sqliteInt.h dc4d9615bc38d5eaaaf95b7a5e98267615c51ee7
F src/sqliteInt.h 2c8f1aad45b003a5efd615ac1d12aca83cc8d364
F src/sqliteLimit.h 216557999cb45f2e3578ed53ebefe228d779cb46
F src/status.c f266ad8a2892d659b74f0f50cb6a88b6e7c12179
F src/table.c 51b46b2a62d1b3a959633d593b89bab5e2c9155e
F src/tclsqlite.c 060e6398f47a187ec707030e6306c730a48c845d
F src/test1.c a8e09b811f70184ce65012f27f30cfee7e54f268
F src/tclsqlite.c 6cfb538d12565aba52840709721a02dd99806f1c
F src/test1.c e055ab594a48d25720ed31daa5eced1163544488
F src/test2.c 577961fe48961b2f2e5c8b56ee50c3f459d3359d
F src/test3.c 64d2afdd68feac1bb5e2ffb8226c8c639f798622
F src/test4.c d168f83cc78d02e8d35567bb5630e40dcd85ac1e
@ -293,7 +370,7 @@ F src/test_autoext.c dea8a01a7153b9adc97bd26161e4226329546e12
F src/test_backup.c 2e6e6a081870150f20c526a2e9d0d29cda47d803
F src/test_blob.c e5a7a81d61a780da79101aeb1e60d300af169e07
F src/test_btree.c 2e9978eca99a9a4bfa8cae949efb00886860a64f
F src/test_config.c 32606543e66d128e8891aa750ab37791211e5623
F src/test_config.c c15d03e834c4bda571e689a0fefb106a288450a4
F src/test_demovfs.c 0de72c2c89551629f58486fde5734b7d90758852
F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc
F src/test_fs.c ced436e3d4b8e4681328409b8081051ce614e28f
@ -330,18 +407,18 @@ F src/treeview.c c84b1a8ebc7f1d00cd76ce4958eeb3ae1021beed
F src/trigger.c 322f23aad694e8f31d384dcfa386d52a48d3c52f
F src/update.c 24dd6a45b8b3470e62702128ebf11be1f2693145
F src/utf.c fc6b889ba0779b7722634cdeaa25f1930d93820c
F src/util.c a6431c92803b975b7322724a7b433e538d243539
F src/util.c 89bfe78b4610d456ba431a0865309a20acc115f3
F src/vacuum.c 2ddd5cad2a7b9cef7f9e431b8c7771634c6b1701
F src/vdbe.c 60917660c84745cb644d4ddd750301fe0649ccd0
F src/vdbe.h 218e957540f5e0866501d2b546ded647c86b8d84
F src/vdbeInt.h 571977048ae46b947fb8bbbd1332e5d98f1ca845
F src/vdbe.c 195b32310c7062847a45fda214b32ceb8f8f6ab2
F src/vdbe.h d0f8ab919146109d080cde4b0840af9b5fafad4b
F src/vdbeInt.h 963c87c4bf8040c0a316ca3e58f8a4888e1fa3c4
F src/vdbeapi.c a5d2e8afd53b4f81934f5ca59c04465cd1a6d50d
F src/vdbeaux.c 73788765a2d43514822fbcb2a69068fb48f4dcdd
F src/vdbeaux.c d6bfb7b4291bc033283140e21c2da2ce04ef0f78
F src/vdbeblob.c ab33f9b57cfce7dddb23853090186da614be4846
F src/vdbemem.c 9b6436ec92a4516df614f55c0ad7be2fc464527a
F src/vdbemem.c 6c9e261d135fc175da2f34e46d60243a19fffb9f
F src/vdbesort.c f5009e7a35e3065635d8918b9a31f498a499976b
F src/vdbetrace.c 8befe829faff6d9e6f6e4dee5a7d3f85cc85f1a0
F src/vtab.c c535e80259ebe616467181a83a4263555b97c694
F src/vtab.c 082b35a25a26e3d36f365ca8cd73c1922532f05e
F src/vxworks.h c18586c8edc1bddbc15c004fa16aeb1e1342b4fb
F src/wal.c ce2cb2d06faab54d1bce3e739bec79e063dd9113
F src/wal.h df01efe09c5cb8c8e391ff1715cca294f89668a4
@ -782,7 +859,7 @@ F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6
F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e
F test/mallocK.test da01dcdd316767b8356741f8d33a23a06a23def5
F test/mallocL.test 252ddc7eb4fbf75364eab17b938816085ff1fc17
F test/malloc_common.tcl 3663f9001ce3e29bbaa9677ffe15cd468e3ec7e3
F test/malloc_common.tcl aac62499b76be719fac31e7a3e54a7fd53272e7f
F test/manydb.test 28385ae2087967aa05c38624cec7d96ec74feb3e
F test/mem5.test c6460fba403c5703141348cd90de1c294188c68f
F test/memdb.test fcb5297b321b562084fc79d64d5a12a1cd2b639b
@ -844,7 +921,7 @@ F test/pagesize.test 5769fc62d8c890a83a503f67d47508dfdc543305
F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d
F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
F test/percentile.test 4243af26b8f3f4555abe166f723715a1f74c77ff
F test/permutations.test 242d5aa14f84cb03ae830342b176d2b15ec55ffb
F test/permutations.test 1a49f543ec7f0e075ca24eae3bda7f75bb00634b
F test/pragma.test be7195f0aa72bdb8a512133e9640ac40f15b57a2
F test/pragma2.test f624a496a95ee878e81e59961eade66d5c00c028
F test/pragma3.test 6f849ccffeee7e496d2f2b5e74152306c0b8757c
@ -1264,6 +1341,7 @@ F tool/genfkey.test 4196a8928b78f51d54ef58e99e99401ab2f0a7e5
F tool/getlock.c f4c39b651370156cae979501a7b156bdba50e7ce
F tool/lemon.c b9109f59b57e7b6f101c4fe644c8361ba6dee969
F tool/lempar.c 01ca97f87610d1dac6d8cd96ab109ab1130e76dc
F tool/loadfts.c 76b6589ab5efcdc9cfe16d43ab5a6c2618e44bd4
F tool/logest.c eef612f8adf4d0993dafed0416064cf50d5d33c6
F tool/mkautoconfamal.sh d1a2da0e15b2ed33d60af35c7e9d483f13a8eb9f
F tool/mkkeywordhash.c dfff09dbbfaf950e89af294f48f902181b144670
@ -1306,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 199bfb67fdf642cca6cd5d460fa4dc602b94837a f824e66b0dc120bed227c7446e2663fcad7cc4f6
R b6a4a6a3d66ab4b5ed5e1b39669d179f
P 924f471291dfd458307a11819aa640cc1a02ac63 8bfcda3d10aec864d71d12a1248c37e4db6f8899
R 16211acde3caeef0e3b8268914f32a0d
U drh
Z d59c65de498941f5f2907132a991b726
Z 476ef58b2bdb077bbb18b26bb89bdae6

View File

@ -1 +1 @@
924f471291dfd458307a11819aa640cc1a02ac63
39936b33b0668aad81aa574d4d74c92b0ddd218a

View File

@ -122,9 +122,7 @@ END {
for(i=0; i<n_op; i++){
name = order[i];
if( op[name]>=0 ) continue;
if( name=="OP_Function" \
|| name=="OP_AggStep" \
|| name=="OP_Transaction" \
if( name=="OP_Transaction" \
|| name=="OP_AutoCommit" \
|| name=="OP_Savepoint" \
|| name=="OP_Checkpoint" \

View File

@ -943,7 +943,7 @@ static void callStatGet(Vdbe *v, int regStat4, int iParam, int regOut){
#else
UNUSED_PARAMETER( iParam );
#endif
sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4, regOut);
sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4, regOut);
sqlite3VdbeChangeP4(v, -1, (char*)&statGetFuncdef, P4_FUNCDEF);
sqlite3VdbeChangeP5(v, 1 + IsStat34);
}
@ -1098,7 +1098,7 @@ static void analyzeOneTable(
#endif
sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1);
sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2);
sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4);
sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4+1, regStat4);
sqlite3VdbeChangeP4(v, -1, (char*)&statInitFuncdef, P4_FUNCDEF);
sqlite3VdbeChangeP5(v, 2+IsStat34);
@ -1194,7 +1194,7 @@ static void analyzeOneTable(
}
#endif
assert( regChng==(regStat4+1) );
sqlite3VdbeAddOp3(v, OP_Function, 1, regStat4, regTemp);
sqlite3VdbeAddOp3(v, OP_Function0, 1, regStat4, regTemp);
sqlite3VdbeChangeP4(v, -1, (char*)&statPushFuncdef, P4_FUNCDEF);
sqlite3VdbeChangeP5(v, 2+IsStat34);
sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v);

View File

@ -359,7 +359,7 @@ static void codeAttach(
assert( v || db->mallocFailed );
if( v ){
sqlite3VdbeAddOp3(v, OP_Function, 0, regArgs+3-pFunc->nArg, regArgs+3);
sqlite3VdbeAddOp3(v, OP_Function0, 0, regArgs+3-pFunc->nArg, regArgs+3);
assert( pFunc->nArg==-1 || (pFunc->nArg&0xff)==pFunc->nArg );
sqlite3VdbeChangeP5(v, (u8)(pFunc->nArg));
sqlite3VdbeChangeP4(v, -1, (char *)pFunc, P4_FUNCDEF);

View File

@ -126,10 +126,10 @@ Bitvec *sqlite3BitvecCreate(u32 iSize){
** If p is NULL (if the bitmap has not been created) or if
** i is out of range, then return false.
*/
int sqlite3BitvecTest(Bitvec *p, u32 i){
if( p==0 ) return 0;
if( i>p->iSize || i==0 ) return 0;
int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){
assert( p!=0 );
i--;
if( i>=p->iSize ) return 0;
while( p->iDivisor ){
u32 bin = i/p->iDivisor;
i = i%p->iDivisor;
@ -149,6 +149,9 @@ int sqlite3BitvecTest(Bitvec *p, u32 i){
return 0;
}
}
int sqlite3BitvecTest(Bitvec *p, u32 i){
return p!=0 && sqlite3BitvecTestNotNull(p,i);
}
/*
** Set the i-th bit. Return 0 on success and an error code if

View File

@ -620,7 +620,7 @@ static int saveCursorPosition(BtCursor *pCur){
** table, then malloc space for and store the pCur->nKey bytes of key
** data.
*/
if( 0==pCur->apPage[0]->intKey ){
if( 0==pCur->curIntKey ){
void *pKey = sqlite3Malloc( pCur->nKey );
if( pKey ){
rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey);
@ -633,7 +633,7 @@ static int saveCursorPosition(BtCursor *pCur){
rc = SQLITE_NOMEM;
}
}
assert( !pCur->apPage[0]->intKey || !pCur->pKey );
assert( !pCur->curIntKey || !pCur->pKey );
if( rc==SQLITE_OK ){
btreeReleaseAllCursorPages(pCur);
@ -655,6 +655,15 @@ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*);
** routine is called just before cursor pExcept is used to modify the
** table, for example in BtreeDelete() or BtreeInsert().
**
** If there are two or more cursors on the same btree, then all such
** cursors should have their BTCF_Multiple flag set. The btreeCursor()
** routine enforces that rule. This routine only needs to be called in
** the uncommon case when pExpect has the BTCF_Multiple flag set.
**
** If pExpect!=NULL and if no other cursors are found on the same root-page,
** then the BTCF_Multiple flag on pExpect is cleared, to avoid another
** pointless call to this routine.
**
** Implementation note: This routine merely checks to see if any cursors
** need to be saved. It calls out to saveCursorsOnList() in the (unusual)
** event that cursors are in need to being saved.
@ -666,7 +675,9 @@ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
for(p=pBt->pCursor; p; p=p->pNext){
if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break;
}
return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK;
if( p ) return saveCursorsOnList(p, iRoot, pExcept);
if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple;
return SQLITE_OK;
}
/* This helper routine to saveAllCursors does the actual work of saving
@ -954,10 +965,16 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
** the page, 1 means the second cell, and so forth) return a pointer
** to the cell content.
**
** findCellPastPtr() does the same except it skips past the initial
** 4-byte child pointer found on interior pages, if there is one.
**
** This routine works only for pages that do not contain overflow cells.
*/
#define findCell(P,I) \
((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)])))
((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
#define findCellPastPtr(P,I) \
((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)])))
/*
** This is common tail processing for btreeParseCellPtr() and
@ -1351,18 +1368,20 @@ static int defragmentPage(MemPage *pPage){
** This function may detect corruption within pPg. If corruption is
** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned.
**
** If a slot of at least nByte bytes is found but cannot be used because
** there are already at least 60 fragmented bytes on the page, return NULL.
** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true.
** Slots on the free list that are between 1 and 3 bytes larger than nByte
** will be ignored if adding the extra space to the fragmentation count
** causes the fragmentation count to exceed 60.
*/
static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){
const int hdr = pPg->hdrOffset;
u8 * const aData = pPg->aData;
int iAddr;
int pc;
int iAddr = hdr + 1;
int pc = get2byte(&aData[iAddr]);
int x;
int usableSize = pPg->pBt->usableSize;
for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){
assert( pc>0 );
do{
int size; /* Size of the free slot */
/* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of
** increasing offset. */
@ -1374,8 +1393,7 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
** freeblock form a big-endian integer which is the size of the freeblock
** in bytes, including the 4-byte header. */
size = get2byte(&aData[pc+2]);
if( size>=nByte ){
int x = size - nByte;
if( (x = size - nByte)>=0 ){
testcase( x==4 );
testcase( x==3 );
if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){
@ -1384,10 +1402,8 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
}else if( x<4 ){
/* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total
** number of bytes in fragments may not exceed 60. */
if( aData[hdr+7]>=60 ){
if( pbDefrag ) *pbDefrag = 1;
return 0;
}
if( aData[hdr+7]>57 ) return 0;
/* Remove the slot from the free-list. Update the number of
** fragmented bytes within the page. */
memcpy(&aData[iAddr], &aData[pc], 2);
@ -1399,7 +1415,9 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){
}
return &aData[pc + x];
}
}
iAddr = pc;
pc = get2byte(&aData[pc]);
}while( pc );
return 0;
}
@ -1441,7 +1459,7 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
** However, that integer is too large to be stored in a 2-byte unsigned
** integer, so a value of 0 is used in its place. */
top = get2byte(&data[hdr+5]);
assert( top<=pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */
assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */
if( gap>top ){
if( top==0 && pPage->pBt->usableSize==65536 ){
top = 65536;
@ -1457,15 +1475,14 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
testcase( gap+2==top );
testcase( gap+1==top );
testcase( gap==top );
if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){
int bDefrag = 0;
u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag);
if( rc ) return rc;
if( bDefrag ) goto defragment_page;
if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){
u8 *pSpace = pageFindSlot(pPage, nByte, &rc);
if( pSpace ){
assert( pSpace>=data && (pSpace - data)<65536 );
*pIdx = (int)(pSpace - data);
return SQLITE_OK;
}else if( rc ){
return rc;
}
}
@ -1474,7 +1491,6 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){
*/
testcase( gap+2+nByte==top );
if( gap+2+nByte>top ){
defragment_page:
assert( pPage->nCell>0 || CORRUPT_DB );
rc = defragmentPage(pPage);
if( rc ) return rc;
@ -1704,6 +1720,7 @@ static int btreeInitPage(MemPage *pPage){
pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize;
pPage->aDataEnd = &data[usableSize];
pPage->aCellIdx = &data[cellOffset];
pPage->aDataOfst = &data[pPage->childPtrSize];
/* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates
** the start of the cell content area. A zero value for this integer is
** interpreted as 65536. */
@ -1737,7 +1754,7 @@ static int btreeInitPage(MemPage *pPage){
if( !pPage->leaf ) iCellLast--;
for(i=0; i<pPage->nCell; i++){
pc = get2byte(&data[cellOffset+i*2]);
pc = get2byteAligned(&data[cellOffset+i*2]);
testcase( pc==iCellFirst );
testcase( pc==iCellLast );
if( pc<iCellFirst || pc>iCellLast ){
@ -1823,6 +1840,7 @@ static void zeroPage(MemPage *pPage, int flags){
pPage->cellOffset = first;
pPage->aDataEnd = &data[pBt->usableSize];
pPage->aCellIdx = &data[first];
pPage->aDataOfst = &data[pPage->childPtrSize];
pPage->nOverflow = 0;
assert( pBt->pageSize>=512 && pBt->pageSize<=65536 );
pPage->maskPage = (u16)(pBt->pageSize - 1);
@ -1841,7 +1859,7 @@ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){
pPage->pDbPage = pDbPage;
pPage->pBt = pBt;
pPage->pgno = pgno;
pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
pPage->hdrOffset = pgno==1 ? 100 : 0;
return pPage;
}
@ -1902,35 +1920,62 @@ u32 sqlite3BtreeLastPage(Btree *p){
}
/*
** Get a page from the pager and initialize it. This routine is just a
** convenience wrapper around separate calls to btreeGetPage() and
** btreeInitPage().
** Get a page from the pager and initialize it.
**
** If an error occurs, then the value *ppPage is set to is undefined. It
** If pCur!=0 then the page is being fetched as part of a moveToChild()
** call. Do additional sanity checking on the page in this case.
** And if the fetch fails, this routine must decrement pCur->iPage.
**
** The page is fetched as read-write unless pCur is not NULL and is
** a read-only cursor.
**
** If an error occurs, then *ppPage is undefined. It
** may remain unchanged, or it may be set to an invalid value.
*/
static int getAndInitPage(
BtShared *pBt, /* The database file */
Pgno pgno, /* Number of the page to get */
MemPage **ppPage, /* Write the page pointer here */
int bReadonly /* PAGER_GET_READONLY or 0 */
BtCursor *pCur, /* Cursor to receive the page, or NULL */
int bReadOnly /* True for a read-only page */
){
int rc;
DbPage *pDbPage;
assert( sqlite3_mutex_held(pBt->mutex) );
assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 );
assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] );
assert( pCur==0 || bReadOnly==pCur->curPagerFlags );
assert( pCur==0 || pCur->iPage>0 );
if( pgno>btreePagecount(pBt) ){
rc = SQLITE_CORRUPT_BKPT;
}else{
rc = btreeGetPage(pBt, pgno, ppPage, bReadonly);
if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
goto getAndInitPage_error;
}
rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly);
if( rc ){
goto getAndInitPage_error;
}
*ppPage = btreePageFromDbPage(pDbPage, pgno, pBt);
if( (*ppPage)->isInit==0 ){
rc = btreeInitPage(*ppPage);
if( rc!=SQLITE_OK ){
releasePage(*ppPage);
}
goto getAndInitPage_error;
}
}
/* If obtaining a child page for a cursor, we must verify that the page is
** compatible with the root page. */
if( pCur
&& ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey)
){
rc = SQLITE_CORRUPT_BKPT;
releasePage(*ppPage);
goto getAndInitPage_error;
}
return SQLITE_OK;
getAndInitPage_error:
if( pCur ) pCur->iPage--;
testcase( pgno==0 );
assert( pgno!=0 || rc==SQLITE_CORRUPT );
return rc;
@ -1940,8 +1985,7 @@ static int getAndInitPage(
** Release a MemPage. This should be called once for each prior
** call to btreeGetPage.
*/
static void releasePage(MemPage *pPage){
if( pPage ){
static void releasePageNotNull(MemPage *pPage){
assert( pPage->aData );
assert( pPage->pBt );
assert( pPage->pDbPage!=0 );
@ -1950,6 +1994,8 @@ static void releasePage(MemPage *pPage){
assert( sqlite3_mutex_held(pPage->pBt->mutex) );
sqlite3PagerUnrefNotNull(pPage->pDbPage);
}
static void releasePage(MemPage *pPage){
if( pPage ) releasePageNotNull(pPage);
}
/*
@ -2924,7 +2970,7 @@ static void unlockBtreeIfUnused(BtShared *pBt){
assert( pPage1->aData );
assert( sqlite3PagerRefcount(pBt->pPager)==1 );
pBt->pPage1 = 0;
releasePage(pPage1);
releasePageNotNull(pPage1);
}
}
@ -3980,6 +4026,7 @@ static int btreeCursor(
BtCursor *pCur /* Space for new cursor */
){
BtShared *pBt = p->pBt; /* Shared b-tree handle */
BtCursor *pX; /* Looping over other all cursors */
assert( sqlite3BtreeHoldsMutex(p) );
assert( wrFlag==0 || wrFlag==1 );
@ -3995,10 +4042,8 @@ static int btreeCursor(
assert( p->inTrans>TRANS_NONE );
assert( wrFlag==0 || p->inTrans==TRANS_WRITE );
assert( pBt->pPage1 && pBt->pPage1->aData );
assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 );
if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){
return SQLITE_READONLY;
}
if( wrFlag ){
allocateTempSpace(pBt);
if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM;
@ -4017,10 +4062,16 @@ static int btreeCursor(
pCur->pBt = pBt;
assert( wrFlag==0 || wrFlag==BTCF_WriteFlag );
pCur->curFlags = wrFlag;
pCur->pNext = pBt->pCursor;
if( pCur->pNext ){
pCur->pNext->pPrev = pCur;
pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY;
/* If there are two or more cursors on the same btree, then all such
** cursors *must* have the BTCF_Multiple flag set. */
for(pX=pBt->pCursor; pX; pX=pX->pNext){
if( pX->pgnoRoot==(Pgno)iTable ){
pX->curFlags |= BTCF_Multiple;
pCur->curFlags |= BTCF_Multiple;
}
}
pCur->pNext = pBt->pCursor;
pBt->pCursor = pCur;
pCur->eState = CURSOR_INVALID;
return SQLITE_OK;
@ -4078,13 +4129,18 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){
BtShared *pBt = pCur->pBt;
sqlite3BtreeEnter(pBtree);
sqlite3BtreeClearCursor(pCur);
if( pCur->pPrev ){
pCur->pPrev->pNext = pCur->pNext;
}else{
assert( pBt->pCursor!=0 );
if( pBt->pCursor==pCur ){
pBt->pCursor = pCur->pNext;
}else{
BtCursor *pPrev = pBt->pCursor;
do{
if( pPrev->pNext==pCur ){
pPrev->pNext = pCur->pNext;
break;
}
if( pCur->pNext ){
pCur->pNext->pPrev = pCur->pPrev;
pPrev = pPrev->pNext;
}while( ALWAYS(pPrev) );
}
for(i=0; i<=pCur->iPage; i++){
releasePage(pCur->apPage[i]);
@ -4630,9 +4686,6 @@ const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){
** vice-versa).
*/
static int moveToChild(BtCursor *pCur, u32 newPgno){
int rc;
int i = pCur->iPage;
MemPage *pNewPage;
BtShared *pBt = pCur->pBt;
assert( cursorHoldsMutex(pCur) );
@ -4642,19 +4695,12 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){
if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
return SQLITE_CORRUPT_BKPT;
}
rc = getAndInitPage(pBt, newPgno, &pNewPage,
(pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0);
if( rc ) return rc;
pCur->apPage[i+1] = pNewPage;
pCur->aiIdx[i+1] = 0;
pCur->iPage++;
pCur->info.nSize = 0;
pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){
return SQLITE_CORRUPT_BKPT;
}
return SQLITE_OK;
pCur->iPage++;
pCur->aiIdx[pCur->iPage] = 0;
return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage],
pCur, pCur->curPagerFlags);
}
#if SQLITE_DEBUG
@ -4698,11 +4744,9 @@ static void moveToParent(BtCursor *pCur){
pCur->apPage[pCur->iPage]->pgno
);
testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell );
releasePage(pCur->apPage[pCur->iPage]);
pCur->iPage--;
pCur->info.nSize = 0;
pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
releasePageNotNull(pCur->apPage[pCur->iPage--]);
}
/*
@ -4743,18 +4787,23 @@ static int moveToRoot(BtCursor *pCur){
}
if( pCur->iPage>=0 ){
while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]);
while( pCur->iPage ){
assert( pCur->apPage[pCur->iPage]!=0 );
releasePageNotNull(pCur->apPage[pCur->iPage--]);
}
}else if( pCur->pgnoRoot==0 ){
pCur->eState = CURSOR_INVALID;
return SQLITE_OK;
}else{
assert( pCur->iPage==(-1) );
rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0],
(pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0);
0, pCur->curPagerFlags);
if( rc!=SQLITE_OK ){
pCur->eState = CURSOR_INVALID;
return rc;
}
pCur->iPage = 0;
pCur->curIntKey = pCur->apPage[0]->intKey;
}
pRoot = pCur->apPage[0];
assert( pRoot->pgno==pCur->pgnoRoot );
@ -4957,7 +5006,7 @@ int sqlite3BtreeMovetoUnpacked(
/* If the cursor is already positioned at the point we are trying
** to move to, then just return without doing any work */
if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
&& pCur->apPage[0]->intKey
&& pCur->curIntKey
){
if( pCur->info.nKey==intKey ){
*pRes = 0;
@ -4992,7 +5041,8 @@ int sqlite3BtreeMovetoUnpacked(
assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
return SQLITE_OK;
}
assert( pCur->apPage[0]->intKey || pIdxKey );
assert( pCur->apPage[0]->intKey==pCur->curIntKey );
assert( pCur->curIntKey || pIdxKey );
for(;;){
int lwr, upr, idx, c;
Pgno chldPg;
@ -5015,7 +5065,7 @@ int sqlite3BtreeMovetoUnpacked(
if( xRecordCompare==0 ){
for(;;){
i64 nCellKey;
pCell = findCell(pPage, idx) + pPage->childPtrSize;
pCell = findCellPastPtr(pPage, idx);
if( pPage->intKeyLeaf ){
while( 0x80 <= *(pCell++) ){
if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT;
@ -5048,7 +5098,7 @@ int sqlite3BtreeMovetoUnpacked(
}else{
for(;;){
int nCell; /* Size of the pCell cell in bytes */
pCell = findCell(pPage, idx) + pPage->childPtrSize;
pCell = findCellPastPtr(pPage, idx);
/* The maximum supported page-size is 65536 bytes. This means that
** the maximum number of record bytes stored on an index B-Tree
@ -5986,9 +6036,7 @@ static int fillInCell(
nSrc = nData;
nData = 0;
}else{
if( NEVER(nKey>0x7fffffff || pKey==0) ){
return SQLITE_CORRUPT_BKPT;
}
assert( nKey<=0x7fffffff && pKey!=0 );
nPayload = (int)nKey;
pSrc = pKey;
nSrc = (int)nKey;
@ -6198,10 +6246,8 @@ static void insertCell(
){
int idx = 0; /* Where to write new cell content in data[] */
int j; /* Loop counter */
int end; /* First byte past the last cell pointer in data[] */
int ins; /* Index in data[] where new cell pointer is inserted */
int cellOffset; /* Address of first cell pointer in data[] */
u8 *data; /* The content of the whole page */
u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */
if( *pRC ) return;
@ -6245,24 +6291,26 @@ static void insertCell(
}
assert( sqlite3PagerIswriteable(pPage->pDbPage) );
data = pPage->aData;
cellOffset = pPage->cellOffset;
end = cellOffset + 2*pPage->nCell;
ins = cellOffset + 2*i;
assert( &data[pPage->cellOffset]==pPage->aCellIdx );
rc = allocateSpace(pPage, sz, &idx);
if( rc ){ *pRC = rc; return; }
/* The allocateSpace() routine guarantees the following properties
** if it returns successfully */
assert( idx >= 0 && (idx >= end+2 || CORRUPT_DB) );
assert( idx >= 0 );
assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB );
assert( idx+sz <= (int)pPage->pBt->usableSize );
pPage->nCell++;
pPage->nFree -= (u16)(2 + sz);
memcpy(&data[idx], pCell, sz);
if( iChild ){
put4byte(&data[idx], iChild);
}
memmove(&data[ins+2], &data[ins], end-ins);
put2byte(&data[ins], idx);
put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
pIns = pPage->aCellIdx + i*2;
memmove(pIns+2, pIns, 2*(pPage->nCell - i));
put2byte(pIns, idx);
pPage->nCell++;
/* increment the cell count */
if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++;
assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell );
#ifndef SQLITE_OMIT_AUTOVACUUM
if( pPage->pBt->autoVacuum ){
/* The cell may contain a pointer to an overflow page. If so, write
@ -6414,14 +6462,13 @@ static int pageInsertArray(
int i;
u8 *aData = pPg->aData;
u8 *pData = *ppData;
const int bFreelist = aData[1] || aData[2];
int iEnd = iFirst + nCell;
assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */
for(i=iFirst; i<iEnd; i++){
int sz, rc;
u8 *pSlot;
sz = cachedCellSize(pCArray, i);
if( bFreelist==0 || (pSlot = pageFindSlot(pPg, sz, &rc, 0))==0 ){
if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){
pData -= sz;
if( pData<pBegin ) return 1;
pSlot = pData;
@ -6578,7 +6625,7 @@ static int editPage(
#ifdef SQLITE_DEBUG
for(i=0; i<nNew && !CORRUPT_DB; i++){
u8 *pCell = pCArray->apCell[i+iNew];
int iOff = get2byte(&pPg->aCellIdx[i*2]);
int iOff = get2byteAligned(&pPg->aCellIdx[i*2]);
if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){
pCell = &pTmp[pCell - aData];
}
@ -6952,7 +6999,7 @@ static int balance_nonroot(
}
pgno = get4byte(pRight);
while( 1 ){
rc = getAndInitPage(pBt, pgno, &apOld[i], 0);
rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0);
if( rc ){
memset(apOld, 0, (i+1)*sizeof(MemPage*));
goto balance_cleanup;
@ -7080,7 +7127,7 @@ static int balance_nonroot(
memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow);
limit = pOld->aiOvfl[0];
for(j=0; j<limit; j++){
b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
piCell += 2;
b.nCell++;
}
@ -7093,7 +7140,7 @@ static int balance_nonroot(
piEnd = aData + pOld->cellOffset + 2*pOld->nCell;
while( piCell<piEnd ){
assert( b.nCell<nMaxCells );
b.apCell[b.nCell] = aData + (maskPage & get2byte(piCell));
b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell));
piCell += 2;
b.nCell++;
}
@ -7870,24 +7917,28 @@ int sqlite3BtreeInsert(
** doing any work. To avoid thwarting these optimizations, it is important
** not to clear the cursor here.
*/
if( pCur->curFlags & BTCF_Multiple ){
rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
if( rc ) return rc;
}
if( pCur->pKeyInfo==0 ){
assert( pKey==0 );
/* If this is an insert into a table b-tree, invalidate any incrblob
** cursors open on the row being replaced */
invalidateIncrblobCursors(p, nKey, 0);
/* If the cursor is currently on the last row and we are appending a
** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto()
** call */
** new row onto the end, set the "loc" to avoid an unnecessary
** btreeMoveto() call */
if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0
&& pCur->info.nKey==nKey-1 ){
loc = -1;
}else if( loc==0 ){
rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc);
if( rc ) return rc;
}
}
if( !loc ){
}else if( loc==0 ){
rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc);
if( rc ) return rc;
}
@ -7989,12 +8040,8 @@ int sqlite3BtreeDelete(BtCursor *pCur){
assert( pCur->curFlags & BTCF_WriteFlag );
assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
assert( !hasReadConflicts(p, pCur->pgnoRoot) );
if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell)
|| NEVER(pCur->eState!=CURSOR_VALID)
){
return SQLITE_ERROR; /* Something has gone awry. */
}
assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
assert( pCur->eState==CURSOR_VALID );
iCellDepth = pCur->iPage;
iCellIdx = pCur->aiIdx[iCellDepth];
@ -8019,8 +8066,10 @@ int sqlite3BtreeDelete(BtCursor *pCur){
** deleted writable. Then free any overflow pages associated with the
** entry and finally remove the cell itself from within the page.
*/
if( pCur->curFlags & BTCF_Multiple ){
rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
if( rc ) return rc;
}
/* If this is a delete operation to remove a row from a table b-tree,
** invalidate any incrblob cursors open on the row being deleted. */
@ -8269,7 +8318,7 @@ static int clearDatabasePage(
if( pgno>btreePagecount(pBt) ){
return SQLITE_CORRUPT_BKPT;
}
rc = getAndInitPage(pBt, pgno, &pPage, 0);
rc = getAndInitPage(pBt, pgno, &pPage, 0, 0);
if( rc ) return rc;
if( pPage->bBusy ){
rc = SQLITE_CORRUPT_BKPT;
@ -9056,7 +9105,7 @@ static int checkTreePage(
/* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte
** integer offsets to the cell contents. */
for(i=0; i<nCell; i++){
int pc = get2byte(&data[cellStart+i*2]);
int pc = get2byteAligned(&data[cellStart+i*2]);
u32 size = 65536;
if( pc<=usableSize-4 ){
size = pPage->xCellSize(pPage, &data[pc]);

View File

@ -295,6 +295,7 @@ struct MemPage {
u8 *aData; /* Pointer to disk image of the page data */
u8 *aDataEnd; /* One byte past the end of usable data */
u8 *aCellIdx; /* The cell index area */
u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */
DbPage *pDbPage; /* Pager page handle */
u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */
void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
@ -506,8 +507,7 @@ struct CellInfo {
struct BtCursor {
Btree *pBtree; /* The Btree to which this cursor belongs */
BtShared *pBt; /* The BtShared this cursor points to */
BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */
struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
BtCursor *pNext; /* Forms a linked list of all cursors */
Pgno *aOverflow; /* Cache of overflow page locations */
CellInfo info; /* A parse of the cell we are pointing at */
i64 nKey; /* Size of pKey, or last integer key */
@ -517,9 +517,16 @@ struct BtCursor {
int skipNext; /* Prev() is noop if negative. Next() is noop if positive.
** Error code if eState==CURSOR_FAULT */
u8 curFlags; /* zero or more BTCF_* flags defined below */
u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */
u8 eState; /* One of the CURSOR_XXX constants (see below) */
u8 hints; /* As configured by CursorSetHints() */
i16 iPage; /* Index of current page in apPage */
/* All fields above are zeroed when the cursor is allocated. See
** sqlite3BtreeCursorZero(). Fields that follow must be manually
** initialized. */
i8 iPage; /* Index of current page in apPage */
u8 curIntKey; /* Value of apPage[0]->intKey */
struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
void *padding1; /* Make object size a multiple of 16 */
u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */
MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */
};
@ -532,6 +539,7 @@ struct BtCursor {
#define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */
#define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */
#define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */
#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */
/*
** Potential values for BtCursor.eState.
@ -683,3 +691,16 @@ struct IntegrityCk {
#define put2byte(p,v) ((p)[0] = (u8)((v)>>8), (p)[1] = (u8)(v))
#define get4byte sqlite3Get4byte
#define put4byte sqlite3Put4byte
/*
** get2byteAligned(), unlike get2byte(), requires that its argument point to a
** two-byte aligned address. get2bytea() is only used for accessing the
** cell addresses in a btree header.
*/
#if SQLITE_BYTEORDER==4321
# define get2byteAligned(x) (*(u16*)(x))
#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000
# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x))
#else
# define get2byteAligned(x) ((x)[0]<<8 | (x)[1])
#endif

View File

@ -284,7 +284,7 @@ int sqlite3_complete16(const void *zSql){
rc = SQLITE_NOMEM;
}
sqlite3ValueFree(pVal);
return sqlite3ApiExit(0, rc);
return rc & 0xff;
}
#endif /* SQLITE_OMIT_UTF16 */
#endif /* SQLITE_OMIT_COMPLETE */

View File

@ -2925,7 +2925,7 @@ int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target){
if( !pColl ) pColl = db->pDfltColl;
sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ);
}
sqlite3VdbeAddOp4(v, OP_Function, constMask, r1, target,
sqlite3VdbeAddOp4(v, OP_Function0, constMask, r1, target,
(char*)pDef, P4_FUNCDEF);
sqlite3VdbeChangeP5(v, (u8)nFarg);
if( nFarg && constMask==0 ){

View File

@ -329,7 +329,7 @@ static int yy_pop_parser_stack(yyParser *pParser){
/* There is no mechanism by which the parser stack can be popped below
** empty in SQLite. */
if( NEVER(pParser->yyidx<0) ) return 0;
assert( pParser->yyidx>=0 );
#ifndef NDEBUG
if( yyTraceFILE && pParser->yyidx>=0 ){
fprintf(yyTraceFILE,"%sPopping %s\n",

View File

@ -2949,7 +2949,7 @@ opendb_out:
sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0);
}
#endif
return sqlite3ApiExit(0, rc);
return rc & 0xff;
}
/*
@ -3007,7 +3007,7 @@ int sqlite3_open16(
}
sqlite3ValueFree(pVal);
return sqlite3ApiExit(0, rc);
return rc & 0xff;
}
#endif /* SQLITE_OMIT_UTF16 */

View File

@ -796,17 +796,16 @@ static SQLITE_NOINLINE int apiOomError(sqlite3 *db){
** function. However, if a malloc() failure has occurred since the previous
** invocation SQLITE_NOMEM is returned instead.
**
** If the first argument, db, is not NULL and a malloc() error has occurred,
** then the connection error-code (the value returned by sqlite3_errcode())
** is set to SQLITE_NOMEM.
** If an OOM as occurred, then the connection error-code (the value
** returned by sqlite3_errcode()) is set to SQLITE_NOMEM.
*/
int sqlite3ApiExit(sqlite3* db, int rc){
/* If the db handle is not NULL, then we must hold the connection handle
** mutex here. Otherwise the read (and possible write) of db->mallocFailed
/* If the db handle must hold the connection handle mutex here.
** Otherwise the read (and possible write) of db->mallocFailed
** is unsafe, as is the call to sqlite3Error().
*/
assert( !db || sqlite3_mutex_held(db->mutex) );
if( db==0 ) return rc & 0xff;
assert( db!=0 );
assert( sqlite3_mutex_held(db->mutex) );
if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){
return apiOomError(db);
}

View File

@ -540,11 +540,11 @@ struct PagerSavepoint {
** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF
** case is a user preference.
**
** If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress()
** is permitted, but syncing the journal file is not. This flag is set
** by sqlite3PagerWrite() when the file-system sector-size is larger than
** the database page-size in order to prevent a journal sync from happening
** in between the journalling of two pages on the same sector.
** If the SPILLFLAG_NOSYNC bit is set, writing to the database from
** pagerStress() is permitted, but syncing the journal file is not.
** This flag is set by sqlite3PagerWrite() when the file-system sector-size
** is larger than the database page-size in order to prevent a journal sync
** from happening in between the journalling of two pages on the same sector.
**
** subjInMemory
**
@ -647,7 +647,7 @@ struct Pager {
u8 doNotSpill; /* Do not spill the cache when non-zero */
u8 subjInMemory; /* True to use in-memory sub-journals */
u8 bUseFetch; /* True to use xFetch() */
u8 hasBeenUsed; /* True if any content previously read from this pager*/
u8 hasBeenUsed; /* True if any content previously read */
Pgno dbSize; /* Number of pages in the database */
Pgno dbOrigSize; /* dbSize before the current transaction */
Pgno dbFileSize; /* Number of pages in the database file */
@ -808,7 +808,7 @@ static const unsigned char aJournalMagic[] = {
**
** if( pPager->jfd->pMethods ){ ...
*/
#define isOpen(pFd) ((pFd)->pMethods)
#define isOpen(pFd) ((pFd)->pMethods!=0)
/*
** Return true if this pager uses a write-ahead log instead of the usual
@ -1031,19 +1031,21 @@ static int subjRequiresPage(PgHdr *pPg){
int i;
for(i=0; i<pPager->nSavepoint; i++){
p = &pPager->aSavepoint[i];
if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){
return 1;
}
}
return 0;
}
#ifdef SQLITE_DEBUG
/*
** Return true if the page is already in the journal file.
*/
static int pageInJournal(Pager *pPager, PgHdr *pPg){
return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno);
}
#endif
/*
** Read a 32-bit integer from the given file descriptor. Store the integer
@ -1655,7 +1657,8 @@ static int writeMasterJournal(Pager *pPager, const char *zMaster){
|| (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
|| (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
|| (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
|| (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8)))
|| (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8,
iHdrOff+4+nMaster+8)))
){
return rc;
}
@ -2215,7 +2218,7 @@ static int pager_playback_one_page(
}
}
/* If this page has already been played by before during the current
/* If this page has already been played back before during the current
** rollback, then don't bother to play it back again.
*/
if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
@ -4317,8 +4320,6 @@ static int openSubJournal(Pager *pPager){
/*
** Append a record of the current state of page pPg to the sub-journal.
** It is the callers responsibility to use subjRequiresPage() to check
** that it is really required before calling this function.
**
** If successful, set the bit corresponding to pPg->pgno in the bitvecs
** for all open savepoints before returning.
@ -4365,6 +4366,13 @@ static int subjournalPage(PgHdr *pPg){
}
return rc;
}
static int subjournalPageIfRequired(PgHdr *pPg){
if( subjRequiresPage(pPg) ){
return subjournalPage(pPg);
}else{
return SQLITE_OK;
}
}
/*
** This function is called by the pcache layer when it has reached some
@ -4422,9 +4430,7 @@ static int pagerStress(void *p, PgHdr *pPg){
pPg->pDirty = 0;
if( pagerUseWal(pPager) ){
/* Write a single frame for this page to the log. */
if( subjRequiresPage(pPg) ){
rc = subjournalPage(pPg);
}
rc = subjournalPageIfRequired(pPg);
if( rc==SQLITE_OK ){
rc = pagerWalFrames(pPager, pPg, 0, 0);
}
@ -4437,39 +4443,6 @@ static int pagerStress(void *p, PgHdr *pPg){
rc = syncJournal(pPager, 1);
}
/* If the page number of this page is larger than the current size of
** the database image, it may need to be written to the sub-journal.
** This is because the call to pager_write_pagelist() below will not
** actually write data to the file in this case.
**
** Consider the following sequence of events:
**
** BEGIN;
** <journal page X>
** <modify page X>
** SAVEPOINT sp;
** <shrink database file to Y pages>
** pagerStress(page X)
** ROLLBACK TO sp;
**
** If (X>Y), then when pagerStress is called page X will not be written
** out to the database file, but will be dropped from the cache. Then,
** following the "ROLLBACK TO sp" statement, reading page X will read
** data from the database file. This will be the copy of page X as it
** was when the transaction started, not as it was when "SAVEPOINT sp"
** was executed.
**
** The solution is to write the current data for page X into the
** sub-journal file now (if it is not already there), so that it will
** be restored to its current value when the "ROLLBACK TO sp" is
** executed.
*/
if( NEVER(
rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
) ){
rc = subjournalPage(pPg);
}
/* Write the contents of the page out to the database file. */
if( rc==SQLITE_OK ){
assert( (pPg->flags&PGHDR_NEED_SYNC)==0 );
@ -4725,7 +4698,7 @@ int sqlite3PagerOpen(
act_like_temp_file:
tempFile = 1;
pPager->eState = PAGER_READER; /* Pretend we already have a lock */
pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE locking mode */
pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */
pPager->noLock = 1; /* Do no locking */
readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
}
@ -5131,7 +5104,7 @@ int sqlite3PagerSharedLock(Pager *pPager){
** occurring on the very first access to a file, in order to save a
** single unnecessary sqlite3OsRead() call at the start-up.
**
** Database changes is detected by looking at 15 bytes beginning
** Database changes are detected by looking at 15 bytes beginning
** at offset 24 into the file. The first 4 of these 16 bytes are
** a 32-bit counter that is incremented with each change. The
** other bytes change randomly with each file change when
@ -5339,9 +5312,14 @@ int sqlite3PagerAcquire(
if( pBase==0 ){
rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase);
if( rc!=SQLITE_OK ) goto pager_acquire_err;
if( pBase==0 ){
pPg = *ppPage = 0;
rc = SQLITE_NOMEM;
goto pager_acquire_err;
}
}
pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase);
if( pPg==0 ) rc = SQLITE_NOMEM;
assert( pPg!=0 );
}
}
@ -5352,10 +5330,11 @@ int sqlite3PagerAcquire(
pPg = 0;
goto pager_acquire_err;
}
assert( (*ppPage)->pgno==pgno );
assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
assert( pPg==(*ppPage) );
assert( pPg->pgno==pgno );
assert( pPg->pPager==pPager || pPg->pPager==0 );
if( (*ppPage)->pPager && !noContent ){
if( pPg->pPager && !noContent ){
/* In this case the pcache already contains an initialized copy of
** the page. Return without further ado. */
assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
@ -5366,7 +5345,6 @@ int sqlite3PagerAcquire(
/* The pager cache has created a new page. Its content needs to
** be initialized. */
pPg = *ppPage;
pPg->pPager = pPager;
/* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
@ -5445,6 +5423,7 @@ DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
assert( pPager->pPCache!=0 );
pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
assert( pPage==0 || pPager->hasBeenUsed );
if( pPage==0 ) return 0;
return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
}
@ -5649,63 +5628,11 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
}
/*
** Mark a single data page as writeable. The page is written into the
** main journal or sub-journal as required. If the page is written into
** one of the journals, the corresponding bit is set in the
** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
** of any open savepoints as appropriate.
** Write page pPg onto the end of the rollback journal.
*/
static int pager_write(PgHdr *pPg){
static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){
Pager *pPager = pPg->pPager;
int rc = SQLITE_OK;
int inJournal;
/* This routine is not called unless a write-transaction has already
** been started. The journal file may or may not be open at this point.
** It is never called in the ERROR state.
*/
assert( pPager->eState==PAGER_WRITER_LOCKED
|| pPager->eState==PAGER_WRITER_CACHEMOD
|| pPager->eState==PAGER_WRITER_DBMOD
);
assert( assert_pager_state(pPager) );
assert( pPager->errCode==0 );
assert( pPager->readOnly==0 );
CHECK_PAGE(pPg);
/* The journal file needs to be opened. Higher level routines have already
** obtained the necessary locks to begin the write-transaction, but the
** rollback journal might not yet be open. Open it now if this is the case.
**
** This is done before calling sqlite3PcacheMakeDirty() on the page.
** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
** an error might occur and the pager would end up in WRITER_LOCKED state
** with pages marked as dirty in the cache.
*/
if( pPager->eState==PAGER_WRITER_LOCKED ){
rc = pager_open_journal(pPager);
if( rc!=SQLITE_OK ) return rc;
}
assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
assert( assert_pager_state(pPager) );
/* Mark the page as dirty. If the page has already been written
** to the journal then we can return right away.
*/
sqlite3PcacheMakeDirty(pPg);
inJournal = pageInJournal(pPager, pPg);
if( inJournal && (pPager->nSavepoint==0 || !subjRequiresPage(pPg)) ){
assert( !pagerUseWal(pPager) );
}else{
/* The transaction journal now exists and we have a RESERVED or an
** EXCLUSIVE lock on the main database file. Write the current page to
** the transaction journal if it is not there already.
*/
if( !inJournal && !pagerUseWal(pPager) ){
assert( pagerUseWal(pPager)==0 );
if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){
int rc;
u32 cksum;
char *pData2;
i64 iOff = pPager->journalOff;
@ -5749,8 +5676,65 @@ static int pager_write(PgHdr *pPg){
testcase( rc==SQLITE_NOMEM );
assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
rc |= addToSavepointBitvecs(pPager, pPg->pgno);
assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
return rc;
}
/*
** Mark a single data page as writeable. The page is written into the
** main journal or sub-journal as required. If the page is written into
** one of the journals, the corresponding bit is set in the
** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
** of any open savepoints as appropriate.
*/
static int pager_write(PgHdr *pPg){
Pager *pPager = pPg->pPager;
int rc = SQLITE_OK;
/* This routine is not called unless a write-transaction has already
** been started. The journal file may or may not be open at this point.
** It is never called in the ERROR state.
*/
assert( pPager->eState==PAGER_WRITER_LOCKED
|| pPager->eState==PAGER_WRITER_CACHEMOD
|| pPager->eState==PAGER_WRITER_DBMOD
);
assert( assert_pager_state(pPager) );
assert( pPager->errCode==0 );
assert( pPager->readOnly==0 );
CHECK_PAGE(pPg);
/* The journal file needs to be opened. Higher level routines have already
** obtained the necessary locks to begin the write-transaction, but the
** rollback journal might not yet be open. Open it now if this is the case.
**
** This is done before calling sqlite3PcacheMakeDirty() on the page.
** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
** an error might occur and the pager would end up in WRITER_LOCKED state
** with pages marked as dirty in the cache.
*/
if( pPager->eState==PAGER_WRITER_LOCKED ){
rc = pager_open_journal(pPager);
if( rc!=SQLITE_OK ) return rc;
}
assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
assert( assert_pager_state(pPager) );
/* Mark the page that is about to be modified as dirty. */
sqlite3PcacheMakeDirty(pPg);
/* If a rollback journal is in use, them make sure the page that is about
** to change is in the rollback journal, or if the page is a new page off
** then end of the file, make sure it is marked as PGHDR_NEED_SYNC.
*/
assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) );
if( pPager->pInJournal!=0
&& sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0
){
assert( pagerUseWal(pPager)==0 );
if( pPg->pgno<=pPager->dbOrigSize ){
rc = pagerAddPageToRollbackJournal(pPg);
if( rc!=SQLITE_OK ){
assert( rc==SQLITE_NOMEM );
return rc;
}
}else{
@ -5763,18 +5747,21 @@ static int pager_write(PgHdr *pPg){
}
}
/* If the statement journal is open and the page is not in it,
** then write the current page to the statement journal. Note that
** the statement journal format differs from the standard journal format
** in that it omits the checksums and the header.
/* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list
** and before writing the page into the rollback journal. Wait until now,
** after the page has been successfully journalled, before setting the
** PGHDR_WRITEABLE bit that indicates that the page can be safely modified.
*/
if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){
rc = subjournalPage(pPg);
}
pPg->flags |= PGHDR_WRITEABLE;
/* If the statement journal is open and the page is not in it,
** then write the page into the statement journal.
*/
if( pPager->nSavepoint>0 ){
rc = subjournalPageIfRequired(pPg);
}
/* Update the database size and return.
*/
/* Update the database size and return. */
if( pPager->dbSize<pPg->pgno ){
pPager->dbSize = pPg->pgno;
}
@ -5789,8 +5776,8 @@ static int pager_write(PgHdr *pPg){
** a write.
**
** Usually, the sector size is less than or equal to the page size, in which
** case pages can be individually written. This routine only runs in the exceptional
** case where the page size is smaller than the sector size.
** case pages can be individually written. This routine only runs in the
** exceptional case where the page size is smaller than the sector size.
*/
static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){
int rc = SQLITE_OK; /* Return code */
@ -5887,11 +5874,15 @@ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){
** as appropriate. Otherwise, SQLITE_OK.
*/
int sqlite3PagerWrite(PgHdr *pPg){
Pager *pPager = pPg->pPager;
assert( (pPg->flags & PGHDR_MMAP)==0 );
assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED );
assert( pPg->pPager->eState!=PAGER_ERROR );
assert( assert_pager_state(pPg->pPager) );
if( pPg->pPager->sectorSize > (u32)pPg->pPager->pageSize ){
assert( pPager->eState>=PAGER_WRITER_LOCKED );
assert( pPager->eState!=PAGER_ERROR );
assert( assert_pager_state(pPager) );
if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){
if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg);
return SQLITE_OK;
}else if( pPager->sectorSize > (u32)pPager->pageSize ){
return pagerWriteLargeSector(pPg);
}else{
return pager_write(pPg);
@ -5905,7 +5896,7 @@ int sqlite3PagerWrite(PgHdr *pPg){
*/
#ifndef NDEBUG
int sqlite3PagerIswriteable(DbPage *pPg){
return pPg->flags&PGHDR_DIRTY;
return pPg->flags & PGHDR_WRITEABLE;
}
#endif
@ -5929,6 +5920,7 @@ void sqlite3PagerDontWrite(PgHdr *pPg){
PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
pPg->flags |= PGHDR_DONT_WRITE;
pPg->flags &= ~PGHDR_WRITEABLE;
pager_set_pagehash(pPg);
}
}
@ -6483,16 +6475,15 @@ int sqlite3PagerIsMemdb(Pager *pPager){
** occurs while opening the sub-journal file, then an IO error code is
** returned. Otherwise, SQLITE_OK.
*/
int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){
int rc = SQLITE_OK; /* Return code */
int nCurrent = pPager->nSavepoint; /* Current number of savepoints */
int ii; /* Iterator variable */
PagerSavepoint *aNew; /* New Pager.aSavepoint array */
assert( pPager->eState>=PAGER_WRITER_LOCKED );
assert( assert_pager_state(pPager) );
if( nSavepoint>nCurrent && pPager->useJournal ){
int ii; /* Iterator variable */
PagerSavepoint *aNew; /* New Pager.aSavepoint array */
assert( nSavepoint>nCurrent && pPager->useJournal );
/* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
** if the allocation fails. Otherwise, zero the new portion in case a
@ -6527,10 +6518,19 @@ int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
}
assert( pPager->nSavepoint==nSavepoint );
assertTruncateConstraint(pPager);
}
return rc;
}
int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
assert( pPager->eState>=PAGER_WRITER_LOCKED );
assert( assert_pager_state(pPager) );
if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){
return pagerOpenSavepoint(pPager, nSavepoint);
}else{
return SQLITE_OK;
}
}
/*
** This function is called to rollback or release (commit) a savepoint.
@ -6761,9 +6761,8 @@ int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
** one or more savepoint bitvecs. This is the reason this function
** may return SQLITE_NOMEM.
*/
if( pPg->flags&PGHDR_DIRTY
&& subjRequiresPage(pPg)
&& SQLITE_OK!=(rc = subjournalPage(pPg))
if( (pPg->flags & PGHDR_DIRTY)!=0
&& SQLITE_OK!=(rc = subjournalPageIfRequired(pPg))
){
return rc;
}

View File

@ -28,7 +28,6 @@ struct PCache {
int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */
void *pStress; /* Argument to xStress */
sqlite3_pcache *pCache; /* Pluggable cache module */
PgHdr *pPage1; /* Reference to page 1 */
};
/********************************** Linked List Management ********************/
@ -106,9 +105,6 @@ static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){
*/
static void pcacheUnpin(PgHdr *p){
if( p->pCache->bPurgeable ){
if( p->pgno==1 ){
p->pCache->pPage1 = 0;
}
sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);
}
}
@ -201,7 +197,6 @@ int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
}
pCache->pCache = pNew;
pCache->pPage1 = 0;
pCache->szPage = szPage;
}
return SQLITE_OK;
@ -333,6 +328,7 @@ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit(
memset(pPgHdr->pExtra, 0, pCache->szExtra);
pPgHdr->pCache = pCache;
pPgHdr->pgno = pgno;
pPgHdr->flags = PGHDR_CLEAN;
return sqlite3PcacheFetchFinish(pCache,pgno,pPage);
}
@ -349,7 +345,7 @@ PgHdr *sqlite3PcacheFetchFinish(
){
PgHdr *pPgHdr;
if( pPage==0 ) return 0;
assert( pPage!=0 );
pPgHdr = (PgHdr *)pPage->pExtra;
if( !pPgHdr->pPage ){
@ -359,9 +355,6 @@ PgHdr *sqlite3PcacheFetchFinish(
pCache->nRef++;
}
pPgHdr->nRef++;
if( pgno==1 ){
pCache->pPage1 = pPgHdr;
}
return pPgHdr;
}
@ -374,7 +367,7 @@ void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
p->nRef--;
if( p->nRef==0 ){
p->pCache->nRef--;
if( (p->flags&PGHDR_DIRTY)==0 ){
if( p->flags&PGHDR_CLEAN ){
pcacheUnpin(p);
}else if( p->pDirtyPrev!=0 ){
/* Move the page to the head of the dirty list. */
@ -402,9 +395,6 @@ void sqlite3PcacheDrop(PgHdr *p){
pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
}
p->pCache->nRef--;
if( p->pgno==1 ){
p->pCache->pPage1 = 0;
}
sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1);
}
@ -413,13 +403,16 @@ void sqlite3PcacheDrop(PgHdr *p){
** make it so.
*/
void sqlite3PcacheMakeDirty(PgHdr *p){
p->flags &= ~PGHDR_DONT_WRITE;
assert( p->nRef>0 );
if( 0==(p->flags & PGHDR_DIRTY) ){
p->flags |= PGHDR_DIRTY;
if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){
p->flags &= ~PGHDR_DONT_WRITE;
if( p->flags & PGHDR_CLEAN ){
p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN);
assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY );
pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD);
}
}
}
/*
** Make sure the page is marked as clean. If it isn't clean already,
@ -427,8 +420,10 @@ void sqlite3PcacheMakeDirty(PgHdr *p){
*/
void sqlite3PcacheMakeClean(PgHdr *p){
if( (p->flags & PGHDR_DIRTY) ){
assert( (p->flags & PGHDR_CLEAN)==0 );
pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE);
p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC);
p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE);
p->flags |= PGHDR_CLEAN;
if( p->nRef==0 ){
pcacheUnpin(p);
}
@ -495,10 +490,15 @@ void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){
sqlite3PcacheMakeClean(p);
}
}
if( pgno==0 && pCache->pPage1 ){
memset(pCache->pPage1->pData, 0, pCache->szPage);
if( pgno==0 && pCache->nRef ){
sqlite3_pcache_page *pPage1;
pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0);
if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because
** pCache->nRef>0 */
memset(pPage1->pBuf, 0, pCache->szPage);
pgno = 1;
}
}
sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1);
}
}

View File

@ -46,13 +46,13 @@ struct PgHdr {
};
/* Bit values for PgHdr.flags */
#define PGHDR_DIRTY 0x002 /* Page has changed */
#define PGHDR_NEED_SYNC 0x004 /* Fsync the rollback journal before
#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */
#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */
#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */
#define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before
** writing this page to the database */
#define PGHDR_NEED_READ 0x008 /* Content is unread */
#define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */
#define PGHDR_NEED_READ 0x010 /* Content is unread */
#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */
#define PGHDR_MMAP 0x040 /* This is an mmap page object */
/* Initialize and shutdown the page cache subsystem */

View File

@ -462,10 +462,11 @@ static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){
/*
** Remove the page supplied as an argument from the hash table
** (PCache1.apHash structure) that it is currently stored in.
** Also free the page if freePage is true.
**
** The PGroup mutex must be held when this function is called.
*/
static void pcache1RemoveFromHash(PgHdr1 *pPage){
static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){
unsigned int h;
PCache1 *pCache = pPage->pCache;
PgHdr1 **pp;
@ -476,6 +477,7 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage){
*pp = (*pp)->pNext;
pCache->nPage--;
if( freeFlag ) pcache1FreePage(pPage);
}
/*
@ -489,8 +491,7 @@ static void pcache1EnforceMaxPage(PGroup *pGroup){
assert( p->pCache->pGroup==pGroup );
assert( p->isPinned==0 );
pcache1PinPage(p);
pcache1RemoveFromHash(p);
pcache1FreePage(p);
pcache1RemoveFromHash(p, 1);
}
}
@ -714,7 +715,7 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2(
PCache1 *pOther;
pPage = pGroup->pLruTail;
assert( pPage->isPinned==0 );
pcache1RemoveFromHash(pPage);
pcache1RemoveFromHash(pPage, 0);
pcache1PinPage(pPage);
pOther = pPage->pCache;
@ -912,8 +913,7 @@ static void pcache1Unpin(
assert( pPage->isPinned==1 );
if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){
pcache1RemoveFromHash(pPage);
pcache1FreePage(pPage);
pcache1RemoveFromHash(pPage, 1);
}else{
/* Add the page to the PGroup LRU list. */
if( pGroup->pLruHead ){
@ -1067,8 +1067,7 @@ int sqlite3PcacheReleaseMemory(int nReq){
#endif
assert( p->isPinned==0 );
pcache1PinPage(p);
pcache1RemoveFromHash(p);
pcache1FreePage(p);
pcache1RemoveFromHash(p, 1);
}
pcache1LeaveMutex(&pcache1.grp);
}

Some files were not shown because too many files have changed in this diff Show More