1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Merge in all the trunk changes from the previous year. This breaks the

cursor-hint mechanism, but provides a baseline for trouble-shooting.

FossilOrigin-Name: 82a7a61bc0883b1e7432548e4890791717aa1bb3
This commit is contained in:
drh
2015-08-13 20:07:13 +00:00
655 changed files with 116353 additions and 23644 deletions

View File

@@ -15,18 +15,22 @@
# The toplevel directory of the source tree. This is the directory
# that contains this "Makefile.in" and the "configure.in" script.
#
TOP = @srcdir@
TOP = @abs_srcdir@
# C Compiler and options for use in building executables that
# will run on the platform that is doing the build.
#
BCC = @BUILD_CC@ @BUILD_CFLAGS@
# C Compile and options for use in building executables that
# TCC is the C Compile and options for use in building executables that
# will run on the target platform. (BCC and TCC are usually the
# same unless your are cross-compiling.)
# same unless your are cross-compiling.) Separate CC and CFLAGS macros
# are provide so that these aspects of the build process can be changed
# on the "make" command-line. Ex: "make CC=clang CFLAGS=-fsanitize=undefined"
#
TCC = @CC@ @CPPFLAGS@ @CFLAGS@ -I. -I${TOP}/src -I${TOP}/ext/rtree
CC = @CC@
CFLAGS = @CPPFLAGS@ @CFLAGS@
TCC = $(CC) $(CFLAGS) -I. -I${TOP}/src -I${TOP}/ext/rtree -I${TOP}/ext/fts3
# Define this for the autoconf-based build, so that the code knows it can
# include the generated config.h
@@ -37,7 +41,7 @@ TCC += -D_HAVE_SQLITE_CONFIG_H -DBUILD_sqlite
# Omitting the define will cause extra debugging code to be inserted and
# includes extra comments when "EXPLAIN stmt" is used.
#
TCC += @TARGET_DEBUG@ @XTHREADCONNECT@
TCC += @TARGET_DEBUG@
# Compiler options needed for programs that use the TCL library.
#
@@ -163,12 +167,13 @@ USE_AMALGAMATION = @USE_AMALGAMATION@
#
LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
callback.lo complete.lo ctime.lo date.lo delete.lo \
callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \
expr.lo fault.lo fkey.lo \
fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
fts3_tokenize_vtab.lo \
fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
fts5.lo \
func.lo global.lo hash.lo \
icu.lo insert.lo journal.lo legacy.lo loadext.lo \
main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
@@ -177,10 +182,11 @@ LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
random.lo resolve.lo rowset.lo rtree.lo select.lo status.lo \
table.lo tokenize.lo trigger.lo \
table.lo threads.lo tokenize.lo treeview.lo trigger.lo \
update.lo util.lo vacuum.lo \
vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
vdbetrace.lo wal.lo walker.lo where.lo utf.lo vtab.lo
vdbetrace.lo wal.lo walker.lo where.lo wherecode.lo whereexpr.lo \
utf.lo vtab.lo
# Object files for the amalgamation.
#
@@ -209,6 +215,7 @@ SRC = \
$(TOP)/src/complete.c \
$(TOP)/src/ctime.c \
$(TOP)/src/date.c \
$(TOP)/src/dbstat.c \
$(TOP)/src/delete.c \
$(TOP)/src/expr.c \
$(TOP)/src/fault.c \
@@ -230,6 +237,7 @@ SRC = \
$(TOP)/src/mem3.c \
$(TOP)/src/mem5.c \
$(TOP)/src/memjournal.c \
$(TOP)/src/msvc.h \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex.h \
$(TOP)/src/mutex_noop.c \
@@ -239,8 +247,10 @@ SRC = \
$(TOP)/src/os.c \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/os_setup.h \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/os_win.h \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
@@ -248,6 +258,7 @@ SRC = \
$(TOP)/src/pcache.h \
$(TOP)/src/pcache1.c \
$(TOP)/src/pragma.c \
$(TOP)/src/pragma.h \
$(TOP)/src/prepare.c \
$(TOP)/src/printf.c \
$(TOP)/src/random.c \
@@ -261,8 +272,10 @@ SRC = \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/table.c \
$(TOP)/src/threads.c \
$(TOP)/src/tclsqlite.c \
$(TOP)/src/tokenize.c \
$(TOP)/src/treeview.c \
$(TOP)/src/trigger.c \
$(TOP)/src/utf.c \
$(TOP)/src/update.c \
@@ -278,10 +291,13 @@ SRC = \
$(TOP)/src/vdbetrace.c \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vtab.c \
$(TOP)/src/vxworks.h \
$(TOP)/src/wal.c \
$(TOP)/src/wal.h \
$(TOP)/src/walker.c \
$(TOP)/src/where.c \
$(TOP)/src/wherecode.c \
$(TOP)/src/whereexpr.c \
$(TOP)/src/whereInt.h
# Source code for extensions
@@ -328,6 +344,9 @@ SRC += \
SRC += \
$(TOP)/ext/rtree/rtree.h \
$(TOP)/ext/rtree/rtree.c
SRC += \
$(TOP)/ext/rbu/sqlite3rbu.h \
$(TOP)/ext/rbu/sqlite3rbu.c
# Generated source code files
@@ -356,6 +375,7 @@ TESTSRC = \
$(TOP)/src/test_autoext.c \
$(TOP)/src/test_async.c \
$(TOP)/src/test_backup.c \
$(TOP)/src/test_blob.c \
$(TOP)/src/test_btree.c \
$(TOP)/src/test_config.c \
$(TOP)/src/test_demovfs.c \
@@ -378,20 +398,24 @@ TESTSRC = \
$(TOP)/src/test_server.c \
$(TOP)/src/test_superlock.c \
$(TOP)/src/test_syscall.c \
$(TOP)/src/test_stat.c \
$(TOP)/src/test_tclvar.c \
$(TOP)/src/test_thread.c \
$(TOP)/src/test_vfs.c \
$(TOP)/src/test_wsd.c \
$(TOP)/ext/fts3/fts3_term.c \
$(TOP)/ext/fts3/fts3_test.c
$(TOP)/ext/fts3/fts3_test.c \
$(TOP)/ext/rbu/test_rbu.c
# Statically linked extensions
#
TESTSRC += \
$(TOP)/ext/misc/amatch.c \
$(TOP)/ext/misc/closure.c \
$(TOP)/ext/misc/eval.c \
$(TOP)/ext/misc/fileio.c \
$(TOP)/ext/misc/fuzzer.c \
$(TOP)/ext/fts5/fts5_tcl.c \
$(TOP)/ext/fts5/fts5_test_mi.c \
$(TOP)/ext/misc/ieee754.c \
$(TOP)/ext/misc/nextchar.c \
$(TOP)/ext/misc/percentile.c \
@@ -410,6 +434,7 @@ TESTSRC2 = \
$(TOP)/src/build.c \
$(TOP)/src/ctime.c \
$(TOP)/src/date.c \
$(TOP)/src/dbstat.c \
$(TOP)/src/expr.c \
$(TOP)/src/func.c \
$(TOP)/src/insert.c \
@@ -436,6 +461,8 @@ TESTSRC2 = \
$(TOP)/src/vdbemem.c \
$(TOP)/src/vdbetrace.c \
$(TOP)/src/where.c \
$(TOP)/src/wherecode.c \
$(TOP)/src/whereexpr.c \
parse.c \
$(TOP)/ext/fts3/fts3.c \
$(TOP)/ext/fts3/fts3_aux.c \
@@ -453,19 +480,24 @@ HDR = \
$(TOP)/src/hash.h \
$(TOP)/src/hwtime.h \
keywordhash.h \
$(TOP)/src/msvc.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/os_setup.h \
$(TOP)/src/os_win.h \
$(TOP)/src/pager.h \
$(TOP)/src/pcache.h \
parse.h \
$(TOP)/src/pragma.h \
sqlite3.h \
$(TOP)/src/sqlite3ext.h \
$(TOP)/src/sqliteInt.h \
$(TOP)/src/sqliteLimit.h \
$(TOP)/src/vdbe.h \
$(TOP)/src/vdbeInt.h \
$(TOP)/src/vxworks.h \
$(TOP)/src/whereInt.h \
config.h
@@ -491,6 +523,25 @@ EXTHDR += \
EXTHDR += \
$(TOP)/ext/rtree/sqlite3rtree.h
# executables needed for testing
#
TESTPROGS = \
testfixture$(TEXE) \
sqlite3$(TEXE) \
sqlite3_analyzer$(TEXE) \
sqldiff$(TEXE)
# Databases containing fuzzer test cases
#
FUZZDATA = \
$(TOP)/test/fuzzdata1.db \
$(TOP)/test/fuzzdata2.db \
$(TOP)/test/fuzzdata3.db
# Standard options to testfixture
#
TESTOPTS = --verbose=file --output=test-out.txt
# This is the default Makefile target. The objects listed here
# are what get build when you type just "make" with no arguments.
#
@@ -518,10 +569,32 @@ sqlite3$(TEXE): $(TOP)/src/shell.c libsqlite3.la sqlite3.h
-o $@ $(TOP)/src/shell.c libsqlite3.la \
$(LIBREADLINE) $(TLIBS) -rpath "$(libdir)"
mptester$(EXE): sqlite3.c $(TOP)/mptest/mptest.c
sqldiff$(TEXE): $(TOP)/tool/sqldiff.c sqlite3.c sqlite3.h
$(LTLINK) -o $@ $(TOP)/tool/sqldiff.c sqlite3.c $(TLIBS)
fuzzershell$(TEXE): $(TOP)/tool/fuzzershell.c sqlite3.c sqlite3.h
$(LTLINK) -o $@ $(TOP)/tool/fuzzershell.c sqlite3.c $(TLIBS)
fuzzcheck$(TEXE): $(TOP)/test/fuzzcheck.c sqlite3.c sqlite3.h
$(LTLINK) -o $@ $(TOP)/test/fuzzcheck.c sqlite3.c $(TLIBS)
mptester$(TEXE): sqlite3.c $(TOP)/mptest/mptest.c
$(LTLINK) -o $@ -I. $(TOP)/mptest/mptest.c sqlite3.c \
$(TLIBS) -rpath "$(libdir)"
MPTEST1=./mptester$(TEXE) mptest.db $(TOP)/mptest/crash01.test --repeat 20
MPTEST2=./mptester$(TEXE) mptest.db $(TOP)/mptest/multiwrite01.test --repeat 20
mptest: mptester$(TEXE)
rm -f mptest.db
$(MPTEST1) --journalmode DELETE
$(MPTEST2) --journalmode WAL
$(MPTEST1) --journalmode WAL
$(MPTEST2) --journalmode PERSIST
$(MPTEST1) --journalmode PERSIST
$(MPTEST2) --journalmode TRUNCATE
$(MPTEST1) --journalmode TRUNCATE
$(MPTEST2) --journalmode DELETE
# This target creates a directory named "tsrc" and fills it with
# copies of all of the C source code and header files needed to
@@ -534,7 +607,7 @@ mptester$(EXE): sqlite3.c $(TOP)/mptest/mptest.c
mkdir tsrc
cp -f $(SRC) tsrc
rm tsrc/sqlite.h.in tsrc/parse.y
$(TCLSH_CMD) $(TOP)/tool/vdbe-compress.tcl <tsrc/vdbe.c >vdbe.new
$(TCLSH_CMD) $(TOP)/tool/vdbe-compress.tcl $(OPTS) <tsrc/vdbe.c >vdbe.new
mv vdbe.new tsrc/vdbe.c
touch .target_source
@@ -615,6 +688,9 @@ ctime.lo: $(TOP)/src/ctime.c $(HDR)
date.lo: $(TOP)/src/date.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/date.c
dbstat.lo: $(TOP)/src/dbstat.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/dbstat.c
delete.lo: $(TOP)/src/delete.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/delete.c
@@ -732,9 +808,15 @@ status.lo: $(TOP)/src/status.c $(HDR)
table.lo: $(TOP)/src/table.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/table.c
threads.lo: $(TOP)/src/threads.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/threads.c
tokenize.lo: $(TOP)/src/tokenize.c keywordhash.h $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/tokenize.c
treeview.lo: $(TOP)/src/treeview.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/treeview.c
trigger.lo: $(TOP)/src/trigger.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/trigger.c
@@ -783,6 +865,12 @@ walker.lo: $(TOP)/src/walker.c $(HDR)
where.lo: $(TOP)/src/where.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/where.c
wherecode.lo: $(TOP)/src/wherecode.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/wherecode.c
whereexpr.lo: $(TOP)/src/whereexpr.c $(HDR)
$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/whereexpr.c
tclsqlite.lo: $(TOP)/src/tclsqlite.c $(HDR)
$(LTCOMPILE) -DUSE_TCL_STUBS=1 -c $(TOP)/src/tclsqlite.c
@@ -889,6 +977,39 @@ fts3_write.lo: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
# FTS5 things
#
FTS5_SRC = \
$(TOP)/ext/fts5/fts5.h \
$(TOP)/ext/fts5/fts5Int.h \
$(TOP)/ext/fts5/fts5_aux.c \
$(TOP)/ext/fts5/fts5_buffer.c \
$(TOP)/ext/fts5/fts5_main.c \
$(TOP)/ext/fts5/fts5_config.c \
$(TOP)/ext/fts5/fts5_expr.c \
$(TOP)/ext/fts5/fts5_hash.c \
$(TOP)/ext/fts5/fts5_index.c \
fts5parse.c fts5parse.h \
$(TOP)/ext/fts5/fts5_storage.c \
$(TOP)/ext/fts5/fts5_tokenize.c \
$(TOP)/ext/fts5/fts5_unicode2.c \
$(TOP)/ext/fts5/fts5_varint.c \
$(TOP)/ext/fts5/fts5_vocab.c \
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h
./lemon $(OPTS) fts5parse.y
fts5parse.h: fts5parse.c
fts5.c: $(FTS5_SRC)
$(TCLSH_CMD) $(TOP)/ext/fts5/tool/mkfts5c.tcl
cp $(TOP)/ext/fts5/fts5.h .
fts5.lo: fts5.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c fts5.c
# Rules to build the 'testfixture' application.
#
@@ -910,22 +1031,56 @@ testfixture$(TEXE): $(TESTFIXTURE_SRC)
$(LTLINK) -DSQLITE_NO_SYNC=1 $(TEMP_STORE) $(TESTFIXTURE_FLAGS) \
-o $@ $(TESTFIXTURE_SRC) $(LIBTCL) $(TLIBS)
# A very detailed test running most or all test cases
fulltest: $(TESTPROGS) fuzztest
./testfixture$(TEXE) $(TOP)/test/all.test $(TESTOPTS)
fulltest: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture$(TEXE) $(TOP)/test/all.test
# Really really long testing
soaktest: $(TESTPROGS)
./testfixture$(TEXE) $(TOP)/test/all.test -soak=1 $(TESTOPTS)
soaktest: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture$(TEXE) $(TOP)/test/all.test -soak=1
fulltestonly: testfixture$(TEXE) sqlite3$(TEXE)
# Do extra testing but not everything.
fulltestonly: $(TESTPROGS) fuzztest
./testfixture$(TEXE) $(TOP)/test/full.test
test: testfixture$(TEXE) sqlite3$(TEXE)
./testfixture$(TEXE) $(TOP)/test/veryquick.test
# Fuzz testing
fuzztest: fuzzcheck$(TEXE) $(FUZZDATA)
./fuzzcheck$(TEXE) $(FUZZDATA)
sqlite3_analyzer.c: sqlite3.c $(TOP)/src/test_stat.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl
fastfuzztest: fuzzcheck$(TEXE) $(FUZZDATA)
./fuzzcheck$(TEXE) --limit-mem 100M $(FUZZDATA)
valgrindfuzz: fuzzcheck$(TEXT) $(FUZZDATA)
valgrind ./fuzzcheck$(TEXE) --cell-size-check --limit-mem 10M $(FUZZDATA)
# Minimal testing that runs in less than 3 minutes
#
quicktest: ./testfixture$(TEXE)
./testfixture$(TEXE) $(TOP)/test/extraquick.test $(TESTOPTS)
# This is the common case. Run many tests that do not take too long,
# including fuzzcheck, sqlite3_analyzer, and sqldiff tests.
#
test: $(TESTPROGS) fastfuzztest
./testfixture$(TEXE) $(TOP)/test/veryquick.test $(TESTOPTS)
# Run a test using valgrind. This can take a really long time
# because valgrind is so much slower than a native machine.
#
valgrindtest: $(TESTPROGS) valgrindfuzz
OMIT_MISUSE=1 valgrind -v ./testfixture$(TEXE) $(TOP)/test/permutations.test valgrind $(TESTOPTS)
# A very fast test that checks basic sanity. The name comes from
# the 60s-era electronics testing: "Turn it on and see if smoke
# comes out."
#
smoketest: $(TESTPROGS) fuzzcheck$(TEXE)
./testfixture$(TEXE) $(TOP)/test/main.test $(TESTOPTS)
sqlite3_analyzer.c: sqlite3.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl
echo "#define TCLSH 2" > $@
cat sqlite3.c $(TOP)/src/test_stat.c $(TOP)/src/tclsqlite.c >> $@
echo "#define SQLITE_ENABLE_DBSTAT_VTAB 1" >> $@
cat sqlite3.c $(TOP)/src/tclsqlite.c >> $@
echo "static const char *tclsh_main_loop(void){" >> $@
echo "static const char *zMainloop = " >> $@
$(NAWK) -f $(TOP)/tool/tostr.awk $(TOP)/tool/spaceanal.tcl >> $@
@@ -934,15 +1089,63 @@ sqlite3_analyzer.c: sqlite3.c $(TOP)/src/test_stat.c $(TOP)/src/tclsqlite.c $(TO
sqlite3_analyzer$(TEXE): sqlite3_analyzer.c
$(LTLINK) sqlite3_analyzer.c -o $@ $(LIBTCL) $(TLIBS)
showdb$(TEXE): $(TOP)/tool/showdb.c sqlite3.c
$(LTLINK) -o $@ $(TOP)/tool/showdb.c sqlite3.c $(TLIBS)
showdb$(TEXE): $(TOP)/tool/showdb.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/tool/showdb.c sqlite3.lo $(TLIBS)
showstat4$(TEXE): $(TOP)/tool/showstat4.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/tool/showstat4.c sqlite3.lo $(TLIBS)
showjournal$(TEXE): $(TOP)/tool/showjournal.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/tool/showjournal.c sqlite3.lo $(TLIBS)
showwal$(TEXE): $(TOP)/tool/showwal.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/tool/showwal.c sqlite3.lo $(TLIBS)
rollback-test$(TEXE): $(TOP)/tool/rollback-test.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/tool/rollback-test.c sqlite3.lo $(TLIBS)
LogEst$(TEXE): $(TOP)/tool/logest.c sqlite3.h
$(LTLINK) -I. -o $@ $(TOP)/tool/logest.c
wordcount$(TEXE): $(TOP)/test/wordcount.c sqlite3.c
$(LTLINK) -o $@ $(TOP)/test/wordcount.c sqlite3.c $(TLIBS)
speedtest1$(TEXE): $(TOP)/test/wordcount.c sqlite3.lo
speedtest1$(TEXE): $(TOP)/test/speedtest1.c sqlite3.lo
$(LTLINK) -o $@ $(TOP)/test/speedtest1.c sqlite3.lo $(TLIBS)
# This target will fail if the SQLite amalgamation contains any exported
# symbols that do not begin with "sqlite3_". It is run as part of the
# releasetest.tcl script.
#
checksymbols: sqlite3.lo
nm -g --defined-only sqlite3.o | grep -v " sqlite3_" ; test $$? -ne 0
echo '0 errors out of 1 tests'
# Build the amalgamation-autoconf package.
#
amalgamation-tarball: sqlite3.c
TOP=$(TOP) sh $(TOP)/tool/mkautoconfamal.sh
# The next two rules are used to support the "threadtest" target. Building
# threadtest runs a few thread-safety tests that are implemented in C. This
# target is invoked by the releasetest.tcl script.
#
THREADTEST3_SRC = $(TOP)/test/threadtest3.c \
$(TOP)/test/tt3_checkpoint.c \
$(TOP)/test/tt3_index.c \
$(TOP)/test/tt3_vacuum.c \
$(TOP)/test/tt3_stress.c \
$(TOP)/test/tt3_lookaside1.c
threadtest3$(TEXE): sqlite3.lo $(THREADTEST3_SRC)
$(LTLINK) $(TOP)/test/threadtest3.c sqlite3.lo -o $@ $(TLIBS)
threadtest: threadtest3$(TEXE)
./threadtest3$(TEXE)
releasetest:
$(TCLSH_CMD) $(TOP)/test/releasetest.tcl
# Standard install and cleanup targets
#
lib_install: libsqlite3.la
@@ -959,7 +1162,7 @@ install: sqlite3$(BEXE) lib_install sqlite3.h sqlite3.pc ${HAVE_TCL:1=tcl_instal
$(INSTALL) -m 0644 sqlite3.pc $(DESTDIR)$(pkgconfigdir)
pkgIndex.tcl:
echo 'package ifneeded sqlite3 $(RELEASE) [list load $(TCLLIBDIR)/libtclsqlite3.so sqlite3]' > $@
echo 'package ifneeded sqlite3 $(RELEASE) [list load $(TCLLIBDIR)/libtclsqlite3$(SHLIB_SUFFIX) sqlite3]' > $@
tcl_install: lib_install libtclsqlite3.la pkgIndex.tcl
$(INSTALL) -d $(DESTDIR)$(TCLLIBDIR)
$(LTINSTALL) libtclsqlite3.la $(DESTDIR)$(TCLLIBDIR)
@@ -977,6 +1180,9 @@ clean:
rm -rf tsrc .target_source
rm -f tclsqlite3$(TEXE)
rm -f testfixture$(TEXE) test.db
rm -f LogEst$(TEXE) fts3view$(TEXE) rollback-test$(TEXE) showdb$(TEXE)
rm -f showjournal$(TEXE) showstat4$(TEXE) showwal$(TEXE) speedtest1$(TEXE)
rm -f wordcount$(TEXE)
rm -f sqlite3.dll sqlite3.lib sqlite3.exp sqlite3.def
rm -f sqlite3.c
rm -f sqlite3rc.h
@@ -984,9 +1190,13 @@ clean:
rm -f sqlite3_analyzer$(TEXE) sqlite3_analyzer.c
rm -f sqlite-*-output.vsix
rm -f mptester mptester.exe
rm -f fuzzershell fuzzershell.exe
rm -f fuzzcheck fuzzcheck.exe
rm -f sqldiff sqldiff.exe
rm -f fts5.* fts5parse.*
distclean: clean
rm -f config.log config.status libtool Makefile sqlite3.pc
rm -f config.h config.log config.status libtool Makefile sqlite3.pc
#
# Windows section

File diff suppressed because it is too large Load Diff

View File

@@ -253,6 +253,7 @@ SRC = \
$(TOP)/src/mem3.c \
$(TOP)/src/mem5.c \
$(TOP)/src/memjournal.c \
$(TOP)/src/msvc.h \
$(TOP)/src/mutex.c \
$(TOP)/src/mutex.h \
$(TOP)/src/mutex_noop.c \
@@ -262,8 +263,10 @@ SRC = \
$(TOP)/src/os.c \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/os_setup.h \
$(TOP)/src/os_unix.c \
$(TOP)/src/os_win.c \
$(TOP)/src/os_win.h \
$(TOP)/src/pager.c \
$(TOP)/src/pager.h \
$(TOP)/src/parse.y \
@@ -412,10 +415,13 @@ HDR = \
$(TOP)/src/hash.h \
$(TOP)/src/hwtime.h \
keywordhash.h \
$(TOP)/src/msvc.h \
$(TOP)/src/mutex.h \
opcodes.h \
$(TOP)/src/os.h \
$(TOP)/src/os_common.h \
$(TOP)/src/os_setup.h \
$(TOP)/src/os_win.h \
$(TOP)/src/pager.h \
$(TOP)/src/pcache.h \
parse.h \

39
README
View File

@@ -1,39 +0,0 @@
This directory contains source code to
SQLite: An Embeddable SQL Database Engine
To compile the project, first create a directory in which to place
the build products. It is recommended, but not required, that the
build directory be separate from the source directory. Cd into the
build directory and then from the build directory run the configure
script found at the root of the source tree. Then run "make".
For example:
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
mkdir bld ;# Build will occur in a sibling directory
cd bld ;# Change to the build directory
../sqlite/configure ;# Run the configure script
make ;# Run the makefile.
make install ;# (Optional) Install the build products
The configure script uses autoconf 2.61 and libtool. If the configure
script does not work out for you, there is a generic makefile named
"Makefile.linux-gcc" in the top directory of the source tree that you
can copy and edit to suit your needs. Comments on the generic makefile
show what changes are needed.
The linux binaries on the website are created using the generic makefile,
not the configure script. The windows binaries on the website are created
using MinGW32 configured as a cross-compiler running under Linux. For
details, see the ./publish.sh script at the top-level of the source tree.
The developers do not use teh configure script.
SQLite does not require TCL to run, but a TCL installation is required
by the makefiles. SQLite contains a lot of generated code and TCL is
used to do much of that code generation. The makefile also requires
AWK.
Contacts:
http://www.sqlite.org/

230
README.md Normal file
View File

@@ -0,0 +1,230 @@
<h1 align="center">SQLite Source Repository</h1>
This repository contains the complete source code for the SQLite database
engine. Some test scripts are also include. However, many other test scripts
and most of the documentation are managed separately.
If you are reading this on a Git mirror someplace, you are doing it wrong.
The [official repository](https://www.sqlite.org/src/) is better. Go there
now.
## Compiling
First create a directory in which to place
the build products. It is recommended, but not required, that the
build directory be separate from the source directory. Cd into the
build directory and then from the build directory run the configure
script found at the root of the source tree. Then run "make".
For example:
tar xzf sqlite.tar.gz ;# Unpack the source tree into "sqlite"
mkdir bld ;# Build will occur in a sibling directory
cd bld ;# Change to the build directory
../sqlite/configure ;# Run the configure script
make ;# Run the makefile.
make sqlite3.c ;# Build the "amalgamation" source file
make test ;# Run some tests (requires Tcl)
See the makefile for additional targets.
The configure script uses autoconf 2.61 and libtool. If the configure
script does not work out for you, there is a generic makefile named
"Makefile.linux-gcc" in the top directory of the source tree that you
can copy and edit to suit your needs. Comments on the generic makefile
show what changes are needed.
## Using MSVC
On Windows, all applicable build products can be compiled with MSVC.
First open the command prompt window associated with the desired compiler
version (e.g. "Developer Command Prompt for VS2013"). Next, use NMAKE
with the provided "Makefile.msc" to build one of the supported targets.
For example:
mkdir bld
cd bld
nmake /f Makefile.msc TOP=..\sqlite
nmake /f Makefile.msc sqlite3.c TOP=..\sqlite
nmake /f Makefile.msc sqlite3.dll TOP=..\sqlite
nmake /f Makefile.msc sqlite3.exe TOP=..\sqlite
nmake /f Makefile.msc test TOP=..\sqlite
There are several build options that can be set via the NMAKE command
line. For example, to build for WinRT, simply add "FOR_WINRT=1" argument
to the "sqlite3.dll" command line above. When debugging into the SQLite
code, adding the "DEBUG=1" argument to one of the above command lines is
recommended.
SQLite does not require [Tcl](http://www.tcl.tk/) to run, but a Tcl installation
is required by the makefiles (including those for MSVC). SQLite contains
a lot of generated code and Tcl is used to do much of that code generation.
The makefiles also require AWK.
## Source Code Tour
Most of the core source files are in the **src/** subdirectory. But
src/ also contains files used to build the "testfixture" test harness;
those file all begin with "test". And src/ contains the "shell.c" file
which is the main program for the "sqlite3.exe" command-line shell and
the "tclsqlite.c" file which implements the bindings to SQLite from the
Tcl programming language. (Historical note: SQLite began as a Tcl
extension and only later escaped to the wild as an independent library.)
Test scripts and programs are found in the **test/** subdirectory.
There are other test suites for SQLite (see
[How SQLite Is Tested](http://www.sqlite.org/testing.html))
but those other test suites are
in separate source repositories.
The **ext/** subdirectory contains code for extensions. The
Full-text search engine is in **ext/fts3**. The R-Tree engine is in
**ext/rtree**. The **ext/misc** subdirectory contains a number of
smaller, single-file extensions, such as a REGEXP operator.
The **tool/** subdirectory contains various scripts and programs used
for building generated source code files or for testing or for generating
accessory programs such as "sqlite3_analyzer(.exe)".
### Generated Source Code Files
Several of the C-language source files used by SQLite are generated from
other sources rather than being typed in manually by a programmer. This
section will summarize those automatically-generated files. To create all
of the automatically-generated files, simply run "make target&#95;source".
The "target&#95;source" make target will create a subdirectory "tsrc/" and
fill it with all the source files needed to build SQLite, both
manually-edited files and automatically-generated files.
The SQLite interface is defined by the **sqlite3.h** header file, which is
generated from src/sqlite.h.in, ./manifest.uuid, and ./VERSION. The
[Tcl script](http://www.tcl.tk) at tool/mksqlite3h.tcl does the conversion.
The manifest.uuid file contains the SHA1 hash of the particular check-in
and is used to generate the SQLITE\_SOURCE\_ID macro. The VERSION file
contains the current SQLite version number. The sqlite3.h header is really
just a copy of src/sqlite.h.in with the source-id and version number inserted
at just the right spots. Note that comment text in the sqlite3.h file is
used to generate much of the SQLite API documentation. The Tcl scripts
used to generate that documentation are in a separate source repository.
The SQL language parser is **parse.c** which is generate from a grammar in
the src/parse.y file. The conversion of "parse.y" into "parse.c" is done
by the [lemon](./doc/lemon.html) LALR(1) parser generator. The source code
for lemon is at tool/lemon.c. Lemon uses a
template for generating its parser. A generic template is in tool/lempar.c,
but SQLite uses a slightly modified template found in src/lempar.c.
Lemon also generates the **parse.h** header file, at the same time it
generates parse.c. But the parse.h header file is
modified further (to add additional symbols) using the ./addopcodes.awk
AWK script.
The **opcodes.h** header file contains macros that define the numbers
corresponding to opcodes in the "VDBE" virtual machine. The opcodes.h
file is generated by the scanning the src/vdbe.c source file. The
AWK script at ./mkopcodeh.awk does this scan and generates opcodes.h.
A second AWK script, ./mkopcodec.awk, then scans opcodes.h to generate
the **opcodes.c** source file, which contains a reverse mapping from
opcode-number to opcode-name that is used for EXPLAIN output.
The **keywordhash.h** header file contains the definition of a hash table
that maps SQL language keywords (ex: "CREATE", "SELECT", "INDEX", etc.) into
the numeric codes used by the parse.c parser. The keywordhash.h file is
generated by a C-language program at tool mkkeywordhash.c.
### The Amalgamation
All of the individual C source code and header files (both manually-edited
and automatically-generated) can be combined into a single big source file
**sqlite3.c** called "the amalgamation". The amalgamation is the recommended
way of using SQLite in a larger application. Combining all individual
source code files into a single big source code file allows the C compiler
to perform more cross-procedure analysis and generate better code. SQLite
runs about 5% faster when compiled from the amalgamation versus when compiled
from individual source files.
The amalgamation is generated from the tool/mksqlite3c.tcl Tcl script.
First, all of the individual source files must be gathered into the tsrc/
subdirectory (using the equivalent of "make target_source") then the
tool/mksqlite3c.tcl script is run to copy them all together in just the
right order while resolving internal "#include" references.
The amalgamation source file is more than 100K lines long. Some symbolic
debuggers (most notably MSVC) are unable to deal with files longer than 64K
lines. To work around this, a separate Tcl script, tool/split-sqlite3c.tcl,
can be run on the amalgamation to break it up into a single small C file
called **sqlite3-all.c** that does #include on about five other files
named **sqlite3-1.c**, **sqlite3-2.c**, ..., **sqlite3-5.c**. In this way,
all of the source code is contained within a single translation unit so
that the compiler can do extra cross-procedure optimization, but no
individual source file exceeds 32K lines in length.
## How It All Fits Together
SQLite is modular in design.
See the [architectural description](http://www.sqlite.org/arch.html)
for details. Other documents that are useful in
(helping to understand how SQLite works include the
[file format](http://www.sqlite.org/fileformat2.html) description,
the [virtual machine](http://www.sqlite.org/vdbe.html) that runs
prepared statements, the description of
[how transactions work](http://www.sqlite.org/atomiccommit.html), and
the [overview of the query planner](http://www.sqlite.org/optoverview.html).
Unfortunately, years of effort have gone into optimizating SQLite, both
for small size and high performance. And optimizations tend to result in
complex code. So there is a lot of complexity in the SQLite implementation.
Key files:
* **sqlite.h.in** - This file defines the public interface to the SQLite
library. Readers will need to be familiar with this interface before
trying to understand how the library works internally.
* **sqliteInt.h** - this header file defines many of the data objects
used internally by SQLite.
* **parse.y** - This file describes the LALR(1) grammer that SQLite uses
to parse SQL statements, and the actions that are taken at each step
in the parsing process.
* **vdbe.c** - This file implements the virtual machine that runs
prepared statements. There are various helper files whose names
begin with "vdbe". The VDBE has access to the vdbeInt.h header file
which defines internal data objects. The rest of SQLite interacts
with the VDBE through an interface defined by vdbe.h.
* **where.c** - This file analyzes the WHERE clause and generates
virtual machine code to run queries efficiently. This file is
sometimes called the "query optimizer". It has its own private
header file, whereInt.h, that defines data objects used internally.
* **btree.c** - This file contains the implementation of the B-Tree
storage engine used by SQLite.
* **pager.c** - This file contains the "pager" implementation, the
module that implements transactions.
* **os_unix.c** and **os_win.c** - These two files implement the interface
between SQLite and the underlying operating system using the run-time
pluggable VFS interface.
* **shell.c** - This file is not part of the core SQLite library. This
is the file that, when linked against sqlite3.a, generates the
"sqlite3.exe" command-line shell.
* **tclsqlite.c** - This file implements the Tcl bindings for SQLite. It
is not part of the core SQLite library. But as most of the tests in this
repository are written in Tcl, the Tcl language bindings are important.
There are many other source files. Each has a suscinct header comment that
describes its purpose and role within the larger system.
## Contacts
The main SQLite webpage is [http://www.sqlite.org/](http://www.sqlite.org/)
with geographically distributed backup servers at
[http://www2.sqlite.org/](http://www2.sqlite.org) and
[http://www3.sqlite.org/](http://www3.sqlite.org).

View File

@@ -1 +1 @@
3.8.3
3.8.12

View File

@@ -30,4 +30,5 @@ END {
printf "#define TK_%-29s %4d\n", "AGG_COLUMN", ++max
printf "#define TK_%-29s %4d\n", "UMINUS", ++max
printf "#define TK_%-29s %4d\n", "UPLUS", ++max
printf "#define TK_%-29s %4d\n", "REGISTER", ++max
}

View File

@@ -6,9 +6,9 @@ libsqlite3_la_SOURCES = sqlite3.c
libsqlite3_la_LDFLAGS = -no-undefined -version-info 8:6:8
bin_PROGRAMS = sqlite3
sqlite3_SOURCES = shell.c sqlite3.h
sqlite3_LDADD = $(top_builddir)/libsqlite3.la @READLINE_LIBS@
sqlite3_DEPENDENCIES = $(top_builddir)/libsqlite3.la
sqlite3_SOURCES = shell.c sqlite3.c sqlite3.h
sqlite3_LDADD = @READLINE_LIBS@
sqlite3_CFLAGS = $(AM_CFLAGS)
include_HEADERS = sqlite3.h sqlite3ext.h

View File

@@ -1,57 +1,11 @@
This directory contains components use to build an autoconf-ready package
of the SQLite amalgamation: sqlite-autoconf-30XXXXXX.tar.gz
This file describes how to use the files in this directory to create a new
version of the "autoconf-amalgamation" package.
1. The following files should have executable permission:
chmod 755 install-sh
chmod 755 missing
chmod 755 depcomp
chmod 755 config.sub
chmod 755 config.guess
2. Copy new versions of the following SQLite files into this directory:
sqlite3.c
sqlite3.h
sqlite3ext.h
sqlite3.1
sqlite3.pc.in
shell.c
3. Update the SQLite version number in the AC_INIT macro in file
configure.ac:
AC_INIT(sqlite, 3.6.3, http://www.sqlite.org)
4. Run the following commands to push the version number change through
to the generated files.
aclocal
autoconf
automake
5. Create the tclsqlite3.c file in the tea/generic directory. As follows:
mkdir -p tea/generic
echo "#ifdef USE_SYSTEM_SQLITE" > tea/generic/tclsqlite3.c
echo "# include <sqlite3.h>" >> tea/generic/tclsqlite3.c
echo "#else" >> tea/generic/tclsqlite3.c
echo "#include \"../../sqlite3.c\"" >> tea/generic/tclsqlite3.c
echo "#endif" >> tea/generic/tclsqlite3.c
cat ../src/tclsqlite.c >> tea/generic/tclsqlite3.c
6. Update the SQLite version in the AC_INIT macro in file tea/configure.in:
AC_INIT([sqlite], [3.6.3])
7. From the 'tea' directory, run the following commands:
autoconf
rm -rf autom4te.cache
8. Run "./configure && make dist". This builds a distribution package
named something like "sqlite-3.6.3.tar.gz". Rename to
"sqlite-amalgamation-3.6.3.tar.gz" and use.
To build the autoconf amalgamation, run from the top-level:
./configure
make amalgamation-tarball
The amalgamation-tarball target (also available in "main.mk") runs the
script tool/mkautoconfamal.sh which does the work. Refer to that script
for details.

View File

@@ -73,6 +73,7 @@ exec_prefix = @exec_prefix@
bindir = @bindir@
libdir = @libdir@
datarootdir = @datarootdir@
datadir = @datadir@
mandir = @mandir@
includedir = @includedir@
@@ -347,7 +348,7 @@ clean:
distclean: clean
-rm -f *.tab.c
-rm -f $(CONFIG_CLEAN_FILES)
-rm -f config.cache config.log config.status
-rm -f config.h config.cache config.log config.status
#========================================================================
# Install binary object libraries. On Windows this includes both .dll and

View File

@@ -166,8 +166,10 @@ AC_DEFINE(USE_TCL_STUBS, 1, [Use Tcl stubs])
#--------------------------------------------------------------------
# Redefine fdatasync as fsync on systems that lack fdatasync
#--------------------------------------------------------------------
AC_CHECK_FUNC(fdatasync, , AC_DEFINE(fdatasync, fsync))
#
#AC_CHECK_FUNC(fdatasync, , AC_DEFINE(fdatasync, fsync))
# Check for library functions that SQLite can optionally use.
AC_CHECK_FUNCS([fdatasync usleep fullfsync localtime_r gmtime_r])
AC_FUNC_STRERROR_R

View File

@@ -11,5 +11,5 @@ SQLite3 is a self-contains, zero-configuration, transactional SQL database
engine. This extension provides an easy to use interface for accessing
SQLite database files from Tcl.
.PP
For full documentation see http://www.sqlite.org/ and
in particular http://www.sqlite.org/tclsqlite.html.
For full documentation see \fIhttp://www.sqlite.org/\fR and
in particular \fIhttp://www.sqlite.org/tclsqlite.html\fR.

View File

@@ -1,119 +1,528 @@
#!/bin/sh
#
# install - install a program, script, or datafile
# This comes from X11R5; it is not part of GNU.
scriptversion=2011-04-20.01; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# $XConsortium: install.sh,v 1.2 89/12/18 14:47:22 jim Exp $
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
#
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
# put in absolute paths if you don't have them in your path; or use env. vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
instcmd="$mvprog"
chmodcmd=""
chowncmd=""
chgrpcmd=""
stripcmd=""
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""
stripcmd=
while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;
src=
dst=
dir_arg=
dst_arg=
-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;
copy_on_change=false
no_target_directory=
-o) chowncmd="$chownprog $2"
shift
shift
continue;;
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
-s) stripcmd="$stripprog"
shift
continue;;
Options:
--help display this help and exit.
--version display version info and exit.
*) if [ x"$src" = x ]
then
src=$1
else
dst=$1
fi
shift
continue;;
esac
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-S $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-S) stripcmd="$stripprog $2"
shift;;
-t) dst_arg=$2
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if [ x"$src" = x ]
then
echo "install: no input file specified"
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call `install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names starting with `-'.
case $src in
-*) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# Protect names starting with `-'.
case $dst in
-*) dst=./$dst;;
esac
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
fi
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
if [ -d $dst ]
then
dst="$dst"/`basename $src`
fi
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# Make a temp file name in the proper directory.
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
dstdir=`dirname $dst`
dsttmp=$dstdir/#inst.$$#
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
# Move or copy the file name to the temp name
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writeable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
$doit $instcmd $src $dsttmp
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# and set any options; do chmod last to preserve setuid bits
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; fi
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; fi
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; fi
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; fi
case $dstdir in
/*) prefix='/';;
-*) prefix='./';;
*) prefix='';;
esac
# Now rename the file to the real destination.
eval "$initialize_posix_glob"
$doit $rmcmd $dst
$doit $mvcmd $dsttmp $dst
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
exit 0
for d
do
test -z "$d" && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

View File

@@ -8,8 +8,6 @@
#
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
# RCS: @(#) $Id: tcl.m4,v 1.145 2010/08/17 00:33:40 hobbs Exp $
AC_PREREQ(2.57)
@@ -140,6 +138,8 @@ AC_DEFUN([TEA_PATH_TCLCONFIG], [
`ls -d /usr/contrib/lib 2>/dev/null` \
`ls -d /usr/lib 2>/dev/null` \
`ls -d /usr/lib64 2>/dev/null` \
`ls -d /usr/lib/tcl8.6 2>/dev/null` \
`ls -d /usr/lib/tcl8.5 2>/dev/null` \
; do
if test -f "$i/tclConfig.sh" ; then
ac_cv_c_tclconfig="`(cd $i; pwd)`"
@@ -170,7 +170,7 @@ AC_DEFUN([TEA_PATH_TCLCONFIG], [
if test x"${ac_cv_c_tclconfig}" = x ; then
TCL_BIN_DIR="# no Tcl configs found"
AC_MSG_ERROR([Can't find Tcl configuration definitions])
AC_MSG_ERROR([Can't find Tcl configuration definitions. Use --with-tcl to specify a directory containing tclConfig.sh])
else
no_tcl=
TCL_BIN_DIR="${ac_cv_c_tclconfig}"
@@ -323,7 +323,7 @@ AC_DEFUN([TEA_PATH_TKCONFIG], [
if test x"${ac_cv_c_tkconfig}" = x ; then
TK_BIN_DIR="# no Tk configs found"
AC_MSG_ERROR([Can't find Tk configuration definitions])
AC_MSG_ERROR([Can't find Tk configuration definitions. Use --with-tk to specify a directory containing tkConfig.sh])
else
no_tk=
TK_BIN_DIR="${ac_cv_c_tkconfig}"
@@ -344,11 +344,10 @@ AC_DEFUN([TEA_PATH_TKCONFIG], [
#
# Results:
#
# Subst the following vars:
# Substitutes the following vars:
# TCL_BIN_DIR
# TCL_SRC_DIR
# TCL_LIB_FILE
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_LOAD_TCLCONFIG], [
@@ -417,32 +416,26 @@ AC_DEFUN([TEA_LOAD_TCLCONFIG], [
AC_SUBST(TCL_STUB_LIB_FLAG)
AC_SUBST(TCL_STUB_LIB_SPEC)
case "`uname -s`" in
*CYGWIN_*)
AC_MSG_CHECKING([for cygwin variant])
case ${TCL_EXTRA_CFLAGS} in
*-mwin32*|*-mno-cygwin*)
TEA_PLATFORM="windows"
CFLAGS="$CFLAGS -mwin32"
AC_MSG_RESULT([win32])
;;
*)
TEA_PLATFORM="unix"
AC_MSG_RESULT([unix])
;;
esac
EXEEXT=".exe"
;;
*)
;;
esac
AC_MSG_CHECKING([platform])
hold_cc=$CC; CC="$TCL_CC"
AC_TRY_COMPILE(,[
#ifdef _WIN32
#error win32
#endif
], TEA_PLATFORM="unix",
TEA_PLATFORM="windows"
)
CC=$hold_cc
AC_MSG_RESULT($TEA_PLATFORM)
# The BUILD_$pkg is to define the correct extern storage class
# handling when making this package
AC_DEFINE_UNQUOTED(BUILD_${PACKAGE_NAME}, [],
[Building extension source?])
# Do this here as we have fully defined TEA_PLATFORM now
if test "${TEA_PLATFORM}" = "windows" ; then
# The BUILD_$pkg is to define the correct extern storage class
# handling when making this package
AC_DEFINE_UNQUOTED(BUILD_${PACKAGE_NAME})
CLEANFILES="$CLEANFILES *.lib *.dll *.pdb"
EXEEXT=".exe"
CLEANFILES="$CLEANFILES *.lib *.dll *.pdb *.exp"
fi
# TEA specific:
@@ -566,11 +559,11 @@ AC_DEFUN([TEA_LOAD_TKCONFIG], [
# only for running extension test cases. It should never be
# or generation of files (like pkgIndex.tcl) at build time.
#
# Arguments
# Arguments:
# none
#
# Results
# Subst's the following values:
# Results:
# Substitutes the following vars:
# TCLSH_PROG
#------------------------------------------------------------------------
@@ -616,11 +609,11 @@ AC_DEFUN([TEA_PROG_TCLSH], [
# only for running extension test cases. It should never be
# or generation of files (like pkgIndex.tcl) at build time.
#
# Arguments
# Arguments:
# none
#
# Results
# Subst's the following values:
# Results:
# Substitutes the following vars:
# WISH_PROG
#------------------------------------------------------------------------
@@ -731,7 +724,6 @@ AC_DEFUN([TEA_ENABLE_SHARED], [
# TCL_THREADS
# _REENTRANT
# _THREAD_SAFE
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_ENABLE_THREADS], [
@@ -855,12 +847,11 @@ AC_DEFUN([TEA_ENABLE_THREADS], [
#
# Defines the following vars:
# CFLAGS_DEFAULT Sets to $(CFLAGS_DEBUG) if true
# Sets to $(CFLAGS_OPTIMIZE) if false
# Sets to "$(CFLAGS_OPTIMIZE) -DNDEBUG" if false
# LDFLAGS_DEFAULT Sets to $(LDFLAGS_DEBUG) if true
# Sets to $(LDFLAGS_OPTIMIZE) if false
# DBGX Formerly used as debug library extension;
# always blank now.
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_ENABLE_SYMBOLS], [
@@ -873,7 +864,7 @@ AC_DEFUN([TEA_ENABLE_SYMBOLS], [
[tcl_ok=$enableval], [tcl_ok=no])
DBGX=""
if test "$tcl_ok" = "no"; then
CFLAGS_DEFAULT="${CFLAGS_OPTIMIZE}"
CFLAGS_DEFAULT="${CFLAGS_OPTIMIZE} -DNDEBUG"
LDFLAGS_DEFAULT="${LDFLAGS_OPTIMIZE}"
AC_MSG_RESULT([no])
else
@@ -920,7 +911,6 @@ AC_DEFUN([TEA_ENABLE_SYMBOLS], [
#
# Defines the following vars:
# HAVE_LANGINFO Triggers use of nl_langinfo if defined.
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_ENABLE_LANGINFO], [
@@ -961,7 +951,6 @@ AC_DEFUN([TEA_ENABLE_LANGINFO], [
# Defines the following var:
#
# system - System/platform/version identification code.
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_CONFIG_SYSTEM], [
@@ -1030,21 +1019,20 @@ AC_DEFUN([TEA_CONFIG_SYSTEM], [
# extensions. An empty string means we don't know how
# to use shared libraries on this platform.
# LIB_SUFFIX - Specifies everything that comes after the "libfoo"
# in a static or shared library name, using the $VERSION variable
# in a static or shared library name, using the $PACKAGE_VERSION variable
# to put the version in the right place. This is used
# by platforms that need non-standard library names.
# Examples: ${VERSION}.so.1.1 on NetBSD, since it needs
# to have a version after the .so, and ${VERSION}.a
# Examples: ${PACKAGE_VERSION}.so.1.1 on NetBSD, since it needs
# to have a version after the .so, and ${PACKAGE_VERSION}.a
# on AIX, since a shared library needs to have
# a .a extension whereas shared objects for loadable
# extensions have a .so extension. Defaults to
# ${VERSION}${SHLIB_SUFFIX}.
# ${PACKAGE_VERSION}${SHLIB_SUFFIX}.
# CFLAGS_DEBUG -
# Flags used when running the compiler in debug mode
# CFLAGS_OPTIMIZE -
# Flags used when running the compiler in optimize mode
# CFLAGS - Additional CFLAGS added as necessary (usually 64-bit)
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_CONFIG_CFLAGS], [
@@ -1086,6 +1074,7 @@ AC_DEFUN([TEA_CONFIG_CFLAGS], [
AC_DEFINE(MODULE_SCOPE,
[extern __attribute__((__visibility__("hidden")))],
[Compiler support for module scope symbols])
AC_DEFINE(HAVE_HIDDEN, [1], [Compiler support for module scope symbols])
])
# Step 0.d: Disable -rpath support?
@@ -1134,15 +1123,14 @@ AC_DEFUN([TEA_CONFIG_CFLAGS], [
ECHO_VERSION='`echo ${PACKAGE_VERSION}`'
TCL_LIB_VERSIONS_OK=ok
CFLAGS_DEBUG=-g
CFLAGS_OPTIMIZE=-O
AS_IF([test "$GCC" = yes], [
# TEA specific:
CFLAGS_OPTIMIZE=-O2
CFLAGS_WARNING="-Wall"
], [CFLAGS_WARNING=""])
dnl FIXME: Replace AC_CHECK_PROG with AC_CHECK_TOOL once cross compiling is fixed.
dnl AC_CHECK_TOOL(AR, ar)
AC_CHECK_PROG(AR, ar, ar)
], [
CFLAGS_OPTIMIZE=-O
CFLAGS_WARNING=""
])
AC_CHECK_TOOL(AR, ar)
STLIB_LD='${AR} cr'
LD_LIBRARY_PATH_VAR="LD_LIBRARY_PATH"
AS_IF([test "x$SHLIB_VERSION" = x],[SHLIB_VERSION="1.0"])
@@ -1171,7 +1159,7 @@ dnl AC_CHECK_TOOL(AR, ar)
PATH64="${MSSDK}/Bin/Win64"
;;
esac
if test ! -d "${PATH64}" ; then
if test "$GCC" != "yes" -a ! -d "${PATH64}" ; then
AC_MSG_WARN([Could not find 64-bit $MACHINE SDK to enable 64bit mode])
AC_MSG_WARN([Ensure latest Platform SDK is installed])
do64bit="no"
@@ -1288,7 +1276,7 @@ dnl AC_CHECK_TOOL(AR, ar)
else
RC="rc"
lflags="-nologo"
LINKBIN="link"
LINKBIN="link"
CFLAGS_DEBUG="-nologo -Z7 -Od -W3 -WX ${runtime}d"
CFLAGS_OPTIMIZE="-nologo -O2 -W2 ${runtime}"
fi
@@ -1296,13 +1284,43 @@ dnl AC_CHECK_TOOL(AR, ar)
if test "$GCC" = "yes"; then
# mingw gcc mode
RC="windres"
AC_CHECK_TOOL(RC, windres)
CFLAGS_DEBUG="-g"
CFLAGS_OPTIMIZE="-O2 -fomit-frame-pointer"
SHLIB_LD="$CC -shared"
SHLIB_LD='${CC} -shared'
UNSHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.a'
LDFLAGS_CONSOLE="-wl,--subsystem,console ${lflags}"
LDFLAGS_WINDOW="-wl,--subsystem,windows ${lflags}"
AC_CACHE_CHECK(for cross-compile version of gcc,
ac_cv_cross,
AC_TRY_COMPILE([
#ifdef _WIN32
#error cross-compiler
#endif
], [],
ac_cv_cross=yes,
ac_cv_cross=no)
)
if test "$ac_cv_cross" = "yes"; then
case "$do64bit" in
amd64|x64|yes)
CC="x86_64-w64-mingw32-gcc"
LD="x86_64-w64-mingw32-ld"
AR="x86_64-w64-mingw32-ar"
RANLIB="x86_64-w64-mingw32-ranlib"
RC="x86_64-w64-mingw32-windres"
;;
*)
CC="i686-w64-mingw32-gcc"
LD="i686-w64-mingw32-ld"
AR="i686-w64-mingw32-ar"
RANLIB="i686-w64-mingw32-ranlib"
RC="i686-w64-mingw32-windres"
;;
esac
fi
else
SHLIB_LD="${LINKBIN} -dll ${lflags}"
# link -lib only works when -lib is the first arg
@@ -1409,7 +1427,8 @@ dnl AC_CHECK_TOOL(AR, ar)
SHLIB_CFLAGS=""
SHLIB_LD='${CC} -shared'
SHLIB_SUFFIX=".dll"
EXE_SUFFIX=".exe"
EXEEXT=".exe"
do64bit_ok=yes
CC_SEARCH_FLAGS=""
LD_SEARCH_FLAGS=""
;;
@@ -1438,7 +1457,7 @@ dnl AC_CHECK_TOOL(AR, ar)
])
AC_CHECK_LIB(dld, shl_load, tcl_ok=yes, tcl_ok=no)
AS_IF([test "$tcl_ok" = yes], [
LDFLAGS="$LDFLAGS -E"
LDFLAGS="$LDFLAGS -Wl,-E"
CC_SEARCH_FLAGS='-Wl,+s,+b,${LIB_RUNTIME_DIR}:.'
LD_SEARCH_FLAGS='+s +b ${LIB_RUNTIME_DIR}:.'
LD_LIBRARY_PATH_VAR="SHLIB_PATH"
@@ -1520,7 +1539,7 @@ dnl AC_CHECK_TOOL(AR, ar)
])
])
;;
Linux*)
Linux*|GNU*|NetBSD-Debian)
SHLIB_CFLAGS="-fPIC"
SHLIB_SUFFIX=".so"
@@ -1553,17 +1572,6 @@ dnl AC_CHECK_TOOL(AR, ar)
# files in compat/*.c is being linked in.
AS_IF([test x"${USE_COMPAT}" != x],[CFLAGS="$CFLAGS -fno-inline"])
;;
GNU*)
SHLIB_CFLAGS="-fPIC"
SHLIB_SUFFIX=".so"
SHLIB_LD='${CC} -shared'
LDFLAGS="$LDFLAGS -Wl,--export-dynamic"
CC_SEARCH_FLAGS=""
LD_SEARCH_FLAGS=""
AS_IF([test "`uname -m`" = "alpha"], [CFLAGS="$CFLAGS -mieee"])
;;
Lynx*)
SHLIB_CFLAGS="-fPIC"
@@ -1576,35 +1584,44 @@ dnl AC_CHECK_TOOL(AR, ar)
LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'])
;;
OpenBSD-*)
SHLIB_CFLAGS="-fPIC"
SHLIB_LD='${CC} -shared ${SHLIB_CFLAGS}'
SHLIB_SUFFIX=".so"
AS_IF([test $doRpath = yes], [
CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'])
LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS}
SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so.${SHLIB_VERSION}'
AC_CACHE_CHECK([for ELF], tcl_cv_ld_elf, [
AC_EGREP_CPP(yes, [
#ifdef __ELF__
yes
#endif
], tcl_cv_ld_elf=yes, tcl_cv_ld_elf=no)])
AS_IF([test $tcl_cv_ld_elf = yes], [
LDFLAGS=-Wl,-export-dynamic
], [LDFLAGS=""])
arch=`arch -s`
case "$arch" in
vax)
SHLIB_SUFFIX=""
SHARED_LIB_SUFFIX=""
LDFLAGS=""
;;
*)
SHLIB_CFLAGS="-fPIC"
SHLIB_LD='${CC} -shared ${SHLIB_CFLAGS}'
SHLIB_SUFFIX=".so"
AS_IF([test $doRpath = yes], [
CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'])
LD_SEARCH_FLAGS=${CC_SEARCH_FLAGS}
SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so.${SHLIB_VERSION}'
LDFLAGS="-Wl,-export-dynamic"
;;
esac
case "$arch" in
vax)
CFLAGS_OPTIMIZE="-O1"
;;
*)
CFLAGS_OPTIMIZE="-O2"
;;
esac
AS_IF([test "${TCL_THREADS}" = "1"], [
# OpenBSD builds and links with -pthread, never -lpthread.
# On OpenBSD: Compile with -pthread
# Don't link with -lpthread
LIBS=`echo $LIBS | sed s/-lpthread//`
CFLAGS="$CFLAGS -pthread"
SHLIB_CFLAGS="$SHLIB_CFLAGS -pthread"
])
# OpenBSD doesn't do version numbers with dots.
UNSHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.a'
TCL_LIB_VERSIONS_OK=nodots
;;
NetBSD-*|FreeBSD-[[3-4]].*)
# FreeBSD 3.* and greater have ELF.
# NetBSD 2.* has ELF and can use 'cc -shared' to build shared libs
NetBSD-*)
# NetBSD has ELF and can use 'cc -shared' to build shared libs
SHLIB_CFLAGS="-fPIC"
SHLIB_LD='${CC} -shared ${SHLIB_CFLAGS}'
SHLIB_SUFFIX=".so"
@@ -1618,35 +1635,32 @@ dnl AC_CHECK_TOOL(AR, ar)
CFLAGS="$CFLAGS -pthread"
LDFLAGS="$LDFLAGS -pthread"
])
case $system in
FreeBSD-3.*)
# FreeBSD-3 doesn't handle version numbers with dots.
UNSHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.a'
SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so'
TCL_LIB_VERSIONS_OK=nodots
;;
esac
;;
FreeBSD-*)
# This configuration from FreeBSD Ports.
SHLIB_CFLAGS="-fPIC"
SHLIB_LD="${CC} -shared"
TCL_SHLIB_LD_EXTRAS="-soname \$[@]"
TCL_SHLIB_LD_EXTRAS="-Wl,-soname=\$[@]"
TK_SHLIB_LD_EXTRAS="-Wl,-soname,\$[@]"
SHLIB_SUFFIX=".so"
LDFLAGS=""
AS_IF([test $doRpath = yes], [
CC_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'
LD_SEARCH_FLAGS='-rpath ${LIB_RUNTIME_DIR}'])
LD_SEARCH_FLAGS='-Wl,-rpath,${LIB_RUNTIME_DIR}'])
AS_IF([test "${TCL_THREADS}" = "1"], [
# The -pthread needs to go in the LDFLAGS, not LIBS
LIBS=`echo $LIBS | sed s/-pthread//`
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
LDFLAGS="$LDFLAGS $PTHREAD_LIBS"])
# Version numbers are dot-stripped by system policy.
TCL_TRIM_DOTS=`echo ${VERSION} | tr -d .`
UNSHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.a'
SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}\$\{DBGX\}.so.1'
TCL_LIB_VERSIONS_OK=nodots
case $system in
FreeBSD-3.*)
# Version numbers are dot-stripped by system policy.
TCL_TRIM_DOTS=`echo ${VERSION} | tr -d .`
UNSHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.a'
SHARED_LIB_SUFFIX='${TCL_TRIM_DOTS}.so'
TCL_LIB_VERSIONS_OK=nodots
;;
esac
;;
Darwin-*)
CFLAGS_OPTIMIZE="-Os"
@@ -1705,7 +1719,7 @@ dnl AC_CHECK_TOOL(AR, ar)
AS_IF([test $tcl_cv_ld_single_module = yes], [
SHLIB_LD="${SHLIB_LD} -Wl,-single_module"
])
# TEA specific: link shlib with current and compatiblity version flags
# TEA specific: link shlib with current and compatibility version flags
vers=`echo ${PACKAGE_VERSION} | sed -e 's/^\([[0-9]]\{1,5\}\)\(\(\.[[0-9]]\{1,3\}\)\{0,2\}\).*$/\1\2/p' -e d`
SHLIB_LD="${SHLIB_LD} -current_version ${vers:-0} -compatibility_version ${vers:-0}"
SHLIB_SUFFIX=".dylib"
@@ -1817,8 +1831,8 @@ dnl AC_CHECK_TOOL(AR, ar)
SHLIB_CFLAGS="-fPIC -melf"
LDFLAGS="$LDFLAGS -melf -Wl,-Bexport"
], [
SHLIB_CFLAGS="-Kpic -belf"
LDFLAGS="$LDFLAGS -belf -Wl,-Bexport"
SHLIB_CFLAGS="-Kpic -belf"
LDFLAGS="$LDFLAGS -belf -Wl,-Bexport"
])
SHLIB_LD="ld -G"
SHLIB_LD_LIBS=""
@@ -1941,6 +1955,24 @@ dnl AC_CHECK_TOOL(AR, ar)
LD_SEARCH_FLAGS='-R ${LIB_RUNTIME_DIR}'
])
;;
UNIX_SV* | UnixWare-5*)
SHLIB_CFLAGS="-KPIC"
SHLIB_LD='${CC} -G'
SHLIB_LD_LIBS=""
SHLIB_SUFFIX=".so"
# Some UNIX_SV* systems (unixware 1.1.2 for example) have linkers
# that don't grok the -Bexport option. Test that it does.
AC_CACHE_CHECK([for ld accepts -Bexport flag], tcl_cv_ld_Bexport, [
hold_ldflags=$LDFLAGS
LDFLAGS="$LDFLAGS -Wl,-Bexport"
AC_TRY_LINK(, [int i;], tcl_cv_ld_Bexport=yes, tcl_cv_ld_Bexport=no)
LDFLAGS=$hold_ldflags])
AS_IF([test $tcl_cv_ld_Bexport = yes], [
LDFLAGS="$LDFLAGS -Wl,-Bexport"
])
CC_SEARCH_FLAGS=""
LD_SEARCH_FLAGS=""
;;
esac
AS_IF([test "$do64bit" = yes -a "$do64bit_ok" = no], [
@@ -1965,7 +1997,7 @@ dnl # preprocessing tests use only CPPFLAGS.
case $system in
AIX-*) ;;
BSD/OS*) ;;
CYGWIN_*) ;;
CYGWIN_*|MINGW32_*) ;;
IRIX*) ;;
NetBSD-*|FreeBSD-*|OpenBSD-*) ;;
Darwin-*) ;;
@@ -1977,15 +2009,109 @@ dnl # preprocessing tests use only CPPFLAGS.
AS_IF([test "$tcl_cv_cc_visibility_hidden" != yes], [
AC_DEFINE(MODULE_SCOPE, [extern],
[No Compiler support for module scope symbols])
AC_DEFINE(NO_VIZ)
])
AS_IF([test "$SHARED_LIB_SUFFIX" = ""], [
# TEA specific: use PACKAGE_VERSION instead of VERSION
SHARED_LIB_SUFFIX='${PACKAGE_VERSION}${SHLIB_SUFFIX}'])
# TEA specific: use PACKAGE_VERSION instead of VERSION
SHARED_LIB_SUFFIX='${PACKAGE_VERSION}${SHLIB_SUFFIX}'])
AS_IF([test "$UNSHARED_LIB_SUFFIX" = ""], [
# TEA specific: use PACKAGE_VERSION instead of VERSION
UNSHARED_LIB_SUFFIX='${PACKAGE_VERSION}.a'])
# TEA specific: use PACKAGE_VERSION instead of VERSION
UNSHARED_LIB_SUFFIX='${PACKAGE_VERSION}.a'])
if test "${GCC}" = "yes" -a ${SHLIB_SUFFIX} = ".dll"; then
AC_CACHE_CHECK(for SEH support in compiler,
tcl_cv_seh,
AC_TRY_RUN([
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
int main(int argc, char** argv) {
int a, b = 0;
__try {
a = 666 / b;
}
__except (EXCEPTION_EXECUTE_HANDLER) {
return 0;
}
return 1;
}
],
tcl_cv_seh=yes,
tcl_cv_seh=no,
tcl_cv_seh=no)
)
if test "$tcl_cv_seh" = "no" ; then
AC_DEFINE(HAVE_NO_SEH, 1,
[Defined when mingw does not support SEH])
fi
#
# Check to see if the excpt.h include file provided contains the
# definition for EXCEPTION_DISPOSITION; if not, which is the case
# with Cygwin's version as of 2002-04-10, define it to be int,
# sufficient for getting the current code to work.
#
AC_CACHE_CHECK(for EXCEPTION_DISPOSITION support in include files,
tcl_cv_eh_disposition,
AC_TRY_COMPILE([
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# undef WIN32_LEAN_AND_MEAN
],[
EXCEPTION_DISPOSITION x;
],
tcl_cv_eh_disposition=yes,
tcl_cv_eh_disposition=no)
)
if test "$tcl_cv_eh_disposition" = "no" ; then
AC_DEFINE(EXCEPTION_DISPOSITION, int,
[Defined when cygwin/mingw does not support EXCEPTION DISPOSITION])
fi
# Check to see if winnt.h defines CHAR, SHORT, and LONG
# even if VOID has already been #defined. The win32api
# used by mingw and cygwin is known to do this.
AC_CACHE_CHECK(for winnt.h that ignores VOID define,
tcl_cv_winnt_ignore_void,
AC_TRY_COMPILE([
#define VOID void
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
], [
CHAR c;
SHORT s;
LONG l;
],
tcl_cv_winnt_ignore_void=yes,
tcl_cv_winnt_ignore_void=no)
)
if test "$tcl_cv_winnt_ignore_void" = "yes" ; then
AC_DEFINE(HAVE_WINNT_IGNORE_VOID, 1,
[Defined when cygwin/mingw ignores VOID define in winnt.h])
fi
fi
# See if the compiler supports casting to a union type.
# This is used to stop gcc from printing a compiler
# warning when initializing a union member.
AC_CACHE_CHECK(for cast to union support,
tcl_cv_cast_to_union,
AC_TRY_COMPILE([],
[
union foo { int i; double d; };
union foo f = (union foo) (int) 0;
],
tcl_cv_cast_to_union=yes,
tcl_cv_cast_to_union=no)
)
if test "$tcl_cv_cast_to_union" = "yes"; then
AC_DEFINE(HAVE_CAST_TO_UNION, 1,
[Defined when compiler supports casting to union type.])
fi
AC_SUBST(CFLAGS_DEBUG)
AC_SUBST(CFLAGS_OPTIMIZE)
@@ -2024,7 +2150,6 @@ dnl # preprocessing tests use only CPPFLAGS.
# USE_TERMIOS
# USE_TERMIO
# USE_SGTTY
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_SERIAL_PORT], [
@@ -2236,7 +2361,6 @@ closedir(d);
# XINCLUDES
# XLIBSW
# PKG_LIBS (appends to)
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_PATH_X], [
@@ -2250,9 +2374,9 @@ AC_DEFUN([TEA_PATH_UNIX_X], [
not_really_there=""
if test "$no_x" = ""; then
if test "$x_includes" = ""; then
AC_TRY_CPP([#include <X11/XIntrinsic.h>], , not_really_there="yes")
AC_TRY_CPP([#include <X11/Xlib.h>], , not_really_there="yes")
else
if test ! -r $x_includes/X11/Intrinsic.h; then
if test ! -r $x_includes/X11/Xlib.h; then
not_really_there="yes"
fi
fi
@@ -2260,11 +2384,11 @@ AC_DEFUN([TEA_PATH_UNIX_X], [
if test "$no_x" = "yes" -o "$not_really_there" = "yes"; then
AC_MSG_CHECKING([for X11 header files])
found_xincludes="no"
AC_TRY_CPP([#include <X11/Intrinsic.h>], found_xincludes="yes", found_xincludes="no")
AC_TRY_CPP([#include <X11/Xlib.h>], found_xincludes="yes", found_xincludes="no")
if test "$found_xincludes" = "no"; then
dirs="/usr/unsupported/include /usr/local/include /usr/X386/include /usr/X11R6/include /usr/X11R5/include /usr/include/X11R5 /usr/include/X11R4 /usr/openwin/include /usr/X11/include /usr/sww/include"
for i in $dirs ; do
if test -r $i/X11/Intrinsic.h; then
if test -r $i/X11/Xlib.h; then
AC_MSG_RESULT([$i])
XINCLUDES=" -I$i"
found_xincludes="yes"
@@ -2332,7 +2456,6 @@ AC_DEFUN([TEA_PATH_UNIX_X], [
# HAVE_SYS_FILIO_H
# USE_FIONBIO
# O_NONBLOCK
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_BLOCKING_STYLE], [
@@ -2367,7 +2490,6 @@ AC_DEFUN([TEA_BLOCKING_STYLE], [
# HAVE_TM_GMTOFF
# HAVE_TM_TZADJ
# HAVE_TIMEZONE_VAR
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_TIME_HANDLER], [
@@ -2436,7 +2558,6 @@ AC_DEFUN([TEA_TIME_HANDLER], [
#
# Might defines some of the following vars:
# strtod (=fixstrtod)
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_BUGGY_STRTOD], [
@@ -2487,7 +2608,7 @@ AC_DEFUN([TEA_BUGGY_STRTOD], [
#
# Results:
#
# Subst's the following var:
# Substitutes the following vars:
# TCL_LIBS
# MATH_LIBS
#
@@ -2496,7 +2617,6 @@ AC_DEFUN([TEA_BUGGY_STRTOD], [
#
# Might define the following vars:
# HAVE_NET_ERRNO_H
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_TCL_LINK_LIBS], [
@@ -2574,7 +2694,6 @@ AC_DEFUN([TEA_TCL_LINK_LIBS], [
# _ISOC99_SOURCE
# _LARGEFILE64_SOURCE
# _LARGEFILE_SOURCE64
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_TCL_EARLY_FLAG],[
@@ -2622,7 +2741,6 @@ AC_DEFUN([TEA_TCL_EARLY_FLAGS],[
# HAVE_STRUCT_DIRENT64
# HAVE_STRUCT_STAT64
# HAVE_TYPE_OFF64_T
#
#--------------------------------------------------------------------
AC_DEFUN([TEA_TCL_64BIT_FLAGS], [
@@ -2654,7 +2772,7 @@ AC_DEFUN([TEA_TCL_64BIT_FLAGS], [
# Now check for auxiliary declarations
AC_CACHE_CHECK([for struct dirent64], tcl_cv_struct_dirent64,[
AC_TRY_COMPILE([#include <sys/types.h>
#include <sys/dirent.h>],[struct dirent64 p;],
#include <dirent.h>],[struct dirent64 p;],
tcl_cv_struct_dirent64=yes,tcl_cv_struct_dirent64=no)])
if test "x${tcl_cv_struct_dirent64}" = "xyes" ; then
AC_DEFINE(HAVE_STRUCT_DIRENT64, 1, [Is 'struct dirent64' in <sys/types.h>?])
@@ -2739,6 +2857,13 @@ TEA version not specified.])
else
AC_MSG_RESULT([ok (TEA ${TEA_VERSION})])
fi
# If the user did not set CFLAGS, set it now to keep macros
# like AC_PROG_CC and AC_TRY_COMPILE from adding "-g -O2".
if test "${CFLAGS+set}" != "set" ; then
CFLAGS=""
fi
case "`uname -s`" in
*win32*|*WIN32*|*MINGW32_*)
AC_CHECK_PROG(CYGPATH, cygpath, cygpath -w, echo)
@@ -2752,8 +2877,17 @@ TEA version not specified.])
;;
*)
CYGPATH=echo
EXEEXT=""
TEA_PLATFORM="unix"
# Maybe we are cross-compiling....
case ${host_alias} in
*mingw32*)
EXEEXT=".exe"
TEA_PLATFORM="windows"
;;
*)
EXEEXT=""
TEA_PLATFORM="unix"
;;
esac
;;
esac
@@ -2766,6 +2900,8 @@ TEA version not specified.])
exec_prefix=$prefix
fi
AC_MSG_NOTICE([configuring ${PACKAGE_NAME} ${PACKAGE_VERSION}])
AC_SUBST(EXEEXT)
AC_SUBST(CYGPATH)
@@ -3001,6 +3137,22 @@ AC_DEFUN([TEA_ADD_CFLAGS], [
AC_SUBST(PKG_CFLAGS)
])
#------------------------------------------------------------------------
# TEA_ADD_CLEANFILES --
#
# Specify one or more CLEANFILES.
#
# Arguments:
# one or more file names to clean target
#
# Results:
#
# Appends to CLEANFILES, already defined for subst in LOAD_TCLCONFIG
#------------------------------------------------------------------------
AC_DEFUN([TEA_ADD_CLEANFILES], [
CLEANFILES="$CLEANFILES $@"
])
#------------------------------------------------------------------------
# TEA_PREFIX --
#
@@ -3055,16 +3207,17 @@ AC_DEFUN([TEA_SETUP_COMPILER_CC], [
# Don't put any macros that use the compiler (e.g. AC_TRY_COMPILE)
# in this macro, they need to go into TEA_SETUP_COMPILER instead.
# If the user did not set CFLAGS, set it now to keep
# the AC_PROG_CC macro from adding "-g -O2".
if test "${CFLAGS+set}" != "set" ; then
CFLAGS=""
fi
AC_PROG_CC
AC_PROG_CPP
AC_PROG_INSTALL
INSTALL="\$(SHELL) \$(srcdir)/tclconfig/install-sh -c"
AC_SUBST(INSTALL)
INSTALL_DATA="\${INSTALL} -m 644"
AC_SUBST(INSTALL_DATA)
INSTALL_PROGRAM="\${INSTALL}"
AC_SUBST(INSTALL_PROGRAM)
INSTALL_SCRIPT="\${INSTALL}"
AC_SUBST(INSTALL_SCRIPT)
#--------------------------------------------------------------------
# Checks to see if the make program sets the $MAKE variable.
@@ -3076,7 +3229,7 @@ AC_DEFUN([TEA_SETUP_COMPILER_CC], [
# Find ranlib
#--------------------------------------------------------------------
AC_PROG_RANLIB
AC_CHECK_TOOL(RANLIB, ranlib)
#--------------------------------------------------------------------
# Determines the correct binary file extension (.o, .obj, .exe etc.)
@@ -3155,13 +3308,26 @@ AC_DEFUN([TEA_SETUP_COMPILER], [
# MAKE_SHARED_LIB Makefile rule for building a shared library
# MAKE_STATIC_LIB Makefile rule for building a static library
# MAKE_STUB_LIB Makefile rule for building a stub library
# VC_MANIFEST_EMBED_DLL Makefile rule for embedded VC manifest in DLL
# VC_MANIFEST_EMBED_EXE Makefile rule for embedded VC manifest in EXE
#------------------------------------------------------------------------
AC_DEFUN([TEA_MAKE_LIB], [
if test "${TEA_PLATFORM}" = "windows" -a "$GCC" != "yes"; then
MAKE_STATIC_LIB="\${STLIB_LD} -out:\[$]@ \$(PKG_OBJECTS)"
MAKE_SHARED_LIB="\${SHLIB_LD} \${SHLIB_LD_LIBS} \${LDFLAGS_DEFAULT} -out:\[$]@ \$(PKG_OBJECTS)"
MAKE_STUB_LIB="\${STLIB_LD} -out:\[$]@ \$(PKG_STUB_OBJECTS)"
AC_EGREP_CPP([manifest needed], [
#if defined(_MSC_VER) && _MSC_VER >= 1400
print("manifest needed")
#endif
], [
# Could do a CHECK_PROG for mt, but should always be with MSVC8+
VC_MANIFEST_EMBED_DLL="if test -f \[$]@.manifest ; then mt.exe -nologo -manifest \[$]@.manifest -outputresource:\[$]@\;2 ; fi"
VC_MANIFEST_EMBED_EXE="if test -f \[$]@.manifest ; then mt.exe -nologo -manifest \[$]@.manifest -outputresource:\[$]@\;1 ; fi"
MAKE_SHARED_LIB="${MAKE_SHARED_LIB} ; ${VC_MANIFEST_EMBED_DLL}"
TEA_ADD_CLEANFILES([*.manifest])
])
MAKE_STUB_LIB="\${STLIB_LD} -nodefaultlib -out:\[$]@ \$(PKG_STUB_OBJECTS)"
else
MAKE_STATIC_LIB="\${STLIB_LD} \[$]@ \$(PKG_OBJECTS)"
MAKE_SHARED_LIB="\${SHLIB_LD} -o \[$]@ \$(PKG_OBJECTS) \${SHLIB_LD_LIBS}"
@@ -3184,13 +3350,19 @@ AC_DEFUN([TEA_MAKE_LIB], [
if test "${SHARED_BUILD}" = "1" ; then
# We force the unresolved linking of symbols that are really in
# the private libraries of Tcl and Tk.
SHLIB_LD_LIBS="${SHLIB_LD_LIBS} \"`${CYGPATH} ${TCL_BIN_DIR}/${TCL_STUB_LIB_FILE}`\""
if test x"${TK_BIN_DIR}" != x ; then
SHLIB_LD_LIBS="${SHLIB_LD_LIBS} \"`${CYGPATH} ${TK_BIN_DIR}/${TK_STUB_LIB_FILE}`\""
fi
SHLIB_LD_LIBS="${SHLIB_LD_LIBS} \"`${CYGPATH} ${TCL_BIN_DIR}/${TCL_STUB_LIB_FILE}`\""
if test "$GCC" = "yes"; then
SHLIB_LD_LIBS="${SHLIB_LD_LIBS} -static-libgcc"
fi
eval eval "PKG_LIB_FILE=${PACKAGE_NAME}${SHARED_LIB_SUFFIX}"
else
eval eval "PKG_LIB_FILE=${PACKAGE_NAME}${UNSHARED_LIB_SUFFIX}"
if test "$GCC" = "yes"; then
PKG_LIB_FILE=lib${PKG_LIB_FILE}
fi
fi
# Some packages build their own stubs libraries
eval eval "PKG_STUB_LIB_FILE=${PACKAGE_NAME}stub${UNSHARED_LIB_SUFFIX}"
@@ -3228,6 +3400,8 @@ AC_DEFUN([TEA_MAKE_LIB], [
AC_SUBST(MAKE_STATIC_LIB)
AC_SUBST(MAKE_STUB_LIB)
AC_SUBST(RANLIB_STUB)
AC_SUBST(VC_MANIFEST_EMBED_DLL)
AC_SUBST(VC_MANIFEST_EMBED_EXE)
])
#------------------------------------------------------------------------
@@ -3316,7 +3490,7 @@ AC_DEFUN([TEA_LIB_SPEC], [
#
# Results:
#
# Substs the following vars:
# Substitutes the following vars:
# TCL_TOP_DIR_NATIVE
# TCL_INCLUDES
#------------------------------------------------------------------------
@@ -3394,7 +3568,7 @@ AC_DEFUN([TEA_PRIVATE_TCL_HEADERS], [
# Adds a --with-tclinclude switch to configure.
# Result is cached.
#
# Substs the following vars:
# Substitutes the following vars:
# TCL_INCLUDES
#------------------------------------------------------------------------
@@ -3484,7 +3658,7 @@ AC_DEFUN([TEA_PUBLIC_TCL_HEADERS], [
#
# Results:
#
# Substs the following vars:
# Substitutes the following vars:
# TK_INCLUDES
#------------------------------------------------------------------------
@@ -3573,7 +3747,7 @@ AC_DEFUN([TEA_PRIVATE_TK_HEADERS], [
# Adds a --with-tkinclude switch to configure.
# Result is cached.
#
# Substs the following vars:
# Substitutes the following vars:
# TK_INCLUDES
#------------------------------------------------------------------------
@@ -3791,11 +3965,10 @@ AC_DEFUN([TEA_PATH_CONFIG], [
#
# Results:
#
# Subst the following vars:
# Substitutes the following vars:
# $1_SRC_DIR
# $1_LIB_FILE
# $1_LIB_SPEC
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_LOAD_CONFIG], [
@@ -3822,6 +3995,8 @@ AC_DEFUN([TEA_LOAD_CONFIG], [
$1_LIB_SPEC=${$1_BUILD_LIB_SPEC}
$1_STUB_LIB_SPEC=${$1_BUILD_STUB_LIB_SPEC}
$1_STUB_LIB_PATH=${$1_BUILD_STUB_LIB_PATH}
$1_INCLUDE_SPEC=${$1_BUILD_INCLUDE_SPEC}
$1_LIBRARY_PATH=${$1_LIBRARY_PATH}
fi
AC_SUBST($1_VERSION)
@@ -3854,7 +4029,6 @@ AC_DEFUN([TEA_LOAD_CONFIG], [
#
# Results:
# Adds to LIBS the appropriate extension library
#
#------------------------------------------------------------------------
AC_DEFUN([TEA_LOAD_CONFIG_LIB], [
AC_MSG_CHECKING([For $1 library for LIBS])
@@ -3886,11 +4060,10 @@ AC_DEFUN([TEA_LOAD_CONFIG_LIB], [
# $1
#
# Results:
# Subst the following vars:
#
# Substitutes the following vars:
#------------------------------------------------------------------------
AC_DEFUN(TEA_EXPORT_CONFIG, [
AC_DEFUN([TEA_EXPORT_CONFIG], [
#--------------------------------------------------------------------
# These are for $1Config.sh
#--------------------------------------------------------------------
@@ -3990,8 +4163,6 @@ AC_DEFUN([TEA_PATH_CELIB], [
fi
fi
])
# Local Variables:
# mode: autoconf
# End:

View File

@@ -27,6 +27,9 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `isnan' function. */
#undef HAVE_ISNAN
/* Define to 1 if you have the `localtime_r' function. */
#undef HAVE_LOCALTIME_R
@@ -48,6 +51,9 @@
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the strchrnul() function */
#undef HAVE_STRCHRNUL
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H

6773
configure vendored

File diff suppressed because it is too large Load Diff

View File

@@ -69,19 +69,6 @@
# The filename extension for executables on the
# target platform. "" for Unix and ".exe" for windows.
#
# The generated configure script will make an attempt to guess
# at all of the above parameters. You can override any of
# the guesses by setting the environment variable named
# "config_AAAA" where "AAAA" is the name of the parameter
# described above. (Exception: srcdir cannot be set this way.)
# If you have a file that sets one or more of these environment
# variables, you can invoke configure as follows:
#
# configure --with-hints=FILE
#
# where FILE is the name of the file that sets the environment
# variables. FILE should be an absolute pathname.
#
# This configure.in file is easy to reuse on other projects. Just
# change the argument to AC_INIT(). And disable any features that
# you don't need (for example BLT) by erasing or commenting out
@@ -98,11 +85,6 @@ AC_MSG_ERROR([configure script is out of date:
please regen with autoconf])
fi
dnl Put the RCS revision string after AC_INIT so that it will also
dnl show in in configure.
# The following RCS revision string applies to configure.in
# $Revision: 1.56 $
#########
# Programs needed
#
@@ -127,7 +109,7 @@ AC_CHECK_HEADERS([sys/types.h stdlib.h stdint.h inttypes.h malloc.h])
#########
# Figure out whether or not we have these functions
#
AC_CHECK_FUNCS([usleep fdatasync localtime_r gmtime_r localtime_s utime malloc_usable_size])
AC_CHECK_FUNCS([fdatasync gmtime_r isnan localtime_r localtime_s malloc_usable_size strchrnul usleep utime])
#########
# By default, we use the amalgamation (this may be changed below...)
@@ -180,41 +162,6 @@ VERSION_NUMBER=[`cat $srcdir/VERSION \
AC_MSG_NOTICE(Version number set to $VERSION_NUMBER)
AC_SUBST(VERSION_NUMBER)
#########
# Check to see if the --with-hints=FILE option is used. If there is none,
# then check for a files named "$host.hints" and ../$hosts.hints where
# $host is the hostname of the build system. If still no hints are
# found, try looking in $system.hints and ../$system.hints where
# $system is the result of uname -s.
#
AC_ARG_WITH(hints,
AC_HELP_STRING([--with-hints=FILE],[Read configuration options from FILE]),
hints=$withval)
if test "$hints" = ""; then
host=`hostname | sed 's/\..*//'`
if test -r $host.hints; then
hints=$host.hints
else
if test -r ../$host.hints; then
hints=../$host.hints
fi
fi
fi
if test "$hints" = ""; then
sys=`uname -s`
if test -r $sys.hints; then
hints=$sys.hints
else
if test -r ../$sys.hints; then
hints=../$sys.hints
fi
fi
fi
if test "$hints" != ""; then
AC_MSG_RESULT(reading hints from $hints)
. $hints
fi
#########
# Locate a compiler for the build machine. This compiler should
# generate command-line programs that run on the build machine.
@@ -236,7 +183,7 @@ AC_SUBST(BUILD_CC)
# Do we want to support multithreaded use of sqlite
#
AC_ARG_ENABLE(threadsafe,
AC_HELP_STRING([--enable-threadsafe],[Support threadsafe operation]),,enable_threadsafe=yes)
AC_HELP_STRING([--disable-threadsafe],[Disable mutexing]),,enable_threadsafe=yes)
AC_MSG_CHECKING([whether to support threadsafe operation])
if test "$enable_threadsafe" = "no"; then
SQLITE_THREADSAFE=0
@@ -251,23 +198,6 @@ if test "$SQLITE_THREADSAFE" = "1"; then
AC_SEARCH_LIBS(pthread_create, pthread)
fi
##########
# Do we want to allow a connection created in one thread to be used
# in another thread. This does not work on many Linux systems (ex: RedHat 9)
# due to bugs in the threading implementations. This is thus off by default.
#
AC_ARG_ENABLE(cross-thread-connections,
AC_HELP_STRING([--enable-cross-thread-connections],[Allow connection sharing across threads]),,enable_xthreadconnect=no)
AC_MSG_CHECKING([whether to allow connections to be shared across threads])
if test "$enable_xthreadconnect" = "no"; then
XTHREADCONNECT=''
AC_MSG_RESULT([no])
else
XTHREADCONNECT='-DSQLITE_ALLOW_XTHREAD_CONNECT=1'
AC_MSG_RESULT([yes])
fi
AC_SUBST(XTHREADCONNECT)
##########
# Do we want to support release
#
@@ -390,6 +320,20 @@ if test "${use_tcl}" = "yes" ; then
fi
fi
# On ubuntu 14.10, $auto_path on tclsh is not quite correct.
# So try again after applying corrections.
if test x"${ac_cv_c_tclconfig}" = x ; then
if test x"$cross_compiling" = xno; then
for i in `echo 'puts stdout $auto_path' | ${TCLSH_CMD} | sed 's,/tcltk/tcl,/tcl,g'`
do
if test -f "$i/tclConfig.sh" ; then
ac_cv_c_tclconfig="$i"
break
fi
done
fi
fi
# then check for a private Tcl installation
if test x"${ac_cv_c_tclconfig}" = x ; then
for i in \
@@ -500,6 +444,7 @@ if test "${use_tcl}" = "yes" ; then
AC_SUBST(TCL_STUB_LIB_FILE)
AC_SUBST(TCL_STUB_LIB_FLAG)
AC_SUBST(TCL_STUB_LIB_SPEC)
AC_SUBST(TCL_SHLIB_SUFFIX)
fi
fi
if test "${use_tcl}" = "no" ; then
@@ -605,9 +550,9 @@ AC_SUBST(USE_AMALGAMATION)
#########
# See whether we should allow loadable extensions
AC_ARG_ENABLE(load-extension, AC_HELP_STRING([--enable-load-extension],
[Enable loading of external extensions]),
[use_loadextension=$enableval],[use_loadextension=no])
AC_ARG_ENABLE(load-extension, AC_HELP_STRING([--disable-load-extension],
[Disable loading of external extensions]),
[use_loadextension=$enableval],[use_loadextension=yes])
if test "${use_loadextension}" = "yes" ; then
OPT_FEATURE_FLAGS=""
AC_SEARCH_LIBS(dlopen, dl)

View File

@@ -1636,6 +1636,7 @@ void sqlite3async_run(void){
** Control/configure the asynchronous IO system.
*/
int sqlite3async_control(int op, ...){
int rc = SQLITE_OK;
va_list ap;
va_start(ap, op);
switch( op ){
@@ -1645,7 +1646,8 @@ int sqlite3async_control(int op, ...){
&& eWhen!=SQLITEASYNC_HALT_NOW
&& eWhen!=SQLITEASYNC_HALT_IDLE
){
return SQLITE_MISUSE;
rc = SQLITE_MISUSE;
break;
}
async.eHalt = eWhen;
async_mutex_enter(ASYNC_MUTEX_QUEUE);
@@ -1657,7 +1659,8 @@ int sqlite3async_control(int op, ...){
case SQLITEASYNC_DELAY: {
int iDelay = va_arg(ap, int);
if( iDelay<0 ){
return SQLITE_MISUSE;
rc = SQLITE_MISUSE;
break;
}
async.ioDelay = iDelay;
break;
@@ -1668,7 +1671,8 @@ int sqlite3async_control(int op, ...){
async_mutex_enter(ASYNC_MUTEX_QUEUE);
if( async.nFile || async.pQueueFirst ){
async_mutex_leave(ASYNC_MUTEX_QUEUE);
return SQLITE_MISUSE;
rc = SQLITE_MISUSE;
break;
}
async.bLockFiles = bLock;
async_mutex_leave(ASYNC_MUTEX_QUEUE);
@@ -1692,9 +1696,11 @@ int sqlite3async_control(int op, ...){
}
default:
return SQLITE_ERROR;
rc = SQLITE_ERROR;
break;
}
return SQLITE_OK;
va_end(ap);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */

View File

@@ -313,6 +313,13 @@ static int fts3EvalStart(Fts3Cursor *pCsr);
static int fts3TermSegReaderCursor(
Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
#ifndef SQLITE_AMALGAMATION
# if defined(SQLITE_DEBUG)
int sqlite3Fts3Always(int b) { assert( b ); return b; }
int sqlite3Fts3Never(int b) { assert( !b ); return b; }
# endif
#endif
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
@@ -422,7 +429,7 @@ void sqlite3Fts3Dequote(char *z){
/* If the first byte was a '[', then the close-quote character is a ']' */
if( quote=='[' ) quote = ']';
while( ALWAYS(z[iIn]) ){
while( z[iIn] ){
if( z[iIn]==quote ){
if( z[iIn+1]!=quote ) break;
z[iOut++] = quote;
@@ -501,6 +508,17 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
return SQLITE_OK;
}
/*
** Write an error message into *pzErr
*/
void sqlite3Fts3ErrMsg(char **pzErr, const char *zFormat, ...){
va_list ap;
sqlite3_free(*pzErr);
va_start(ap, zFormat);
*pzErr = sqlite3_vmprintf(zFormat, ap);
va_end(ap);
}
/*
** Construct one or more SQL statements from the format string given
** and then evaluate those statements. The success code is written
@@ -910,11 +928,16 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
** This function is used when parsing the "prefix=" FTS4 parameter.
*/
static int fts3GobbleInt(const char **pp, int *pnOut){
const int MAX_NPREFIX = 10000000;
const char *p; /* Iterator pointer */
int nInt = 0; /* Output value */
for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
nInt = nInt * 10 + (p[0] - '0');
if( nInt>MAX_NPREFIX ){
nInt = 0;
break;
}
}
if( p==*pp ) return SQLITE_ERROR;
*pnOut = nInt;
@@ -957,7 +980,6 @@ static int fts3PrefixParameter(
aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
*apIndex = aIndex;
*pnIndex = nIndex;
if( !aIndex ){
return SQLITE_NOMEM;
}
@@ -967,13 +989,20 @@ static int fts3PrefixParameter(
const char *p = zParam;
int i;
for(i=1; i<nIndex; i++){
int nPrefix;
int nPrefix = 0;
if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR;
aIndex[i].nPrefix = nPrefix;
assert( nPrefix>=0 );
if( nPrefix==0 ){
nIndex--;
i--;
}else{
aIndex[i].nPrefix = nPrefix;
}
p++;
}
}
*pnIndex = nIndex;
return SQLITE_OK;
}
@@ -1008,7 +1037,8 @@ static int fts3ContentColumns(
const char *zTbl, /* Name of content table */
const char ***pazCol, /* OUT: Malloc'd array of column names */
int *pnCol, /* OUT: Size of array *pazCol */
int *pnStr /* OUT: Bytes of string content */
int *pnStr, /* OUT: Bytes of string content */
char **pzErr /* OUT: error message */
){
int rc = SQLITE_OK; /* Return code */
char *zSql; /* "SELECT *" statement on zTbl */
@@ -1019,6 +1049,9 @@ static int fts3ContentColumns(
rc = SQLITE_NOMEM;
}else{
rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
sqlite3Fts3ErrMsg(pzErr, "%s", sqlite3_errmsg(db));
}
}
sqlite3_free(zSql);
@@ -1097,7 +1130,7 @@ static int fts3InitVtab(
const char **aCol; /* Array of column names */
sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */
int nIndex; /* Size of aIndex[] array */
int nIndex = 0; /* Size of aIndex[] array */
struct Fts3Index *aIndex = 0; /* Array of indexes for this table */
/* The results of parsing supported FTS4 key=value options: */
@@ -1185,13 +1218,13 @@ static int fts3InitVtab(
}
}
if( iOpt==SizeofArray(aFts4Opt) ){
*pzErr = sqlite3_mprintf("unrecognized parameter: %s", z);
sqlite3Fts3ErrMsg(pzErr, "unrecognized parameter: %s", z);
rc = SQLITE_ERROR;
}else{
switch( iOpt ){
case 0: /* MATCHINFO */
if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){
*pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal);
sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo: %s", zVal);
rc = SQLITE_ERROR;
}
bNoDocsize = 1;
@@ -1219,7 +1252,7 @@ static int fts3InitVtab(
if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
&& (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
){
*pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
sqlite3Fts3ErrMsg(pzErr, "unrecognized order: %s", zVal);
rc = SQLITE_ERROR;
}
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
@@ -1270,7 +1303,7 @@ static int fts3InitVtab(
if( nCol==0 ){
sqlite3_free((void*)aCol);
aCol = 0;
rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
rc = fts3ContentColumns(db, argv[1], zContent,&aCol,&nCol,&nString,pzErr);
/* If a languageid= option was specified, remove the language id
** column from the aCol[] array. */
@@ -1305,7 +1338,7 @@ static int fts3InitVtab(
rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
if( rc==SQLITE_ERROR ){
assert( zPrefix );
*pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
sqlite3Fts3ErrMsg(pzErr, "error parsing prefix parameter: %s", zPrefix);
}
if( rc!=SQLITE_OK ) goto fts3_init_out;
@@ -1333,7 +1366,7 @@ static int fts3InitVtab(
p->bHasStat = isFts4;
p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->nAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
@@ -1376,7 +1409,9 @@ static int fts3InitVtab(
int n = (int)strlen(p->azColumn[iCol]);
for(i=0; i<nNotindexed; i++){
char *zNot = azNotindexed[i];
if( zNot && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n) ){
if( zNot && n==(int)strlen(zNot)
&& 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n)
){
p->abNotindexed[iCol] = 1;
sqlite3_free(zNot);
azNotindexed[i] = 0;
@@ -1385,7 +1420,7 @@ static int fts3InitVtab(
}
for(i=0; i<nNotindexed; i++){
if( azNotindexed[i] ){
*pzErr = sqlite3_mprintf("no such column: %s", azNotindexed[i]);
sqlite3Fts3ErrMsg(pzErr, "no such column: %s", azNotindexed[i]);
rc = SQLITE_ERROR;
}
}
@@ -1393,7 +1428,7 @@ static int fts3InitVtab(
if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){
char const *zMiss = (zCompress==0 ? "compress" : "uncompress");
rc = SQLITE_ERROR;
*pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss);
sqlite3Fts3ErrMsg(pzErr, "missing %s parameter in fts4 constructor", zMiss);
}
p->zReadExprlist = fts3ReadExprList(p, zUncompress, &rc);
p->zWriteExprlist = fts3WriteExprList(p, zCompress, &rc);
@@ -1410,10 +1445,7 @@ static int fts3InitVtab(
** addition of a %_stat table so that it can use incremental merge.
*/
if( !isFts4 && !isCreate ){
int rc2 = SQLITE_OK;
fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
p->zDb, p->zName);
if( rc2==SQLITE_OK ) p->bHasStat = 1;
p->bHasStat = 2;
}
/* Figure out the page-size for the database. This is required in order to
@@ -1472,6 +1504,19 @@ static int fts3CreateMethod(
return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
}
/*
** Set the pIdxInfo->estimatedRows variable to nRow. Unless this
** extension is currently being used by a version of SQLite too old to
** support estimatedRows. In that case this function is a no-op.
*/
static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){
#if SQLITE_VERSION_NUMBER>=3008002
if( sqlite3_libversion_number()>=3008002 ){
pIdxInfo->estimatedRows = nRow;
}
#endif
}
/*
** Implementation of the xBestIndex method for FTS3 tables. There
** are three possible strategies, in order of preference:
@@ -1499,7 +1544,20 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
for(i=0; i<pInfo->nConstraint; i++){
int bDocid; /* True if this constraint is on docid */
struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
if( pCons->usable==0 ) continue;
if( pCons->usable==0 ){
if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
/* There exists an unusable MATCH constraint. This means that if
** the planner does elect to use the results of this call as part
** of the overall query plan the user will see an "unable to use
** function MATCH in the requested context" error. To discourage
** this, return a very high cost here. */
pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
pInfo->estimatedCost = 1e50;
fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50);
return SQLITE_OK;
}
continue;
}
bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1);
@@ -1617,7 +1675,7 @@ static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
sqlite3Fts3ExprFree(pCsr->pExpr);
sqlite3Fts3FreeDeferredTokens(pCsr);
sqlite3_free(pCsr->aDoclist);
sqlite3_free(pCsr->aMatchinfo);
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
sqlite3_free(pCsr);
return SQLITE_OK;
@@ -1828,7 +1886,7 @@ static int fts3SelectLeaf(
sqlite3_int64 *piLeaf, /* Selected leaf node */
sqlite3_int64 *piLeaf2 /* Selected leaf node */
){
int rc; /* Return code */
int rc = SQLITE_OK; /* Return code */
int iHeight; /* Height of this node in tree */
assert( piLeaf || piLeaf2 );
@@ -1839,7 +1897,7 @@ static int fts3SelectLeaf(
if( rc==SQLITE_OK && iHeight>1 ){
char *zBlob = 0; /* Blob read from %_segments table */
int nBlob; /* Size of zBlob in bytes */
int nBlob = 0; /* Size of zBlob in bytes */
if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){
rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0);
@@ -2466,26 +2524,33 @@ static int fts3DoclistOrMerge(
**
** The right-hand input doclist is overwritten by this function.
*/
static void fts3DoclistPhraseMerge(
static int fts3DoclistPhraseMerge(
int bDescDoclist, /* True if arguments are desc */
int nDist, /* Distance from left to right (1=adjacent) */
char *aLeft, int nLeft, /* Left doclist */
char *aRight, int *pnRight /* IN/OUT: Right/output doclist */
char **paRight, int *pnRight /* IN/OUT: Right/output doclist */
){
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
sqlite3_int64 iPrev = 0;
char *aRight = *paRight;
char *pEnd1 = &aLeft[nLeft];
char *pEnd2 = &aRight[*pnRight];
char *p1 = aLeft;
char *p2 = aRight;
char *p;
int bFirstOut = 0;
char *aOut = aRight;
char *aOut;
assert( nDist>0 );
if( bDescDoclist ){
aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX);
if( aOut==0 ) return SQLITE_NOMEM;
}else{
aOut = aRight;
}
p = aOut;
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
@@ -2514,6 +2579,12 @@ static void fts3DoclistPhraseMerge(
}
*pnRight = (int)(p - aOut);
if( bDescDoclist ){
sqlite3_free(aRight);
*paRight = aOut;
}
return SQLITE_OK;
}
/*
@@ -2638,8 +2709,22 @@ static int fts3TermSelectMerge(
){
if( pTS->aaOutput[0]==0 ){
/* If this is the first term selected, copy the doclist to the output
** buffer using memcpy(). */
pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
** buffer using memcpy().
**
** Add FTS3_VARINT_MAX bytes of unused space to the end of the
** allocation. This is so as to ensure that the buffer is big enough
** to hold the current doclist AND'd with any other doclist. If the
** doclists are stored in order=ASC order, this padding would not be
** required (since the size of [doclistA AND doclistB] is always less
** than or equal to the size of [doclistA] in that case). But this is
** not true for order=DESC. For example, a doclist containing (1, -1)
** may be smaller than (-1), as in the first example the -1 may be stored
** as a single-byte delta, whereas in the second it must be stored as a
** FTS3_VARINT_MAX byte varint.
**
** Similar padding is added in the fts3DoclistOrMerge() function.
*/
pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1);
pTS->anOutput[0] = nDoclist;
if( pTS->aaOutput[0] ){
memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
@@ -2736,7 +2821,7 @@ static int fts3SegReaderCursor(
** calls out here. */
if( iLevel<0 && p->aIndex ){
Fts3SegReader *pSeg = 0;
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg);
rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix||isScan, &pSeg);
if( rc==SQLITE_OK && pSeg ){
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
}
@@ -3061,7 +3146,7 @@ static int fts3FilterMethod(
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
int rc;
int rc = SQLITE_OK;
char *zSql; /* SQL statement used to access %_content */
int eSearch;
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
@@ -3091,6 +3176,7 @@ static int fts3FilterMethod(
/* In case the cursor has been used before, clear it now. */
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr->aDoclist);
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
sqlite3Fts3ExprFree(pCsr->pExpr);
memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor));
@@ -3138,10 +3224,17 @@ static int fts3FilterMethod(
** row by docid.
*/
if( eSearch==FTS3_FULLSCAN_SEARCH ){
zSql = sqlite3_mprintf(
"SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
);
if( pDocidGe || pDocidLe ){
zSql = sqlite3_mprintf(
"SELECT %s WHERE rowid BETWEEN %lld AND %lld ORDER BY rowid %s",
p->zReadExprlist, pCsr->iMinDocid, pCsr->iMaxDocid,
(pCsr->bDesc ? "DESC" : "ASC")
);
}else{
zSql = sqlite3_mprintf("SELECT %s ORDER BY rowid %s",
p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
);
}
if( zSql ){
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
sqlite3_free(zSql);
@@ -3279,7 +3372,10 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
if( rc==SQLITE_OK
&& p->nLeafAdd>(nMinMerge/16)
&& p->nAutoincrmerge && p->nAutoincrmerge!=0xff
){
int mxLevel = 0; /* Maximum relative level value in db */
int A; /* Incr-merge parameter A */
@@ -3287,14 +3383,41 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
assert( rc==SQLITE_OK || mxLevel==0 );
A = p->nLeafAdd * mxLevel;
A += (A/2);
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, p->nAutoincrmerge);
}
sqlite3Fts3SegmentsClose(p);
return rc;
}
/*
** Implementation of xBegin() method. This is a no-op.
** If it is currently unknown whether or not the FTS table has an %_stat
** table (if p->bHasStat==2), attempt to determine this (set p->bHasStat
** to 0 or 1). Return SQLITE_OK if successful, or an SQLite error code
** if an error occurs.
*/
static int fts3SetHasStat(Fts3Table *p){
int rc = SQLITE_OK;
if( p->bHasStat==2 ){
const char *zFmt ="SELECT 1 FROM %Q.sqlite_master WHERE tbl_name='%q_stat'";
char *zSql = sqlite3_mprintf(zFmt, p->zDb, p->zName);
if( zSql ){
sqlite3_stmt *pStmt = 0;
rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
if( rc==SQLITE_OK ){
int bHasStat = (sqlite3_step(pStmt)==SQLITE_ROW);
rc = sqlite3_finalize(pStmt);
if( rc==SQLITE_OK ) p->bHasStat = bHasStat;
}
sqlite3_free(zSql);
}else{
rc = SQLITE_NOMEM;
}
}
return rc;
}
/*
** Implementation of xBegin() method.
*/
static int fts3BeginMethod(sqlite3_vtab *pVtab){
Fts3Table *p = (Fts3Table*)pVtab;
@@ -3305,7 +3428,7 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){
TESTONLY( p->inTransaction = 1 );
TESTONLY( p->mxSavepoint = -1; );
p->nLeafAdd = 0;
return SQLITE_OK;
return fts3SetHasStat(p);
}
/*
@@ -3347,11 +3470,31 @@ static void fts3ReversePoslist(char *pStart, char **ppPoslist){
char *p = &(*ppPoslist)[-2];
char c = 0;
/* Skip backwards passed any trailing 0x00 bytes added by NearTrim() */
while( p>pStart && (c=*p--)==0 );
/* Search backwards for a varint with value zero (the end of the previous
** poslist). This is an 0x00 byte preceded by some byte that does not
** have the 0x80 bit set. */
while( p>pStart && (*p & 0x80) | c ){
c = *p--;
}
if( p>pStart ){ p = &p[2]; }
assert( p==pStart || c==0 );
/* At this point p points to that preceding byte without the 0x80 bit
** set. So to find the start of the poslist, skip forward 2 bytes then
** over a varint.
**
** Normally. The other case is that p==pStart and the poslist to return
** is the first in the doclist. In this case do not skip forward 2 bytes.
** The second part of the if condition (c==0 && *ppPoslist>&p[2])
** is required for cases where the first byte of a doclist and the
** doclist is empty. For example, if the first docid is 10, a doclist
** that begins with:
**
** 0x0A 0x00 <next docid delta varint>
*/
if( p>pStart || (c==0 && *ppPoslist>&p[2]) ){ p = &p[2]; }
while( *p++&0x80 );
*ppPoslist = p;
}
@@ -3422,6 +3565,8 @@ static void fts3SnippetFunc(
}
if( !zEllipsis || !zEnd || !zStart ){
sqlite3_result_error_nomem(pContext);
}else if( nToken==0 ){
sqlite3_result_text(pContext, "", -1, SQLITE_STATIC);
}else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
}
@@ -3554,6 +3699,10 @@ static int fts3RenameMethod(
sqlite3 *db = p->db; /* Database connection */
int rc; /* Return Code */
/* At this point it must be known if the %_stat table exists or not.
** So bHasStat may not be 2. */
rc = fts3SetHasStat(p);
/* As it happens, the pending terms table is always empty here. This is
** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
** always opens a savepoint transaction. And the xSavepoint() method
@@ -3561,7 +3710,9 @@ static int fts3RenameMethod(
** PendingTermsFlush() in in case that changes.
*/
assert( p->nPendingData==0 );
rc = sqlite3Fts3PendingTermsFlush(p);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3PendingTermsFlush(p);
}
if( p->zContentTbl==0 ){
fts3DbExec(&rc, db,
@@ -3689,7 +3840,7 @@ static void hashDestroy(void *p){
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
@@ -3707,7 +3858,7 @@ int sqlite3Fts3Init(sqlite3 *db){
Fts3Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
const sqlite3_tokenizer_module *pUnicode = 0;
#endif
@@ -3716,7 +3867,7 @@ int sqlite3Fts3Init(sqlite3 *db){
sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif
@@ -3744,7 +3895,7 @@ int sqlite3Fts3Init(sqlite3 *db){
if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
|| sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
#endif
#ifdef SQLITE_ENABLE_ICU
@@ -3851,14 +4002,17 @@ static void fts3EvalAllocateReaders(
** This function assumes that pList points to a buffer allocated using
** sqlite3_malloc(). This function takes responsibility for eventually
** freeing the buffer.
**
** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs.
*/
static void fts3EvalPhraseMergeToken(
static int fts3EvalPhraseMergeToken(
Fts3Table *pTab, /* FTS Table pointer */
Fts3Phrase *p, /* Phrase to merge pList/nList into */
int iToken, /* Token pList/nList corresponds to */
char *pList, /* Pointer to doclist */
int nList /* Number of bytes in pList */
){
int rc = SQLITE_OK;
assert( iToken!=p->iDoclistToken );
if( pList==0 ){
@@ -3897,13 +4051,16 @@ static void fts3EvalPhraseMergeToken(
nDiff = p->iDoclistToken - iToken;
}
fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
rc = fts3DoclistPhraseMerge(
pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight
);
sqlite3_free(pLeft);
p->doclist.aAll = pRight;
p->doclist.nAll = nRight;
}
if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
return rc;
}
/*
@@ -3929,7 +4086,7 @@ static int fts3EvalPhraseLoad(
char *pThis = 0;
rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
if( rc==SQLITE_OK ){
fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
}
}
assert( pToken->pSegcsr==0 );
@@ -4074,7 +4231,6 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
int bIncrOk = (bOptOk
&& pCsr->bDesc==pTab->bDescIdx
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
&& p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
#ifdef SQLITE_TEST
&& pTab->bNoIncrDoclist==0
#endif
@@ -4194,6 +4350,7 @@ void sqlite3Fts3DoclistNext(
p += sqlite3Fts3GetVarint(p, piDocid);
}else{
fts3PoslistCopy(0, &p);
while( p<&aDoclist[nDoclist] && *p==0 ) p++;
if( p>=&aDoclist[nDoclist] ){
*pbEof = 1;
}else{
@@ -4365,7 +4522,7 @@ static int fts3EvalIncrPhraseNext(
bMaxSet = 1;
}
}
assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 );
assert( rc!=SQLITE_OK || (p->nToken>=1 && a[p->nToken-1].bIgnore==0) );
assert( rc!=SQLITE_OK || bMaxSet );
/* Keep advancing iterators until they all point to the same document */
@@ -4471,12 +4628,14 @@ static void fts3EvalStartReaders(
){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_PHRASE ){
int i;
int nToken = pExpr->pPhrase->nToken;
for(i=0; i<nToken; i++){
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
if( nToken ){
int i;
for(i=0; i<nToken; i++){
if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
}
pExpr->bDeferred = (i==nToken);
}
pExpr->bDeferred = (i==nToken);
*pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
}else{
fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
@@ -4731,9 +4890,13 @@ static int fts3EvalSelectDeferred(
char *pList = 0;
rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
assert( rc==SQLITE_OK || pList==0 );
if( rc==SQLITE_OK ){
rc = fts3EvalPhraseMergeToken(
pTab, pTC->pPhrase, pTC->iToken,pList,nList
);
}
if( rc==SQLITE_OK ){
int nCount;
fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
nCount = fts3DoclistCountDocids(
pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
);
@@ -4911,7 +5074,7 @@ static int fts3EvalNearTrim(
** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is
** advanced to point to the next row that matches "x AND y".
**
** See fts3EvalTestDeferredAndNear() for details on testing if a row is
** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is
** really a match, taking into account deferred tokens and NEAR operators.
*/
static void fts3EvalNextRow(
@@ -4958,6 +5121,22 @@ static void fts3EvalNextRow(
}
pExpr->iDocid = pLeft->iDocid;
pExpr->bEof = (pLeft->bEof || pRight->bEof);
if( pExpr->eType==FTSQUERY_NEAR && pExpr->bEof ){
if( pRight->pPhrase && pRight->pPhrase->doclist.aAll ){
Fts3Doclist *pDl = &pRight->pPhrase->doclist;
while( *pRc==SQLITE_OK && pRight->bEof==0 ){
memset(pDl->pList, 0, pDl->nList);
fts3EvalNextRow(pCsr, pRight, pRc);
}
}
if( pLeft->pPhrase && pLeft->pPhrase->doclist.aAll ){
Fts3Doclist *pDl = &pLeft->pPhrase->doclist;
while( *pRc==SQLITE_OK && pLeft->bEof==0 ){
memset(pDl->pList, 0, pDl->nList);
fts3EvalNextRow(pCsr, pLeft, pRc);
}
}
}
}
break;
}
@@ -5115,7 +5294,7 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
}
/*
** This function is a helper function for fts3EvalTestDeferredAndNear().
** This function is a helper function for sqlite3Fts3EvalTestDeferred().
** Assuming no error occurs or has occurred, It returns non-zero if the
** expression passed as the second argument matches the row that pCsr
** currently points to, or zero if it does not.
@@ -5236,7 +5415,7 @@ static int fts3EvalTestExpr(
** Or, if no error occurs and it seems the current row does match the FTS
** query, return 0.
*/
static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){
int rc = *pRc;
int bMiss = 0;
if( rc==SQLITE_OK ){
@@ -5283,7 +5462,7 @@ static int fts3EvalNext(Fts3Cursor *pCsr){
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pExpr->iDocid;
}while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
}while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) );
}
/* Check if the cursor is past the end of the docid range specified
@@ -5330,6 +5509,7 @@ static void fts3EvalRestart(
}
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
pPhrase->pOrPoslist = 0;
}
pExpr->iDocid = 0;
@@ -5443,7 +5623,7 @@ static int fts3EvalGatherStats(
pCsr->iPrevId = pRoot->iDocid;
}while( pCsr->isEof==0
&& pRoot->eType==FTSQUERY_NEAR
&& fts3EvalTestDeferredAndNear(pCsr, &rc)
&& sqlite3Fts3EvalTestDeferred(pCsr, &rc)
);
if( rc==SQLITE_OK && pCsr->isEof==0 ){
@@ -5468,7 +5648,6 @@ static int fts3EvalGatherStats(
fts3EvalNextRow(pCsr, pRoot, &rc);
assert( pRoot->bEof==0 );
}while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
fts3EvalTestDeferredAndNear(pCsr, &rc);
}
}
return rc;
@@ -5575,13 +5754,13 @@ int sqlite3Fts3EvalPhrasePoslist(
iDocid = pExpr->iDocid;
pIter = pPhrase->doclist.pList;
if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
int rc = SQLITE_OK;
int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
int iMul; /* +1 if csr dir matches index dir, else -1 */
int bOr = 0;
u8 bEof = 0;
u8 bTreeEof = 0;
Fts3Expr *p; /* Used to iterate from pExpr to root */
Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
int bMatch;
/* Check if this phrase descends from an OR expression node. If not,
** return NULL. Otherwise, the entry that corresponds to docid
@@ -5600,74 +5779,62 @@ int sqlite3Fts3EvalPhrasePoslist(
** an incremental phrase. Load the entire doclist for the phrase
** into memory in this case. */
if( pPhrase->bIncr ){
int rc = SQLITE_OK;
int bEofSave = pExpr->bEof;
fts3EvalRestart(pCsr, pExpr, &rc);
while( rc==SQLITE_OK && !pExpr->bEof ){
fts3EvalNextRow(pCsr, pExpr, &rc);
if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
int bEofSave = pNear->bEof;
fts3EvalRestart(pCsr, pNear, &rc);
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
if( bEofSave==0 && pNear->iDocid==iDocid ) break;
}
pIter = pPhrase->doclist.pList;
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
if( rc!=SQLITE_OK ) return rc;
}
iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1);
while( bTreeEof==1
&& pNear->bEof==0
&& (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0
){
int rc = SQLITE_OK;
fts3EvalNextRow(pCsr, pExpr, &rc);
if( rc!=SQLITE_OK ) return rc;
iDocid = pExpr->iDocid;
pIter = pPhrase->doclist.pList;
if( bTreeEof ){
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
}
}
if( rc!=SQLITE_OK ) return rc;
bEof = (pPhrase->doclist.nAll==0);
assert( bDescDoclist==0 || bDescDoclist==1 );
assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
bMatch = 1;
for(p=pNear; p; p=p->pLeft){
u8 bEof = 0;
Fts3Expr *pTest = p;
Fts3Phrase *pPh;
assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE );
if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight;
assert( pTest->eType==FTSQUERY_PHRASE );
pPh = pTest->pPhrase;
if( bEof==0 ){
pIter = pPh->pOrPoslist;
iDocid = pPh->iOrDocid;
if( pCsr->bDesc==bDescDoclist ){
int dummy;
if( pNear->bEof ){
/* This expression is already at EOF. So position it to point to the
** last entry in the doclist at pPhrase->doclist.aAll[]. Variable
** iDocid is already set for this entry, so all that is required is
** to set pIter to point to the first byte of the last position-list
** in the doclist.
**
** It would also be correct to set pIter and iDocid to zero. In
** this case, the first call to sqltie3Fts4DoclistPrev() below
** would also move the iterator to point to the last entry in the
** doclist. However, this is expensive, as to do so it has to
** iterate through the entire doclist from start to finish (since
** it does not know the docid for the last entry). */
pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1];
fts3ReversePoslist(pPhrase->doclist.aAll, &pIter);
}
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
sqlite3Fts3DoclistPrev(
bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
&pIter, &iDocid, &dummy, &bEof
);
}
}else{
if( pNear->bEof ){
pIter = 0;
iDocid = 0;
}
bEof = !pPh->doclist.nAll ||
(pIter >= (pPh->doclist.aAll + pPh->doclist.nAll));
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
sqlite3Fts3DoclistNext(
bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
&pIter, &iDocid, &bEof
);
}
}else{
bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll);
while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
int dummy;
sqlite3Fts3DoclistPrev(
bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll,
&pIter, &iDocid, &dummy, &bEof
);
}
}
pPh->pOrPoslist = pIter;
pPh->iOrDocid = iDocid;
if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0;
}
if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
if( bMatch ){
pIter = pPhrase->pOrPoslist;
}else{
pIter = 0;
}
}
if( pIter==0 ) return SQLITE_OK;
@@ -5679,10 +5846,13 @@ int sqlite3Fts3EvalPhrasePoslist(
}
while( iThis<iCol ){
fts3ColumnlistCopy(0, &pIter);
if( *pIter==0x00 ) return 0;
if( *pIter==0x00 ) return SQLITE_OK;
pIter++;
pIter += fts3GetVarint32(pIter, &iThis);
}
if( *pIter==0x00 ){
pIter = 0;
}
*ppOut = ((iCol==iThis)?pIter:0);
return SQLITE_OK;

View File

@@ -134,6 +134,11 @@ SQLITE_EXTENSION_INIT3
#ifdef SQLITE_COVERAGE_TEST
# define ALWAYS(x) (1)
# define NEVER(X) (0)
#elif defined(SQLITE_DEBUG)
# define ALWAYS(x) sqlite3Fts3Always((x)!=0)
# define NEVER(x) sqlite3Fts3Never((x)!=0)
int sqlite3Fts3Always(int b);
int sqlite3Fts3Never(int b);
#else
# define ALWAYS(x) (x)
# define NEVER(x) (x)
@@ -192,6 +197,8 @@ typedef struct Fts3DeferredToken Fts3DeferredToken;
typedef struct Fts3SegReader Fts3SegReader;
typedef struct Fts3MultiSegReader Fts3MultiSegReader;
typedef struct MatchinfoBuffer MatchinfoBuffer;
/*
** A connection to a fulltext index is an instance of the following
** structure. The xCreate and xConnect methods create an instance
@@ -210,20 +217,20 @@ struct Fts3Table {
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
char *zContentTbl; /* content=xxx option, or NULL */
char *zLanguageid; /* languageid=xxx option, or NULL */
u8 bAutoincrmerge; /* True if automerge=1 */
int nAutoincrmerge; /* Value configured by 'automerge' */
u32 nLeafAdd; /* Number of leaf blocks added this trans */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
sqlite3_stmt *aStmt[37];
sqlite3_stmt *aStmt[40];
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
u8 bFts4; /* True for FTS4, false for FTS3 */
u8 bHasStat; /* True if %_stat table exists */
u8 bHasStat; /* True if %_stat table exists (2==unknown) */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 bDescIdx; /* True if doclists are in reverse order */
u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
@@ -301,9 +308,7 @@ struct Fts3Cursor {
i64 iMinDocid; /* Minimum docid to return */
i64 iMaxDocid; /* Maximum docid to return */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
u32 *aMatchinfo; /* Information about most recent match */
int nMatchinfo; /* Number of elements in aMatchinfo[] */
char *zMatchinfo; /* Matchinfo specification */
MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */
};
#define FTS3_EVAL_FILTER 0
@@ -375,6 +380,11 @@ struct Fts3Phrase {
int bIncr; /* True if doclist is loaded incrementally */
int iDoclistToken;
/* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an
** OR condition. */
char *pOrPoslist;
i64 iOrDocid;
/* Variables below this point are populated by fts3_expr.c when parsing
** a MATCH expression. Everything above is part of the evaluation phase.
*/
@@ -418,7 +428,9 @@ struct Fts3Expr {
u8 bStart; /* True if iDocid is valid */
u8 bDeferred; /* True if this expression is entirely deferred */
u32 *aMI;
/* The following are used by the fts3_snippet.c module. */
int iPhrase; /* Index of this phrase in matchinfo() results */
u32 *aMI; /* See above */
};
/*
@@ -529,6 +541,7 @@ int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
)
/* fts3.c */
void sqlite3Fts3ErrMsg(char**,const char*,...);
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
@@ -538,6 +551,7 @@ void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
@@ -553,6 +567,7 @@ void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p);
/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
@@ -585,7 +600,7 @@ int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *);
/* fts3_unicode2.c (functions generated by parsing unicode text files) */
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
int sqlite3FtsUnicodeFold(int, int);
int sqlite3FtsUnicodeIsalnum(int);
int sqlite3FtsUnicodeIsdiacritic(int);

View File

@@ -116,7 +116,7 @@ static int fts3auxConnectMethod(
return SQLITE_OK;
bad_args:
*pzErr = sqlite3_mprintf("invalid arguments to fts4aux constructor");
sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
return SQLITE_ERROR;
}

View File

@@ -185,40 +185,23 @@ static int getNextToken(
int rc;
sqlite3_tokenizer_cursor *pCursor;
Fts3Expr *pRet = 0;
int nConsumed = 0;
int i = 0;
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
/* Set variable i to the maximum number of bytes of input to tokenize. */
for(i=0; i<n; i++){
if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
if( z[i]=='"' ) break;
}
*pnConsumed = i;
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
int nByte; /* total space to allocate */
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){
int i;
if( rc==SQLITE_DONE ) iStart = n;
for(i=0; i<iStart; i++){
if( z[i]=='(' ){
pParse->nNest++;
rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed);
if( rc==SQLITE_OK && !pRet ){
rc = SQLITE_DONE;
}
nConsumed = (int)(i + 1 + nConsumed);
break;
}
if( z[i]==')' ){
rc = SQLITE_DONE;
pParse->nNest--;
nConsumed = i+1;
break;
}
}
}
if( nConsumed==0 && rc==SQLITE_OK ){
if( rc==SQLITE_OK ){
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
pRet = (Fts3Expr *)fts3MallocZero(nByte);
if( !pRet ){
@@ -252,13 +235,14 @@ static int getNextToken(
}
}
nConsumed = iEnd;
*pnConsumed = iEnd;
}else if( i && rc==SQLITE_DONE ){
rc = SQLITE_OK;
}
pModule->xClose(pCursor);
}
*pnConsumed = nConsumed;
*ppExpr = pRet;
return rc;
}
@@ -508,6 +492,21 @@ static int getNextNode(
return getNextString(pParse, &zInput[1], ii-1, ppExpr);
}
if( sqlite3_fts3_enable_parentheses ){
if( *zInput=='(' ){
int nConsumed = 0;
pParse->nNest++;
rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
*pnConsumed = (int)(zInput - z) + 1 + nConsumed;
return rc;
}else if( *zInput==')' ){
pParse->nNest--;
*pnConsumed = (int)((zInput - z) + 1);
*ppExpr = 0;
return SQLITE_DONE;
}
}
/* If control flows to this point, this must be a regular token, or
** the end of the input. Read a regular token using the sqlite3_tokenizer
@@ -626,96 +625,100 @@ static int fts3ExprParse(
while( rc==SQLITE_OK ){
Fts3Expr *p = 0;
int nByte = 0;
rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
if( rc==SQLITE_OK ){
int isPhrase;
if( p ){
int isPhrase;
if( !sqlite3_fts3_enable_parentheses
&& p->eType==FTSQUERY_PHRASE && pParse->isNot
){
/* Create an implicit NOT operator. */
Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
if( !pNot ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_NOMEM;
goto exprparse_out;
}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
}else{
int eType = p->eType;
isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
/* The isRequirePhrase variable is set to true if a phrase or
** an expression contained in parenthesis is required. If a
** binary operator (AND, OR, NOT or NEAR) is encounted when
** isRequirePhrase is set, this is a syntax error.
*/
if( !isPhrase && isRequirePhrase ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_ERROR;
goto exprparse_out;
}
if( isPhrase && !isRequirePhrase ){
/* Insert an implicit AND operator. */
Fts3Expr *pAnd;
assert( pRet && pPrev );
pAnd = fts3MallocZero(sizeof(Fts3Expr));
if( !pAnd ){
if( !sqlite3_fts3_enable_parentheses
&& p->eType==FTSQUERY_PHRASE && pParse->isNot
){
/* Create an implicit NOT operator. */
Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
if( !pNot ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_NOMEM;
goto exprparse_out;
}
pAnd->eType = FTSQUERY_AND;
insertBinaryOperator(&pRet, pPrev, pAnd);
pPrev = pAnd;
}
pNot->eType = FTSQUERY_NOT;
pNot->pRight = p;
p->pParent = pNot;
if( pNotBranch ){
pNot->pLeft = pNotBranch;
pNotBranch->pParent = pNot;
}
pNotBranch = pNot;
p = pPrev;
}else{
int eType = p->eType;
isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
/* This test catches attempts to make either operand of a NEAR
** operator something other than a phrase. For example, either of
** the following:
**
** (bracketed expression) NEAR phrase
** phrase NEAR (bracketed expression)
**
** Return an error in either case.
*/
if( pPrev && (
/* The isRequirePhrase variable is set to true if a phrase or
** an expression contained in parenthesis is required. If a
** binary operator (AND, OR, NOT or NEAR) is encounted when
** isRequirePhrase is set, this is a syntax error.
*/
if( !isPhrase && isRequirePhrase ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_ERROR;
goto exprparse_out;
}
if( isPhrase && !isRequirePhrase ){
/* Insert an implicit AND operator. */
Fts3Expr *pAnd;
assert( pRet && pPrev );
pAnd = fts3MallocZero(sizeof(Fts3Expr));
if( !pAnd ){
sqlite3Fts3ExprFree(p);
rc = SQLITE_NOMEM;
goto exprparse_out;
}
pAnd->eType = FTSQUERY_AND;
insertBinaryOperator(&pRet, pPrev, pAnd);
pPrev = pAnd;
}
/* This test catches attempts to make either operand of a NEAR
** operator something other than a phrase. For example, either of
** the following:
**
** (bracketed expression) NEAR phrase
** phrase NEAR (bracketed expression)
**
** Return an error in either case.
*/
if( pPrev && (
(eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
|| (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
)){
sqlite3Fts3ExprFree(p);
rc = SQLITE_ERROR;
goto exprparse_out;
}
if( isPhrase ){
if( pRet ){
assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
pPrev->pRight = p;
p->pParent = pPrev;
}else{
pRet = p;
)){
sqlite3Fts3ExprFree(p);
rc = SQLITE_ERROR;
goto exprparse_out;
}
}else{
insertBinaryOperator(&pRet, pPrev, p);
if( isPhrase ){
if( pRet ){
assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
pPrev->pRight = p;
p->pParent = pPrev;
}else{
pRet = p;
}
}else{
insertBinaryOperator(&pRet, pPrev, p);
}
isRequirePhrase = !isPhrase;
}
isRequirePhrase = !isPhrase;
pPrev = p;
}
assert( nByte>0 );
}
assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
nIn -= nByte;
zIn += nByte;
pPrev = p;
}
if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
@@ -1019,13 +1022,13 @@ int sqlite3Fts3ExprParse(
sqlite3Fts3ExprFree(*ppExpr);
*ppExpr = 0;
if( rc==SQLITE_TOOBIG ){
*pzErr = sqlite3_mprintf(
sqlite3Fts3ErrMsg(pzErr,
"FTS expression tree is too large (maximum depth %d)",
SQLITE_FTS3_MAX_EXPR_DEPTH
);
rc = SQLITE_ERROR;
}else if( rc==SQLITE_ERROR ){
*pzErr = sqlite3_mprintf("malformed MATCH expression: [%s]", z);
sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z);
}
}

View File

@@ -96,13 +96,13 @@ void sqlite3Fts3HashClear(Fts3Hash *pH){
*/
static int fts3StrHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
unsigned h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
return (int)(h & 0x7fffffff);
}
static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;

View File

@@ -240,12 +240,13 @@ static int icuNext(
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
0, /* xLanguageid */
};
/*

View File

@@ -183,7 +183,7 @@ static int isVowel(const char *z){
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
** for an instance of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
@@ -403,12 +403,14 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
if( !stem(&z, "lanoita", "ate", m_gt_0) ){
stem(&z, "lanoit", "tion", m_gt_0);
}
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
if( !stem(&z, "icne", "ence", m_gt_0) ){
stem(&z, "icna", "ance", m_gt_0);
}
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
@@ -417,43 +419,54 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
if( !stem(&z, "ilb", "ble", m_gt_0)
&& !stem(&z, "illa", "al", m_gt_0)
&& !stem(&z, "iltne", "ent", m_gt_0)
&& !stem(&z, "ile", "e", m_gt_0)
){
stem(&z, "ilsuo", "ous", m_gt_0);
}
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
if( !stem(&z, "noitazi", "ize", m_gt_0)
&& !stem(&z, "noita", "ate", m_gt_0)
){
stem(&z, "rota", "ate", m_gt_0);
}
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
if( !stem(&z, "msila", "al", m_gt_0)
&& !stem(&z, "ssenevi", "ive", m_gt_0)
&& !stem(&z, "ssenluf", "ful", m_gt_0)
){
stem(&z, "ssensuo", "ous", m_gt_0);
}
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
if( !stem(&z, "itila", "al", m_gt_0)
&& !stem(&z, "itivi", "ive", m_gt_0)
){
stem(&z, "itilib", "ble", m_gt_0);
}
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
if( !stem(&z, "etaci", "ic", m_gt_0)
&& !stem(&z, "evita", "", m_gt_0)
){
stem(&z, "ezila", "al", m_gt_0);
}
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
if( !stem(&z, "laci", "ic", m_gt_0) ){
stem(&z, "luf", "", m_gt_0);
}
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
@@ -494,9 +507,11 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
if( !stem(&z, "tneme", "", m_gt_1)
&& !stem(&z, "tnem", "", m_gt_1)
){
stem(&z, "tne", "", m_gt_1);
}
}
}
break;
@@ -515,8 +530,9 @@ static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
if( !stem(&z, "eta", "", m_gt_1) ){
stem(&z, "iti", "", m_gt_1);
}
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){

View File

@@ -27,6 +27,8 @@
#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
#define FTS3_MATCHINFO_LCS 's' /* nCol values */
#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
/*
** The default value for the second argument to matchinfo().
@@ -88,9 +90,22 @@ struct MatchInfo {
int nCol; /* Number of columns in table */
int nPhrase; /* Number of matchable phrases in query */
sqlite3_int64 nDoc; /* Number of docs in database */
char flag;
u32 *aMatchinfo; /* Pre-allocated buffer */
};
/*
** An instance of this structure is used to manage a pair of buffers, each
** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
** for details.
*/
struct MatchinfoBuffer {
u8 aRef[3];
int nElem;
int bGlobal; /* Set if global data is loaded */
char *zMatchinfo;
u32 aMatchinfo[1];
};
/*
@@ -106,6 +121,97 @@ struct StrBuffer {
};
/*************************************************************************
** Start of MatchinfoBuffer code.
*/
/*
** Allocate a two-slot MatchinfoBuffer object.
*/
static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){
MatchinfoBuffer *pRet;
int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer);
int nStr = (int)strlen(zMatchinfo);
pRet = sqlite3_malloc(nByte + nStr+1);
if( pRet ){
memset(pRet, 0, nByte);
pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1);
pRet->nElem = nElem;
pRet->zMatchinfo = ((char*)pRet) + nByte;
memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
pRet->aRef[0] = 1;
}
return pRet;
}
static void fts3MIBufferFree(void *p){
MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
assert( (u32*)p==&pBuf->aMatchinfo[1]
|| (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
);
if( (u32*)p==&pBuf->aMatchinfo[1] ){
pBuf->aRef[1] = 0;
}else{
pBuf->aRef[2] = 0;
}
if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
sqlite3_free(pBuf);
}
}
static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
void (*xRet)(void*) = 0;
u32 *aOut = 0;
if( p->aRef[1]==0 ){
p->aRef[1] = 1;
aOut = &p->aMatchinfo[1];
xRet = fts3MIBufferFree;
}
else if( p->aRef[2]==0 ){
p->aRef[2] = 1;
aOut = &p->aMatchinfo[p->nElem+2];
xRet = fts3MIBufferFree;
}else{
aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32));
if( aOut ){
xRet = sqlite3_free;
if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
}
}
*paOut = aOut;
return xRet;
}
static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
p->bGlobal = 1;
memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
}
/*
** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
*/
void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
if( p ){
assert( p->aRef[0]==1 );
p->aRef[0] = 0;
if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
sqlite3_free(p);
}
}
}
/*
** End of MatchinfoBuffer code.
*************************************************************************/
/*
** This function is used to help iterate through a position-list. A position
** list is a list of unique integers, sorted from smallest to largest. Each
@@ -142,7 +248,7 @@ static int fts3ExprIterate2(
void *pCtx /* Second argument to pass to callback */
){
int rc; /* Return code */
int eType = pExpr->eType; /* Type of expression node pExpr */
int eType = pExpr->eType; /* Type of expression node pExpr */
if( eType!=FTSQUERY_PHRASE ){
assert( pExpr->pLeft && pExpr->pRight );
@@ -176,6 +282,7 @@ static int fts3ExprIterate(
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
}
/*
** This is an fts3ExprIterate() callback used while loading the doclists
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
@@ -220,8 +327,7 @@ static int fts3ExprLoadDoclists(
static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
(*(int *)ctx)++;
UNUSED_PARAMETER(pExpr);
UNUSED_PARAMETER(iPhrase);
pExpr->iPhrase = iPhrase;
return SQLITE_OK;
}
static int fts3ExprPhraseCount(Fts3Expr *pExpr){
@@ -442,37 +548,39 @@ static int fts3BestSnippet(
sIter.nSnippet = nSnippet;
sIter.nPhrase = nList;
sIter.iCurrent = -1;
(void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
if( rc==SQLITE_OK ){
/* Set the *pmSeen output variable. */
for(i=0; i<nList; i++){
if( sIter.aPhrase[i].pHead ){
*pmSeen |= (u64)1 << i;
/* Set the *pmSeen output variable. */
for(i=0; i<nList; i++){
if( sIter.aPhrase[i].pHead ){
*pmSeen |= (u64)1 << i;
}
}
}
/* Loop through all candidate snippets. Store the best snippet in
** *pFragment. Store its associated 'score' in iBestScore.
*/
pFragment->iCol = iCol;
while( !fts3SnippetNextCandidate(&sIter) ){
int iPos;
int iScore;
u64 mCover;
u64 mHighlight;
fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
assert( iScore>=0 );
if( iScore>iBestScore ){
pFragment->iPos = iPos;
pFragment->hlmask = mHighlight;
pFragment->covered = mCover;
iBestScore = iScore;
/* Loop through all candidate snippets. Store the best snippet in
** *pFragment. Store its associated 'score' in iBestScore.
*/
pFragment->iCol = iCol;
while( !fts3SnippetNextCandidate(&sIter) ){
int iPos;
int iScore;
u64 mCover;
u64 mHighlite;
fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
assert( iScore>=0 );
if( iScore>iBestScore ){
pFragment->iPos = iPos;
pFragment->hlmask = mHighlite;
pFragment->covered = mCover;
iBestScore = iScore;
}
}
}
*piScore = iBestScore;
}
sqlite3_free(sIter.aPhrase);
*piScore = iBestScore;
return SQLITE_OK;
return rc;
}
@@ -680,8 +788,12 @@ static int fts3SnippetText(
** required. They are required if (a) this is not the first fragment,
** or (b) this fragment does not begin at position 0 of its column.
*/
if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
if( rc==SQLITE_OK ){
if( iPos>0 || iFragment>0 ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}else if( iBegin ){
rc = fts3StringAppend(pOut, zDoc, iBegin);
}
}
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
}
@@ -737,6 +849,60 @@ static int fts3ColumnlistCount(char **ppCollist){
return nEntry;
}
/*
** This function gathers 'y' or 'b' data for a single phrase.
*/
static void fts3ExprLHits(
Fts3Expr *pExpr, /* Phrase expression node */
MatchInfo *p /* Matchinfo context */
){
Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
int iStart;
Fts3Phrase *pPhrase = pExpr->pPhrase;
char *pIter = pPhrase->doclist.pList;
int iCol = 0;
assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
if( p->flag==FTS3_MATCHINFO_LHITS ){
iStart = pExpr->iPhrase * p->nCol;
}else{
iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
}
while( 1 ){
int nHit = fts3ColumnlistCount(&pIter);
if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
if( p->flag==FTS3_MATCHINFO_LHITS ){
p->aMatchinfo[iStart + iCol] = (u32)nHit;
}else if( nHit ){
p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
}
}
assert( *pIter==0x00 || *pIter==0x01 );
if( *pIter!=0x01 ) break;
pIter++;
pIter += fts3GetVarint32(pIter, &iCol);
}
}
/*
** Gather the results for matchinfo directives 'y' and 'b'.
*/
static void fts3ExprLHitGather(
Fts3Expr *pExpr,
MatchInfo *p
){
assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
if( pExpr->pLeft ){
fts3ExprLHitGather(pExpr->pLeft, p);
fts3ExprLHitGather(pExpr->pRight, p);
}else{
fts3ExprLHits(pExpr, p);
}
}
}
/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
@@ -815,10 +981,12 @@ static int fts3MatchinfoCheck(
|| (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
|| (cArg==FTS3_MATCHINFO_LCS)
|| (cArg==FTS3_MATCHINFO_HITS)
|| (cArg==FTS3_MATCHINFO_LHITS)
|| (cArg==FTS3_MATCHINFO_LHITS_BM)
){
return SQLITE_OK;
}
*pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
return SQLITE_ERROR;
}
@@ -838,6 +1006,14 @@ static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
nVal = pInfo->nCol;
break;
case FTS3_MATCHINFO_LHITS:
nVal = pInfo->nCol * pInfo->nPhrase;
break;
case FTS3_MATCHINFO_LHITS_BM:
nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
break;
default:
assert( cArg==FTS3_MATCHINFO_HITS );
nVal = pInfo->nCol * pInfo->nPhrase * 3;
@@ -1032,7 +1208,7 @@ static int fts3MatchinfoValues(
sqlite3_stmt *pSelect = 0;
for(i=0; rc==SQLITE_OK && zArg[i]; i++){
pInfo->flag = zArg[i];
switch( zArg[i] ){
case FTS3_MATCHINFO_NPHRASE:
if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
@@ -1092,6 +1268,14 @@ static int fts3MatchinfoValues(
}
break;
case FTS3_MATCHINFO_LHITS_BM:
case FTS3_MATCHINFO_LHITS: {
int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
memset(pInfo->aMatchinfo, 0, nZero);
fts3ExprLHitGather(pCsr->pExpr, pInfo);
break;
}
default: {
Fts3Expr *pExpr;
assert( zArg[i]==FTS3_MATCHINFO_HITS );
@@ -1104,6 +1288,7 @@ static int fts3MatchinfoValues(
if( rc!=SQLITE_OK ) break;
}
rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
sqlite3Fts3EvalTestDeferred(pCsr, &rc);
if( rc!=SQLITE_OK ) break;
}
(void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
@@ -1123,7 +1308,8 @@ static int fts3MatchinfoValues(
** Populate pCsr->aMatchinfo[] with data for the current row. The
** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
*/
static int fts3GetMatchinfo(
static void fts3GetMatchinfo(
sqlite3_context *pCtx, /* Return results here */
Fts3Cursor *pCsr, /* FTS3 Cursor object */
const char *zArg /* Second argument to matchinfo() function */
){
@@ -1132,6 +1318,9 @@ static int fts3GetMatchinfo(
int rc = SQLITE_OK;
int bGlobal = 0; /* Collect 'global' stats as well as local */
u32 *aOut = 0;
void (*xDestroyOut)(void*) = 0;
memset(&sInfo, 0, sizeof(MatchInfo));
sInfo.pCursor = pCsr;
sInfo.nCol = pTab->nColumn;
@@ -1139,21 +1328,18 @@ static int fts3GetMatchinfo(
/* If there is cached matchinfo() data, but the format string for the
** cache does not match the format string for this request, discard
** the cached data. */
if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){
assert( pCsr->aMatchinfo );
sqlite3_free(pCsr->aMatchinfo);
pCsr->zMatchinfo = 0;
pCsr->aMatchinfo = 0;
if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
pCsr->pMIBuffer = 0;
}
/* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
/* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
** matchinfo function has been called for this query. In this case
** allocate the array used to accumulate the matchinfo data and
** initialize those elements that are constant for every row.
*/
if( pCsr->aMatchinfo==0 ){
if( pCsr->pMIBuffer==0 ){
int nMatchinfo = 0; /* Number of u32 elements in match-info */
int nArg; /* Bytes in zArg */
int i; /* Used to iterate through zArg */
/* Determine the number of phrases in the query */
@@ -1162,30 +1348,46 @@ static int fts3GetMatchinfo(
/* Determine the number of integers in the buffer returned by this call. */
for(i=0; zArg[i]; i++){
char *zErr = 0;
if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
sqlite3_result_error(pCtx, zErr, -1);
sqlite3_free(zErr);
return;
}
nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
}
/* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
nArg = (int)strlen(zArg);
pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1);
if( !pCsr->aMatchinfo ) return SQLITE_NOMEM;
pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo];
pCsr->nMatchinfo = nMatchinfo;
memcpy(pCsr->zMatchinfo, zArg, nArg+1);
memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo);
pCsr->isMatchinfoNeeded = 1;
bGlobal = 1;
}
sInfo.aMatchinfo = pCsr->aMatchinfo;
sInfo.nPhrase = pCsr->nPhrase;
if( pCsr->isMatchinfoNeeded ){
rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
pCsr->isMatchinfoNeeded = 0;
if( rc==SQLITE_OK ){
xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
if( xDestroyOut==0 ){
rc = SQLITE_NOMEM;
}
}
return rc;
if( rc==SQLITE_OK ){
sInfo.aMatchinfo = aOut;
sInfo.nPhrase = pCsr->nPhrase;
rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
if( bGlobal ){
fts3MIBufferSetGlobal(pCsr->pMIBuffer);
}
}
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
if( xDestroyOut ) xDestroyOut(aOut);
}else{
int n = pCsr->pMIBuffer->nElem * sizeof(u32);
sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
}
}
/*
@@ -1247,7 +1449,7 @@ void sqlite3Fts3Snippet(
*/
for(iRead=0; iRead<pTab->nColumn; iRead++){
SnippetFragment sF = {0, 0, 0, 0};
int iS;
int iS = 0;
if( iCol>=0 && iRead!=iCol ) continue;
/* Find the best snippet of nFToken tokens in column iRead. */
@@ -1391,7 +1593,7 @@ void sqlite3Fts3Offsets(
*/
sCtx.iCol = iCol;
sCtx.iTerm = 0;
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
/* Retreive the text stored in column iCol. If an SQL NULL is stored
** in column iCol, jump immediately to the next iteration of the loop.
@@ -1483,19 +1685,9 @@ void sqlite3Fts3Matchinfo(
const char *zArg /* Second arg to matchinfo() function */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc;
int i;
const char *zFormat;
if( zArg ){
for(i=0; zArg[i]; i++){
char *zErr = 0;
if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
sqlite3_result_error(pContext, zErr, -1);
sqlite3_free(zErr);
return;
}
}
zFormat = zArg;
}else{
zFormat = FTS3_MATCHINFO_DEFAULT;
@@ -1504,17 +1696,10 @@ void sqlite3Fts3Matchinfo(
if( !pCsr->pExpr ){
sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
return;
}
/* Retrieve matchinfo() data. */
rc = fts3GetMatchinfo(pCsr, zFormat);
sqlite3Fts3SegmentsClose(pTab);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pContext, rc);
}else{
int n = pCsr->nMatchinfo * sizeof(u32);
sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
/* Retrieve matchinfo() data. */
fts3GetMatchinfo(pContext, pCsr, zFormat);
sqlite3Fts3SegmentsClose(pTab);
}
}

View File

@@ -81,7 +81,7 @@ static int fts3termConnectMethod(
/* The user should specify a single argument - the name of an fts3 table. */
if( argc!=4 ){
*pzErr = sqlite3_mprintf(
sqlite3Fts3ErrMsg(pzErr,
"wrong number of arguments to fts4term constructor"
);
return SQLITE_ERROR;

View File

@@ -85,7 +85,7 @@ static int fts3tokQueryTokenizer(
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
if( !p ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
return SQLITE_ERROR;
}
@@ -163,7 +163,7 @@ static int fts3tokConnectMethod(
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
Fts3tokTable *pTab;
Fts3tokTable *pTab = 0;
const sqlite3_tokenizer_module *pMod = 0;
sqlite3_tokenizer *pTok = 0;
int rc;

View File

@@ -69,7 +69,7 @@ static void scalarFunc(
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
if( zName==0 || n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
@@ -80,7 +80,9 @@ static void scalarFunc(
return;
}
}else{
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
if( zName ){
pPtr = sqlite3Fts3HashFind(pHash, zName, nName);
}
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
@@ -161,12 +163,16 @@ int sqlite3Fts3InitTokenizer(
zEnd = &zCopy[strlen(zCopy)];
z = (char *)sqlite3Fts3NextToken(zCopy, &n);
if( z==0 ){
assert( n==0 );
z = zCopy;
}
z[n] = '\0';
sqlite3Fts3Dequote(z);
m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1);
if( !m ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", z);
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z);
rc = SQLITE_ERROR;
}else{
char const **aArg = 0;
@@ -189,7 +195,7 @@ int sqlite3Fts3InitTokenizer(
rc = m->xCreate(iArg, aArg, ppTok);
assert( rc!=SQLITE_OK || *ppTok );
if( rc!=SQLITE_OK ){
*pzErr = sqlite3_mprintf("unknown tokenizer");
sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer");
}else{
(*ppTok)->pModule = m;
}
@@ -273,9 +279,9 @@ static void testFunc(
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr2, -1);
sqlite3_free(zErr2);
return;
}

View File

@@ -13,7 +13,7 @@
** Implementation of the "unicode" full-text-search tokenizer.
*/
#ifdef SQLITE_ENABLE_FTS4_UNICODE61
#ifndef SQLITE_DISABLE_FTS3_UNICODE
#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
@@ -231,7 +231,7 @@ static int unicodeCreate(
for(i=0; rc==SQLITE_OK && i<nArg; i++){
const char *z = azArg[i];
int n = strlen(z);
int n = (int)strlen(z);
if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
pNew->bRemoveDiacritic = 1;
@@ -318,7 +318,7 @@ static int unicodeNext(
){
unicode_cursor *pCsr = (unicode_cursor *)pC;
unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
int iCode;
int iCode = 0;
char *zOut;
const unsigned char *z = &pCsr->aInput[pCsr->iOff];
const unsigned char *zStart = z;
@@ -363,11 +363,11 @@ static int unicodeNext(
);
/* Set the output variables and return. */
pCsr->iOff = (z - pCsr->aInput);
pCsr->iOff = (int)(z - pCsr->aInput);
*paToken = pCsr->zToken;
*pnToken = zOut - pCsr->zToken;
*piStart = (zStart - pCsr->aInput);
*piEnd = (zEnd - pCsr->aInput);
*pnToken = (int)(zOut - pCsr->zToken);
*piStart = (int)(zStart - pCsr->aInput);
*piEnd = (int)(zEnd - pCsr->aInput);
*piPos = pCsr->iToken++;
return SQLITE_OK;
}
@@ -390,4 +390,4 @@ void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

View File

@@ -15,7 +15,7 @@
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#if defined(SQLITE_ENABLE_FTS4_UNICODE61)
#ifndef SQLITE_DISABLE_FTS3_UNICODE
#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
#include <assert.h>
@@ -39,7 +39,7 @@ int sqlite3FtsUnicodeIsalnum(int c){
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
const static unsigned int aEntry[] = {
static const unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
@@ -131,7 +131,7 @@ int sqlite3FtsUnicodeIsalnum(int c){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
@@ -202,7 +202,7 @@ static int remove_diacritic(int c){
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
};
}
/*
@@ -362,4 +362,4 @@ int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */
#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */

View File

@@ -193,6 +193,7 @@ struct SegmentWriter {
int nSize; /* Size of allocation at aData */
int nData; /* Bytes of data in aData */
char *aData; /* Pointer to block from malloc() */
i64 nLeafData; /* Number of bytes of leaf data written */
};
/*
@@ -268,6 +269,10 @@ struct SegmentNode {
#define SQL_SELECT_INDEXES 35
#define SQL_SELECT_MXLEVEL 36
#define SQL_SELECT_LEVEL_RANGE2 37
#define SQL_UPDATE_LEVEL_IDX 38
#define SQL_UPDATE_LEVEL 39
/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
@@ -321,7 +326,7 @@ static int fts3SqlStmt(
/* 25 */ "",
/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'",
/* This statement is used to determine which level to read the input from
** when performing an incremental merge. It returns the absolute level number
@@ -369,7 +374,18 @@ static int fts3SqlStmt(
/* SQL_SELECT_MXLEVEL
** Return the largest relative level in the FTS index or indexes. */
/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'",
/* Return segments in order from oldest to newest.*/
/* 37 */ "SELECT level, idx, end_block "
"FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? "
"ORDER BY level DESC, idx ASC",
/* Update statements used while promoting segments */
/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? "
"WHERE level=? AND idx=?",
/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1"
};
int rc = SQLITE_OK;
sqlite3_stmt *pStmt;
@@ -1609,7 +1625,10 @@ int sqlite3Fts3SegReaderNew(
** an array of pending terms by term. This occurs as part of flushing
** the contents of the pending-terms hash table to the database.
*/
static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
static int SQLITE_CDECL fts3CompareElemByTerm(
const void *lhs,
const void *rhs
){
char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
@@ -1910,6 +1929,7 @@ static int fts3WriteSegdir(
sqlite3_int64 iStartBlock, /* Value for "start_block" field */
sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
sqlite3_int64 iEndBlock, /* Value for "end_block" field */
sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */
char *zRoot, /* Blob value for "root" field */
int nRoot /* Number of bytes in buffer zRoot */
){
@@ -1920,7 +1940,13 @@ static int fts3WriteSegdir(
sqlite3_bind_int(pStmt, 2, iIdx);
sqlite3_bind_int64(pStmt, 3, iStartBlock);
sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
sqlite3_bind_int64(pStmt, 5, iEndBlock);
if( nLeafData==0 ){
sqlite3_bind_int64(pStmt, 5, iEndBlock);
}else{
char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData);
if( !zEnd ) return SQLITE_NOMEM;
sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free);
}
sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
@@ -2246,6 +2272,9 @@ static int fts3SegWriterAdd(
nDoclist; /* Doclist data */
}
/* Increase the total number of bytes written to account for the new entry. */
pWriter->nLeafData += nReq;
/* If the buffer currently allocated is too small for this entry, realloc
** the buffer to make it large enough.
*/
@@ -2317,13 +2346,13 @@ static int fts3SegWriterFlush(
pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
}
if( rc==SQLITE_OK ){
rc = fts3WriteSegdir(
p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
rc = fts3WriteSegdir(p, iLevel, iIdx,
pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot);
}
}else{
/* The entire tree fits on the root node. Write it to the segdir table. */
rc = fts3WriteSegdir(
p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
rc = fts3WriteSegdir(p, iLevel, iIdx,
0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData);
}
p->nLeafAdd++;
return rc;
@@ -2407,6 +2436,37 @@ static int fts3SegmentMaxLevel(
return sqlite3_reset(pStmt);
}
/*
** iAbsLevel is an absolute level that may be assumed to exist within
** the database. This function checks if it is the largest level number
** within its index. Assuming no error occurs, *pbMax is set to 1 if
** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK
** is returned. If an error occurs, an error code is returned and the
** final value of *pbMax is undefined.
*/
static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){
/* Set pStmt to the compiled version of:
**
** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
**
** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
*/
sqlite3_stmt *pStmt;
int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
sqlite3_bind_int64(pStmt, 1, iAbsLevel+1);
sqlite3_bind_int64(pStmt, 2,
((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL
);
*pbMax = 0;
if( SQLITE_ROW==sqlite3_step(pStmt) ){
*pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL;
}
return sqlite3_reset(pStmt);
}
/*
** Delete all entries in the %_segments table associated with the segment
** opened with seg-reader pSeg. This function does not affect the contents
@@ -2942,6 +3002,140 @@ void sqlite3Fts3SegReaderFinish(
}
}
/*
** Decode the "end_block" field, selected by column iCol of the SELECT
** statement passed as the first argument.
**
** The "end_block" field may contain either an integer, or a text field
** containing the text representation of two non-negative integers separated
** by one or more space (0x20) characters. In the first case, set *piEndBlock
** to the integer value and *pnByte to zero before returning. In the second,
** set *piEndBlock to the first value and *pnByte to the second.
*/
static void fts3ReadEndBlockField(
sqlite3_stmt *pStmt,
int iCol,
i64 *piEndBlock,
i64 *pnByte
){
const unsigned char *zText = sqlite3_column_text(pStmt, iCol);
if( zText ){
int i;
int iMul = 1;
i64 iVal = 0;
for(i=0; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
}
*piEndBlock = iVal;
while( zText[i]==' ' ) i++;
iVal = 0;
if( zText[i]=='-' ){
i++;
iMul = -1;
}
for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){
iVal = iVal*10 + (zText[i] - '0');
}
*pnByte = (iVal * (i64)iMul);
}
}
/*
** A segment of size nByte bytes has just been written to absolute level
** iAbsLevel. Promote any segments that should be promoted as a result.
*/
static int fts3PromoteSegments(
Fts3Table *p, /* FTS table handle */
sqlite3_int64 iAbsLevel, /* Absolute level just updated */
sqlite3_int64 nByte /* Size of new segment at iAbsLevel */
){
int rc = SQLITE_OK;
sqlite3_stmt *pRange;
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0);
if( rc==SQLITE_OK ){
int bOk = 0;
i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1;
i64 nLimit = (nByte*3)/2;
/* Loop through all entries in the %_segdir table corresponding to
** segments in this index on levels greater than iAbsLevel. If there is
** at least one such segment, and it is possible to determine that all
** such segments are smaller than nLimit bytes in size, they will be
** promoted to level iAbsLevel. */
sqlite3_bind_int64(pRange, 1, iAbsLevel+1);
sqlite3_bind_int64(pRange, 2, iLast);
while( SQLITE_ROW==sqlite3_step(pRange) ){
i64 nSize = 0, dummy;
fts3ReadEndBlockField(pRange, 2, &dummy, &nSize);
if( nSize<=0 || nSize>nLimit ){
/* If nSize==0, then the %_segdir.end_block field does not not
** contain a size value. This happens if it was written by an
** old version of FTS. In this case it is not possible to determine
** the size of the segment, and so segment promotion does not
** take place. */
bOk = 0;
break;
}
bOk = 1;
}
rc = sqlite3_reset(pRange);
if( bOk ){
int iIdx = 0;
sqlite3_stmt *pUpdate1 = 0;
sqlite3_stmt *pUpdate2 = 0;
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0);
}
if( rc==SQLITE_OK ){
rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0);
}
if( rc==SQLITE_OK ){
/* Loop through all %_segdir entries for segments in this index with
** levels equal to or greater than iAbsLevel. As each entry is visited,
** updated it to set (level = -1) and (idx = N), where N is 0 for the
** oldest segment in the range, 1 for the next oldest, and so on.
**
** In other words, move all segments being promoted to level -1,
** setting the "idx" fields as appropriate to keep them in the same
** order. The contents of level -1 (which is never used, except
** transiently here), will be moved back to level iAbsLevel below. */
sqlite3_bind_int64(pRange, 1, iAbsLevel);
while( SQLITE_ROW==sqlite3_step(pRange) ){
sqlite3_bind_int(pUpdate1, 1, iIdx++);
sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0));
sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1));
sqlite3_step(pUpdate1);
rc = sqlite3_reset(pUpdate1);
if( rc!=SQLITE_OK ){
sqlite3_reset(pRange);
break;
}
}
}
if( rc==SQLITE_OK ){
rc = sqlite3_reset(pRange);
}
/* Move level -1 to level iAbsLevel */
if( rc==SQLITE_OK ){
sqlite3_bind_int64(pUpdate2, 1, iAbsLevel);
sqlite3_step(pUpdate2);
rc = sqlite3_reset(pUpdate2);
}
}
}
return rc;
}
/*
** Merge all level iLevel segments in the database into a single
** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
@@ -2966,6 +3160,7 @@ static int fts3SegmentMerge(
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
i64 iMaxLevel = 0; /* Max level number for this index/langid */
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
@@ -2977,6 +3172,11 @@ static int fts3SegmentMerge(
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
if( rc!=SQLITE_OK ) goto finished;
}
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the numerically
@@ -2986,21 +3186,21 @@ static int fts3SegmentMerge(
rc = SQLITE_DONE;
goto finished;
}
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
iNewLevel = iMaxLevel;
bIgnoreEmpty = 1;
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
assert( FTS3_SEGCURSOR_PENDING==-1 );
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
@@ -3017,7 +3217,7 @@ static int fts3SegmentMerge(
csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
}
if( rc!=SQLITE_OK ) goto finished;
assert( pWriter );
assert( pWriter || bIgnoreEmpty );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(
@@ -3025,7 +3225,14 @@ static int fts3SegmentMerge(
);
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( pWriter ){
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
}
}
}
finished:
fts3SegWriterFree(pWriter);
@@ -3051,14 +3258,19 @@ int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
** estimate the number of leaf blocks of content to be written
*/
if( rc==SQLITE_OK && p->bHasStat
&& p->bAutoincrmerge==0xff && p->nLeafAdd>0
&& p->nAutoincrmerge==0xff && p->nLeafAdd>0
){
sqlite3_stmt *pStmt = 0;
rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc==SQLITE_OK ){
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
rc = sqlite3_step(pStmt);
p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
if( rc==SQLITE_ROW ){
p->nAutoincrmerge = sqlite3_column_int(pStmt, 0);
if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8;
}else if( rc==SQLITE_DONE ){
p->nAutoincrmerge = 0;
}
rc = sqlite3_reset(pStmt);
}
}
@@ -3232,7 +3444,8 @@ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->nIndex);
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
int i;
int iLangid = sqlite3_column_int(pAllLangid, 0);
@@ -3426,6 +3639,8 @@ struct IncrmergeWriter {
int iIdx; /* Index of *output* segment in iAbsLevel+1 */
sqlite3_int64 iStart; /* Block number of first allocated block */
sqlite3_int64 iEnd; /* Block number of last allocated block */
sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */
u8 bNoLeafData; /* If true, store 0 for segment size */
NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
};
@@ -3764,8 +3979,8 @@ static int fts3IncrmergeAppend(
nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
}
pWriter->nLeafData += nSpace;
blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
if( rc==SQLITE_OK ){
if( pLeaf->block.n==0 ){
pLeaf->block.n = 1;
@@ -3864,6 +4079,7 @@ static void fts3IncrmergeRelease(
pWriter->iStart, /* start_block */
pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
pWriter->iEnd, /* end_block */
(pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */
pRoot->block.a, pRoot->block.n /* root */
);
}
@@ -3965,7 +4181,11 @@ static int fts3IncrmergeLoad(
if( sqlite3_step(pSelect)==SQLITE_ROW ){
iStart = sqlite3_column_int64(pSelect, 1);
iLeafEnd = sqlite3_column_int64(pSelect, 2);
iEnd = sqlite3_column_int64(pSelect, 3);
fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData);
if( pWriter->nLeafData<0 ){
pWriter->nLeafData = pWriter->nLeafData * -1;
}
pWriter->bNoLeafData = (pWriter->nLeafData==0);
nRoot = sqlite3_column_bytes(pSelect, 4);
aRoot = sqlite3_column_blob(pSelect, 4);
}else{
@@ -4557,7 +4777,7 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
pHint->n = i;
i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
i += fts3GetVarint32(&pHint->a[i], pnInput);
if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
if( i!=nHint ) return FTS_CORRUPT_VTAB;
return SQLITE_OK;
}
@@ -4566,11 +4786,11 @@ static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
/*
** Attempt an incremental merge that writes nMerge leaf blocks.
**
** Incremental merges happen nMin segments at a time. The two
** segments to be merged are the nMin oldest segments (the ones with
** the smallest indexes) in the highest level that contains at least
** nMin segments. Multiple merges might occur in an attempt to write the
** quota of nMerge leaf blocks.
** Incremental merges happen nMin segments at a time. The segments
** to be merged are the nMin oldest segments (the ones with the smallest
** values for the _segdir.idx field) in the highest level that contains
** at least nMin segments. Multiple merges might occur in an attempt to
** write the quota of nMerge leaf blocks.
*/
int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
int rc; /* Return code */
@@ -4595,6 +4815,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
int bUseHint = 0; /* True if attempting to append */
int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
/* Search the %_segdir table for the absolute level with the smallest
** relative level number that contains at least nMin segments, if any.
@@ -4648,6 +4869,19 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
** to start work on some other level. */
memset(pWriter, 0, nAlloc);
pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
if( rc==SQLITE_OK ){
rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
assert( bUseHint==1 || bUseHint==0 );
if( iIdx==0 || (bUseHint && iIdx==1) ){
int bIgnore = 0;
rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore);
if( bIgnore ){
pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY;
}
}
}
if( rc==SQLITE_OK ){
rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
}
@@ -4655,16 +4889,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
&& SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
){
int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
if( rc==SQLITE_OK ){
if( bUseHint && iIdx>0 ){
const char *zKey = pCsr->zTerm;
int nKey = pCsr->nTerm;
rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
}else{
rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
}
if( bUseHint && iIdx>0 ){
const char *zKey = pCsr->zTerm;
int nKey = pCsr->nTerm;
rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
}else{
rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
}
if( rc==SQLITE_OK && pWriter->nLeafEst ){
@@ -4686,7 +4916,13 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
}
}
if( nSeg!=0 ){
pWriter->nLeafData = pWriter->nLeafData * -1;
}
fts3IncrmergeRelease(p, pWriter, &rc);
if( nSeg==0 && pWriter->bNoLeafData==0 ){
fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData);
}
}
sqlite3Fts3SegReaderFinish(pCsr);
@@ -4773,7 +5009,10 @@ static int fts3DoAutoincrmerge(
){
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
p->bAutoincrmerge = fts3Getint(&zParam)!=0;
p->nAutoincrmerge = fts3Getint(&zParam);
if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){
p->nAutoincrmerge = 8;
}
if( !p->bHasStat ){
assert( p->bFts4==0 );
sqlite3Fts3CreateStatTable(&rc, p);
@@ -4782,7 +5021,7 @@ static int fts3DoAutoincrmerge(
rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
if( rc ) return rc;
sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge);
sqlite3_step(pStmt);
rc = sqlite3_reset(pStmt);
return rc;
@@ -4906,7 +5145,8 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
int rc2;
sqlite3_bind_int(pAllLangid, 1, p->nIndex);
sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid);
sqlite3_bind_int(pAllLangid, 2, p->nIndex);
while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
int iLangid = sqlite3_column_int(pAllLangid, 0);
int i;
@@ -4919,7 +5159,6 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
}
/* This block calculates the checksum according to the %_content table */
rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
if( rc==SQLITE_OK ){
sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
sqlite3_stmt *pStmt = 0;
@@ -4939,34 +5178,36 @@ static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
int iCol;
for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
int nText = sqlite3_column_bytes(pStmt, iCol+1);
sqlite3_tokenizer_cursor *pT = 0;
if( p->abNotindexed[iCol]==0 ){
const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
int nText = sqlite3_column_bytes(pStmt, iCol+1);
sqlite3_tokenizer_cursor *pT = 0;
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken = 0; /* Number of bytes in token */
int iDum1 = 0, iDum2 = 0; /* Dummy variables */
int iPos = 0; /* Position of token in zText */
rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
int nToken = 0; /* Number of bytes in token */
int iDum1 = 0, iDum2 = 0; /* Dummy variables */
int iPos = 0; /* Position of token in zText */
rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
if( rc==SQLITE_OK ){
int i;
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, nToken, iLang, 0, iDocid, iCol, iPos
);
for(i=1; i<p->nIndex; i++){
if( p->aIndex[i].nPrefix<=nToken ){
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
);
rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
if( rc==SQLITE_OK ){
int i;
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, nToken, iLang, 0, iDocid, iCol, iPos
);
for(i=1; i<p->nIndex; i++){
if( p->aIndex[i].nPrefix<=nToken ){
cksum2 = cksum2 ^ fts3ChecksumEntry(
zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
);
}
}
}
}
if( pT ) pModule->xClose(pT);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
if( pT ) pModule->xClose(pT);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
}
@@ -5014,7 +5255,7 @@ static int fts3DoIntegrityCheck(
int rc;
int bOk = 0;
rc = fts3IntegrityCheck(p, &bOk);
if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB;
return rc;
}
@@ -5271,6 +5512,10 @@ int sqlite3Fts3UpdateMethod(
int nChng = 0; /* Net change in number of documents */
int bInsertDone = 0;
/* At this point it must be known if the %_stat table exists or not.
** So bHasStat may not be 2. */
assert( p->bHasStat==0 || p->bHasStat==1 );
assert( p->pSegments==0 );
assert(
nArg==1 /* DELETE operations */

View File

@@ -376,7 +376,7 @@ static void showSegmentStats(sqlite3 *db, const char *zTab){
sqlite3_finalize(pStmt);
nLeaf = nSeg - nIdx;
printf("Leaf segments larger than %5d bytes.... %9d %5.2f%%\n",
pgsz-45, n, n*100.0/nLeaf);
pgsz-45, n, nLeaf>0 ? n*100.0/nLeaf : 0.0);
pStmt = prepare(db, "SELECT max(level%%1024) FROM '%q_segdir'", zTab);
mxLevel = 0;
@@ -504,7 +504,7 @@ static void showSegdirMap(sqlite3 *db, const char *zTab){
sqlite3_column_int64(pStmt,5));
printf(" root %9s\n", rtag);
if( iLEnd>iStart ){
sqlite3_int64 iLower, iPrev, iX;
sqlite3_int64 iLower, iPrev = 0, iX;
if( iLEnd+1<=iEnd ){
sqlite3_bind_int64(pStmt2, 1, iLEnd+1);
sqlite3_bind_int64(pStmt2, 2, iEnd);
@@ -548,13 +548,13 @@ static void decodeSegment(
const unsigned char *aData, /* Content to print */
int nData /* Number of bytes of content */
){
sqlite3_int64 iChild;
sqlite3_int64 iChild = 0;
sqlite3_int64 iPrefix;
sqlite3_int64 nTerm;
sqlite3_int64 n;
sqlite3_int64 iDocsz;
int iHeight;
int i = 0;
sqlite3_int64 i = 0;
int cnt = 0;
char zTerm[1000];
@@ -576,12 +576,12 @@ static void decodeSegment(
fprintf(stderr, "term to long\n");
exit(1);
}
memcpy(zTerm+iPrefix, aData+i, nTerm);
memcpy(zTerm+iPrefix, aData+i, (size_t)nTerm);
zTerm[iPrefix+nTerm] = 0;
i += nTerm;
if( iHeight==0 ){
i += getVarint(aData+i, &iDocsz);
printf("term: %-25s doclist %7lld bytes offset %d\n", zTerm, iDocsz, i);
printf("term: %-25s doclist %7lld bytes offset %lld\n", zTerm, iDocsz, i);
i += iDocsz;
}else{
printf("term: %-25s child %lld\n", zTerm, ++iChild);
@@ -749,18 +749,19 @@ static void decodeDoclist(
*/
static void showDoclist(sqlite3 *db, const char *zTab){
const unsigned char *aData;
sqlite3_int64 offset, nData;
sqlite3_int64 offset;
int nData;
sqlite3_stmt *pStmt;
offset = atoi64(azExtra[1]);
nData = atoi64(azExtra[2]);
nData = atoi(azExtra[2]);
pStmt = prepareToGetSegment(db, zTab, azExtra[0]);
if( sqlite3_step(pStmt)!=SQLITE_ROW ){
sqlite3_finalize(pStmt);
return;
}
aData = sqlite3_column_blob(pStmt, 0);
printf("Doclist at %s offset %lld of size %lld bytes:\n",
printf("Doclist at %s offset %lld of size %d bytes:\n",
azExtra[0], offset, nData);
if( findOption("raw", 0, 0)!=0 ){
printBlob(aData+offset, nData);

View File

@@ -1,77 +1,5 @@
#
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the
# codepoint should simply be removed from the input. Examples:
#
# { 224 a } (replace codepoint 224 to "a")
# { 769 "" } (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
global tl_lookup_table
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
if { [llength $character_decomposition_mapping]!=2
|| [string is xdigit [lindex $character_decomposition_mapping 0]]==0
} {
continue
}
set iCode [expr "0x$code"]
set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
if {[info exists tl_lookup_table($iCode)]} continue
if { ($iAscii >= 97 && $iAscii <= 122)
|| ($iAscii >= 65 && $iAscii <= 90)
} {
lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
set dia($iDia) 1
}
}
foreach d [array names dia] {
lappend lRet [list $d ""]
}
set lRet [lsort -integer -index 0 $lRet]
close $fd
set lRet
}
source [file join [file dirname [info script]] parseunicode.tcl]
proc print_rd {map} {
global tl_lookup_table
@@ -117,7 +45,7 @@ proc print_rd {map} {
puts "** E\"). The resuls of passing a codepoint that corresponds to an"
puts "** uppercase letter are undefined."
puts "*/"
puts "static int remove_diacritic(int c)\{"
puts "static int ${::remove_diacritic}(int c)\{"
puts " unsigned short aDia\[\] = \{"
puts -nonewline " 0, "
set i 1
@@ -160,7 +88,7 @@ proc print_rd {map} {
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
puts "\};"
puts "\}"
}
proc print_isdiacritic {zFunc map} {
@@ -204,53 +132,6 @@ proc print_isdiacritic {zFunc map} {
#-------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
set iCode [expr "0x$code"]
set bAlnum [expr {
[lsearch {L N} [string range $general_category 0 0]] >= 0
|| $general_category=="Co"
}]
if { !$bAlnum } { lappend lRet $iCode }
}
close $fd
set lRet
}
proc an_load_separator_ranges {} {
global unicodedata.txt
set lSep [an_load_unicodedata_text ${unicodedata.txt}]
@@ -298,7 +179,7 @@ proc an_print_range_array {lRange} {
** using this format.
*/
}]
puts -nonewline " const static unsigned int aEntry\[\] = \{"
puts -nonewline " static const unsigned int aEntry\[\] = \{"
set i 0
foreach range $lRange {
foreach {iFirst nRange} $range {}
@@ -349,7 +230,7 @@ proc print_isalnum {zFunc lRange} {
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes;
int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
@@ -440,29 +321,6 @@ proc print_test_isalnum {zFunc lRange} {
#-------------------------------------------------------------------------
proc tl_load_casefolding_txt {zName} {
global tl_lookup_table
set fd [open $zName]
while { ![eof $fd] } {
set line [gets $fd]
if {[string range $line 0 0] == "#"} continue
if {$line == ""} continue
foreach x {a b c d} {unset -nocomplain $x}
foreach {a b c d} [split $line ";"] {}
set a2 [list]
set c2 [list]
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
set b [string trim $b]
set d [string trim $d]
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
}
}
proc tl_create_records {} {
global tl_lookup_table
@@ -626,19 +484,20 @@ proc print_fold {zFunc} {
tl_print_table_footer toggle
tl_print_ioff_table $liOff
puts {
puts [subst -nocommands {
int ret = c;
assert( c>=0 );
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
@@ -649,19 +508,17 @@ proc print_fold {zFunc} {
iHi = iTest-1;
}
}
assert( iRes<0 || c>=aEntry[iRes].iCode );
if( iRes>=0 ){
const struct TableEntry *p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
assert( iRes>=0 && c>=aEntry[iRes].iCode );
p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
if( bRemoveDiacritic ) ret = remove_diacritic(ret);
}
if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
}
}]
foreach entry $lHigh {
tl_print_if_entry $entry
@@ -732,8 +589,12 @@ proc print_fileheader {} {
*/
}]
puts ""
puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
if {$::generate_fts5_code} {
# no-op
} else {
puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
}
puts ""
puts "#include <assert.h>"
puts ""
@@ -760,22 +621,40 @@ proc print_test_main {} {
# our liking.
#
proc usage {} {
puts -nonewline stderr "Usage: $::argv0 ?-test? "
puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1
}
if {[llength $argv]!=2 && [llength $argv]!=3} usage
if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
if {[llength $argv]<2} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]
set generate_test_code [expr {[llength $argv]==3}]
set remove_diacritic remove_diacritic
set generate_test_code 0
set generate_fts5_code 0
set function_prefix "sqlite3Fts"
for {set i 0} {$i < [llength $argv]-2} {incr i} {
switch -- [lindex $argv $i] {
-test {
set generate_test_code 1
}
-fts5 {
set function_prefix sqlite3Fts5
set generate_fts5_code 1
set remove_diacritic fts5_remove_diacritic
}
default {
usage
}
}
}
print_fileheader
# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
print_isalnum sqlite3FtsUnicodeIsalnum $lRange
print_isalnum ${function_prefix}UnicodeIsalnum $lRange
# Leave a gap between the two generated C functions.
#
@@ -790,22 +669,26 @@ set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings
puts ""
puts ""
print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts ""
puts ""
# Print the fold() function to stdout.
#
print_fold sqlite3FtsUnicodeFold
print_fold ${function_prefix}UnicodeFold
# Print the test routines and main() function to stdout, if -test
# was specified.
#
if {$::generate_test_code} {
print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
print_fold_test sqlite3FtsUnicodeFold $mappings
print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
print_fold_test ${function_prefix}UnicodeFold $mappings
print_test_main
}
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */"
if {$generate_fts5_code} {
# no-op
} else {
puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
}

View File

@@ -0,0 +1,146 @@
#--------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the
# codepoint should simply be removed from the input. Examples:
#
# { 224 a } (replace codepoint 224 to "a")
# { 769 "" } (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
global tl_lookup_table
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
if { [llength $character_decomposition_mapping]!=2
|| [string is xdigit [lindex $character_decomposition_mapping 0]]==0
} {
continue
}
set iCode [expr "0x$code"]
set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
if {[info exists tl_lookup_table($iCode)]} continue
if { ($iAscii >= 97 && $iAscii <= 122)
|| ($iAscii >= 65 && $iAscii <= 90)
} {
lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
set dia($iDia) 1
}
}
foreach d [array names dia] {
lappend lRet [list $d ""]
}
set lRet [lsort -integer -index 0 $lRet]
close $fd
set lRet
}
#-------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
set fd [open $zName]
set lField {
code
character_name
general_category
canonical_combining_classes
bidirectional_category
character_decomposition_mapping
decimal_digit_value
digit_value
numeric_value
mirrored
unicode_1_name
iso10646_comment_field
uppercase_mapping
lowercase_mapping
titlecase_mapping
}
set lRet [list]
while { ![eof $fd] } {
set line [gets $fd]
if {$line == ""} continue
set fields [split $line ";"]
if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
foreach $lField $fields {}
set iCode [expr "0x$code"]
set bAlnum [expr {
[lsearch {L N} [string range $general_category 0 0]] >= 0
|| $general_category=="Co"
}]
if { !$bAlnum } { lappend lRet $iCode }
}
close $fd
set lRet
}
proc tl_load_casefolding_txt {zName} {
global tl_lookup_table
set fd [open $zName]
while { ![eof $fd] } {
set line [gets $fd]
if {[string range $line 0 0] == "#"} continue
if {$line == ""} continue
foreach x {a b c d} {unset -nocomplain $x}
foreach {a b c d} [split $line ";"] {}
set a2 [list]
set c2 [list]
foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
set b [string trim $b]
set d [string trim $d]
if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
}
}

View File

@@ -0,0 +1,246 @@
#
# 2014 August 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#--------------------------------------------------------------------------
#
# This script extracts the documentation for the API used by fts5 auxiliary
# functions from header file fts5.h. It outputs html text on stdout that
# is included in the documentation on the web.
#
set ::fts5_docs_output ""
if {[info commands hd_putsnl]==""} {
if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] }
proc output {text} {
puts $text
}
} else {
proc output {text} {
append ::fts5_docs_output "$text\n"
}
}
if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api}
set input_file [file join [file dir [info script]] fts5.h]
set fd [open $input_file]
set data [read $fd]
close $fd
# Argument $data is the entire text of the fts5.h file. This function
# extracts the definition of the Fts5ExtensionApi structure from it and
# returns a key/value list of structure member names and definitions. i.e.
#
# iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
#
proc get_struct_members {data} {
# Extract the structure definition from the fts5.h file.
regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn
# Remove all comments from the structure definition
regsub -all {/[*].*?[*]/} $defn {} defn2
set res [list]
foreach member [split $defn2 {;}] {
set member [string trim $member]
if {$member!=""} {
catch { set name [lindex $member end] }
regexp {.*?[(][*]([^)]*)[)]} $member -> name
lappend res $name $member
}
}
set res
}
proc get_struct_docs {data names} {
# Extract the structure definition from the fts5.h file.
regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs
set current_doc ""
set current_header ""
foreach line [split $docs "\n"] {
regsub {[*]*} $line {} line
if {[regexp {^ } $line]} {
append current_doc "$line\n"
} elseif {[string trim $line]==""} {
if {$current_header!=""} { append current_doc "\n" }
} else {
if {$current_doc != ""} {
lappend res $current_header $current_doc
set current_doc ""
}
set subject n/a
regexp {^ *([[:alpha:]]*)} $line -> subject
if {[lsearch $names $subject]>=0} {
set current_header $subject
} else {
set current_header [string trim $line]
}
}
}
if {$current_doc != ""} {
lappend res $current_header $current_doc
}
set res
}
proc get_tokenizer_docs {data} {
regexp {(xCreate:.*?)[*]/} $data -> docs
set res "<dl>\n"
foreach line [split [string trim $docs] "\n"] {
regexp {[*][*](.*)} $line -> line
if {[regexp {^ ?x.*:} $line]} {
append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
continue
}
if {[string trim $line] == ""} {
append res "<p>\n"
} else {
append res "$line\n"
}
}
append res "</dl>\n"
set res
}
proc get_api_docs {data} {
# Initialize global array M as a map from Fts5StructureApi member name
# to member definition. i.e.
#
# iVersion -> {int iVersion}
# xUserData -> {void *(*xUserData)(Fts5Context*)}
# ...
#
array set M [get_struct_members $data]
# Initialize global list D as a map from section name to documentation
# text. Most (all?) section names are structure member names.
#
set D [get_struct_docs $data [array names M]]
output "<dl>"
foreach {sub docs} $D {
if {[info exists M($sub)]} {
set hdr $M($sub)
set link " id=$sub"
} else {
set link ""
}
#output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>"
#set style "padding-left:6ex;font-size:1.4em;display:block"
#output "<h style=\"$style\"><pre>$hdr</pre></h>"
regsub -line {^ *[)]} $hdr ")" hdr
output "<dt style=\"white-space:pre;font-family:monospace;font-size:120%\""
output "$link>"
output "<b>$hdr</b></dt><dd>"
set mode ""
set margin " style=margin-top:0.1em"
foreach line [split [string trim $docs] "\n"] {
if {[string trim $line]==""} {
if {$mode != ""} {output "</$mode>"}
set mode ""
} elseif {$mode == ""} {
if {[regexp {^ } $line]} {
set mode codeblock
} else {
set mode p
}
output "<$mode$margin>"
set margin ""
}
output $line
}
if {$mode != ""} {output "</$mode>"}
output "</dd>"
}
output "</dl>"
}
proc get_fts5_struct {data start end} {
set res ""
set bOut 0
foreach line [split $data "\n"] {
if {$bOut==0} {
if {[regexp $start $line]} {
set bOut 1
}
}
if {$bOut} {
append res "$line\n"
}
if {$bOut} {
if {[regexp $end $line]} {
set bOut 0
}
}
}
set map [list /* <i>/* */ */</i>]
string map $map $res
}
proc main {data} {
switch $::extract_api_docs_mode {
fts5_api {
output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
}
fts5_tokenizer {
output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
}
fts5_extension {
output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
}
Fts5ExtensionApi {
set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"]
set map [list]
foreach {k v} [get_struct_members $data] {
if {[string match x* $k]==0} continue
lappend map $k "<a href=#$k>$k</a>"
}
output [string map $map $struct]
}
api {
get_api_docs $data
}
tokenizer_api {
output [get_tokenizer_docs $data]
}
default {
}
}
}
main $data
set ::fts5_docs_output

366
ext/fts5/fts5.h Normal file
View File

@@ -0,0 +1,366 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Interfaces to extend FTS5. Using the interfaces defined in this file,
** FTS5 may be extended with:
**
** * custom tokenizers, and
** * custom auxiliary functions.
*/
#ifndef _FTS5_H
#define _FTS5_H
#include "sqlite3.h"
/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
typedef struct Fts5Context Fts5Context;
typedef struct Fts5PhraseIter Fts5PhraseIter;
typedef void (*fts5_extension_function)(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
);
struct Fts5PhraseIter {
const unsigned char *a;
const unsigned char *b;
};
/*
** EXTENSION API FUNCTIONS
**
** xUserData(pFts):
** Return a copy of the context pointer the extension function was
** registered with.
**
** xColumnTotalSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the FTS5 table. Or, if iCol is
** non-negative but less than the number of columns in the table, return
** the total number of tokens in column iCol, considering all rows in
** the FTS5 table.
**
** If parameter iCol is greater than or equal to the number of columns
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnCount(pFts):
** Return the number of columns in the table.
**
** xColumnSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the current row. Or, if iCol is
** non-negative but less than the number of columns in the table, set
** *pnToken to the number of tokens in column iCol of the current row.
**
** If parameter iCol is greater than or equal to the number of columns
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnText:
** This function attempts to retrieve the text of column iCol of the
** current document. If successful, (*pz) is set to point to a buffer
** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
** if an error occurs, an SQLite error code is returned and the final values
** of (*pz) and (*pn) are undefined.
**
** xPhraseCount:
** Returns the number of phrases in the current query expression.
**
** xPhraseSize:
** Returns the number of tokens in phrase iPhrase of the query. Phrases
** are numbered starting from zero.
**
** xInstCount:
** Set *pnInst to the total number of occurrences of all phrases within
** the query within the current row. Return SQLITE_OK if successful, or
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
** Query for the details of phrase match iIdx within the current row.
** Phrase matches are numbered starting from zero, so the iIdx argument
** should be greater than or equal to zero and smaller than the value
** output by xInstCount().
**
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
** if an error occurs.
**
** xRowid:
** Returns the rowid of the current row.
**
** xTokenize:
** Tokenize text using the tokenizer belonging to the FTS5 table.
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
** This API function is used to query the FTS table for phrase iPhrase
** of the current query. Specifically, a query equivalent to:
**
** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
**
** with $p set to a phrase equivalent to the phrase iPhrase of the
** current query is executed. For each row visited, the callback function
** passed as the fourth argument is invoked. The context and API objects
** passed to the callback function may be used to access the properties of
** each matched row. Invoking Api.xUserData() returns a copy of the pointer
** passed as the third argument to pUserData.
**
** If the callback function returns any value other than SQLITE_OK, the
** query is abandoned and the xQueryPhrase function returns immediately.
** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
** Otherwise, the error code is propagated upwards.
**
** If the query runs to completion without incident, SQLITE_OK is returned.
** Or, if some error occurs before the query completes or is aborted by
** the callback, an SQLite error code is returned.
**
**
** xSetAuxdata(pFts5, pAux, xDelete)
**
** Save the pointer passed as the second argument as the extension functions
** "auxiliary data". The pointer may then be retrieved by the current or any
** future invocation of the same fts5 extension function made as part of
** of the same MATCH query using the xGetAuxdata() API.
**
** Each extension function is allocated a single auxiliary data slot for
** each FTS query (MATCH expression). If the extension function is invoked
** more than once for a single FTS query, then all invocations share a
** single auxiliary data context.
**
** If there is already an auxiliary data pointer when this function is
** invoked, then it is replaced by the new pointer. If an xDelete callback
** was specified along with the original pointer, it is invoked at this
** point.
**
** The xDelete callback, if one is specified, is also invoked on the
** auxiliary data pointer after the FTS5 query has finished.
**
** If an error (e.g. an OOM condition) occurs within this function, an
** the auxiliary data is set to NULL and an error code returned. If the
** xDelete parameter was not NULL, it is invoked on the auxiliary data
** pointer before returning.
**
**
** xGetAuxdata(pFts5, bClear)
**
** Returns the current auxiliary data pointer for the fts5 extension
** function. See the xSetAuxdata() method for details.
**
** If the bClear argument is non-zero, then the auxiliary data is cleared
** (set to NULL) before this function returns. In this case the xDelete,
** if any, is not invoked.
**
**
** xRowCount(pFts5, pnRow)
**
** This function is used to retrieve the total number of rows in the table.
** In other words, the same value that would be returned by:
**
** SELECT count(*) FROM ftstable;
**
** xPhraseFirst()
** This function is used, along with type Fts5PhraseIter and the xPhraseNext
** method, to iterate through all instances of a single query phrase within
** the current row. This is the same information as is accessible via the
** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
** to use, this API may be faster under some circumstances. To iterate
** through instances of phrase iPhrase, use the following code:
**
** Fts5PhraseIter iter;
** int iCol, iOff;
** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
** iOff>=0;
** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
** ){
** // An instance of phrase iPhrase at offset iOff of column iCol
** }
**
** The Fts5PhraseIter structure is defined above. Applications should not
** modify this structure directly - it should only be used as shown above
** with the xPhraseFirst() and xPhraseNext() API methods.
**
** xPhraseNext()
** See xPhraseFirst above.
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 1 */
void *(*xUserData)(Fts5Context*);
int (*xColumnCount)(Fts5Context*);
int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
int (*xPhraseSize)(Fts5Context*, int iPhrase);
int (*xInstCount)(Fts5Context*, int *pnInst);
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
sqlite3_int64 (*xRowid)(Fts5Context*);
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
);
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
void *(*xGetAuxdata)(Fts5Context*, int bClear);
void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
};
/*
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/
/*************************************************************************
** CUSTOM TOKENIZERS
**
** Applications may also register custom tokenizer types. A tokenizer
** is registered by providing fts5 with a populated instance of the
** following structure. All structure methods must be defined, setting
** any member of the fts5_tokenizer struct to NULL leads to undefined
** behaviour. The structure methods are expected to function as follows:
**
** xCreate:
** This function is used to allocate and inititalize a tokenizer instance.
** A tokenizer instance is required to actually tokenize text.
**
** The first argument passed to this function is a copy of the (void*)
** pointer provided by the application when the fts5_tokenizer object
** was registered with FTS5 (the third argument to xCreateTokenizer()).
** The second and third arguments are an array of nul-terminated strings
** containing the tokenizer arguments, if any, specified following the
** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
** to create the FTS5 table.
**
** The final argument is an output variable. If successful, (*ppOut)
** should be set to point to the new tokenizer handle and SQLITE_OK
** returned. If an error occurs, some value other than SQLITE_OK should
** be returned. In this case, fts5 assumes that the final value of *ppOut
** is undefined.
**
** xDelete:
** This function is invoked to delete a tokenizer handle previously
** allocated using xCreate(). Fts5 guarantees that this function will
** be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
** This function is expected to tokenize the nText byte string indicated
** by argument pText. pText may not be nul-terminated. The first argument
** passed to this function is a pointer to an Fts5Tokenizer object returned
** by an earlier call to xCreate().
**
** For each token in the input string, the supplied callback xToken() must
** be invoked. The first argument to it should be a copy of the pointer
** passed as the second argument to xTokenize(). The next two arguments
** are a pointer to a buffer containing the token text, and the size of
** the token in bytes. The 4th and 5th arguments are the byte offsets of
** the first byte of and first byte immediately following the text from
** which the token is derived within the input.
**
** FTS5 assumes the xToken() callback is invoked for each token in the
** order that they occur within the input text.
**
** If an xToken() callback returns any value other than SQLITE_OK, then
** the tokenization should be abandoned and the xTokenize() method should
** immediately return a copy of the xToken() return value. Or, if the
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
** if an error occurs with the xTokenize() implementation itself, it
** may abandon the tokenization and return any error code other than
** SQLITE_OK or SQLITE_DONE.
**
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
void (*xDelete)(Fts5Tokenizer*);
int (*xTokenize)(Fts5Tokenizer*,
void *pCtx,
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd /* Byte offset of end of token within input text */
)
);
};
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/
/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
int iVersion; /* Currently always set to 1 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_tokenizer *pTokenizer,
void (*xDestroy)(void*)
);
/* Find an existing tokenizer */
int (*xFindTokenizer)(
fts5_api *pApi,
const char *zName,
void **ppContext,
fts5_tokenizer *pTokenizer
);
/* Create a new auxiliary function */
int (*xCreateFunction)(
fts5_api *pApi,
const char *zName,
void *pContext,
fts5_extension_function xFunction,
void (*xDestroy)(void*)
);
};
/*
** END OF REGISTRATION API
*************************************************************************/
#endif /* _FTS5_H */

700
ext/fts5/fts5Int.h Normal file
View File

@@ -0,0 +1,700 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifndef _FTS5INT_H
#define _FTS5INT_H
#include "fts5.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#include <string.h>
#include <assert.h>
#ifndef SQLITE_AMALGAMATION
typedef unsigned char u8;
typedef unsigned int u32;
typedef unsigned short u16;
typedef sqlite3_int64 i64;
typedef sqlite3_uint64 u64;
#define ArraySize(x) (sizeof(x) / sizeof(x[0]))
#define testcase(x)
#define ALWAYS(x) 1
#define NEVER(x) 0
#define MIN(x,y) (((x) < (y)) ? (x) : (y))
#define MAX(x,y) (((x) > (y)) ? (x) : (y))
/*
** Constants for the largest and smallest possible 64-bit signed integers.
*/
# define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
#endif
/*
** Maximum number of prefix indexes on single FTS5 table. This must be
** less than 32. If it is set to anything large than that, an #error
** directive in fts5_index.c will cause the build to fail.
*/
#define FTS5_MAX_PREFIX_INDEXES 31
#define FTS5_DEFAULT_NEARDIST 10
#define FTS5_DEFAULT_RANK "bm25"
/* Name of rank and rowid columns */
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"
#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif
/*
** The assert_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be
** guranteed that the database is not corrupt.
*/
#ifdef SQLITE_DEBUG
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif
typedef struct Fts5Global Fts5Global;
/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/
typedef struct Fts5Config Fts5Config;
/*
** An instance of the following structure encodes all information that can
** be gleaned from the CREATE VIRTUAL TABLE statement.
**
** And all information loaded from the %_config table.
**
** nAutomerge:
** The minimum number of segments that an auto-merge operation should
** attempt to merge together. A value of 1 sets the object to use the
** compile time default. Zero disables auto-merge altogether.
**
** zContent:
**
** zContentRowid:
** The value of the content_rowid= option, if one was specified. Or
** the string "rowid" otherwise. This text is not quoted - if it is
** used as part of an SQL statement it needs to be quoted appropriately.
**
** zContentExprlist:
**
** pzErrmsg:
** This exists in order to allow the fts5_index.c module to return a
** decent error message if it encounters a file-format version it does
** not understand.
**
** bColumnsize:
** True if the %_docsize table is created.
**
*/
struct Fts5Config {
sqlite3 *db; /* Database handle */
char *zDb; /* Database holding FTS index (e.g. "main") */
char *zName; /* Name of FTS index */
int nCol; /* Number of columns */
char **azCol; /* Column names */
u8 *abUnindexed; /* True for unindexed columns */
int nPrefix; /* Number of prefix indexes */
int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
int eContent; /* An FTS5_CONTENT value */
char *zContent; /* content table */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
char *zContentExprlist;
Fts5Tokenizer *pTok;
fts5_tokenizer *pTokApi;
/* Values loaded from the %_config table */
int iCookie; /* Incremented when %_config is modified */
int pgsz; /* Approximate page size used in %_data */
int nAutomerge; /* 'automerge' setting */
int nCrisisMerge; /* Maximum allowed segments per level */
char *zRank; /* Name of rank function */
char *zRankArgs; /* Arguments to rank function */
/* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
char **pzErrmsg;
};
/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 3
#define FTS5_CONTENT_NORMAL 0
#define FTS5_CONTENT_NONE 1
#define FTS5_CONTENT_EXTERNAL 2
int sqlite3Fts5ConfigParse(
Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
);
void sqlite3Fts5ConfigFree(Fts5Config*);
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
);
void sqlite3Fts5Dequote(char *z);
/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);
/* Set the value of a single config attribute */
int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
/*
** End of interface to code in fts5_config.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_buffer.c.
*/
/*
** Buffer object for the incremental building of string data.
*/
typedef struct Fts5Buffer Fts5Buffer;
struct Fts5Buffer {
u8 *p;
int n;
int nSpace;
};
int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int);
void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
void sqlite3Fts5BufferFree(Fts5Buffer*);
void sqlite3Fts5BufferZero(Fts5Buffer*);
void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int);
char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
#define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
#define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c)
#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
#define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
#define fts5BufferAppend32(a,b,c) sqlite3Fts5BufferAppend32(a,b,c)
/* Write and decode big-endian 32-bit integer values */
void sqlite3Fts5Put32(u8*, int);
int sqlite3Fts5Get32(const u8*);
#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
typedef struct Fts5PoslistReader Fts5PoslistReader;
struct Fts5PoslistReader {
/* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
int iCol; /* If (iCol>=0), this column only */
const u8 *a; /* Position list to iterate through */
int n; /* Size of buffer at a[] in bytes */
int i; /* Current offset in a[] */
/* Output variables */
int bEof; /* Set to true at EOF */
i64 iPos; /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
int iCol, /* If (iCol>=0), this column only */
const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
);
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
typedef struct Fts5PoslistWriter Fts5PoslistWriter;
struct Fts5PoslistWriter {
i64 iPrev;
};
int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
int sqlite3Fts5PoslistNext64(
const u8 *a, int n, /* Buffer containing poslist */
int *pi, /* IN/OUT: Offset within a[] */
i64 *piOff /* IN/OUT: Current offset */
);
/* Malloc utility */
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
/* Character set tests (like isspace(), isalpha() etc.) */
int sqlite3Fts5IsBareword(char t);
/*
** End of interface to code in fts5_buffer.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/
typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;
/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
/*
** Create/destroy an Fts5Index object.
*/
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
int sqlite3Fts5IndexClose(Fts5Index *p);
/*
** for(
** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
** 0==sqlite3Fts5IterEof(pIter);
** sqlite3Fts5IterNext(pIter)
** ){
** i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/
/*
** Open a new iterator to iterate though all rowids that match the
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
Fts5Index *p, /* FTS index to query */
const char *pToken, int nToken, /* Token (or prefix) to query for */
int flags, /* Mask of FTS5INDEX_QUERY_X flags */
Fts5IndexIter **ppIter
);
/*
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);
/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);
/*
** This interface is used by the fts5vocab module.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
int sqlite3Fts5IterNextScan(Fts5IndexIter*);
/*
** Insert or remove data to or from the index. Each time a document is
** added to or removed from the index, this function is called one or more
** times.
**
** For an insert, it must be called once for each token in the new document.
** If the operation is a delete, it must be called (at least) once for each
** unique token in the document with an iCol value less than zero. The iPos
** argument is ignored for a delete.
*/
int sqlite3Fts5IndexWrite(
Fts5Index *p, /* Index to write to */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
const char *pToken, int nToken /* Token to add or remove to or from index */
);
/*
** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
** document iDocid.
*/
int sqlite3Fts5IndexBeginWrite(
Fts5Index *p, /* Index to write to */
i64 iDocid /* Docid to add or remove data from */
);
/*
** Flush any data stored in the in-memory hash tables to the database.
** If the bCommit flag is true, also close any open blob handles.
*/
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);
/*
** Discard any data stored in the in-memory hash tables. Do not write it
** to the database. Additionally, assume that the contents of the %_data
** table may have changed on disk. So any in-memory caches of %_data
** records must be invalidated.
*/
int sqlite3Fts5IndexRollback(Fts5Index *p);
/*
** Retrieve and clear the current error code, respectively.
*/
int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);
/*
** Get or set the "averages" record.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
/*
** Functions called by the storage module as part of integrity-check.
*/
u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);
/*
** Called during virtual module initialization to register UDF
** fts5_decode() with SQLite
*/
int sqlite3Fts5IndexInit(sqlite3*);
int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
/*
** Return the total number of entries read from the %_data table by
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);
int sqlite3Fts5IndexReinit(Fts5Index *p);
int sqlite3Fts5IndexOptimize(Fts5Index *p);
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
/*
** End of interface to code in fts5_index.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_varint.c.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
int sqlite3Fts5GetVarintLen(u32 iVal);
u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
#define fts5GetVarint sqlite3Fts5GetVarint
/*
** End of interface to code in fts5_varint.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5.c.
*/
int sqlite3Fts5GetTokenizer(
Fts5Global*,
const char **azArg,
int nArg,
Fts5Tokenizer**,
fts5_tokenizer**,
char **pzErr
);
Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*);
/*
** End of interface to code in fts5.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_hash.c.
*/
typedef struct Fts5Hash Fts5Hash;
/*
** Create a hash table, free a hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);
int sqlite3Fts5HashWrite(
Fts5Hash*,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
char bByte,
const char *pToken, int nToken /* Token to add or remove to or from index */
);
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);
int sqlite3Fts5HashQuery(
Fts5Hash*, /* Hash table to query */
const char *pTerm, int nTerm, /* Query term */
const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
int *pnDoclist /* OUT: Size of doclist in bytes */
);
int sqlite3Fts5HashScanInit(
Fts5Hash*, /* Hash table to query */
const char *pTerm, int nTerm /* Query prefix */
);
void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
const char **pzTerm, /* OUT: term (nul-terminated) */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
);
/*
** End of interface to code in fts5_hash.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains
** code to access the data stored in the %_content and %_docsize tables.
*/
#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
typedef struct Fts5Storage Fts5Storage;
int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
int sqlite3Fts5StorageClose(Fts5Storage *p);
int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
int sqlite3Fts5DropAll(Fts5Config*);
int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
int sqlite3Fts5StorageDelete(Fts5Storage *p, i64);
int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*);
int sqlite3Fts5StorageIntegrity(Fts5Storage *p);
int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
int sqlite3Fts5StorageRollback(Fts5Storage *p);
int sqlite3Fts5StorageConfigValue(
Fts5Storage *p, const char*, sqlite3_value*, int
);
int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**);
int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
/*
** End of interface to code in fts5_storage.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_expr.c.
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;
typedef struct Fts5ExprColset Fts5ExprColset;
struct Fts5Token {
const char *p; /* Token text (not NULL terminated) */
int n; /* Size of buffer p in bytes */
};
/* Parse a MATCH expression. */
int sqlite3Fts5ExprNew(
Fts5Config *pConfig,
const char *zExpr,
Fts5Expr **ppNew,
char **pzErr
);
/*
** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
** rc = sqlite3Fts5ExprNext(pExpr)
** ){
** // The document with rowid iRowid matches the expression!
** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
** }
*/
int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
int sqlite3Fts5ExprEof(Fts5Expr*);
i64 sqlite3Fts5ExprRowid(Fts5Expr*);
void sqlite3Fts5ExprFree(Fts5Expr*);
/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y. */
void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
Fts5ExprNode *sqlite3Fts5ParseNode(
Fts5Parse *pParse,
int eType,
Fts5ExprNode *pLeft,
Fts5ExprNode *pRight,
Fts5ExprNearset *pNear
);
Fts5ExprPhrase *sqlite3Fts5ParseTerm(
Fts5Parse *pParse,
Fts5ExprPhrase *pPhrase,
Fts5Token *pToken,
int bPrefix
);
Fts5ExprNearset *sqlite3Fts5ParseNearset(
Fts5Parse*,
Fts5ExprNearset*,
Fts5ExprPhrase*
);
Fts5ExprColset *sqlite3Fts5ParseColset(
Fts5Parse*,
Fts5ExprColset*,
Fts5Token *
);
void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*);
void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
/*
** End of interface to code in fts5_expr.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_aux.c.
*/
int sqlite3Fts5AuxInit(fts5_api*);
/*
** End of interface to code in fts5_aux.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_tokenizer.c.
*/
int sqlite3Fts5TokenizerInit(fts5_api*);
/*
** End of interface to code in fts5_tokenizer.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_sorter.c.
*/
typedef struct Fts5Sorter Fts5Sorter;
int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp);
/*
** End of interface to code in fts5_sorter.c.
**************************************************************************/
/**************************************************************************
** Interface to code in fts5_vocab.c.
*/
int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
/*
** End of interface to code in fts5_vocab.c.
**************************************************************************/
/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c.
*/
int sqlite3Fts5UnicodeIsalnum(int c);
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/
#endif

555
ext/fts5/fts5_aux.c Normal file
View File

@@ -0,0 +1,555 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
#include "fts5Int.h"
#include <math.h>
/*
** Object used to iterate through all "coalesced phrase instances" in
** a single column of the current row. If the phrase instances in the
** column being considered do not overlap, this object simply iterates
** through them. Or, if they do overlap (share one or more tokens in
** common), each set of overlapping instances is treated as a single
** match. See documentation for the highlight() auxiliary function for
** details.
**
** Usage is:
**
** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
** rc = fts5CInstIterNext(&iter)
** ){
** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
** }
**
*/
typedef struct CInstIter CInstIter;
struct CInstIter {
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
Fts5Context *pFts; /* First arg to pass to pApi functions */
int iCol; /* Column to search */
int iInst; /* Next phrase instance index */
int nInst; /* Total number of phrase instances */
/* Output variables */
int iStart; /* First token in coalesced phrase instance */
int iEnd; /* Last token in coalesced phrase instance */
};
/*
** Advance the iterator to the next coalesced phrase instance. Return
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
*/
static int fts5CInstIterNext(CInstIter *pIter){
int rc = SQLITE_OK;
pIter->iStart = -1;
pIter->iEnd = -1;
while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
int ip; int ic; int io;
rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
if( rc==SQLITE_OK ){
if( ic==pIter->iCol ){
int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
if( pIter->iStart<0 ){
pIter->iStart = io;
pIter->iEnd = iEnd;
}else if( io<=pIter->iEnd ){
if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
}else{
break;
}
}
pIter->iInst++;
}
}
return rc;
}
/*
** Initialize the iterator object indicated by the final parameter to
** iterate through coalesced phrase instances in column iCol.
*/
static int fts5CInstIterInit(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
int iCol,
CInstIter *pIter
){
int rc;
memset(pIter, 0, sizeof(CInstIter));
pIter->pApi = pApi;
pIter->pFts = pFts;
pIter->iCol = iCol;
rc = pApi->xInstCount(pFts, &pIter->nInst);
if( rc==SQLITE_OK ){
rc = fts5CInstIterNext(pIter);
}
return rc;
}
/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
CInstIter iter; /* Coalesced Instance Iterator */
int iPos; /* Current token offset in zIn[] */
int iRangeStart; /* First token to include */
int iRangeEnd; /* If non-zero, last token to include */
const char *zOpen; /* Opening highlight */
const char *zClose; /* Closing highlight */
const char *zIn; /* Input text */
int nIn; /* Size of input text in bytes */
int iOff; /* Current offset within zIn[] */
char *zOut; /* Output value */
};
/*
** Append text to the HighlightContext output string - p->zOut. Argument
** z points to a buffer containing n bytes of text to append. If n is
** negative, everything up until the first '\0' is appended to the output.
**
** If *pRc is set to any value other than SQLITE_OK when this function is
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
** *pRc is set to an error code before returning.
*/
static void fts5HighlightAppend(
int *pRc,
HighlightContext *p,
const char *z, int n
){
if( *pRc==SQLITE_OK ){
if( n<0 ) n = strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
}
}
/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
void *pContext, /* Pointer to HighlightContext object */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStartOff, /* Start offset of token */
int iEndOff /* End offset of token */
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
int iPos = p->iPos++;
if( p->iRangeEnd>0 ){
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
}
if( iPos==p->iter.iStart ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zOpen, -1);
p->iOff = iStartOff;
}
if( iPos==p->iter.iEnd ){
if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
fts5HighlightAppend(&rc, p, p->zOpen, -1);
}
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
fts5HighlightAppend(&rc, p, p->zClose, -1);
p->iOff = iEndOff;
if( rc==SQLITE_OK ){
rc = fts5CInstIterNext(&p->iter);
}
}
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
p->iOff = iEndOff;
if( iPos<p->iter.iEnd ){
fts5HighlightAppend(&rc, p, p->zClose, -1);
}
}
return rc;
}
/*
** Implementation of highlight() function.
*/
static void fts5HighlightFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx;
int rc;
int iCol;
if( nVal!=3 ){
const char *zErr = "wrong number of arguments to function highlight()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
iCol = sqlite3_value_int(apVal[0]);
memset(&ctx, 0, sizeof(HighlightContext));
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
if( ctx.zIn ){
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
}
if( rc==SQLITE_OK ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
}
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
}
sqlite3_free(ctx.zOut);
}
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
}
}
/*
** End of highlight() implementation.
**************************************************************************/
/*
** Implementation of snippet() function.
*/
static void fts5SnippetFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx;
int rc = SQLITE_OK; /* Return code */
int iCol; /* 1st argument to snippet() */
const char *zEllips; /* 4th argument to snippet() */
int nToken; /* 5th argument to snippet() */
int nInst = 0; /* Number of instance matches this row */
int i; /* Used to iterate through instances */
int nPhrase; /* Number of phrases in query */
unsigned char *aSeen; /* Array of "seen instance" flags */
int iBestCol; /* Column containing best snippet */
int iBestStart = 0; /* First token of best snippet */
int iBestLast; /* Last token of best snippet */
int nBestScore = 0; /* Score of best snippet */
int nColSize = 0; /* Total size of iBestCol in tokens */
if( nVal!=5 ){
const char *zErr = "wrong number of arguments to function snippet()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
memset(&ctx, 0, sizeof(HighlightContext));
iCol = sqlite3_value_int(apVal[0]);
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
zEllips = (const char*)sqlite3_value_text(apVal[3]);
nToken = sqlite3_value_int(apVal[4]);
iBestLast = nToken-1;
iBestCol = (iCol>=0 ? iCol : 0);
nPhrase = pApi->xPhraseCount(pFts);
aSeen = sqlite3_malloc(nPhrase);
if( aSeen==0 ){
rc = SQLITE_NOMEM;
}
if( rc==SQLITE_OK ){
rc = pApi->xInstCount(pFts, &nInst);
}
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int ip, iSnippetCol, iStart;
memset(aSeen, 0, nPhrase);
rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
int nScore = 1000;
int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
int j;
aSeen[ip] = 1;
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
int ic; int io; int iFinal;
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
nScore += aSeen[ip] ? 1000 : 1;
aSeen[ip] = 1;
if( iFinal>iLast ) iLast = iFinal;
}
}
if( rc==SQLITE_OK && nScore>nBestScore ){
iBestCol = iSnippetCol;
iBestStart = iStart;
iBestLast = iLast;
nBestScore = nScore;
}
}
}
if( rc==SQLITE_OK ){
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
}
if( rc==SQLITE_OK ){
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
}
if( ctx.zIn ){
if( rc==SQLITE_OK ){
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
}
if( (iBestStart+nToken-1)>iBestLast ){
iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
}
if( iBestStart+nToken>nColSize ){
iBestStart = nColSize - nToken;
}
if( iBestStart<0 ) iBestStart = 0;
ctx.iRangeStart = iBestStart;
ctx.iRangeEnd = iBestStart + nToken - 1;
if( iBestStart>0 ){
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
if( rc==SQLITE_OK ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
}
if( ctx.iRangeEnd>=(nColSize-1) ){
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
}else{
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
}
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_code(pCtx, rc);
}
sqlite3_free(ctx.zOut);
}
sqlite3_free(aSeen);
}
/************************************************************************/
/*
** The first time the bm25() function is called for a query, an instance
** of the following structure is allocated and populated.
*/
typedef struct Fts5Bm25Data Fts5Bm25Data;
struct Fts5Bm25Data {
int nPhrase; /* Number of phrases in query */
double avgdl; /* Average number of tokens in each row */
double *aIDF; /* IDF for each phrase */
double *aFreq; /* Array used to calculate phrase freq. */
};
/*
** Callback used by fts5Bm25GetData() to count the number of rows in the
** table matched by each individual phrase within the query.
*/
static int fts5CountCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
void *pUserData /* Pointer to sqlite3_int64 variable */
){
sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
(*pn)++;
return SQLITE_OK;
}
/*
** Set *ppData to point to the Fts5Bm25Data object for the current query.
** If the object has not already been allocated, allocate and populate it
** now.
*/
static int fts5Bm25GetData(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
){
int rc = SQLITE_OK; /* Return code */
Fts5Bm25Data *p; /* Object to return */
p = pApi->xGetAuxdata(pFts, 0);
if( p==0 ){
int nPhrase; /* Number of phrases in query */
sqlite3_int64 nRow = 0; /* Number of rows in table */
sqlite3_int64 nToken = 0; /* Number of tokens in table */
int nByte; /* Bytes of space to allocate */
int i;
/* Allocate the Fts5Bm25Data object */
nPhrase = pApi->xPhraseCount(pFts);
nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
if( p==0 ){
rc = SQLITE_NOMEM;
}else{
memset(p, 0, nByte);
p->nPhrase = nPhrase;
p->aIDF = (double*)&p[1];
p->aFreq = &p->aIDF[nPhrase];
}
/* Calculate the average document length for this FTS5 table */
if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
/* Calculate an IDF for each phrase in the query */
for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
sqlite3_int64 nHit = 0;
rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
if( rc==SQLITE_OK ){
/* Calculate the IDF (Inverse Document Frequency) for phrase i.
** This is done using the standard BM25 formula as found on wikipedia:
**
** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
**
** where "N" is the total number of documents in the set and nHit
** is the number that contain at least one instance of the phrase
** under consideration.
**
** The problem with this is that if (N < 2*nHit), the IDF is
** negative. Which is undesirable. So the mimimum allowable IDF is
** (1e-6) - roughly the same as a term that appears in just over
** half of set of 5,000,000 documents. */
double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
if( idf<=0.0 ) idf = 1e-6;
p->aIDF[i] = idf;
}
}
if( rc!=SQLITE_OK ){
sqlite3_free(p);
}else{
rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
}
if( rc!=SQLITE_OK ) p = 0;
}
*ppData = p;
return rc;
}
/*
** Implementation of bm25() function.
*/
static void fts5Bm25Function(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
const double k1 = 1.2; /* Constant "k1" from BM25 formula */
const double b = 0.75; /* Constant "b" from BM25 formula */
int rc = SQLITE_OK; /* Error code */
double score = 0.0; /* SQL function return value */
Fts5Bm25Data *pData; /* Values allocated/calculated once only */
int i; /* Iterator variable */
int nInst = 0; /* Value returned by xInstCount() */
double D = 0.0; /* Total number of tokens in row */
double *aFreq = 0; /* Array of phrase freq. for current row */
/* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
** for each phrase in the query for the current row. */
rc = fts5Bm25GetData(pApi, pFts, &pData);
if( rc==SQLITE_OK ){
aFreq = pData->aFreq;
memset(aFreq, 0, sizeof(double) * pData->nPhrase);
rc = pApi->xInstCount(pFts, &nInst);
}
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int ip; int ic; int io;
rc = pApi->xInst(pFts, i, &ip, &ic, &io);
if( rc==SQLITE_OK ){
double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
aFreq[ip] += w;
}
}
/* Figure out the total size of the current row in tokens. */
if( rc==SQLITE_OK ){
int nTok;
rc = pApi->xColumnSize(pFts, -1, &nTok);
D = (double)nTok;
}
/* Determine the BM25 score for the current row. */
for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
score += pData->aIDF[i] * (
( aFreq[i] * (k1 + 1.0) ) /
( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
);
}
/* If no error has occurred, return the calculated score. Otherwise,
** throw an SQL exception. */
if( rc==SQLITE_OK ){
sqlite3_result_double(pCtx, -1.0 * score);
}else{
sqlite3_result_error_code(pCtx, rc);
}
}
int sqlite3Fts5AuxInit(fts5_api *pApi){
struct Builtin {
const char *zFunc; /* Function name (nul-terminated) */
void *pUserData; /* User-data pointer */
fts5_extension_function xFunc;/* Callback function */
void (*xDestroy)(void*); /* Destructor function */
} aBuiltin [] = {
{ "snippet", 0, fts5SnippetFunction, 0 },
{ "highlight", 0, fts5HighlightFunction, 0 },
{ "bm25", 0, fts5Bm25Function, 0 },
};
int rc = SQLITE_OK; /* Return code */
int i; /* To iterate through builtin functions */
for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){
rc = pApi->xCreateFunction(pApi,
aBuiltin[i].zFunc,
aBuiltin[i].pUserData,
aBuiltin[i].xFunc,
aBuiltin[i].xDestroy
);
}
return rc;
}

307
ext/fts5/fts5_buffer.c Normal file
View File

@@ -0,0 +1,307 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
#include "fts5Int.h"
int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
/* A no-op if an error has already occurred */
if( *pRc ) return 1;
if( (pBuf->n + nByte) > pBuf->nSpace ){
u8 *pNew;
int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;
while( nNew<(pBuf->n + nByte) ){
nNew = nNew * 2;
}
pNew = sqlite3_realloc(pBuf->p, nNew);
if( pNew==0 ){
*pRc = SQLITE_NOMEM;
return 1;
}else{
pBuf->nSpace = nNew;
pBuf->p = pNew;
}
}
return 0;
}
/*
** Encode value iVal as an SQLite varint and append it to the buffer object
** pBuf. If an OOM error occurs, set the error code in p.
*/
void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return;
pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
}
void sqlite3Fts5Put32(u8 *aBuf, int iVal){
aBuf[0] = (iVal>>24) & 0x00FF;
aBuf[1] = (iVal>>16) & 0x00FF;
aBuf[2] = (iVal>> 8) & 0x00FF;
aBuf[3] = (iVal>> 0) & 0x00FF;
}
int sqlite3Fts5Get32(const u8 *aBuf){
return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3];
}
void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){
if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return;
sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal);
pBuf->n += 4;
}
/*
** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferAppendBlob(
int *pRc,
Fts5Buffer *pBuf,
int nData,
const u8 *pData
){
assert( *pRc || nData>=0 );
if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return;
memcpy(&pBuf->p[pBuf->n], pData, nData);
pBuf->n += nData;
}
/*
** Append the nul-terminated string zStr to the buffer pBuf. This function
** ensures that the byte following the buffer data is set to 0x00, even
** though this byte is not included in the pBuf->n count.
*/
void sqlite3Fts5BufferAppendString(
int *pRc,
Fts5Buffer *pBuf,
const char *zStr
){
int nStr = strlen(zStr);
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
pBuf->n--;
}
/*
** Argument zFmt is a printf() style format string. This function performs
** the printf() style processing, then appends the results to buffer pBuf.
**
** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
** following the buffer data is set to 0x00, even though this byte is not
** included in the pBuf->n count.
*/
void sqlite3Fts5BufferAppendPrintf(
int *pRc,
Fts5Buffer *pBuf,
char *zFmt, ...
){
if( *pRc==SQLITE_OK ){
char *zTmp;
va_list ap;
va_start(ap, zFmt);
zTmp = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
if( zTmp==0 ){
*pRc = SQLITE_NOMEM;
}else{
sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
sqlite3_free(zTmp);
}
}
}
char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
char *zRet = 0;
if( *pRc==SQLITE_OK ){
va_list ap;
va_start(ap, zFmt);
zRet = sqlite3_vmprintf(zFmt, ap);
va_end(ap);
if( zRet==0 ){
*pRc = SQLITE_NOMEM;
}
}
return zRet;
}
/*
** Free any buffer allocated by pBuf. Zero the structure before returning.
*/
void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
sqlite3_free(pBuf->p);
memset(pBuf, 0, sizeof(Fts5Buffer));
}
/*
** Zero the contents of the buffer object. But do not free the associated
** memory allocation.
*/
void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
pBuf->n = 0;
}
/*
** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferSet(
int *pRc,
Fts5Buffer *pBuf,
int nData,
const u8 *pData
){
pBuf->n = 0;
sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
}
int sqlite3Fts5PoslistNext64(
const u8 *a, int n, /* Buffer containing poslist */
int *pi, /* IN/OUT: Offset within a[] */
i64 *piOff /* IN/OUT: Current offset */
){
int i = *pi;
if( i>=n ){
/* EOF */
*piOff = -1;
return 1;
}else{
i64 iOff = *piOff;
int iVal;
i += fts5GetVarint32(&a[i], iVal);
if( iVal==1 ){
i += fts5GetVarint32(&a[i], iVal);
iOff = ((i64)iVal) << 32;
i += fts5GetVarint32(&a[i], iVal);
}
*piOff = iOff + (iVal-2);
*pi = i;
return 0;
}
}
/*
** Advance the iterator object passed as the only argument. Return true
** if the iterator reaches EOF, or false otherwise.
*/
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos)
|| (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol)
){
pIter->bEof = 1;
}
return pIter->bEof;
}
int sqlite3Fts5PoslistReaderInit(
int iCol, /* If (iCol>=0), this column only */
const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistReader *pIter /* Iterator object to initialize */
){
memset(pIter, 0, sizeof(*pIter));
pIter->a = a;
pIter->n = n;
pIter->iCol = iCol;
do {
sqlite3Fts5PoslistReaderNext(pIter);
}while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
return pIter->bEof;
}
int sqlite3Fts5PoslistWriterAppend(
Fts5Buffer *pBuf,
Fts5PoslistWriter *pWriter,
i64 iPos
){
static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
int rc = SQLITE_OK;
if( (iPos & colmask) != (pWriter->iPrev & colmask) ){
fts5BufferAppendVarint(&rc, pBuf, 1);
fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32));
pWriter->iPrev = (iPos & colmask);
}
fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2);
pWriter->iPrev = iPos;
return rc;
}
void *sqlite3Fts5MallocZero(int *pRc, int nByte){
void *pRet = 0;
if( *pRc==SQLITE_OK ){
pRet = sqlite3_malloc(nByte);
if( pRet==0 && nByte>0 ){
*pRc = SQLITE_NOMEM;
}else{
memset(pRet, 0, nByte);
}
}
return pRet;
}
/*
** Return a nul-terminated copy of the string indicated by pIn. If nIn
** is non-negative, then it is the length of the string in bytes. Otherwise,
** the length of the string is determined using strlen().
**
** It is the responsibility of the caller to eventually free the returned
** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
*/
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
char *zRet = 0;
if( *pRc==SQLITE_OK ){
if( nIn<0 ){
nIn = strlen(pIn);
}
zRet = (char*)sqlite3_malloc(nIn+1);
if( zRet ){
memcpy(zRet, pIn, nIn);
zRet[nIn] = '\0';
}else{
*pRc = SQLITE_NOMEM;
}
}
return zRet;
}
/*
** Return true if character 't' may be part of an FTS5 bareword, or false
** otherwise. Characters that may be part of barewords:
**
** * All non-ASCII characters,
** * The 52 upper and lower case ASCII characters, and
** * The 10 integer ASCII characters.
** * The underscore character "_" (0x5F).
*/
int sqlite3Fts5IsBareword(char t){
u8 aBareword[128] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
};
return (t & 0x80) || aBareword[(int)t];
}

862
ext/fts5/fts5_config.c Normal file
View File

@@ -0,0 +1,862 @@
/*
** 2014 Jun 09
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
*/
#include "fts5Int.h"
#define FTS5_DEFAULT_PAGE_SIZE 4050
#define FTS5_DEFAULT_AUTOMERGE 4
#define FTS5_DEFAULT_CRISISMERGE 16
/* Maximum allowed page size */
#define FTS5_MAX_PAGE_SIZE (128*1024)
static int fts5_iswhitespace(char x){
return (x==' ');
}
static int fts5_isopenquote(char x){
return (x=='"' || x=='\'' || x=='[' || x=='`');
}
/*
** Argument pIn points to a character that is part of a nul-terminated
** string. Return a pointer to the first character following *pIn in
** the string that is not a white-space character.
*/
static const char *fts5ConfigSkipWhitespace(const char *pIn){
const char *p = pIn;
if( p ){
while( fts5_iswhitespace(*p) ){ p++; }
}
return p;
}
/*
** Argument pIn points to a character that is part of a nul-terminated
** string. Return a pointer to the first character following *pIn in
** the string that is not a "bareword" character.
*/
static const char *fts5ConfigSkipBareword(const char *pIn){
const char *p = pIn;
while ( sqlite3Fts5IsBareword(*p) ) p++;
if( p==pIn ) p = 0;
return p;
}
static int fts5_isdigit(char a){
return (a>='0' && a<='9');
}
static const char *fts5ConfigSkipLiteral(const char *pIn){
const char *p = pIn;
switch( *p ){
case 'n': case 'N':
if( sqlite3_strnicmp("null", p, 4)==0 ){
p = &p[4];
}else{
p = 0;
}
break;
case 'x': case 'X':
p++;
if( *p=='\'' ){
p++;
while( (*p>='a' && *p<='f')
|| (*p>='A' && *p<='F')
|| (*p>='0' && *p<='9')
){
p++;
}
if( *p=='\'' && 0==((p-pIn)%2) ){
p++;
}else{
p = 0;
}
}else{
p = 0;
}
break;
case '\'':
p++;
while( p ){
if( *p=='\'' ){
p++;
if( *p!='\'' ) break;
}
p++;
if( *p==0 ) p = 0;
}
break;
default:
/* maybe a number */
if( *p=='+' || *p=='-' ) p++;
while( fts5_isdigit(*p) ) p++;
/* At this point, if the literal was an integer, the parse is
** finished. Or, if it is a floating point value, it may continue
** with either a decimal point or an 'E' character. */
if( *p=='.' && fts5_isdigit(p[1]) ){
p += 2;
while( fts5_isdigit(*p) ) p++;
}
if( p==pIn ) p = 0;
break;
}
return p;
}
/*
** The first character of the string pointed to by argument z is guaranteed
** to be an open-quote character (see function fts5_isopenquote()).
**
** This function searches for the corresponding close-quote character within
** the string and, if found, dequotes the string in place and adds a new
** nul-terminator byte.
**
** If the close-quote is found, the value returned is the byte offset of
** the character immediately following it. Or, if the close-quote is not
** found, -1 is returned. If -1 is returned, the buffer is left in an
** undefined state.
*/
static int fts5Dequote(char *z){
char q;
int iIn = 1;
int iOut = 0;
q = z[0];
/* Set stack variable q to the close-quote character */
assert( q=='[' || q=='\'' || q=='"' || q=='`' );
if( q=='[' ) q = ']';
while( ALWAYS(z[iIn]) ){
if( z[iIn]==q ){
if( z[iIn+1]!=q ){
/* Character iIn was the close quote. */
iIn++;
break;
}else{
/* Character iIn and iIn+1 form an escaped quote character. Skip
** the input cursor past both and copy a single quote character
** to the output buffer. */
iIn += 2;
z[iOut++] = q;
}
}else{
z[iOut++] = z[iIn++];
}
}
z[iOut] = '\0';
return iIn;
}
/*
** Convert an SQL-style quoted string into a normal string by removing
** the quote characters. The conversion is done in-place. If the
** input does not begin with a quote character, then this routine
** is a no-op.
**
** Examples:
**
** "abc" becomes abc
** 'xyz' becomes xyz
** [pqr] becomes pqr
** `mno` becomes mno
*/
void sqlite3Fts5Dequote(char *z){
char quote; /* Quote character (if any ) */
assert( 0==fts5_iswhitespace(z[0]) );
quote = z[0];
if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
fts5Dequote(z);
}
}
/*
** Parse a "special" CREATE VIRTUAL TABLE directive and update
** configuration object pConfig as appropriate.
**
** If successful, object pConfig is updated and SQLITE_OK returned. If
** an error occurs, an SQLite error code is returned and an error message
** may be left in *pzErr. It is the responsibility of the caller to
** eventually free any such error message using sqlite3_free().
*/
static int fts5ConfigParseSpecial(
Fts5Global *pGlobal,
Fts5Config *pConfig, /* Configuration object to update */
const char *zCmd, /* Special command to parse */
const char *zArg, /* Argument to parse */
char **pzErr /* OUT: Error message */
){
int rc = SQLITE_OK;
int nCmd = strlen(zCmd);
if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
const char *p;
if( pConfig->aPrefix ){
*pzErr = sqlite3_mprintf("multiple prefix=... directives");
rc = SQLITE_ERROR;
}else{
pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
}
p = zArg;
while( rc==SQLITE_OK && p[0] ){
int nPre = 0;
while( p[0]==' ' ) p++;
while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
nPre = nPre*10 + (p[0] - '0');
p++;
}
while( p[0]==' ' ) p++;
if( p[0]==',' ){
p++;
}else if( p[0] ){
*pzErr = sqlite3_mprintf("malformed prefix=... directive");
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK && (nPre==0 || nPre>=1000) ){
*pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre);
rc = SQLITE_ERROR;
}
pConfig->aPrefix[pConfig->nPrefix] = nPre;
pConfig->nPrefix++;
}
return rc;
}
if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
const char *p = (const char*)zArg;
int nArg = strlen(zArg) + 1;
char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
char *pSpace = pDel;
if( azArg && pSpace ){
if( pConfig->pTok ){
*pzErr = sqlite3_mprintf("multiple tokenize=... directives");
rc = SQLITE_ERROR;
}else{
for(nArg=0; p && *p; nArg++){
const char *p2 = fts5ConfigSkipWhitespace(p);
if( *p2=='\'' ){
p = fts5ConfigSkipLiteral(p2);
}else{
p = fts5ConfigSkipBareword(p2);
}
if( p ){
memcpy(pSpace, p2, p-p2);
azArg[nArg] = pSpace;
sqlite3Fts5Dequote(pSpace);
pSpace += (p - p2) + 1;
p = fts5ConfigSkipWhitespace(p);
}
}
if( p==0 ){
*pzErr = sqlite3_mprintf("parse error in tokenize directive");
rc = SQLITE_ERROR;
}else{
rc = sqlite3Fts5GetTokenizer(pGlobal,
(const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi,
pzErr
);
}
}
}
sqlite3_free(azArg);
sqlite3_free(pDel);
return rc;
}
if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
*pzErr = sqlite3_mprintf("multiple content=... directives");
rc = SQLITE_ERROR;
}else{
if( zArg[0] ){
pConfig->eContent = FTS5_CONTENT_EXTERNAL;
pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
}else{
pConfig->eContent = FTS5_CONTENT_NONE;
}
}
return rc;
}
if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
if( pConfig->zContentRowid ){
*pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
rc = SQLITE_ERROR;
}else{
pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
}
return rc;
}
if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
*pzErr = sqlite3_mprintf("malformed columnsize=... directive");
rc = SQLITE_ERROR;
}else{
pConfig->bColumnsize = (zArg[0]=='1');
}
return rc;
}
*pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
return SQLITE_ERROR;
}
/*
** Allocate an instance of the default tokenizer ("simple") at
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
** code if an error occurs.
*/
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
return sqlite3Fts5GetTokenizer(
pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
);
}
/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a
** nul-terminated, dequoted copy of the gobbled word. If the word was
** quoted, *pbQuoted is also set to 1 before returning.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
** set if a parse error (failed to find close quote) occurs.
*/
static const char *fts5ConfigGobbleWord(
int *pRc, /* IN/OUT: Error code */
const char *zIn, /* Buffer to gobble string/bareword from */
char **pzOut, /* OUT: malloc'd buffer containing str/bw */
int *pbQuoted /* OUT: Set to true if dequoting required */
){
const char *zRet = 0;
int nIn = strlen(zIn);
char *zOut = sqlite3_malloc(nIn+1);
assert( *pRc==SQLITE_OK );
*pbQuoted = 0;
*pzOut = 0;
if( zOut==0 ){
*pRc = SQLITE_NOMEM;
}else{
memcpy(zOut, zIn, nIn+1);
if( fts5_isopenquote(zOut[0]) ){
int ii = fts5Dequote(zOut);
zRet = &zIn[ii];
*pbQuoted = 1;
}else{
zRet = fts5ConfigSkipBareword(zIn);
zOut[zRet-zIn] = '\0';
}
}
if( zRet==0 ){
sqlite3_free(zOut);
}else{
*pzOut = zOut;
}
return zRet;
}
static int fts5ConfigParseColumn(
Fts5Config *p,
char *zCol,
char *zArg,
char **pzErr
){
int rc = SQLITE_OK;
if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
|| 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
){
*pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
rc = SQLITE_ERROR;
}else if( zArg ){
if( 0==sqlite3_stricmp(zArg, "unindexed") ){
p->abUnindexed[p->nCol] = 1;
}else{
*pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
rc = SQLITE_ERROR;
}
}
p->azCol[p->nCol++] = zCol;
return rc;
}
/*
** Populate the Fts5Config.zContentExprlist string.
*/
static int fts5ConfigMakeExprlist(Fts5Config *p){
int i;
int rc = SQLITE_OK;
Fts5Buffer buf = {0, 0, 0};
sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
if( p->eContent!=FTS5_CONTENT_NONE ){
for(i=0; i<p->nCol; i++){
if( p->eContent==FTS5_CONTENT_EXTERNAL ){
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
}else{
sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
}
}
}
assert( p->zContentExprlist==0 );
p->zContentExprlist = (char*)buf.p;
return rc;
}
/*
** Arguments nArg/azArg contain the string arguments passed to the xCreate
** or xConnect method of the virtual table. This function attempts to
** allocate an instance of Fts5Config containing the results of parsing
** those arguments.
**
** If successful, SQLITE_OK is returned and *ppOut is set to point to the
** new Fts5Config object. If an error occurs, an SQLite error code is
** returned, *ppOut is set to NULL and an error message may be left in
** *pzErr. It is the responsibility of the caller to eventually free any
** such error message using sqlite3_free().
*/
int sqlite3Fts5ConfigParse(
Fts5Global *pGlobal,
sqlite3 *db,
int nArg, /* Number of arguments */
const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
Fts5Config **ppOut, /* OUT: Results of parse */
char **pzErr /* OUT: Error message */
){
int rc = SQLITE_OK; /* Return code */
Fts5Config *pRet; /* New object to return */
int i;
int nByte;
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
if( pRet==0 ) return SQLITE_NOMEM;
memset(pRet, 0, sizeof(Fts5Config));
pRet->db = db;
pRet->iCookie = -1;
nByte = nArg * (sizeof(char*) + sizeof(u8));
pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
pRet->bColumnsize = 1;
if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
*pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
rc = SQLITE_ERROR;
}
for(i=3; rc==SQLITE_OK && i<nArg; i++){
const char *zOrig = azArg[i];
const char *z;
char *zOne = 0;
char *zTwo = 0;
int bOption = 0;
int bMustBeCol = 0;
z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
z = fts5ConfigSkipWhitespace(z);
if( z && *z=='=' ){
bOption = 1;
z++;
if( bMustBeCol ) z = 0;
}
z = fts5ConfigSkipWhitespace(z);
if( z && z[0] ){
int bDummy;
z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
if( z && z[0] ) z = 0;
}
if( rc==SQLITE_OK ){
if( z==0 ){
*pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
rc = SQLITE_ERROR;
}else{
if( bOption ){
rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
}else{
rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
zOne = 0;
}
}
}
sqlite3_free(zOne);
sqlite3_free(zTwo);
}
/* If a tokenizer= option was successfully parsed, the tokenizer has
** already been allocated. Otherwise, allocate an instance of the default
** tokenizer (unicode61) now. */
if( rc==SQLITE_OK && pRet->pTok==0 ){
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
}
/* If no zContent option was specified, fill in the default values. */
if( rc==SQLITE_OK && pRet->zContent==0 ){
const char *zTail = 0;
assert( pRet->eContent==FTS5_CONTENT_NORMAL
|| pRet->eContent==FTS5_CONTENT_NONE
);
if( pRet->eContent==FTS5_CONTENT_NORMAL ){
zTail = "content";
}else if( pRet->bColumnsize ){
zTail = "docsize";
}
if( zTail ){
pRet->zContent = sqlite3Fts5Mprintf(
&rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
);
}
}
if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
}
/* Formulate the zContentExprlist text */
if( rc==SQLITE_OK ){
rc = fts5ConfigMakeExprlist(pRet);
}
if( rc!=SQLITE_OK ){
sqlite3Fts5ConfigFree(pRet);
*ppOut = 0;
}
return rc;
}
/*
** Free the configuration object passed as the only argument.
*/
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
if( pConfig ){
int i;
if( pConfig->pTok ){
pConfig->pTokApi->xDelete(pConfig->pTok);
}
sqlite3_free(pConfig->zDb);
sqlite3_free(pConfig->zName);
for(i=0; i<pConfig->nCol; i++){
sqlite3_free(pConfig->azCol[i]);
}
sqlite3_free(pConfig->azCol);
sqlite3_free(pConfig->aPrefix);
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);
sqlite3_free(pConfig->zContent);
sqlite3_free(pConfig->zContentRowid);
sqlite3_free(pConfig->zContentExprlist);
sqlite3_free(pConfig);
}
}
/*
** Call sqlite3_declare_vtab() based on the contents of the configuration
** object passed as the only argument. Return SQLITE_OK if successful, or
** an SQLite error code if an error occurs.
*/
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
int i;
int rc = SQLITE_OK;
char *zSql;
zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
for(i=0; zSql && i<pConfig->nCol; i++){
const char *zSep = (i==0?"":", ");
zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
}
zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
zSql, pConfig->zName, FTS5_RANK_NAME
);
assert( zSql || rc==SQLITE_NOMEM );
if( zSql ){
rc = sqlite3_declare_vtab(pConfig->db, zSql);
sqlite3_free(zSql);
}
return rc;
}
/*
** Tokenize the text passed via the second and third arguments.
**
** The callback is invoked once for each token in the input text. The
** arguments passed to it are, in order:
**
** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
** const char *pToken // Pointer to buffer containing token
** int nToken // Size of token in bytes
** int iStart // Byte offset of start of token within input text
** int iEnd // Byte offset of end of token within input text
** int iPos // Position of token in input (first token is 0)
**
** If the callback returns a non-zero value the tokenization is abandoned
** and no further callbacks are issued.
**
** This function returns SQLITE_OK if successful or an SQLite error code
** if an error occurs. If the tokenization was abandoned early because
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
Fts5Config *pConfig, /* FTS5 Configuration object */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int) /* Callback */
){
if( pText==0 ) return SQLITE_OK;
return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
}
/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.
*/
static const char *fts5ConfigSkipArgs(const char *pIn){
const char *p = pIn;
while( 1 ){
p = fts5ConfigSkipWhitespace(p);
p = fts5ConfigSkipLiteral(p);
p = fts5ConfigSkipWhitespace(p);
if( p==0 || *p==')' ) break;
if( *p!=',' ){
p = 0;
break;
}
p++;
}
return p;
}
/*
** Parameter zIn contains a rank() function specification. The format of
** this is:
**
** + Bareword (function name)
** + Open parenthesis - "("
** + Zero or more SQL literals in a comma separated list
** + Close parenthesis - ")"
*/
int sqlite3Fts5ConfigParseRank(
const char *zIn, /* Input string */
char **pzRank, /* OUT: Rank function name */
char **pzRankArgs /* OUT: Rank function arguments */
){
const char *p = zIn;
const char *pRank;
char *zRank = 0;
char *zRankArgs = 0;
int rc = SQLITE_OK;
*pzRank = 0;
*pzRankArgs = 0;
p = fts5ConfigSkipWhitespace(p);
pRank = p;
p = fts5ConfigSkipBareword(p);
if( p ){
zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
if( zRank ) memcpy(zRank, pRank, p-pRank);
}else{
rc = SQLITE_ERROR;
}
if( rc==SQLITE_OK ){
p = fts5ConfigSkipWhitespace(p);
if( *p!='(' ) rc = SQLITE_ERROR;
p++;
}
if( rc==SQLITE_OK ){
const char *pArgs;
p = fts5ConfigSkipWhitespace(p);
pArgs = p;
if( *p!=')' ){
p = fts5ConfigSkipArgs(p);
if( p==0 ){
rc = SQLITE_ERROR;
}else{
zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
}
}
}
if( rc!=SQLITE_OK ){
sqlite3_free(zRank);
assert( zRankArgs==0 );
}else{
*pzRank = zRank;
*pzRankArgs = zRankArgs;
}
return rc;
}
int sqlite3Fts5ConfigSetValue(
Fts5Config *pConfig,
const char *zKey,
sqlite3_value *pVal,
int *pbBadkey
){
int rc = SQLITE_OK;
if( 0==sqlite3_stricmp(zKey, "pgsz") ){
int pgsz = 0;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
pgsz = sqlite3_value_int(pVal);
}
if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
*pbBadkey = 1;
}else{
pConfig->pgsz = pgsz;
}
}
else if( 0==sqlite3_stricmp(zKey, "automerge") ){
int nAutomerge = -1;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
nAutomerge = sqlite3_value_int(pVal);
}
if( nAutomerge<0 || nAutomerge>64 ){
*pbBadkey = 1;
}else{
if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
pConfig->nAutomerge = nAutomerge;
}
}
else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
int nCrisisMerge = -1;
if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
nCrisisMerge = sqlite3_value_int(pVal);
}
if( nCrisisMerge<0 ){
*pbBadkey = 1;
}else{
if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
pConfig->nCrisisMerge = nCrisisMerge;
}
}
else if( 0==sqlite3_stricmp(zKey, "rank") ){
const char *zIn = (const char*)sqlite3_value_text(pVal);
char *zRank;
char *zRankArgs;
rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
if( rc==SQLITE_OK ){
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);
pConfig->zRank = zRank;
pConfig->zRankArgs = zRankArgs;
}else if( rc==SQLITE_ERROR ){
rc = SQLITE_OK;
*pbBadkey = 1;
}
}else{
*pbBadkey = 1;
}
return rc;
}
/*
** Load the contents of the %_config table into memory.
*/
int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
char *zSql;
sqlite3_stmt *p = 0;
int rc = SQLITE_OK;
int iVersion = 0;
/* Set default values */
pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
if( zSql ){
rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
sqlite3_free(zSql);
}
assert( rc==SQLITE_OK || p==0 );
if( rc==SQLITE_OK ){
while( SQLITE_ROW==sqlite3_step(p) ){
const char *zK = (const char*)sqlite3_column_text(p, 0);
sqlite3_value *pVal = sqlite3_column_value(p, 1);
if( 0==sqlite3_stricmp(zK, "version") ){
iVersion = sqlite3_value_int(pVal);
}else{
int bDummy = 0;
sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
}
}
rc = sqlite3_finalize(p);
}
if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
rc = SQLITE_ERROR;
if( pConfig->pzErrmsg ){
assert( 0==*pConfig->pzErrmsg );
*pConfig->pzErrmsg = sqlite3_mprintf(
"invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
iVersion, FTS5_CURRENT_VERSION
);
}
}
if( rc==SQLITE_OK ){
pConfig->iCookie = iCookie;
}
return rc;
}

2043
ext/fts5/fts5_expr.c Normal file

File diff suppressed because it is too large Load Diff

472
ext/fts5/fts5_hash.c Normal file
View File

@@ -0,0 +1,472 @@
/*
** 2014 August 11
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#include "fts5Int.h"
typedef struct Fts5HashEntry Fts5HashEntry;
/*
** This file contains the implementation of an in-memory hash table used
** to accumuluate "term -> doclist" content before it is flused to a level-0
** segment.
*/
struct Fts5Hash {
int *pnByte; /* Pointer to bytes counter */
int nEntry; /* Number of entries currently in hash */
int nSlot; /* Size of aSlot[] array */
Fts5HashEntry *pScan; /* Current ordered scan item */
Fts5HashEntry **aSlot; /* Array of hash slots */
};
/*
** Each entry in the hash table is represented by an object of the
** following type. Each object, its key (zKey[]) and its current data
** are stored in a single memory allocation. The position list data
** immediately follows the key data in memory.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
**
** * Rowid, as a varint
** * Position list, without 0x00 terminator.
** * Size of previous position list and rowid, as a 4 byte
** big-endian integer.
**
** iRowidOff:
** Offset of last rowid written to data area. Relative to first byte of
** structure.
**
** nData:
** Bytes of data written since iRowidOff.
*/
struct Fts5HashEntry {
Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
Fts5HashEntry *pScanNext; /* Next entry in sorted order */
int nAlloc; /* Total size of allocation */
int iSzPoslist; /* Offset of space for 4-byte poslist size */
int nData; /* Total bytes of data (incl. structure) */
u8 bDel; /* Set delete-flag @ iSzPoslist */
int iCol; /* Column of last value written */
int iPos; /* Position of last value written */
i64 iRowid; /* Rowid of last value written */
char zKey[8]; /* Nul-terminated entry key */
};
/*
** Size of Fts5HashEntry without the zKey[] array.
*/
#define FTS5_HASHENTRYSIZE (sizeof(Fts5HashEntry)-8)
/*
** Allocate a new hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
int rc = SQLITE_OK;
Fts5Hash *pNew;
*ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
if( pNew==0 ){
rc = SQLITE_NOMEM;
}else{
int nByte;
memset(pNew, 0, sizeof(Fts5Hash));
pNew->pnByte = pnByte;
pNew->nSlot = 1024;
nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
if( pNew->aSlot==0 ){
sqlite3_free(pNew);
*ppNew = 0;
rc = SQLITE_NOMEM;
}else{
memset(pNew->aSlot, 0, nByte);
}
}
return rc;
}
/*
** Free a hash table object.
*/
void sqlite3Fts5HashFree(Fts5Hash *pHash){
if( pHash ){
sqlite3Fts5HashClear(pHash);
sqlite3_free(pHash->aSlot);
sqlite3_free(pHash);
}
}
/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash *pHash){
int i;
for(i=0; i<pHash->nSlot; i++){
Fts5HashEntry *pNext;
Fts5HashEntry *pSlot;
for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
pNext = pSlot->pHashNext;
sqlite3_free(pSlot);
}
}
memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
pHash->nEntry = 0;
}
static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
return (h % nSlot);
}
static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
int i;
unsigned int h = 13;
for(i=n-1; i>=0; i--){
h = (h << 3) ^ h ^ p[i];
}
h = (h << 3) ^ h ^ b;
return (h % nSlot);
}
/*
** Resize the hash table by doubling the number of slots.
*/
static int fts5HashResize(Fts5Hash *pHash){
int nNew = pHash->nSlot*2;
int i;
Fts5HashEntry **apNew;
Fts5HashEntry **apOld = pHash->aSlot;
apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
if( !apNew ) return SQLITE_NOMEM;
memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
for(i=0; i<pHash->nSlot; i++){
while( apOld[i] ){
int iHash;
Fts5HashEntry *p = apOld[i];
apOld[i] = p->pHashNext;
iHash = fts5HashKey(nNew, (u8*)p->zKey, strlen(p->zKey));
p->pHashNext = apNew[iHash];
apNew[iHash] = p;
}
}
sqlite3_free(apOld);
pHash->nSlot = nNew;
pHash->aSlot = apNew;
return SQLITE_OK;
}
static void fts5HashAddPoslistSize(Fts5HashEntry *p){
if( p->iSzPoslist ){
u8 *pPtr = (u8*)p;
int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */
int nPos = nSz*2 + p->bDel; /* Value of nPos field */
assert( p->bDel==0 || p->bDel==1 );
if( nPos<=127 ){
pPtr[p->iSzPoslist] = nPos;
}else{
int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
p->nData += (nByte-1);
}
p->bDel = 0;
p->iSzPoslist = 0;
}
}
int sqlite3Fts5HashWrite(
Fts5Hash *pHash,
i64 iRowid, /* Rowid for this entry */
int iCol, /* Column token appears in (-ve -> delete) */
int iPos, /* Position of token within column */
char bByte, /* First byte of token */
const char *pToken, int nToken /* Token to add or remove to or from index */
){
unsigned int iHash;
Fts5HashEntry *p;
u8 *pPtr;
int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
/* Attempt to locate an existing hash entry */
iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
if( p->zKey[0]==bByte
&& memcmp(&p->zKey[1], pToken, nToken)==0
&& p->zKey[nToken+1]==0
){
break;
}
}
/* If an existing hash entry cannot be found, create a new one. */
if( p==0 ){
int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64;
if( nByte<128 ) nByte = 128;
if( (pHash->nEntry*2)>=pHash->nSlot ){
int rc = fts5HashResize(pHash);
if( rc!=SQLITE_OK ) return rc;
iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
}
p = (Fts5HashEntry*)sqlite3_malloc(nByte);
if( !p ) return SQLITE_NOMEM;
memset(p, 0, FTS5_HASHENTRYSIZE);
p->nAlloc = nByte;
p->zKey[0] = bByte;
memcpy(&p->zKey[1], pToken, nToken);
assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) );
p->zKey[nToken+1] = '\0';
p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE;
p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
p->iSzPoslist = p->nData;
p->nData += 1;
p->iRowid = iRowid;
p->pHashNext = pHash->aSlot[iHash];
pHash->aSlot[iHash] = p;
pHash->nEntry++;
nIncr += p->nData;
}
/* Check there is enough space to append a new entry. Worst case scenario
** is:
**
** + 9 bytes for a new rowid,
** + 4 byte reserved for the "poslist size" varint.
** + 1 byte for a "new column" byte,
** + 3 bytes for a new column number (16-bit max) as a varint,
** + 5 bytes for the new position offset (32-bit max).
*/
if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
int nNew = p->nAlloc * 2;
Fts5HashEntry *pNew;
Fts5HashEntry **pp;
pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
if( pNew==0 ) return SQLITE_NOMEM;
pNew->nAlloc = nNew;
for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
*pp = pNew;
p = pNew;
}
pPtr = (u8*)p;
nIncr -= p->nData;
/* If this is a new rowid, append the 4-byte size field for the previous
** entry, and the new rowid for this entry. */
if( iRowid!=p->iRowid ){
fts5HashAddPoslistSize(p);
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
p->iSzPoslist = p->nData;
p->nData += 1;
p->iCol = 0;
p->iPos = 0;
p->iRowid = iRowid;
}
if( iCol>=0 ){
/* Append a new column value, if necessary */
assert( iCol>=p->iCol );
if( iCol!=p->iCol ){
pPtr[p->nData++] = 0x01;
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
p->iCol = iCol;
p->iPos = 0;
}
/* Append the new position offset */
p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
p->iPos = iPos;
}else{
/* This is a delete. Set the delete flag. */
p->bDel = 1;
}
nIncr += p->nData;
*pHash->pnByte += nIncr;
return SQLITE_OK;
}
/*
** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
** each sorted in key order. This function merges the two lists into a
** single list and returns a pointer to its first element.
*/
static Fts5HashEntry *fts5HashEntryMerge(
Fts5HashEntry *pLeft,
Fts5HashEntry *pRight
){
Fts5HashEntry *p1 = pLeft;
Fts5HashEntry *p2 = pRight;
Fts5HashEntry *pRet = 0;
Fts5HashEntry **ppOut = &pRet;
while( p1 || p2 ){
if( p1==0 ){
*ppOut = p2;
p2 = 0;
}else if( p2==0 ){
*ppOut = p1;
p1 = 0;
}else{
int i = 0;
while( p1->zKey[i]==p2->zKey[i] ) i++;
if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
/* p2 is smaller */
*ppOut = p2;
ppOut = &p2->pScanNext;
p2 = p2->pScanNext;
}else{
/* p1 is smaller */
*ppOut = p1;
ppOut = &p1->pScanNext;
p1 = p1->pScanNext;
}
*ppOut = 0;
}
}
return pRet;
}
/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
*/
static int fts5HashEntrySort(
Fts5Hash *pHash,
const char *pTerm, int nTerm, /* Query prefix, if any */
Fts5HashEntry **ppSorted
){
const int nMergeSlot = 32;
Fts5HashEntry **ap;
Fts5HashEntry *pList;
int iSlot;
int i;
*ppSorted = 0;
ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
if( !ap ) return SQLITE_NOMEM;
memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
Fts5HashEntry *pIter;
for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
Fts5HashEntry *pEntry = pIter;
pEntry->pScanNext = 0;
for(i=0; ap[i]; i++){
pEntry = fts5HashEntryMerge(pEntry, ap[i]);
ap[i] = 0;
}
ap[i] = pEntry;
}
}
}
pList = 0;
for(i=0; i<nMergeSlot; i++){
pList = fts5HashEntryMerge(pList, ap[i]);
}
pHash->nEntry = 0;
sqlite3_free(ap);
*ppSorted = pList;
return SQLITE_OK;
}
/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
int sqlite3Fts5HashQuery(
Fts5Hash *pHash, /* Hash table to query */
const char *pTerm, int nTerm, /* Query term */
const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */
int *pnDoclist /* OUT: Size of doclist in bytes */
){
unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
Fts5HashEntry *p;
for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
}
if( p ){
fts5HashAddPoslistSize(p);
*ppDoclist = (const u8*)&p->zKey[nTerm+1];
*pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
}else{
*ppDoclist = 0;
*pnDoclist = 0;
}
return SQLITE_OK;
}
int sqlite3Fts5HashScanInit(
Fts5Hash *p, /* Hash table to query */
const char *pTerm, int nTerm /* Query prefix */
){
return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}
void sqlite3Fts5HashScanNext(Fts5Hash *p){
assert( !sqlite3Fts5HashScanEof(p) );
p->pScan = p->pScan->pScanNext;
}
int sqlite3Fts5HashScanEof(Fts5Hash *p){
return (p->pScan==0);
}
void sqlite3Fts5HashScanEntry(
Fts5Hash *pHash,
const char **pzTerm, /* OUT: term (nul-terminated) */
const u8 **ppDoclist, /* OUT: pointer to doclist */
int *pnDoclist /* OUT: size of doclist in bytes */
){
Fts5HashEntry *p;
if( (p = pHash->pScan) ){
int nTerm = strlen(p->zKey);
fts5HashAddPoslistSize(p);
*pzTerm = p->zKey;
*ppDoclist = (const u8*)&p->zKey[nTerm+1];
*pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1);
}else{
*pzTerm = 0;
*ppDoclist = 0;
*pnDoclist = 0;
}
}

5625
ext/fts5/fts5_index.c Normal file

File diff suppressed because it is too large Load Diff

2382
ext/fts5/fts5_main.c Normal file

File diff suppressed because it is too large Load Diff

1098
ext/fts5/fts5_storage.c Normal file

File diff suppressed because it is too large Load Diff

1012
ext/fts5/fts5_tcl.c Normal file

File diff suppressed because it is too large Load Diff

406
ext/fts5/fts5_test_mi.c Normal file
View File

@@ -0,0 +1,406 @@
/*
** 2015 Aug 04
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains test code only, it is not included in release
** versions of FTS5. It contains the implementation of an FTS5 auxiliary
** function very similar to the FTS4 function matchinfo():
**
** https://www.sqlite.org/fts3.html#matchinfo
**
** Known differences are that:
**
** 1) this function uses the FTS5 definition of "matchable phrase", which
** excludes any phrases that are part of an expression sub-tree that
** does not match the current row. This comes up for MATCH queries
** such as:
**
** "a OR (b AND c)"
**
** In FTS4, if a single row contains instances of tokens "a" and "c",
** but not "b", all instances of "c" are considered matches. In FTS5,
** they are not (as the "b AND c" sub-tree does not match the current
** row.
**
** 2) For the values returned by 'x' that apply to all rows of the table,
** NEAR constraints are not considered. But for the number of hits in
** the current row, they are.
**
** This file exports a single function that may be called to register the
** matchinfo() implementation with a database handle:
**
** int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
*/
#ifdef SQLITE_TEST
#ifdef SQLITE_ENABLE_FTS5
#include "fts5.h"
#include <tcl.h>
#include <assert.h>
#include <string.h>
typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
typedef unsigned int u32;
struct Fts5MatchinfoCtx {
int nCol; /* Number of cols in FTS5 table */
int nPhrase; /* Number of phrases in FTS5 query */
char *zArg; /* nul-term'd copy of 2nd arg */
int nRet; /* Number of elements in aRet[] */
u32 *aRet; /* Array of 32-bit unsigned ints to return */
};
/*
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
static fts5_api *fts5_api_from_db(sqlite3 *db){
fts5_api *pRet = 0;
sqlite3_stmt *pStmt = 0;
if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
&& SQLITE_ROW==sqlite3_step(pStmt)
&& sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
){
memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
}
sqlite3_finalize(pStmt);
return pRet;
}
/*
** Argument f should be a flag accepted by matchinfo() (a valid character
** in the string passed as the second argument). If it is not, -1 is
** returned. Otherwise, if f is a valid matchinfo flag, the value returned
** is the number of 32-bit integers added to the output array if the
** table has nCol columns and the query nPhrase phrases.
*/
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
int ret = -1;
switch( f ){
case 'p': ret = 1; break;
case 'c': ret = 1; break;
case 'x': ret = 3 * nCol * nPhrase; break;
case 'y': ret = nCol * nPhrase; break;
case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
case 'n': ret = 1; break;
case 'a': ret = nCol; break;
case 'l': ret = nCol; break;
case 's': ret = nCol; break;
}
return ret;
}
static int fts5MatchinfoIter(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
Fts5MatchinfoCtx *p,
int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
){
int i;
int n = 0;
int rc = SQLITE_OK;
char f;
for(i=0; (f = p->zArg[i]); i++){
rc = x(pApi, pFts, p, f, &p->aRet[n]);
if( rc!=SQLITE_OK ) break;
n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
}
return rc;
}
static int fts5MatchinfoXCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
void *pUserData
){
Fts5PhraseIter iter;
int iCol, iOff;
u32 *aOut = (u32*)pUserData;
int iPrev = -1;
for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
iOff>=0;
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
){
aOut[iCol*3+1]++;
if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
iPrev = iCol;
}
return SQLITE_OK;
}
static int fts5MatchinfoGlobalCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5MatchinfoCtx *p,
char f,
u32 *aOut
){
int rc = SQLITE_OK;
switch( f ){
case 'p':
aOut[0] = p->nPhrase;
break;
case 'c':
aOut[0] = p->nCol;
break;
case 'x': {
int i;
for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
void *pPtr = (void*)&aOut[i * p->nCol * 3];
rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
}
break;
}
case 'n': {
sqlite3_int64 nRow;
rc = pApi->xRowCount(pFts, &nRow);
aOut[0] = (u32)nRow;
break;
}
case 'a': {
sqlite3_int64 nRow = 0;
rc = pApi->xRowCount(pFts, &nRow);
if( nRow==0 ){
memset(aOut, 0, sizeof(u32) * p->nCol);
}else{
int i;
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
sqlite3_int64 nToken;
rc = pApi->xColumnTotalSize(pFts, i, &nToken);
if( rc==SQLITE_OK){
aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
}
}
}
break;
}
}
return rc;
}
static int fts5MatchinfoLocalCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5MatchinfoCtx *p,
char f,
u32 *aOut
){
int i;
int rc = SQLITE_OK;
switch( f ){
case 'b':
case 'x':
case 'y': {
int nMul = (f=='x' ? 3 : 1);
int iPhrase;
if( f=='b' ){
int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
for(i=0; i<nInt; i++) aOut[i] = 0;
}else{
for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
}
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
Fts5PhraseIter iter;
int iOff, iCol;
for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
iOff>=0;
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
){
if( f=='b' ){
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
}else{
aOut[nMul * (iCol + iPhrase * p->nCol)]++;
}
}
}
break;
}
case 'l': {
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
int nToken;
rc = pApi->xColumnSize(pFts, i, &nToken);
aOut[i] = (u32)nToken;
}
break;
}
case 's': {
int nInst;
memset(aOut, 0, sizeof(u32) * p->nCol);
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iOff, iCol = 0;
int iNextPhrase;
int iNextOff;
u32 nSeq = 1;
int j;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
iNextPhrase = iPhrase+1;
iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
int ip, ic, io;
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
if( ic!=iCol || io>iNextOff ) break;
if( ip==iNextPhrase && io==iNextOff ){
nSeq++;
iNextPhrase = ip+1;
iNextOff = io + pApi->xPhraseSize(pFts, ip);
}
}
if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
}
break;
}
}
return rc;
}
static Fts5MatchinfoCtx *fts5MatchinfoNew(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning error message */
const char *zArg /* Matchinfo flag string */
){
Fts5MatchinfoCtx *p;
int nCol;
int nPhrase;
int i;
int nInt;
int nByte;
int rc;
nCol = pApi->xColumnCount(pFts);
nPhrase = pApi->xPhraseCount(pFts);
nInt = 0;
for(i=0; zArg[i]; i++){
int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
if( n<0 ){
char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
sqlite3_result_error(pCtx, zErr, -1);
sqlite3_free(zErr);
return 0;
}
nInt += n;
}
nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */
+ sizeof(u32) * nInt /* The p->aRet[] array */
+ (i+1); /* The p->zArg string */
p = (Fts5MatchinfoCtx*)sqlite3_malloc(nByte);
if( p==0 ){
sqlite3_result_error_nomem(pCtx);
return 0;
}
memset(p, 0, nByte);
p->nCol = nCol;
p->nPhrase = nPhrase;
p->aRet = (u32*)&p[1];
p->nRet = nInt;
p->zArg = (char*)&p->aRet[nInt];
memcpy(p->zArg, zArg, i);
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
sqlite3_free(p);
p = 0;
}
return p;
}
static void fts5MatchinfoFunc(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
const char *zArg;
Fts5MatchinfoCtx *p;
int rc;
if( nVal>0 ){
zArg = (const char*)sqlite3_value_text(apVal[0]);
}else{
zArg = "pcx";
}
p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
pApi->xSetAuxdata(pFts, p, sqlite3_free);
if( p==0 ) return;
}
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
}else{
/* No errors has occured, so return a copy of the array of integers. */
int nByte = p->nRet * sizeof(u32);
sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
}
}
int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
int rc; /* Return code */
fts5_api *pApi; /* FTS5 API functions */
/* Extract the FTS5 API pointer from the database handle. The
** fts5_api_from_db() function above is copied verbatim from the
** FTS5 documentation. Refer there for details. */
pApi = fts5_api_from_db(db);
/* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
** with this database handle, or an error (OOM perhaps?) has occurred.
**
** Also check that the fts5_api object is version 2 or newer.
*/
if( pApi==0 || pApi->iVersion<1 ){
return SQLITE_ERROR;
}
/* Register the implementation of matchinfo() */
rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);
return rc;
}
#endif /* SQLITE_ENABLE_FTS5 */
#endif /* SQLITE_TEST */

1231
ext/fts5/fts5_tokenize.c Normal file

File diff suppressed because it is too large Load Diff

360
ext/fts5/fts5_unicode2.c Normal file
View File

@@ -0,0 +1,360 @@
/*
** 2012 May 25
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
*/
/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
#include <assert.h>
/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeIsalnum(int c){
/* Each unsigned integer in the following array corresponds to a contiguous
** range of unicode codepoints that are not either letters or numbers (i.e.
** codepoints for which this function should return 0).
**
** The most significant 22 bits in each 32-bit value contain the first
** codepoint in the range. The least significant 10 bits are used to store
** the size of the range (always at least 1). In other words, the value
** ((C<<22) + N) represents a range of N codepoints starting with codepoint
** C. It is not possible to represent a range larger than 1023 codepoints
** using this format.
*/
static const unsigned int aEntry[] = {
0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
0x380400F0,
};
static const unsigned int aAscii[4] = {
0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
};
if( c<128 ){
return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
}else if( c<(1<<22) ){
unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
int iRes = 0;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aEntry[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( aEntry[0]<key );
assert( key>=aEntry[iRes] );
return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
}
return 1;
}
/*
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int fts5_remove_diacritic(int c){
unsigned short aDia[] = {
0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
62924, 63050, 63082, 63274, 63390,
};
char aChar[] = {
'\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
'\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
'e', 'i', 'o', 'u', 'y',
};
unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
int iRes = 0;
int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
int iLo = 0;
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
if( key >= aDia[iTest] ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( key>=aDia[iRes] );
return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}
/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
unsigned int mask0 = 0x08029FDF;
unsigned int mask1 = 0x000361F8;
if( c<768 || c>817 ) return 0;
return (c < 768+32) ?
(mask0 & (1 << (c-768))) :
(mask1 & (1 << (c-768-32)));
}
/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
/* Each entry in the following array defines a rule for folding a range
** of codepoints to lower case. The rule applies to a range of nRange
** codepoints starting at codepoint iCode.
**
** If the least significant bit in flags is clear, then the rule applies
** to all nRange codepoints (i.e. all nRange codepoints are upper case and
** need to be folded). Or, if it is set, then the rule only applies to
** every second codepoint in the range, starting with codepoint C.
**
** The 7 most significant bits in flags are an index into the aiOff[]
** array. If a specific codepoint C does require folding, then its lower
** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
**
** The contents of this array are generated by parsing the CaseFolding.txt
** file distributed as part of the "Unicode Character Database". See
** http://www.unicode.org for details.
*/
static const struct TableEntry {
unsigned short iCode;
unsigned char flags;
unsigned char nRange;
} aEntry[] = {
{65, 14, 26}, {181, 64, 1}, {192, 14, 23},
{216, 14, 7}, {256, 1, 48}, {306, 1, 6},
{313, 1, 16}, {330, 1, 46}, {376, 116, 1},
{377, 1, 6}, {383, 104, 1}, {385, 50, 1},
{386, 1, 4}, {390, 44, 1}, {391, 0, 1},
{393, 42, 2}, {395, 0, 1}, {398, 32, 1},
{399, 38, 1}, {400, 40, 1}, {401, 0, 1},
{403, 42, 1}, {404, 46, 1}, {406, 52, 1},
{407, 48, 1}, {408, 0, 1}, {412, 52, 1},
{413, 54, 1}, {415, 56, 1}, {416, 1, 6},
{422, 60, 1}, {423, 0, 1}, {425, 60, 1},
{428, 0, 1}, {430, 60, 1}, {431, 0, 1},
{433, 58, 2}, {435, 1, 4}, {439, 62, 1},
{440, 0, 1}, {444, 0, 1}, {452, 2, 1},
{453, 0, 1}, {455, 2, 1}, {456, 0, 1},
{458, 2, 1}, {459, 1, 18}, {478, 1, 18},
{497, 2, 1}, {498, 1, 4}, {502, 122, 1},
{503, 134, 1}, {504, 1, 40}, {544, 110, 1},
{546, 1, 18}, {570, 70, 1}, {571, 0, 1},
{573, 108, 1}, {574, 68, 1}, {577, 0, 1},
{579, 106, 1}, {580, 28, 1}, {581, 30, 1},
{582, 1, 10}, {837, 36, 1}, {880, 1, 4},
{886, 0, 1}, {902, 18, 1}, {904, 16, 3},
{908, 26, 1}, {910, 24, 2}, {913, 14, 17},
{931, 14, 9}, {962, 0, 1}, {975, 4, 1},
{976, 140, 1}, {977, 142, 1}, {981, 146, 1},
{982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
{1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
{1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
{1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
{1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
{1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
{4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
{7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
{7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
{7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
{8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
{8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
{8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
{8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
{8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
{8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
{8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
{8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
{8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
{11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
{11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
{11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
{11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
{11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
{42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
{42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
{42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
{42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
{65313, 14, 26},
};
static const unsigned short aiOff[] = {
1, 2, 8, 15, 16, 26, 28, 32,
37, 38, 40, 48, 63, 64, 69, 71,
79, 80, 116, 202, 203, 205, 206, 207,
209, 210, 211, 213, 214, 217, 218, 219,
775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
65514, 65521, 65527, 65528, 65529,
};
int ret = c;
assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
if( c<128 ){
if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
}else if( c<65536 ){
const struct TableEntry *p;
int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
int iLo = 0;
int iRes = -1;
assert( c>aEntry[0].iCode );
while( iHi>=iLo ){
int iTest = (iHi + iLo) / 2;
int cmp = (c - aEntry[iTest].iCode);
if( cmp>=0 ){
iRes = iTest;
iLo = iTest+1;
}else{
iHi = iTest-1;
}
}
assert( iRes>=0 && c>=aEntry[iRes].iCode );
p = &aEntry[iRes];
if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
assert( ret>0 );
}
if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
}
else if( c>=66560 && c<66600 ){
ret = c + 40;
}
return ret;
}

342
ext/fts5/fts5_varint.c Normal file
View File

@@ -0,0 +1,342 @@
/*
** 2015 May 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Routines for varint serialization and deserialization.
*/
#include "fts5Int.h"
/*
** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
** Except, this version does handle the single byte case that the core
** version depends on being handled before its function is called.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
u32 a,b;
/* The 1-byte case. Overwhelmingly the most common. */
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
/* Values between 0 and 127 */
*v = a;
return 1;
}
/* The 2-byte case */
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
/* Values between 128 and 16383 */
a &= 0x7f;
a = a<<7;
*v = a | b;
return 2;
}
/* The 3-byte case */
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
/* Values between 16384 and 2097151 */
a &= (0x7f<<14)|(0x7f);
b &= 0x7f;
b = b<<7;
*v = a | b;
return 3;
}
/* A 32-bit varint is used to store size information in btrees.
** Objects are rarely larger than 2MiB limit of a 3-byte varint.
** A 3-byte varint is sufficient, for example, to record the size
** of a 1048569-byte BLOB or string.
**
** We only unroll the first 1-, 2-, and 3- byte cases. The very
** rare larger cases can be handled by the slower 64-bit varint
** routine.
*/
{
u64 v64;
u8 n;
p -= 2;
n = sqlite3Fts5GetVarint(p, &v64);
*v = (u32)v64;
assert( n>3 && n<=9 );
return n;
}
}
/*
** Bitmasks used by sqlite3GetVarint(). These precomputed constants
** are defined here rather than simply putting the constant expressions
** inline in order to work around bugs in the RVT compiler.
**
** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
**
** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
*/
#define SLOT_2_0 0x001fc07f
#define SLOT_4_2_0 0xf01fc07f
/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read. The value is stored in *v.
*/
u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
u32 a,b,s;
a = *p;
/* a: p0 (unmasked) */
if (!(a&0x80))
{
*v = a;
return 1;
}
p++;
b = *p;
/* b: p1 (unmasked) */
if (!(b&0x80))
{
a &= 0x7f;
a = a<<7;
a |= b;
*v = a;
return 2;
}
/* Verify that constants are precomputed correctly */
assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
p++;
a = a<<14;
a |= *p;
/* a: p0<<14 | p2 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_2_0;
b &= 0x7f;
b = b<<7;
a |= b;
*v = a;
return 3;
}
/* CSE1 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p1<<14 | p3 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_2_0;
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
*v = a;
return 4;
}
/* a: p0<<14 | p2 (masked) */
/* b: p1<<14 | p3 (unmasked) */
/* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
/* moved CSE1 up */
/* a &= (0x7f<<14)|(0x7f); */
b &= SLOT_2_0;
s = a;
/* s: p0<<14 | p2 (masked) */
p++;
a = a<<14;
a |= *p;
/* a: p0<<28 | p2<<14 | p4 (unmasked) */
if (!(a&0x80))
{
/* we can skip these cause they were (effectively) done above in calc'ing s */
/* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
/* b &= (0x7f<<14)|(0x7f); */
b = b<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 5;
}
/* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
s = s<<7;
s |= b;
/* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
p++;
b = b<<14;
b |= *p;
/* b: p1<<28 | p3<<14 | p5 (unmasked) */
if (!(b&0x80))
{
/* we can skip this cause it was (effectively) done above in calc'ing s */
/* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
a &= SLOT_2_0;
a = a<<7;
a |= b;
s = s>>18;
*v = ((u64)s)<<32 | a;
return 6;
}
p++;
a = a<<14;
a |= *p;
/* a: p2<<28 | p4<<14 | p6 (unmasked) */
if (!(a&0x80))
{
a &= SLOT_4_2_0;
b &= SLOT_2_0;
b = b<<7;
a |= b;
s = s>>11;
*v = ((u64)s)<<32 | a;
return 7;
}
/* CSE2 from below */
a &= SLOT_2_0;
p++;
b = b<<14;
b |= *p;
/* b: p3<<28 | p5<<14 | p7 (unmasked) */
if (!(b&0x80))
{
b &= SLOT_4_2_0;
/* moved CSE2 up */
/* a &= (0x7f<<14)|(0x7f); */
a = a<<7;
a |= b;
s = s>>4;
*v = ((u64)s)<<32 | a;
return 8;
}
p++;
a = a<<15;
a |= *p;
/* a: p4<<29 | p6<<15 | p8 (unmasked) */
/* moved CSE2 up */
/* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
b &= SLOT_2_0;
b = b<<8;
a |= b;
s = s<<4;
b = p[-4];
b &= 0x7f;
b = b>>3;
s |= b;
*v = ((u64)s)<<32 | a;
return 9;
}
/*
** The variable-length integer encoding is as follows:
**
** KEY:
** A = 0xxxxxxx 7 bits of data and one flag bit
** B = 1xxxxxxx 7 bits of data and one flag bit
** C = xxxxxxxx 8 bits of data
**
** 7 bits - A
** 14 bits - BA
** 21 bits - BBA
** 28 bits - BBBA
** 35 bits - BBBBA
** 42 bits - BBBBBA
** 49 bits - BBBBBBA
** 56 bits - BBBBBBBA
** 64 bits - BBBBBBBBC
*/
#ifdef SQLITE_NOINLINE
# define FTS5_NOINLINE SQLITE_NOINLINE
#else
# define FTS5_NOINLINE
#endif
/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data write will be between 1 and 9 bytes. The number
** of bytes written is returned.
**
** A variable-length integer consists of the lower 7 bits of each byte
** for all bytes that have the 8th bit set and one byte with the 8th
** bit clear. Except, if we get to the 9th byte, it stores the full
** 8 bits and is the last byte.
*/
static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
int i, j, n;
u8 buf[10];
if( v & (((u64)0xff000000)<<32) ){
p[8] = (u8)v;
v >>= 8;
for(i=7; i>=0; i--){
p[i] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}
return 9;
}
n = 0;
do{
buf[n++] = (u8)((v & 0x7f) | 0x80);
v >>= 7;
}while( v!=0 );
buf[0] &= 0x7f;
assert( n<=9 );
for(i=0, j=n-1; j>=0; j--, i++){
p[i] = buf[j];
}
return n;
}
int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
if( v<=0x7f ){
p[0] = v&0x7f;
return 1;
}
if( v<=0x3fff ){
p[0] = ((v>>7)&0x7f)|0x80;
p[1] = v&0x7f;
return 2;
}
return fts5PutVarint64(p,v);
}
int sqlite3Fts5GetVarintLen(u32 iVal){
if( iVal<(1 << 7 ) ) return 1;
if( iVal<(1 << 14) ) return 2;
if( iVal<(1 << 21) ) return 3;
if( iVal<(1 << 28) ) return 4;
return 5;
}

488
ext/fts5/fts5_vocab.c Normal file
View File

@@ -0,0 +1,488 @@
/*
** 2015 May 08
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite virtual table module implementing direct access to an
** existing FTS5 index. The module may create several different types of
** tables:
**
** col:
** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
**
** One row for each term/column combination. The value of $doc is set to
** the number of fts5 rows that contain at least one instance of term
** $term within column $col. Field $cnt is set to the total number of
** instances of term $term in column $col (in any row of the fts5 table).
**
** row:
** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
**
** One row for each term in the database. The value of $doc is set to
** the number of fts5 rows that contain at least one instance of term
** $term. Field $cnt is set to the total number of instances of term
** $term in the database.
*/
#include "fts5Int.h"
typedef struct Fts5VocabTable Fts5VocabTable;
typedef struct Fts5VocabCursor Fts5VocabCursor;
struct Fts5VocabTable {
sqlite3_vtab base;
char *zFts5Tbl; /* Name of fts5 table */
char *zFts5Db; /* Db containing fts5 table */
sqlite3 *db; /* Database handle */
Fts5Global *pGlobal; /* FTS5 global object for this database */
int eType; /* FTS5_VOCAB_COL or ROW */
};
struct Fts5VocabCursor {
sqlite3_vtab_cursor base;
sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
Fts5Index *pIndex; /* Associated FTS5 index */
int bEof; /* True if this cursor is at EOF */
Fts5IndexIter *pIter; /* Term/rowid iterator object */
/* These are used by 'col' tables only */
int nCol;
int iCol;
i64 *aCnt;
i64 *aDoc;
/* Output values */
i64 rowid; /* This table's current rowid value */
Fts5Buffer term; /* Current value of 'term' column */
i64 aVal[3]; /* Up to three columns left of 'term' */
};
#define FTS5_VOCAB_COL 0
#define FTS5_VOCAB_ROW 1
#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
/*
** Translate a string containing an fts5vocab table type to an
** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
** and return SQLITE_ERROR.
*/
static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
int rc = SQLITE_OK;
char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
if( rc==SQLITE_OK ){
sqlite3Fts5Dequote(zCopy);
if( sqlite3_stricmp(zCopy, "col")==0 ){
*peType = FTS5_VOCAB_COL;
}else
if( sqlite3_stricmp(zCopy, "row")==0 ){
*peType = FTS5_VOCAB_ROW;
}else
{
*pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
rc = SQLITE_ERROR;
}
sqlite3_free(zCopy);
}
return rc;
}
/*
** The xDisconnect() virtual table method.
*/
static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
sqlite3_free(pTab);
return SQLITE_OK;
}
/*
** The xDestroy() virtual table method.
*/
static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
sqlite3_free(pTab);
return SQLITE_OK;
}
/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
** The argv[] array contains the following:
**
** argv[0] -> module name ("fts5vocab")
** argv[1] -> database name
** argv[2] -> table name
**
** then:
**
** argv[3] -> name of fts5 table
** argv[4] -> type of fts5vocab table
**
** or, for tables in the TEMP schema only.
**
** argv[3] -> name of fts5 tables database
** argv[4] -> name of fts5 table
** argv[5] -> type of fts5vocab table
*/
static int fts5VocabInitVtab(
sqlite3 *db, /* The SQLite database connection */
void *pAux, /* Pointer to Fts5Global object */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
){
const char *azSchema[] = {
"CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
"CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")"
};
Fts5VocabTable *pRet = 0;
int rc = SQLITE_OK; /* Return code */
int bDb;
bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
if( argc!=5 && bDb==0 ){
*pzErr = sqlite3_mprintf("wrong number of vtable arguments");
rc = SQLITE_ERROR;
}else{
int nByte; /* Bytes of space to allocate */
const char *zDb = bDb ? argv[3] : argv[1];
const char *zTab = bDb ? argv[4] : argv[3];
const char *zType = bDb ? argv[5] : argv[4];
int nDb = strlen(zDb)+1;
int nTab = strlen(zTab)+1;
int eType;
rc = fts5VocabTableType(zType, pzErr, &eType);
if( rc==SQLITE_OK ){
assert( eType>=0 && eType<sizeof(azSchema)/sizeof(azSchema[0]) );
rc = sqlite3_declare_vtab(db, azSchema[eType]);
}
nByte = sizeof(Fts5VocabTable) + nDb + nTab;
pRet = sqlite3Fts5MallocZero(&rc, nByte);
if( pRet ){
pRet->pGlobal = (Fts5Global*)pAux;
pRet->eType = eType;
pRet->db = db;
pRet->zFts5Tbl = (char*)&pRet[1];
pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
memcpy(pRet->zFts5Tbl, zTab, nTab);
memcpy(pRet->zFts5Db, zDb, nDb);
sqlite3Fts5Dequote(pRet->zFts5Tbl);
sqlite3Fts5Dequote(pRet->zFts5Db);
}
}
*ppVTab = (sqlite3_vtab*)pRet;
return rc;
}
/*
** The xConnect() and xCreate() methods for the virtual table. All the
** work is done in function fts5VocabInitVtab().
*/
static int fts5VocabConnectMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts5VocabCreateMethod(
sqlite3 *db, /* Database connection */
void *pAux, /* Pointer to tokenizer hash table */
int argc, /* Number of elements in argv array */
const char * const *argv, /* xCreate/xConnect argument array */
sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
char **pzErr /* OUT: sqlite3_malloc'd error message */
){
return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}
/*
** Implementation of the xBestIndex method.
*/
static int fts5VocabBestIndexMethod(
sqlite3_vtab *pVTab,
sqlite3_index_info *pInfo
){
return SQLITE_OK;
}
/*
** Implementation of xOpen method.
*/
static int fts5VocabOpenMethod(
sqlite3_vtab *pVTab,
sqlite3_vtab_cursor **ppCsr
){
Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
Fts5Index *pIndex = 0;
int nCol = 0;
Fts5VocabCursor *pCsr = 0;
int rc = SQLITE_OK;
sqlite3_stmt *pStmt = 0;
char *zSql = 0;
int nByte;
zSql = sqlite3Fts5Mprintf(&rc,
"SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
);
if( zSql ){
rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
}
sqlite3_free(zSql);
assert( rc==SQLITE_OK || pStmt==0 );
if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
i64 iId = sqlite3_column_int64(pStmt, 0);
pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol);
}
if( rc==SQLITE_OK && pIndex==0 ){
rc = sqlite3_finalize(pStmt);
pStmt = 0;
if( rc==SQLITE_OK ){
pVTab->zErrMsg = sqlite3_mprintf(
"no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
);
rc = SQLITE_ERROR;
}
}
nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor);
pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
if( pCsr ){
pCsr->pIndex = pIndex;
pCsr->pStmt = pStmt;
pCsr->nCol = nCol;
pCsr->aCnt = (i64*)&pCsr[1];
pCsr->aDoc = &pCsr->aCnt[nCol];
}else{
sqlite3_finalize(pStmt);
}
*ppCsr = (sqlite3_vtab_cursor*)pCsr;
return rc;
}
static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
pCsr->rowid = 0;
sqlite3Fts5IterClose(pCsr->pIter);
pCsr->pIter = 0;
}
/*
** Close the cursor. For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
fts5VocabResetCursor(pCsr);
sqlite3Fts5BufferFree(&pCsr->term);
sqlite3_finalize(pCsr->pStmt);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Advance the cursor to the next row in the table.
*/
static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
int rc = SQLITE_OK;
pCsr->rowid++;
if( pTab->eType==FTS5_VOCAB_COL ){
for(pCsr->iCol++; pCsr->iCol<pCsr->nCol; pCsr->iCol++){
if( pCsr->aCnt[pCsr->iCol] ) break;
}
}
if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){
if( sqlite3Fts5IterEof(pCsr->pIter) ){
pCsr->bEof = 1;
}else{
const char *zTerm;
int nTerm;
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
memset(pCsr->aVal, 0, sizeof(pCsr->aVal));
memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64));
memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64));
pCsr->iCol = 0;
assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
while( rc==SQLITE_OK ){
i64 dummy;
const u8 *pPos; int nPos; /* Position list */
i64 iPos = 0; /* 64-bit position read from poslist */
int iOff = 0; /* Current offset within position list */
rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
if( rc==SQLITE_OK ){
if( pTab->eType==FTS5_VOCAB_ROW ){
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
pCsr->aVal[1]++;
}
pCsr->aVal[0]++;
}else{
int iCol = -1;
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
int ii = FTS5_POS2COLUMN(iPos);
pCsr->aCnt[ii]++;
if( iCol!=ii ){
pCsr->aDoc[ii]++;
iCol = ii;
}
}
}
rc = sqlite3Fts5IterNextScan(pCsr->pIter);
}
if( rc==SQLITE_OK ){
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break;
if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
}
}
}
}
if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++;
pCsr->aVal[0] = pCsr->iCol;
pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol];
pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol];
}
return rc;
}
/*
** This is the xFilter implementation for the virtual table.
*/
static int fts5VocabFilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
int idxNum, /* Strategy index */
const char *idxStr, /* Unused */
int nVal, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
int rc;
const int flags = FTS5INDEX_QUERY_SCAN;
fts5VocabResetCursor(pCsr);
rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter);
if( rc==SQLITE_OK ){
rc = fts5VocabNextMethod(pCursor);
}
return rc;
}
/*
** This is the xEof method of the virtual table. SQLite calls this
** routine to find out if it has reached the end of a result set.
*/
static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
return pCsr->bEof;
}
static int fts5VocabColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
switch( iCol ){
case 0: /* term */
sqlite3_result_text(
pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
);
break;
default:
assert( iCol<4 && iCol>0 );
sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]);
break;
}
return SQLITE_OK;
}
/*
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. The
** rowid should be written to *pRowid.
*/
static int fts5VocabRowidMethod(
sqlite3_vtab_cursor *pCursor,
sqlite_int64 *pRowid
){
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
*pRowid = pCsr->rowid;
return SQLITE_OK;
}
int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
static const sqlite3_module fts5Vocab = {
/* iVersion */ 2,
/* xCreate */ fts5VocabCreateMethod,
/* xConnect */ fts5VocabConnectMethod,
/* xBestIndex */ fts5VocabBestIndexMethod,
/* xDisconnect */ fts5VocabDisconnectMethod,
/* xDestroy */ fts5VocabDestroyMethod,
/* xOpen */ fts5VocabOpenMethod,
/* xClose */ fts5VocabCloseMethod,
/* xFilter */ fts5VocabFilterMethod,
/* xNext */ fts5VocabNextMethod,
/* xEof */ fts5VocabEofMethod,
/* xColumn */ fts5VocabColumnMethod,
/* xRowid */ fts5VocabRowidMethod,
/* xUpdate */ 0,
/* xBegin */ 0,
/* xSync */ 0,
/* xCommit */ 0,
/* xRollback */ 0,
/* xFindFunction */ 0,
/* xRename */ 0,
/* xSavepoint */ 0,
/* xRelease */ 0,
/* xRollbackTo */ 0,
};
void *p = (void*)pGlobal;
return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
}

173
ext/fts5/fts5parse.y Normal file
View File

@@ -0,0 +1,173 @@
/*
** 2014 May 31
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
// All token codes are small integers with #defines that begin with "TK_"
%token_prefix FTS5_
// The type of the data attached to each token is Token. This is also the
// default type for non-terminals.
//
%token_type {Fts5Token}
%default_type {Fts5Token}
// The generated parser function takes a 4th argument as follows:
%extra_argument {Fts5Parse *pParse}
// This code runs whenever there is a syntax error
//
%syntax_error {
sqlite3Fts5ParseError(
pParse, "fts5: syntax error near \"%.*s\"",TOKEN.n,TOKEN.p
);
}
%stack_overflow {
assert( 0 );
}
// The name of the generated procedure that implements the parser
// is as follows:
%name sqlite3Fts5Parser
// The following text is included near the beginning of the C source
// code file that implements the parser.
//
%include {
#include "fts5Int.h"
#include "fts5parse.h"
/*
** Disable all error recovery processing in the parser push-down
** automaton.
*/
#define YYNOERRORRECOVERY 1
/*
** Make yytestcase() the same as testcase()
*/
#define yytestcase(X) testcase(X)
} // end %include
%left OR.
%left AND.
%left NOT.
%left TERM.
%left COLON.
input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); }
%type cnearset {Fts5ExprNode*}
%type expr {Fts5ExprNode*}
%type exprlist {Fts5ExprNode*}
%destructor cnearset { sqlite3Fts5ParseNodeFree($$); }
%destructor expr { sqlite3Fts5ParseNodeFree($$); }
%destructor exprlist { sqlite3Fts5ParseNodeFree($$); }
expr(A) ::= expr(X) AND expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}
expr(A) ::= expr(X) OR expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_OR, X, Y, 0);
}
expr(A) ::= expr(X) NOT expr(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_NOT, X, Y, 0);
}
expr(A) ::= LP expr(X) RP. {A = X;}
expr(A) ::= exprlist(X). {A = X;}
exprlist(A) ::= cnearset(X). {A = X;}
exprlist(A) ::= exprlist(X) cnearset(Y). {
A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}
cnearset(A) ::= nearset(X). {
A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X);
}
cnearset(A) ::= colset(X) COLON nearset(Y). {
sqlite3Fts5ParseSetColset(pParse, Y, X);
A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y);
}
%type colset {Fts5ExprColset*}
%destructor colset { sqlite3_free($$); }
%type colsetlist {Fts5ExprColset*}
%destructor colsetlist { sqlite3_free($$); }
colset(A) ::= LCP colsetlist(X) RCP. { A = X; }
colset(A) ::= STRING(X). {
A = sqlite3Fts5ParseColset(pParse, 0, &X);
}
colsetlist(A) ::= colsetlist(Y) STRING(X). {
A = sqlite3Fts5ParseColset(pParse, Y, &X); }
colsetlist(A) ::= STRING(X). {
A = sqlite3Fts5ParseColset(pParse, 0, &X);
}
%type nearset {Fts5ExprNearset*}
%type nearphrases {Fts5ExprNearset*}
%destructor nearset { sqlite3Fts5ParseNearsetFree($$); }
%destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); }
nearset(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); }
nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. {
sqlite3Fts5ParseNear(pParse, &X);
sqlite3Fts5ParseSetDistance(pParse, Y, &Z);
A = Y;
}
nearphrases(A) ::= phrase(X). {
A = sqlite3Fts5ParseNearset(pParse, 0, X);
}
nearphrases(A) ::= nearphrases(X) phrase(Y). {
A = sqlite3Fts5ParseNearset(pParse, X, Y);
}
/*
** The optional ", <integer>" at the end of the NEAR() arguments.
*/
neardist_opt(A) ::= . { A.p = 0; A.n = 0; }
neardist_opt(A) ::= COMMA STRING(X). { A = X; }
/*
** A phrase. A set of primitives connected by "+" operators. Examples:
**
** "the" + "quick brown" + fo *
** "the quick brown fo" *
** the+quick+brown+fo*
*/
%type phrase {Fts5ExprPhrase*}
%destructor phrase { sqlite3Fts5ParsePhraseFree($$); }
phrase(A) ::= phrase(X) PLUS STRING(Y) star_opt(Z). {
A = sqlite3Fts5ParseTerm(pParse, X, &Y, Z);
}
phrase(A) ::= STRING(Y) star_opt(Z). {
A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z);
}
/*
** Optional "*" character.
*/
%type star_opt {int}
star_opt(A) ::= STAR. { A = 1; }
star_opt(A) ::= . { A = 0; }

222
ext/fts5/mkportersteps.tcl Normal file
View File

@@ -0,0 +1,222 @@
#
# 2014 Jun 09
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#-------------------------------------------------------------------------
#
# This script generates the implementations of the following C functions,
# which are part of the porter tokenizer implementation:
#
# static int fts5PorterStep1B(char *aBuf, int *pnBuf);
# static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
# static int fts5PorterStep2(char *aBuf, int *pnBuf);
# static int fts5PorterStep3(char *aBuf, int *pnBuf);
# static int fts5PorterStep4(char *aBuf, int *pnBuf);
#
set O(Step1B2) {
{ at {} ate 1 }
{ bl {} ble 1 }
{ iz {} ize 1 }
}
set O(Step1B) {
{ "eed" fts5Porter_MGt0 "ee" 0 }
{ "ed" fts5Porter_Vowel "" 1 }
{ "ing" fts5Porter_Vowel "" 1 }
}
set O(Step2) {
{ "ational" fts5Porter_MGt0 "ate" }
{ "tional" fts5Porter_MGt0 "tion" }
{ "enci" fts5Porter_MGt0 "ence" }
{ "anci" fts5Porter_MGt0 "ance" }
{ "izer" fts5Porter_MGt0 "ize" }
{ "logi" fts5Porter_MGt0 "log" }
{ "bli" fts5Porter_MGt0 "ble" }
{ "alli" fts5Porter_MGt0 "al" }
{ "entli" fts5Porter_MGt0 "ent" }
{ "eli" fts5Porter_MGt0 "e" }
{ "ousli" fts5Porter_MGt0 "ous" }
{ "ization" fts5Porter_MGt0 "ize" }
{ "ation" fts5Porter_MGt0 "ate" }
{ "ator" fts5Porter_MGt0 "ate" }
{ "alism" fts5Porter_MGt0 "al" }
{ "iveness" fts5Porter_MGt0 "ive" }
{ "fulness" fts5Porter_MGt0 "ful" }
{ "ousness" fts5Porter_MGt0 "ous" }
{ "aliti" fts5Porter_MGt0 "al" }
{ "iviti" fts5Porter_MGt0 "ive" }
{ "biliti" fts5Porter_MGt0 "ble" }
}
set O(Step3) {
{ "icate" fts5Porter_MGt0 "ic" }
{ "ative" fts5Porter_MGt0 "" }
{ "alize" fts5Porter_MGt0 "al" }
{ "iciti" fts5Porter_MGt0 "ic" }
{ "ical" fts5Porter_MGt0 "ic" }
{ "ful" fts5Porter_MGt0 "" }
{ "ness" fts5Porter_MGt0 "" }
}
set O(Step4) {
{ "al" fts5Porter_MGt1 "" }
{ "ance" fts5Porter_MGt1 "" }
{ "ence" fts5Porter_MGt1 "" }
{ "er" fts5Porter_MGt1 "" }
{ "ic" fts5Porter_MGt1 "" }
{ "able" fts5Porter_MGt1 "" }
{ "ible" fts5Porter_MGt1 "" }
{ "ant" fts5Porter_MGt1 "" }
{ "ement" fts5Porter_MGt1 "" }
{ "ment" fts5Porter_MGt1 "" }
{ "ent" fts5Porter_MGt1 "" }
{ "ion" fts5Porter_MGt1_and_S_or_T "" }
{ "ou" fts5Porter_MGt1 "" }
{ "ism" fts5Porter_MGt1 "" }
{ "ate" fts5Porter_MGt1 "" }
{ "iti" fts5Porter_MGt1 "" }
{ "ous" fts5Porter_MGt1 "" }
{ "ive" fts5Porter_MGt1 "" }
{ "ize" fts5Porter_MGt1 "" }
}
proc sort_cb {lhs rhs} {
set L [string range [lindex $lhs 0] end-1 end-1]
set R [string range [lindex $rhs 0] end-1 end-1]
string compare $L $R
}
proc create_step_function {name data} {
set T(function) {
static int fts5Porter${name}(char *aBuf, int *pnBuf){
int ret = 0;
int nBuf = *pnBuf;
switch( aBuf[nBuf-2] ){
${switchbody}
}
return ret;
}
}
set T(case) {
case '${k}':
${ifstmts}
break;
}
set T(if_0_0_0) {
if( ${match} ){
*pnBuf = nBuf - $n;
}
}
set T(if_1_0_0) {
if( ${match} ){
if( ${cond} ){
*pnBuf = nBuf - $n;
}
}
}
set T(if_0_1_0) {
if( ${match} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
}
}
set T(if_1_1_0) {
if( ${match} ){
if( ${cond} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
}
}
}
set T(if_1_0_1) {
if( ${match} ){
if( ${cond} ){
*pnBuf = nBuf - $n;
ret = 1;
}
}
}
set T(if_0_1_1) {
if( ${match} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
ret = 1;
}
}
set T(if_1_1_1) {
if( ${match} ){
if( ${cond} ){
${memcpy}
*pnBuf = nBuf - $n + $nRep;
ret = 1;
}
}
}
set switchbody ""
foreach I $data {
set k [string range [lindex $I 0] end-1 end-1]
lappend aCase($k) $I
}
foreach k [lsort [array names aCase]] {
set ifstmts ""
foreach I $aCase($k) {
set zSuffix [lindex $I 0] ;# Suffix text for this rule
set zRep [lindex $I 2] ;# Replacement text for rule
set xCond [lindex $I 1] ;# Condition callback (or "")
set n [string length $zSuffix]
set nRep [string length $zRep]
set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
set cond "${xCond}(aBuf, nBuf-$n)"
set bMemcpy [expr {$nRep>0}]
set bCond [expr {$xCond!=""}]
set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
set t $T(if_${bCond}_${bMemcpy}_${bRet})
lappend ifstmts [string trim [subst -nocommands $t]]
}
set ifstmts [join $ifstmts "else "]
append switchbody [subst -nocommands $T(case)]
}
puts [subst -nocommands $T(function)]
}
puts [string trim {
/**************************************************************************
***************************************************************************
** GENERATED CODE STARTS HERE (mkportersteps.tcl)
*/
}]
foreach step [array names O] {
create_step_function $step $O($step)
}
puts [string trim {
/*
** GENERATED CODE ENDS HERE (mkportersteps.tcl)
***************************************************************************
**************************************************************************/
}]

View File

@@ -0,0 +1,297 @@
# 2014 Dec 19
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl
catch {
sqlite3_fts5_may_be_corrupt 0
append G(perm:dbconfig) "; load_static_extension \$::dbhandle fts5"
reset_db
}
proc fts5_test_poslist {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [string map {{ } .} [$cmd xInst $i]]
}
set res
}
proc fts5_test_columnsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnSize $i]
}
set res
}
proc fts5_test_columntext {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnText $i]
}
set res
}
proc fts5_test_columntotalsize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
lappend res [$cmd xColumnTotalSize $i]
}
set res
}
proc test_append_token {varname token iStart iEnd} {
upvar $varname var
lappend var $token
return "SQLITE_OK"
}
proc fts5_test_tokenize {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
set tokens [list]
$cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
lappend res $tokens
}
set res
}
proc fts5_test_rowcount {cmd} {
$cmd xRowCount
}
proc test_queryphrase_cb {cnt cmd} {
upvar $cnt L
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
foreach {ip ic io} [$cmd xInst $i] break
set A($ic) 1
}
foreach ic [array names A] {
lset L $ic [expr {[lindex $L $ic] + 1}]
}
}
proc fts5_test_queryphrase {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
set cnt [list]
for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
$cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
lappend res $cnt
}
set res
}
proc fts5_test_phrasecount {cmd} {
$cmd xPhraseCount
}
proc fts5_test_all {cmd} {
set res [list]
lappend res columnsize [fts5_test_columnsize $cmd]
lappend res columntext [fts5_test_columntext $cmd]
lappend res columntotalsize [fts5_test_columntotalsize $cmd]
lappend res poslist [fts5_test_poslist $cmd]
lappend res tokenize [fts5_test_tokenize $cmd]
lappend res rowcount [fts5_test_rowcount $cmd]
set res
}
proc fts5_aux_test_functions {db} {
foreach f {
fts5_test_columnsize
fts5_test_columntext
fts5_test_columntotalsize
fts5_test_poslist
fts5_test_tokenize
fts5_test_rowcount
fts5_test_all
fts5_test_queryphrase
fts5_test_phrasecount
} {
sqlite3_fts5_create_function $db $f $f
}
}
proc fts5_level_segs {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
lappend ret [expr [llength $L] - 3]
}
set ret
}
proc fts5_level_segids {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
set lvl [list]
foreach S [lrange $L 3 end] {
regexp {id=([1234567890]*)} $S -> segid
lappend lvl $segid
}
lappend ret $lvl
}
set ret
}
proc fts5_rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
}
set doc
}
#-------------------------------------------------------------------------
# Usage:
#
# nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
#
# This command is used to test if a document (set of column values) matches
# the logical equivalent of a single FTS5 NEAR() clump and, if so, return
# the equivalent of an FTS5 position list.
#
# Parameter $aCol is passed a list of the column values for the document
# to test. Parameters $phrase1 and so on are the phrases.
#
# The result is a list of phrase hits. Each phrase hit is formatted as
# three integers separated by "." characters, in the following format:
#
# <phrase number> . <column number> . <token offset>
#
# Options:
#
# -near N (NEAR distance. Default 10)
# -col C (List of column indexes to match against)
# -pc VARNAME (variable in caller frame to use for phrase numbering)
#
proc nearset {aCol args} {
set O(-near) 10
set O(-col) {}
set O(-pc) ""
set nOpt [lsearch -exact $args --]
if {$nOpt<0} { error "no -- option" }
foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
if {[info exists O($k)]==0} { error "unrecognized option $k" }
set O($k) $v
}
if {$O(-pc) == ""} {
set counter 0
} else {
upvar $O(-pc) counter
}
# Set $phraselist to be a list of phrases. $nPhrase its length.
set phraselist [lrange $args [expr $nOpt+1] end]
set nPhrase [llength $phraselist]
for {set j 0} {$j < [llength $aCol]} {incr j} {
for {set i 0} {$i < $nPhrase} {incr i} {
set A($j,$i) [list]
}
}
set iCol -1
foreach col $aCol {
incr iCol
if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
set nToken [llength $col]
set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
for { } {$iFL < $nToken} {incr iFL} {
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set B($iPhrase) [list]
}
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set p [lindex $phraselist $iPhrase]
set nPm1 [expr {[llength $p] - 1}]
set iFirst [expr $iFL - $O(-near) - [llength $p]]
for {set i $iFirst} {$i <= $iFL} {incr i} {
if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i }
}
if {[llength $B($iPhrase)] == 0} break
}
if {$iPhrase==$nPhrase} {
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
}
}
}
}
set res [list]
#puts [array names A]
for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
foreach a $A($iCol,$iPhrase) {
lappend res "$counter.$iCol.$a"
}
}
incr counter
}
#puts $res
sort_poslist $res
}
#-------------------------------------------------------------------------
# Usage:
#
# sort_poslist LIST
#
# Sort a position list of the type returned by command [nearset]
#
proc sort_poslist {L} {
lsort -command instcompare $L
}
proc instcompare {lhs rhs} {
foreach {p1 c1 o1} [split $lhs .] {}
foreach {p2 c2 o2} [split $rhs .] {}
set res [expr $c1 - $c2]
if {$res==0} { set res [expr $o1 - $o2] }
if {$res==0} { set res [expr $p1 - $p2] }
return $res
}
#-------------------------------------------------------------------------
# Logical operators used by the commands returned by fts5_tcl_expr().
#
proc AND {args} {
foreach a $args {
if {[llength $a]==0} { return [list] }
}
sort_poslist [concat {*}$args]
}
proc OR {args} {
sort_poslist [concat {*}$args]
}
proc NOT {a b} {
if {[llength $b]>0} { return [list] }
return $a
}

511
ext/fts5/test/fts5aa.test Normal file
View File

@@ -0,0 +1,511 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aa
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, c);
SELECT name, sql FROM sqlite_master;
} {
t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)}
t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)}
t1_idx {CREATE TABLE 't1_idx'(segid, term, pgno, PRIMARY KEY(segid, term)) WITHOUT ROWID}
t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)}
t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)}
t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID}
}
do_execsql_test 1.1 {
DROP TABLE t1;
SELECT name, sql FROM sqlite_master;
} {
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
foreach w {a b c d e f} {
do_execsql_test 2.3.$w.asc {
SELECT rowid FROM t1 WHERE t1 MATCH $w;
} {1}
do_execsql_test 2.3.$w.desc {
SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
} {1}
}
do_execsql_test 2.4 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
} {
do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
} {
do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
1 {dd abc abc abc abcde} {aaa dd ddd ddd aab}
2 {dd aab d aaa b} {abcde c aaa aaa aaa}
3 {abcde dd b b dd} {abc abc d abc ddddd}
4 {aaa abcde dddd dddd abcde} {abc b b abcde abc}
5 {aab dddd d dddd c} {ddd abcde dddd abcde c}
6 {ddd dd b aab abcde} {d ddddd dddd c abc}
7 {d ddddd ddd c abcde} {c aab d abcde ddd}
8 {abcde aaa aab c c} {ddd c dddd b aaa}
9 {abcde aab ddddd c aab} {dddd dddd b c dd}
10 {ddd abcde dddd dd c} {dddd c c d abcde}
} {
do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
breakpoint
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 6.1 {
INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a');
REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d');
}
do_execsql_test 6.2 {
INSERT INTO t1(t1) VALUES('integrity-check')
}
do_execsql_test 6.3 {
REPLACE INTO t1(rowid, x, y) VALUES('22', 'l l l', 'l l l');
}
do_execsql_test 6.4 {
INSERT INTO t1(t1) VALUES('integrity-check')
}
#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
proc doc {} {
set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
set ret [list]
for {set j 0} {$j < 20} {incr j} {
lappend ret [lindex $v [expr int(rand()*[llength $v])]]
}
return $ret
}
proc dump_structure {} {
db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
foreach lvl [lrange $t 1 end] {
set seg [string repeat . [expr [llength $lvl]-2]]
puts "[lrange $lvl 0 1] $seg"
}
}
}
for {set i 1} {$i <= 10} {incr i} {
do_test 7.$i {
for {set j 0} {$j < 10} {incr j} {
set x [doc]
set y [doc]
set z [doc]
set rowid [expr int(rand() * 100)]
execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 8.1 {
INSERT INTO t1 VALUES('the quick brown fox');
INSERT INTO t1(t1) VALUES('integrity-check');
}
#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
proc doc {} {
set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
set ret [list]
for {set j 0} {$j < 20} {incr j} {
lappend ret [lindex $v [expr int(rand()*[llength $v])]]
}
return $ret
}
proc dump_structure {} {
db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
foreach lvl [lrange $t 1 end] {
set seg [string repeat . [expr [llength $lvl]-2]]
puts "[lrange $lvl 0 1] $seg"
}
}
}
for {set i 1} {$i <= 10} {incr i} {
do_test 9.$i {
for {set j 0} {$j < 100} {incr j} {
set x [doc]
set y [doc]
set z [doc]
set rowid [expr int(rand() * 100)]
execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
} {}
if {[set_test_counter errors]} break
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
set d10 {
1 {g f d b f} {h h e i a}
2 {f i g j e} {i j c f f}
3 {e e i f a} {e h f d f}
4 {h j f j i} {h a c f j}
5 {d b j c g} {f e i b e}
6 {a j a e e} {j d f d e}
7 {g i j c h} {j d h c a}
8 {j j i d d} {e e d f b}
9 {c j j d c} {h j i f g}
10 {b f h i a} {c f b b j}
}
foreach {rowid x y} $d10 {
do_execsql_test 10.1.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 10.1.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach rowid {5 9 8 1 2 4 10 7 3 5 6} {
do_execsql_test 10.2.$rowid.1 { DELETE FROM t1 WHERE rowid = $rowid }
do_execsql_test 10.2.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach {rowid x y} $d10 {
do_execsql_test 10.3.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
do_execsql_test 10.3.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
do_execsql_test 10.4.1 { DELETE FROM t1 }
do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
#-------------------------------------------------------------------------
#
do_catchsql_test 11.1 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank);
} {1 {reserved fts5 column name: rank}}
do_catchsql_test 11.2 {
CREATE VIRTUAL TABLE rank USING fts5(a, b, c);
} {1 {reserved fts5 table name: rank}}
do_catchsql_test 11.3 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid);
} {1 {reserved fts5 column name: rowid}}
#-------------------------------------------------------------------------
#
do_execsql_test 12.1 {
CREATE VIRTUAL TABLE t2 USING fts5(x,y);
} {}
do_catchsql_test 12.2 {
SELECT t2 FROM t2 WHERE t2 MATCH '*stuff'
} {1 {unknown special query: stuff}}
do_test 12.3 {
set res [db eval { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }]
string is integer $res
} {1}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 13.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o');
} {}
do_execsql_test 13.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1 2}
do_execsql_test 13.4 {
DELETE FROM t1 WHERE rowid=2;
} {}
do_execsql_test 13.5 {
SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}
do_execsql_test 13.6 {
SELECT rowid FROM t1 WHERE t1 MATCH '.';
} {}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH d(x,y) AS (
SELECT NULL, 'xyz xyz xyz xyz xyz xyz'
UNION ALL
SELECT NULL, 'xyz xyz xyz xyz xyz xyz' FROM d
)
INSERT INTO t1 SELECT * FROM d LIMIT 200;
}
do_test 14.2 {
set nRow 0
db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
db eval {
BEGIN;
CREATE TABLE t2(a, b);
ROLLBACK;
}
incr nRow
}
set nRow
} {200}
do_test 14.3 {
set nRow 0
db eval { BEGIN; }
db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
db eval {
SAVEPOINT aaa;
CREATE TABLE t2(a, b);
ROLLBACK TO aaa;
RELEASE aaa;
}
incr nRow
}
set nRow
} {200}
do_execsql_test 15.0 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 15.1 {
UPDATE t1_content SET c1 = 'xyz xyz xyz xyz xyz abc' WHERE rowid = 1;
}
do_catchsql_test 15.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
} {1 {database disk image is malformed}}
#-------------------------------------------------------------------------
#
do_execsql_test 16.1 {
CREATE VIRTUAL TABLE n1 USING fts5(a);
INSERT INTO n1 VALUES('a b c d');
}
proc funk {} {
set fd [db incrblob main n1_data block 10]
fconfigure $fd -encoding binary -translation binary
puts -nonewline $fd "\x44\x45"
close $fd
db eval { UPDATE n1_config SET v=50 WHERE k='version' }
}
db func funk funk
do_catchsql_test 16.2 {
SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d'
} {1 {SQL logic error or missing database}}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE b2 USING fts5(x);
INSERT INTO b2 VALUES('a');
INSERT INTO b2 VALUES('b');
INSERT INTO b2 VALUES('c');
}
do_test 17.2 {
set res [list]
db eval { SELECT * FROM b2 ORDER BY rowid ASC } {
lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }]
}
set res
} {{a b c} {a b c} {a b c}}
reset_db
do_execsql_test 18.1 {
CREATE VIRTUAL TABLE c2 USING fts5(x, y);
INSERT INTO c2 VALUES('x x x', 'x x x');
SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
CREATE VIRTUAL TABLE uio USING fts5(ttt);
INSERT INTO uio VALUES(NULL);
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
INSERT INTO uio SELECT NULL FROM uio;
SELECT count(*) FROM uio;
} {256}
do_execsql_test 17.2 {
SELECT count(*) FROM uio WHERE rowid BETWEEN 8 AND 17
} {10}
do_execsql_test 17.3 {
SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17
} {8 9 10 11 12 13 14 15 16 17}
do_execsql_test 17.4 {
SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 ORDER BY rowid DESC
} {17 16 15 14 13 12 11 10 9 8}
do_execsql_test 17.5 {
SELECT count(*) FROM uio
} {256}
do_execsql_test 17.6 {
INSERT INTO uio(rowid) VALUES(9223372036854775807);
INSERT INTO uio(rowid) VALUES(-9223372036854775808);
SELECT count(*) FROM uio;
} {258}
do_execsql_test 17.7 {
SELECT min(rowid), max(rowid) FROM uio;
} {-9223372036854775808 9223372036854775807}
do_execsql_test 17.8 {
INSERT INTO uio DEFAULT VALUES;
SELECT min(rowid), max(rowid), count(*) FROM uio;
} {-9223372036854775808 9223372036854775807 259}
do_execsql_test 17.9 {
SELECT min(rowid), max(rowid), count(*) FROM uio WHERE rowid < 10;
} {-9223372036854775808 9 10}
#--------------------------------------------------------------------
#
do_execsql_test 18.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
CREATE VIRTUAL TABLE t2 USING fts5(c, d);
INSERT INTO t1 VALUES('abc*', NULL);
INSERT INTO t2 VALUES(1, 'abcdefg');
}
do_execsql_test 18.2 {
SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
do_execsql_test 18.3 {
SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
finish_test

289
ext/fts5/test/fts5ab.test Normal file
View File

@@ -0,0 +1,289 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ab
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1 VALUES('hello', 'world');
INSERT INTO t1 VALUES('one two', 'three four');
INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five');
}
do_execsql_test 1.1 {
SELECT * FROM t1 ORDER BY rowid DESC;
} { forty five {one two} {three four} hello world }
do_execsql_test 1.2 {
SELECT rowid FROM t1 ORDER BY rowid DESC;
} {45 2 1}
do_execsql_test 1.3 {
SELECT rowid FROM t1 ORDER BY rowid ASC;
} {1 2 45}
do_execsql_test 1.4 {
SELECT * FROM t1 WHERE rowid=2;
} {{one two} {three four}}
do_execsql_test 1.5 {
SELECT * FROM t1 WHERE rowid=2.01;
} {}
do_execsql_test 1.6 {
SELECT * FROM t1 WHERE rowid=1.99;
} {}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
INSERT INTO t1 VALUES('one');
INSERT INTO t1 VALUES('two');
INSERT INTO t1 VALUES('three');
}
do_catchsql_test 2.2 {
SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND'
} {1 {fts5: syntax error near "AND"}}
do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two}
do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three}
do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one}
do_execsql_test 2.6 {
INSERT INTO t1 VALUES('a b c d e f g');
INSERT INTO t1 VALUES('b d e a a a i');
INSERT INTO t1 VALUES('x y z b c c c');
}
foreach {tn expr res} {
1 a {5 4}
2 b {6 5 4}
3 c {6 4}
4 d {5 4}
5 e {5 4}
6 f {4}
7 g {4}
8 x {6}
9 y {6}
10 z {6}
} {
do_execsql_test 2.7.$tn.1 {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
} $res
do_execsql_test 2.7.$tn.2 {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC
} [lsort -integer $res]
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a,b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {tn a b} {
1 {abashed abandons abase abash abaft} {abases abased}
2 {abasing abases abaft abated abandons} {abases abandoned}
3 {abatement abash abash abated abase} {abasements abashing}
4 {abaft abasements abase abasement abasing} {abasement abases}
5 {abaft abashing abatement abash abasements} {abandons abandoning}
6 {aback abate abasements abashes abandoned} {abasement abased}
7 {abandons abated abased aback abandoning} {abases abandoned}
8 {abashing abases abasement abaft abashing} {abashed abate}
9 {abash abase abate abashing abashed} {abandon abandoned}
10 {abate abandoning abandons abasement aback} {abandon abandoning}
} {
do_execsql_test 3.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) }
do_execsql_test 3.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach {tn expr res} {
1 {abash} {9 5 3 1}
2 {abase} {9 4 3 1}
3 {abase + abash} {1}
4 {abash + abase} {9}
5 {abaft + abashing} {8 5}
6 {abandon + abandoning} {10}
7 {"abashing abases abasement abaft abashing"} {8}
} {
do_execsql_test 3.2.$tn {
SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
} $res
}
do_execsql_test 3.3 {
SELECT rowid FROM t1 WHERE t1 MATCH 'NEAR(aback abate, 2)'
} {6}
foreach {tn expr res} {
1 {abash} {1 3 5 9}
2 {abase} {1 3 4 9}
3 {abase + abash} {1}
4 {abash + abase} {9}
5 {abaft + abashing} {5 8}
6 {abandon + abandoning} {10}
7 {"abashing abases abasement abaft abashing"} {8}
} {
do_execsql_test 3.4.$tn {
SELECT rowid FROM t1 WHERE t1 MATCH $expr
} $res
}
#-------------------------------------------------------------------------
# Documents with more than 2M tokens.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE s1 USING fts5(x);
}
foreach {tn doc} [list \
1 [string repeat {a x } 1500000] \
2 "[string repeat {a a } 1500000] x" \
] {
do_execsql_test 4.$tn { INSERT INTO s1 VALUES($doc) }
}
do_execsql_test 4.3 {
SELECT rowid FROM s1 WHERE s1 MATCH 'x'
} {1 2}
do_execsql_test 4.4 {
SELECT rowid FROM s1 WHERE s1 MATCH '"a x"'
} {1 2}
#-------------------------------------------------------------------------
# Check that a special case of segment promotion works. The case is where
# a new segment is written to level L, but the oldest segment within level
# (L-2) is larger than it.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
}
set doc
}
db func rnddoc rnddoc
do_test 5.1 {
for {set i 1} {$i <= 65} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(10)) }
}
for {set i 1} {$i <= 63} {incr i} {
execsql { DELETE FROM s2 WHERE rowid = $i }
}
fts5_level_segs s2
} {0 8}
do_test 5.2 {
execsql {
INSERT INTO s2(s2, rank) VALUES('automerge', 8);
}
for {set i 0} {$i < 7} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(50)) }
}
fts5_level_segs s2
} {8 0 0}
# Test also the other type of segment promotion - when a new segment is written
# that is larger than segments immediately following it.
do_test 5.3 {
execsql {
DROP TABLE s2;
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
for {set i 1} {$i <= 16} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
}
fts5_level_segs s2
} {0 1}
do_test 5.4 {
execsql { INSERT INTO s2 VALUES(rnddoc(160)) }
fts5_level_segs s2
} {2 0}
#-------------------------------------------------------------------------
#
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE s3 USING fts5(x);
BEGIN;
INSERT INTO s3 VALUES('a b c');
INSERT INTO s3 VALUES('A B C');
}
do_execsql_test 6.1.1 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}
do_execsql_test 6.1.2 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a' ORDER BY rowid DESC
} {2 1}
do_execsql_test 6.2 {
COMMIT;
}
do_execsql_test 6.3 {
SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}
do_test 6.4 {
db close
sqlite3 db test.db
execsql {
BEGIN;
INSERT INTO s3(s3) VALUES('optimize');
ROLLBACK;
}
} {}
#-------------------------------------------------------------------------
#
set doc [string repeat "a b c " 500]
breakpoint
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
INSERT INTO x1 VALUES($doc);
}
finish_test

359
ext/fts5/test/fts5ac.test Normal file
View File

@@ -0,0 +1,359 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ac
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set data {
0 {p o q e z k z p n f y u z y n y} {l o o l v v k}
1 {p k h h p y l l h i p v n} {p p l u r i f a j g e r r x w}
2 {l s z j k i m p s} {l w e j t j e e i t w r o p o}
3 {x g y m y m h p} {k j j b r e y y a k y}
4 {q m a i y i z} {o w a g k x g j m w e u k}
5 {k o a w y b s z} {s g l m m l m g p}
6 {d a q i z h b l c p k j g k} {p x u j x t v c z}
7 {f d a g o c t i} {w f c x l d r k i j}
8 {y g w u b q p o m j y b p a e k} {r i d k y w o z q m a t p}
9 {r k o m c c j s x m x m x m q r} {y r c a q d z k n x n}
10 {k j q m g q a j d} {d d e z g w h c d o o g x d}
11 {j z u m o y q j f w e e w t r j w} {g m o r x n t n w i f g l z f}
12 {s y w a w d o h x m k} {c w k z b p o r a}
13 {u t h x e g s k n g i} {f j w g c s r}
14 {b f i c s u z t k} {c k q s j u i z o}
15 {n a f n u s w h y n s i q e w} {x g e g a s s h n}
16 {k s q e j n p} {t r j f t o e k k l m i}
17 {g d t u w r o p m n m n p h b o u} {h s w o s l j e}
18 {f l q y q q g e e x j r} {n b r r g e i r t x q k}
19 {f i r g o a w e p i l o a w} {e k r z t d g h g i b d i e m}
20 {l d u u f p y} {g o m m u x m g l j t t x x u}
21 {m c d k x i c z l} {m i a i e u h}
22 {w b f o c g x y j} {z d w x d f h i p}
23 {w u i u x t c h k i b} {b y k h b v r t g j}
24 {h f d j s w s b a p k} {a q y u z e y m m j q r}
25 {d i x y x x k i y f s d j h z p n} {l l q m e t c w g y h t s v g}
26 {g s q w t d k x g f m j p k y} {r m b x e l t d}
27 {j l s q u g y v e c l o} {m f l m m m h g x x l n c}
28 {c t j g v r s b z j} {l c f y d t q n}
29 {e x z y w i h l} {b n b x e y q e n u m}
30 {g y y h j b w r} {q b q f u s k c k g r}
31 {g u l x l b r c m z b u c} {k g t b x k x n t e z d h o}
32 {w g v l z f b z h p s c v h} {g e w v m h k r g w a r f q}
33 {c g n f u d o y o b} {e y o h x x y y i z s b h a j}
34 {v y h c q u u s q y x x k s q} {d n r m y k n t i r n w e}
35 {o u c x l e b t a} {y b a x y f z x r}
36 {x p h l j a a u u j h} {x o f s z m b c q p}
37 {k q t i c a q n m v v} {v r z e f m y o}
38 {r w t t t t r v v o e p g h} {l w x a g a u h y}
39 {o p v g v b a g o} {j t q c r b b g y z}
40 {f s o r o d t h q f x l} {r d b m k i f s t d l m y x j w}
41 {t m o t m f m f} {i p i q j v n v m b q}
42 {t x w a r l w d t b c o d o} {a h f h w z d n s}
43 {t u q c d g p q x j o l c x c} {m n t o z z j a y}
44 {v d i i k b f s z r v r z y} {g n q y s x x m b x c l w}
45 {p v v a c s z y e o l} {m v t u d k m k q b d c v z r}
46 {f y k l d r q w r s t r e} {h m v r r l r r t f q e x y}
47 {w l n l t y x} {n h s l a f c h u f l x x m v n o}
48 {t n v i k e b p z p d j j l i o} {i v z p g u e j s i k n h w d c}
49 {z v x p n l t a j c} {e j l e n c e t a d}
50 {w u b x u i v h a i y m m r p m s} {s r h d o g z y f f x e}
51 {d c c x b c a x g} {p r a j v u y}
52 {f w g r c o d l t u e z h i} {j l l s s b j m}
53 {p m t f k i x} {u v y a z g w v v m x h i}
54 {l c z g l o j i c d e b} {b f v y w u i b e i y}
55 {r h c x f x a d s} {z x y k f l r b q c v}
56 {v x x c y h z x b g m o q n c} {h n b i t g h a q b c o r u}
57 {d g l o h t b s b r} {n u e p t i m u}
58 {t d y e t d c w u o s w x f c h} {i o s v y b r d r}
59 {l b a p q n d r} {k d c c d n y q h g a o p e x}
60 {f r z v m p k r} {x x r i s b a g f c}
61 {s a z i e r f i w c n y v z t k s} {y y i r y n l s b w i e k n}
62 {n x p r e x q r m v i b y} {f o o z n b s r q j}
63 {y j s u j x o n r q t f} {f v k n v x u s o a d e f e}
64 {u s i l y c x q} {r k c h p c h b o s s u s p b}
65 {m p i o s h o} {s w h u n d m n q t y k b w c}
66 {l d f g m x x x o} {s w d d f b y j j h h t i y p j o}
67 {c b m h f n v w n h} {i r w i e x r w l z p x u g u l s}
68 {y a h u h i m a y q} {d d r x h e v q n z y c j}
69 {c x f d x o n p o b r t b l p l} {m i t k b x v f p t m l l y r o}
70 {u t l w w m s} {m f m o l t k o p e}
71 {f g q e l n d m z x q} {z s i i i m f w w f n g p e q}
72 {n l h a v u o d f j d e x} {v v s l f g d g r a j x i f z x}
73 {x v m v f i g q e w} {r y s j i k m j j e d g r n o i f}
74 {g d y n o h p s y q z j d w n h w} {x o d l t j i b r d o r y}
75 {p g b i u r b e q d v o a g w m k} {q y z s f q o h}
76 {u z a q u f i f f b} {b s p b a a d x r r i q f}
77 {w h h z t h p o a h h e e} {h w r p h k z v y f r x}
78 {c a r k i a p u x} {f w l p t e m l}
79 {q q u k o t r k z} {f b m c w p s s o z}
80 {t i g v y q s r x m r x z e f} {x o j w a u e y s j c b u p p r o}
81 {n j n h r l a r e o z w e} {v o r r j a v b}
82 {i f i d k w d n h} {o i d z i z l m w s b q v u}
83 {m d g q q b k b w f q q p p} {j m q f b y c i z k y q p l e a}
84 {m x o n y f g} {y c n x n q j i y c l h b r q z}
85 {v o z l n p c} {g n j n t b b x n c l d a g j v}
86 {z n a y f b t k k t d b z a v} {r p c n r u k u}
87 {b q t x z e c w} {q a o a l o a h i m j r}
88 {j f h o x x a z g b a f a m i b} {j z c z y x e x w t}
89 {t c t p r s u c q n} {z x l i k n f q l n t}
90 {w t d q j g m r f k n} {l e w f w w a l y q k i q t p c t}
91 {c b o k l i c b s j n m b l} {y f p q o w g}
92 {f y d j o q t c c q m f j s t} {f h e d y m o k}
93 {k x j r m a d o i z j} {r t t t f e b r x i v j v g o}
94 {s f e a e t i h h d q p z t q} {b k m k w h c}
95 {h b n j t k i h o q u} {w n g i t o k c a m y p f l x c p}
96 {f c x p y r b m o l m o a} {p c a q s u n n x d c f a o}
97 {u h h k m n k} {u b v n u a o c}
98 {s p e t c z d f n w f} {l s f j b l c e s h}
99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o}
}
# Argument $expr is an FTS5 match expression designed to be executed against
# an FTS5 table with the following schema:
#
# CREATE VIRTUAL TABLE xy USING fts5(x, y);
#
# Assuming the table contains the same records as stored int the global
# $::data array (see above), this function returns a list containing one
# element for each match in the dataset. The elements are themselves lists
# formatted as follows:
#
# <rowid> {<phrase 0 matches> <phrase 1 matches>...}
#
# where each <phrase X matches> element is a list of phrase matches in the
# same form as returned by auxiliary scalar function fts5_test().
#
proc matchdata {bPos expr {bAsc 1}} {
set tclexpr [db one {
SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y')
}]
set res [list]
#puts $tclexpr
foreach {id x y} $::data {
set cols [list $x $y]
set ::pc 0
#set hits [lsort -command instcompare [eval $tclexpr]]
set hits [eval $tclexpr]
if {[llength $hits]>0} {
if {$bPos} {
lappend res [list $id $hits]
} else {
lappend res $id
}
}
}
if {$bAsc} {
set res [lsort -integer -increasing -index 0 $res]
} else {
set res [lsort -integer -decreasing -index 0 $res]
}
return [concat {*}$res]
}
#
# End of test code
#-------------------------------------------------------------------------
proc fts5_test_poslist {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [string map {{ } .} [$cmd xInst $i]]
}
set res
}
foreach {tn2 sql} {
1 {}
2 {BEGIN}
} {
reset_db
sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x,y);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
}
execsql $sql
do_test $tn2.1.1 {
foreach {id x y} $data {
execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
}
execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
} {}
#-------------------------------------------------------------------------
# Test phrase queries.
#
foreach {tn phrase} {
1 "o"
2 "b q"
3 "e a e"
4 "m d g q q b k b w f q q p p"
5 "l o o l v v k"
6 "a"
7 "b"
8 "c"
9 "no"
10 "L O O L V V K"
} {
set expr "\"$phrase\""
set res [matchdata 1 $expr]
do_execsql_test $tn2.1.2.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Test some AND and OR queries.
#
foreach {tn expr} {
1.1 "a AND b"
1.2 "a+b AND c"
1.3 "d+c AND u"
1.4 "d+c AND u+d"
2.1 "a OR b"
2.2 "a+b OR c"
2.3 "d+c OR u"
2.4 "d+c OR u+d"
3.1 { a AND b AND c }
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.2.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Queries on a specific column.
#
foreach {tn expr} {
1.1 "x:a"
1.2 "y:a"
1.3 "x:b"
1.4 "y:b"
2.1 "{x}:a"
2.2 "{y}:a"
2.3 "{x}:b"
2.4 "{y}:b"
3.1 "{x y}:a"
3.2 "{y x}:a"
3.3 "{x x}:b"
3.4 "{y y}:b"
4.1 {{"x" "y"}:a}
4.2 {{"y" x}:a}
4.3 {{x "x"}:b}
4.4 {{"y" y}:b}
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.3.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
#-------------------------------------------------------------------------
# Some NEAR queries.
#
foreach {tn expr} {
1 "NEAR(a b)"
2 "NEAR(r c)"
2 { NEAR(r c, 5) }
3 { NEAR(r c, 3) }
4 { NEAR(r c, 2) }
5 { NEAR(r c, 0) }
6 { NEAR(a b c) }
7 { NEAR(a b c, 8) }
8 { x : NEAR(r c) }
9 { y : NEAR(r c) }
} {
set res [matchdata 1 $expr]
do_execsql_test $tn2.4.1.$tn.[llength $res] {
SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
} $res
}
do_test $tn2.4.1 { nearset {{a b c}} -- a } {0.0.0}
do_test $tn2.4.2 { nearset {{a b c}} -- c } {0.0.2}
foreach {tn expr tclexpr} {
1 {a b} {AND [N $x -- {a}] [N $x -- {b}]}
} {
do_execsql_test $tn2.5.$tn {
SELECT fts5_expr_tcl($expr, 'N $x')
} [list $tclexpr]
}
#-------------------------------------------------------------------------
#
do_execsql_test $tn2.6.integrity {
INSERT INTO xx(xx) VALUES('integrity-check');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r}
foreach {bAsc sql} {
1 {SELECT rowid FROM xx WHERE xx MATCH $expr}
0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC}
} {
foreach {tn expr} {
0.1 x
1 { NEAR(r c) }
2 { NEAR(r c, 5) }
3 { NEAR(r c, 3) }
4 { NEAR(r c, 2) }
5 { NEAR(r c, 0) }
6 { NEAR(a b c) }
7 { NEAR(a b c, 8) }
8 { x : NEAR(r c) }
9 { y : NEAR(r c) }
10 { x : "r c" }
11 { y : "r c" }
12 { a AND b }
13 { a AND b AND c }
14a { a }
14b { a OR b }
15 { a OR b AND c }
16 { c AND b OR a }
17 { c AND (b OR a) }
18 { c NOT (b OR a) }
19 { c NOT b OR a AND d }
} {
set res [matchdata 0 $expr $bAsc]
do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res
}
}
}
do_execsql_test 3.1 {
SELECT fts5_expr_tcl('a AND b');
} {{AND [nearset -- {a}] [nearset -- {b}]}}
finish_test

236
ext/fts5/test/fts5ad.test Normal file
View File

@@ -0,0 +1,236 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ad
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE yy USING fts5(x, y);
INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching');
INSERT INTO yy VALUES('indices (or all matching', 'values if -inline is');
INSERT INTO yy VALUES('specified as well.) If', 'indices are returned, the');
} {}
foreach {tn match res} {
1 {c*} {1}
2 {i*} {3 2}
3 {t*} {3 1}
4 {r*} {3 1}
} {
do_execsql_test 1.$tn {
SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid DESC
} $res
}
foreach {tn match res} {
5 {c*} {1}
6 {i*} {2 3}
7 {t*} {1 3}
8 {r*} {1 3}
} {
do_execsql_test 1.$tn {
SELECT rowid FROM yy WHERE yy MATCH $match
} $res
}
foreach {T create} {
2 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
3 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
4 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
5 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
} {
do_test $T.1 {
execsql { DROP TABLE IF EXISTS t1 }
execsql $create
} {}
do_test $T.1 {
foreach {rowid a b} {
0 {fghij uvwxyz klmn pq uvwx} {klmn f fgh uv fghij klmno}
1 {uv f abcd abcd fghi} {pq klm uv uv fgh uv a}
2 {klmn klm pqrs fghij uv} {f k uvw ab abcd pqr uv}
3 {ab pqrst a fghi ab pqr fg} {k klmno a fg abcd}
4 {abcd pqrst uvwx a fgh} {f klmno fghij kl pqrst}
5 {uvwxyz k abcde u a} {uv k k kl klmn}
6 {uvwxyz k klmn pqrst uv} {fghi pqrs abcde u k}
7 {uvwxy klmn u p pqrst fgh} {p f fghi abcd uvw kl uv}
8 {f klmno pqrst uvwxy pqrst} {uv abcde klm pq pqr}
9 {f abcde a uvwxyz pqrst} {fghij abc k uvwx pqr fghij uvwxy}
10 {ab uv f fg pqrst uvwxy} {fgh p uv k abc klm uvw}
11 {pq klmno a uvw abcde uvwxyz} {fghij pq uvwxyz pqr fghi}
12 {fgh u pq fgh uvw} {uvw pqr f uvwxy uvwx}
13 {uvwx klmn f fgh abcd pqr} {uvw k fg uv klm abcd}
14 {ab uvwx pqrst pqr uvwxyz pqrs} {uvwxyz abcde ab ab uvw abcde}
15 {abc abcde uvwxyz abc kl k pqr} {klm k k klmno u fgh}
16 {fghi abcd fghij uv uvwxyz ab uv} {klmn pqr a uvw fghi}
17 {abc pqrst fghi uvwx uvw klmn fghi} {ab fg pqr pqrs p}
18 {pqr kl a fghij fgh fg kl} {pqr uvwxyz uvw abcd uvwxyz}
19 {fghi fghi pqr kl fghi f} {klmn u u klmno klmno}
20 {abc pqrst klmno kl pq uvwxy} {abc k fghi pqrs klm}
21 {a pqr uvwxyz uv fghi a fgh} {abc pqrs pqrst pq klm}
22 {klm abc uvwxyz klm pqrst} {fghij k pq pqr u klm fghij}
23 {p klm uv p a a} {uvwxy klmn uvw abcde pq}
24 {uv fgh fg pq uvwxy u uvwxy} {pqrs a uvw p uvwx uvwxyz fg}
25 {fghij fghi klmn abcd pq kl} {fghi abcde pqrs abcd fgh uvwxy}
26 {pq fgh a abc klmno klmn} {fgh p k p fg fghij}
27 {fg pq kl uvwx fghij pqrst klmn} {abcd uvw abcd fghij f fghij}
28 {uvw fghi p fghij pq fgh uvwx} {k fghij abcd uvwx pqr fghi}
29 {klm pq abcd pq f uvwxy} {pqrst p fghij pqr p}
30 {ab uvwx fg uvwx klmn klm} {klmn klmno fghij klmn klm}
31 {pq k pqr abcd a pqrs} {abcd abcd uvw a abcd klmno ab}
32 {pqrst u abc pq klm} {abc kl uvwxyz fghij u fghi p}
33 {f uvwxy u k f uvw uvwx} {pqrs uvw fghi fg pqrst klm}
34 {pqrs pq fghij uvwxyz pqr} {ab abc abc uvw f pq f}
35 {uvwxy ab uvwxy klmno kl pqrs} {abcde uvw pqrs uvwx k k}
36 {uvwxyz k ab abcde abc uvw} {uvw abcde uvw klmn uv klmn}
37 {k kl uv abcde uvwx fg u} {u abc uvwxy k fg abcd}
38 {fghi pqrst fghi pqr pqrst uvwx} {u uv uvwx fghi abcde}
39 {k pqrst k uvw fg pqrst fghij} {uvwxy ab kl klmn uvwxyz abcde}
40 {fg uvwxy pqrs klmn uvwxyz klm p} {k uv ab fghij fgh k pqrs}
41 {uvwx abc f pq uvwxy k} {ab uvwxyz abc f fghij}
42 {uvwxy klmno uvwxyz uvwxyz pqrst} {uv kl kl klmno k f abcde}
43 {abcde ab pqrs fg f fgh} {abc fghij fghi k k}
44 {uvw abcd a ab pqrst klmn fg} {pqrst u uvwx pqrst fghij f pqrst}
45 {uvwxy p kl uvwxyz ab pqrst fghi} {abc f pqr fg a k}
46 {u p f a fgh} {a kl pq uv f}
47 {pqrs abc fghij fg abcde ab a} {p ab uv pqrs kl fghi abcd}
48 {abcde uvwxy pqrst uv abc pqr uvwx} {uvwxy klm uvwxy uvwx k}
49 {fgh klm abcde klmno u} {a f fghij f uvwxyz abc u}
50 {uv uvw uvwxyz uvwxyz uv ab} {uvwx pq fg u k uvwxy}
51 {uvwxy pq p kl fghi} {pqrs fghi pqrs abcde uvwxyz ab}
52 {pqr p uvwxy kl pqrs klmno fghij} {ab abcde abc pqrst pqrs uv}
53 {fgh pqrst p a klmno} {ab ab pqrst pqr kl pqrst}
54 {abcd klm ab uvw a fg u} {f pqr f abcd uv}
55 {u fg uvwxyz k uvw} {abc pqrs f fghij fg pqrs uvwxy}
56 {klm fg p fghi fg a} {uv a fghi uvwxyz a fghi}
57 {uvwxy k abcde fgh f fghi} {f kl klmn f fghi klm}
58 {klm k fgh uvw fgh fghi} {klmno uvwx u pqrst u}
59 {fghi pqr pqrst p uvw fghij} {uv pqrst pqrs pq fghij klm}
60 {uvwx klm uvwxy uv klmn} {p a a abc klmn ab k}
61 {uvwxy uvwx klm uvwx klm} {pqrs ab ab uvwxyz fg}
62 {kl uv uv uvw fg kl k} {abcde uvw fgh uvwxy klm}
63 {a abc fgh u klm abcd} {fgh pqr uv klmn fghij}
64 {klmn k klmn klmno pqrs pqr} {fg kl abcde klmno uvwxy kl pq}
65 {uvwxyz klm fghi abc abcde kl} {uvwxy uvw uvwxyz uvwxyz pq pqrst}
66 {pq klm abc pqrst fgh f} {u abcde pqrst abcde fg}
67 {u pqrst kl u uvw klmno} {u pqr pqrs fgh u p}
68 {abc fghi uvwxy fgh k pq} {uv p uvwx uvwxyz ab}
69 {klmno f uvwxyz uvwxy klmn fg ab} {fgh kl a pqr abcd pqr}
70 {fghi pqrst pqrst uv a} {uvwxy k p uvw uvwx a}
71 {a fghij f p uvw} {klm fg abcd abcde klmno pqrs}
72 {uv uvwx uvwx uvw klm} {uv fghi klmno uvwxy uvw}
73 {kl uvwxy ab f pq klm u} {uvwxy klmn klm abcd pq fg k}
74 {uvw pqrst abcd uvwxyz ab} {fgh fgh klmn abc pq}
75 {uvwxyz klm pq abcd klmno pqr uvwxyz} {kl f a fg pqr klmn}
76 {uvw uvwxy pqr k pqrst kl} {uvwxy abc uvw uvw u}
77 {fgh klm u uvwxyz f uvwxy abcde} {uv abcde klmno u u ab}
78 {klmno abc pq pqr fgh} {p uv abcd fgh abc u k}
79 {fg pqr uvw pq uvwx} {uv uvw fghij pqrs fg p}
80 {abcd pqrs uvwx uvwxy uvwx} {u uvw pqrst pqr abcde pqrs kl}
81 {uvwxyz klm pq uvwxy fghij} {p pq klm fghij u a a}
82 {uvwx k uvwxyz klmno pqrst kl} {abcde p f pqrst abcd uvwxyz p}
83 {abcd abcde klm pqrst uvwxyz} {uvw pqrst u p uvwxyz a pqrs}
84 {k klm abc uv uvwxy klm klmn} {k abc pqr a abc p kl}
85 {klmn abcd pqrs p pq klm a} {klmn kl ab uvw pq}
86 {klmn a pqrs abc uvw pqrst} {a pqr kl klm a k f}
87 {pqrs ab uvwx uvwxy a pqr f} {fg klm uvwx pqr pqr}
88 {klmno ab k kl u uvwxyz} {uv kl uvw fghi uv uvw}
89 {pq fghi pqrst klmn uvwxy abc pqrs} {fg f f fg abc abcde klm}
90 {kl a k fghi uvwx fghi u} {ab uvw pqr fg a p abc}
91 {uvwx pqrs klmno ab fgh uvwx} {pqr uvwx abc kl f klmno kl}
92 {fghij pq pqrs fghij f pqrst} {u abcde fg pq pqr fgh k}
93 {fgh u pqrs abcde klmno abc} {abc fg pqrst pqr abcde}
94 {uvwx p abc f pqr p} {k pqrs kl klm abc fghi klm}
95 {kl p klmno uvwxyz klmn} {fghi ab a fghi pqrs kl}
96 {pqr fgh pq uvwx a} {uvw klm klmno fg uvwxy uvwx}
97 {fg abc uvwxyz fghi pqrst pq} {abc k a ab abcde f}
98 {uvwxy fghi uvwxy u abcde abcde uvw} {klmn uvwx pqrs uvw uvwxy abcde}
99 {pq fg fghi uvwx uvwx fghij uvwxy} {klmn klmn f abc fg a}
} {
execsql {
INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b);
}
}
} {}
proc prefix_query {prefixlist} {
set ret [list]
db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} {
set bMatch 1
foreach pref $prefixlist {
if { [lsearch -glob $a $pref]<0 && [lsearch -glob $b $pref]<0 } {
set bMatch 0
break
}
}
if {$bMatch} { lappend ret $rowid }
}
return $ret
}
foreach {bAsc sql} {
1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
} {
foreach {tn prefix} {
1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*}
6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}
27 {x*}
28 {a f*} 29 {a* f*} 30 {a* fghij*}
} {
set res [prefix_query $prefix]
if {$bAsc} {
set res [lsort -integer -increasing $res]
}
set n [llength $res]
if {$T==5} breakpoint
do_execsql_test $T.$bAsc.$tn.$n $sql $res
}
}
catchsql COMMIT
}
finish_test

304
ext/fts5/test/fts5ae.test Normal file
View File

@@ -0,0 +1,304 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ae
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_execsql_test 1.1 {
INSERT INTO t1 VALUES('hello', 'world');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1}
do_execsql_test 1.2 {
INSERT INTO t1 VALUES('world', 'hello');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}
do_execsql_test 1.3 {
INSERT INTO t1 VALUES('world', 'world');
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}
do_execsql_test 1.4.1 {
INSERT INTO t1 VALUES('hello', 'hello');
}
do_execsql_test 1.4.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2 4}
fts5_aux_test_functions db
#-------------------------------------------------------------------------
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x, y);
INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e');
INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p');
}
do_execsql_test 2.1 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'm' ORDER BY rowid;
} {
1 {0.0.5 0.1.0 0.1.2}
2 {0.0.7 0.1.5}
}
do_execsql_test 2.2 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'u OR q' ORDER BY rowid;
} {
1 {0.0.0}
2 {1.0.2 1.0.10}
}
do_execsql_test 2.3 {
SELECT rowid, fts5_test_poslist(t2) FROM t2
WHERE t2 MATCH 'y:o' ORDER BY rowid;
} {
1 {0.1.3 0.1.7}
}
#-------------------------------------------------------------------------
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x, y);
INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t');
INSERT INTO t3 VALUES( 'r c', '');
}
do_execsql_test 3.1 {
SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)';
} {
1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15}
}
do_execsql_test 3.2 {
SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)';
} {
2 {0.0.0 1.0.1}
}
do_execsql_test 3.3 {
INSERT INTO t3
VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
SELECT rowid, fts5_test_poslist(t3)
FROM t3 WHERE t3 MATCH 'a OR b AND c';
} {
1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2}
3 0.0.5
}
#-------------------------------------------------------------------------
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t4 USING fts5(x, y);
INSERT INTO t4
VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
}
do_execsql_test 4.1 {
SELECT rowid, fts5_test_poslist(t4) FROM t4 WHERE t4 MATCH 'a OR b AND c';
} {
1 0.0.5
}
#-------------------------------------------------------------------------
# Test that the xColumnSize() and xColumnAvgsize() APIs work.
#
reset_db
fts5_aux_test_functions db
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE t5 USING fts5(x, y);
INSERT INTO t5 VALUES('a b c d', 'e f g h i j');
INSERT INTO t5 VALUES('', 'a');
INSERT INTO t5 VALUES('a', '');
}
do_execsql_test 5.2 {
SELECT rowid, fts5_test_columnsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {1 0}
2 {0 1}
1 {4 6}
}
do_execsql_test 5.3 {
SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {a {}}
2 {{} a}
1 {{a b c d} {e f g h i j}}
}
do_execsql_test 5.4 {
SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {5 7}
2 {5 7}
1 {5 7}
}
do_execsql_test 5.5 {
INSERT INTO t5 VALUES('x y z', 'v w x y z');
SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
3 {8 12}
2 {8 12}
1 {8 12}
}
#-------------------------------------------------------------------------
# Test the xTokenize() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE t6 USING fts5(x, y);
INSERT INTO t6 VALUES('There are more', 'things in heaven and earth');
INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.');
}
do_execsql_test 6.2 {
SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*'
} {
1 {{there are more} {things in heaven and earth}}
2 {{horatio than are} {dreamt of in your philosophy}}
}
#-------------------------------------------------------------------------
# Test the xQueryPhrase() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 7.1 {
CREATE VIRTUAL TABLE t7 USING fts5(x, y);
}
do_test 7.2 {
foreach {x y} {
{q i b w s a a e l o} {i b z a l f p t e u}
{b a z t a l o x d i} {b p a d b f h d w y}
{z m h n p p u i e g} {v h d v b x j j c z}
{a g i m v a u c b i} {p k s o t l r t b m}
{v v c j o d a s c p} {f f v o k p o f o g}
} {
execsql {INSERT INTO t7 VALUES($x, $y)}
}
execsql { SELECT count(*) FROM t7 }
} {5}
foreach {tn q res} {
1 a {{4 2}}
2 b {{3 4}}
3 c {{2 1}}
4 d {{2 2}}
5 {a AND b} {{4 2} {3 4}}
6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}}
} {
do_execsql_test 7.3.$tn {
SELECT fts5_test_queryphrase(t7) FROM t7 WHERE t7 MATCH $q LIMIT 1
} [list $res]
}
do_execsql_test 7.4 {
SELECT fts5_test_rowcount(t7) FROM t7 WHERE t7 MATCH 'a';
} {5 5 5 5}
#do_execsql_test 7.4 {
# SELECT rowid, bm25debug(t7) FROM t7 WHERE t7 MATCH 'a';
#} {5 5 5 5}
#
#-------------------------------------------------------------------------
#
do_test 8.1 {
execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) }
foreach {rowid x y} {
0 {A o} {o o o C o o o o o o o o}
1 {o o B} {o o o C C o o o o o o o}
2 {A o o} {o o o o D D o o o o o o}
3 {o B} {o o o o o D o o o o o o}
4 {E o G} {H o o o o o o o o o o o}
5 {F o G} {I o J o o o o o o o o o}
6 {E o o} {H o J o o o o o o o o o}
7 {o o o} {o o o o o o o o o o o o}
9 {o o o} {o o o o o o o o o o o o}
} {
execsql { INSERT INTO t8(rowid, x, y) VALUES($rowid, $x, $y) }
}
} {}
foreach {tn q res} {
1 {a} {0 2}
2 {b} {3 1}
3 {c} {1 0}
4 {d} {2 3}
5 {g AND (e OR f)} {5 4}
6 {j AND (h OR i)} {5 6}
} {
do_execsql_test 8.2.$tn.1 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8);
} $res
do_execsql_test 8.2.$tn.2 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank;
} $res
do_execsql_test 8.2.$tn.3 {
SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank;
} $res
}
#-------------------------------------------------------------------------
# Test xPhraseCount() for some different queries.
#
do_test 9.1 {
execsql { CREATE VIRTUAL TABLE t9 USING fts5(x) }
foreach x {
"a b c" "d e f"
} {
execsql { INSERT INTO t9 VALUES($x) }
}
} {}
foreach {tn q cnt} {
1 {a AND b} 2
2 {a OR b} 2
3 {a OR b OR c} 3
4 {NEAR(a b)} 2
} {
do_execsql_test 9.2.$tn {
SELECT fts5_test_phrasecount(t9) FROM t9 WHERE t9 MATCH $q LIMIT 1
} $cnt
}
finish_test

144
ext/fts5/test/fts5af.test Normal file
View File

@@ -0,0 +1,144 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# More specifically, the tests in this file focus on the built-in
# snippet() function.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5af
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}
proc do_snippet_test {tn doc match res} {
uplevel #0 [list set v1 $doc]
uplevel #0 [list set v2 $match]
do_execsql_test $tn.1 {
DELETE FROM t1;
INSERT INTO t1 VALUES($v1, NULL);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
} [list $res]
do_execsql_test $tn.2 {
DELETE FROM t1;
INSERT INTO t1 VALUES(NULL, $v1);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
} [list $res]
do_execsql_test $tn.3 {
DELETE FROM t1;
INSERT INTO t1 VALUES($v1, NULL);
SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2
ORDER BY rank DESC;
} [list $res]
}
foreach {tn doc res} {
1.1 {X o o o o o o} {[X] o o o o o o}
1.2 {o X o o o o o} {o [X] o o o o o}
1.3 {o o X o o o o} {o o [X] o o o o}
1.4 {o o o X o o o} {o o o [X] o o o}
1.5 {o o o o X o o} {o o o o [X] o o}
1.6 {o o o o o X o} {o o o o o [X] o}
1.7 {o o o o o o X} {o o o o o o [X]}
2.1 {X o o o o o o o} {[X] o o o o o o...}
2.2 {o X o o o o o o} {o [X] o o o o o...}
2.3 {o o X o o o o o} {o o [X] o o o o...}
2.4 {o o o X o o o o} {o o o [X] o o o...}
2.5 {o o o o X o o o} {...o o o [X] o o o}
2.6 {o o o o o X o o} {...o o o o [X] o o}
2.7 {o o o o o o X o} {...o o o o o [X] o}
2.8 {o o o o o o o X} {...o o o o o o [X]}
3.1 {X o o o o o o o o} {[X] o o o o o o...}
3.2 {o X o o o o o o o} {o [X] o o o o o...}
3.3 {o o X o o o o o o} {o o [X] o o o o...}
3.4 {o o o X o o o o o} {o o o [X] o o o...}
3.5 {o o o o X o o o o} {...o o o [X] o o o...}
3.6 {o o o o o X o o o} {...o o o [X] o o o}
3.7 {o o o o o o X o o} {...o o o o [X] o o}
3.8 {o o o o o o o X o} {...o o o o o [X] o}
3.9 {o o o o o o o o X} {...o o o o o o [X]}
4.1 {X o o o o o X o o} {[X] o o o o o [X]...}
4.2 {o X o o o o o X o} {...[X] o o o o o [X]...}
4.3 {o o X o o o o o X} {...[X] o o o o o [X]}
5.1 {X o o o o X o o o} {[X] o o o o [X] o...}
5.2 {o X o o o o X o o} {...[X] o o o o [X] o...}
5.3 {o o X o o o o X o} {...[X] o o o o [X] o}
5.4 {o o o X o o o o X} {...o [X] o o o o [X]}
6.1 {X o o o X o o o} {[X] o o o [X] o o...}
6.2 {o X o o o X o o o} {o [X] o o o [X] o...}
6.3 {o o X o o o X o o} {...o [X] o o o [X] o...}
6.4 {o o o X o o o X o} {...o [X] o o o [X] o}
6.5 {o o o o X o o o X} {...o o [X] o o o [X]}
7.1 {X o o X o o o o o} {[X] o o [X] o o o...}
7.2 {o X o o X o o o o} {o [X] o o [X] o o...}
7.3 {o o X o o X o o o} {...o [X] o o [X] o o...}
7.4 {o o o X o o X o o} {...o [X] o o [X] o o}
7.5 {o o o o X o o X o} {...o o [X] o o [X] o}
7.6 {o o o o o X o o X} {...o o o [X] o o [X]}
} {
do_snippet_test 1.$tn $doc X $res
}
foreach {tn doc res} {
1.1 {X Y o o o o o} {[X Y] o o o o o}
1.2 {o X Y o o o o} {o [X Y] o o o o}
1.3 {o o X Y o o o} {o o [X Y] o o o}
1.4 {o o o X Y o o} {o o o [X Y] o o}
1.5 {o o o o X Y o} {o o o o [X Y] o}
1.6 {o o o o o X Y} {o o o o o [X Y]}
2.1 {X Y o o o o o o} {[X Y] o o o o o...}
2.2 {o X Y o o o o o} {o [X Y] o o o o...}
2.3 {o o X Y o o o o} {o o [X Y] o o o...}
2.4 {o o o X Y o o o} {...o o [X Y] o o o}
2.5 {o o o o X Y o o} {...o o o [X Y] o o}
2.6 {o o o o o X Y o} {...o o o o [X Y] o}
2.7 {o o o o o o X Y} {...o o o o o [X Y]}
3.1 {X Y o o o o o o o} {[X Y] o o o o o...}
3.2 {o X Y o o o o o o} {o [X Y] o o o o...}
3.3 {o o X Y o o o o o} {o o [X Y] o o o...}
3.4 {o o o X Y o o o o} {...o o [X Y] o o o...}
3.5 {o o o o X Y o o o} {...o o [X Y] o o o}
3.6 {o o o o o X Y o o} {...o o o [X Y] o o}
3.7 {o o o o o o X Y o} {...o o o o [X Y] o}
3.8 {o o o o o o o X Y} {...o o o o o [X Y]}
} {
do_snippet_test 2.$tn $doc "X + Y" $res
}
finish_test

138
ext/fts5/test/fts5ag.test Normal file
View File

@@ -0,0 +1,138 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ag
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# This file attempts to verify that the extension APIs work with
# "ORDER BY rank" queries. This is done by comparing the results of
# the fts5_test() function when run with queries of the form:
#
# ... WHERE fts MATCH ? ORDER BY bm25(fts) [ASC|DESC]
#
# and
#
# ... WHERE fts MATCH ? ORDER BY rank [ASC|DESC]
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y, z);
}
do_test 1.1 {
foreach {x y z} {
{j s m y m r n l u k} {z k f u z g h s w g} {r n o s s b v n w w}
{m v g n d x q r r s} {q t d a q a v l h j} {s k l f s i n v q v}
{m f f d h h s o h a} {y e v r q i u m h d} {b c k q m z l z h n}
{j e m v k p e c j m} {m p v z d x l n i a} {v p u p m t p q i f}
{v r w l e e t d z p} {c s b w k m n k o u} {w g y f v w v w v p}
{k d g o u j p z n o} {t g e q l z i g b j} {f i q q j y h b g h}
{j s w x o t j b t m} {v a v v r t x c q a} {r t k x w u l h a g}
{j y b i u d e m d w} {y s o j h i n a u p} {n a g b u c w e b m}
{b c k s c w j p w b} {m o c o w o b d q q} {n t y o y z y r z e}
{p n q l e l h z q c} {n s e i h c v b b u} {m p d i t a o o f f}
{k c o n v e z l b m} {s m n i n s d e s u} {t a u e q d a o u c}
{h d t o i a g b b p} {k x c i g f g b b k} {x f i v n a n n j i}
{f z k r b u s k z e} {n z v z w l e r h t} {t i s v v a v p n s}
{k f e c t z r e f d} {f m g r c w q k b v} {v y s y f r b f e f}
{z r c t d q q h x b} {u c g z n z u v s s} {y t n f f x b f d x}
{u n p n u t i m e j} {p j j d m f k p m z} {d o l v c o e a h w}
{h o q w t f v i c y} {c q u n r z s l l q} {z x a q w s b w s y}
{y m s x k i m n x c} {b i a n v h z n k a} {w l q p b h h g d y}
{z v s j f p v l f w} {c s b i z e k i g c} {x b v d w j f e d z}
{r k k j e o m k g b} {h b d c h m y b t u} {u j s h k z c u d y}
{v h i v s y z i k l} {d t m w q w c a z p} {r s e s x v d w k b}
{u r e q j y h o o s} {x x z r x y t f j s} {k n h x i i u e c v}
{q l f d a p w l q o} {y z q w j o p b o v} {s u h z h f d f n l}
{q o e o x x l g q i} {j g m h q q w c d b} {o m d h w a g b f n}
{m x k t s s y l v a} {j x t c a u w b w g} {n f j b v x y p u t}
{u w k a q b u w k w} {a h j u o w f s k p} {j o f s h y t j h g}
{x v b l m t l m h l} {t p y i y i q b q a} {k o o z w a c h c f}
{j g c d k w b d t v} {a k v c m a v h v p} {i c a i j g h l j h}
{l m v l c z j b p b} {z p z f l n k i b a} {j v q k g i x g i b}
{m c i w u z m i s z} {i z r f n l q z k w} {x n b p b q r g i z}
{d g i o o x l f x d} {r t m f b n q y c b} {i u g k w x n m p o}
{t o s i q d z x d t} {v a k s q z j c o o} {z f n n r l y w v v}
{w k h d t l j g n n} {r z m v y b l n c u} {v b v s c l n k g v}
{m a g r a b u u n z} {u y l h v w v k b f} {x l p g i s j f x v}
{v s g x k z a k a r} {l t g v j q l k p l} {f h n a x t v s t y}
{z u v u x p s j y t} {g b q e e g l n w g} {e n p j i g j f u r}
{q z l t w o l m p e} {t s g h r p r o t z} {y b f a o n u m z g}
{d t w n y b o g f o} {d a j e r l g g s h} {d z e l w q l t h f}
{f l u w q v x j a h} {f n u l l d m h h w} {d x c c e r o d q j}
{b y f q s q f u l g} {u z w l f d b i a g} {m v q b g u o z e z}
{h z p t s e x i v m} {l h q m e o x x x j} {e e d n p r m g j f}
{k h s g o n s d a x} {u d t t s j o v h a} {z r b a e u v o e s}
{m b b g a f c p a t} {w c m j o d b l g e} {f p j p m o s y v j}
{c r n h d w c a b l} {s g e u s d n j b g} {b o n a x a b x y l}
{r h u x f c d z n o} {x y l g u m i i w d} {t f h b z v r s r g}
{t i o r b v g g p a} {d x l u q k m o s u} {j f h t u n z u k m}
{g j t y d c n j y g} {w e s k v c w i g t} {g a h r g v g h r o}
{e j l a q j g i n h} {d z k c u p n u p p} {t u e e v z v r r g}
{l j s g k j k h z l} {p v d a t x d e q u} {r l u z b m g k s j}
{i e y d u x d i n l} {p f z k m m w p u l} {z l p m r q w n d a}
} {
execsql { INSERT INTO t1 VALUES($x, $y, $z) }
}
set {} {}
} {}
fts5_aux_test_functions db
proc do_fts5ag_test {tn E} {
set q1 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank}
set q2 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)}
set res [execsql $q1]
set expected [execsql $q2]
uplevel [list do_test $tn.1 [list set {} $res] $expected]
append q1 " DESC"
append q2 " DESC"
set res [execsql $q1]
set expected [execsql $q2]
uplevel [list do_test $tn.2 [list set {} $res] $expected]
}
foreach {tn expr} {
2.1 a
2.2 b
2.3 c
2.4 d
2.5 {"m m"}
2.6 {e + s}
3.0 {a AND b}
3.1 {a OR b}
3.2 {b OR c AND d}
3.3 {NEAR(c d)}
} {
do_fts5ag_test $tn $expr
if {[set_test_counter errors]} break
}
finish_test

150
ext/fts5/test/fts5ah.test Normal file
View File

@@ -0,0 +1,150 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ah
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# This file contains tests for very large doclists.
#
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) }
execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) }
set v {w w w w w w w w w w w w w w w w w w w w}
execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) }
for {set i 1} {$i <= 10000} {incr i} {
set v {x x x x x x x x x x x x x x x x x x x x}
if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i}
if {($i % 1577)==0} {lset v 5 W ; lappend W $i}
execsql { INSERT INTO t1 VALUES($v) }
}
set v {w w w w w w w w w w w w w w w w w w w w}
execsql { INSERT INTO t1 VALUES($v) }
} {}
do_execsql_test 1.1.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -incr $W]
do_execsql_test 1.1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -incr $W]
do_execsql_test 1.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -incr $Y]
do_execsql_test 1.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
proc reads {} {
db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}
proc execsql_reads {sql} {
set nRead [reads]
execsql $sql
expr [reads] - $nRead
}
do_test 1.4 {
set nRead [reads]
execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
set nReadX [expr [reads] - $nRead]
expr $nReadX>1000
} {1}
do_test 1.5 {
set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }]
set bwd [execsql_reads {
SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC
}]
expr {$bwd < $fwd + 12}
} {1}
foreach {tn q res} "
1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W]
2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W]
3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W]
4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y]
" {
do_test 1.6.$tn.1 {
set n [execsql_reads $q]
puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}
do_test 1.6.$tn.2 {
set n [execsql_reads "$q ORDER BY rowid DESC"]
puts -nonewline "(n=$n nReadX=$nReadX)"
expr {$n < ($nReadX / 8)}
} {1}
do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}
#-------------------------------------------------------------------------
# Now test that adding range constraints on the rowid field reduces the
# number of pages loaded from disk.
#
foreach {tn fraction tail cnt} {
1 0.6 {rowid > 5000} 5000
2 0.2 {rowid > 9000} 1000
3 0.2 {rowid < 1000} 999
4 0.2 {rowid BETWEEN 4000 AND 5000} 1001
5 0.6 {rowid >= 5000} 5001
6 0.2 {rowid >= 9000} 1001
7 0.2 {rowid <= 1000} 1000
8 0.6 {rowid > '5000'} 5000
9 0.2 {rowid > '9000'} 1000
10 0.1 {rowid = 444} 1
} {
set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail"
set n [execsql_reads $q]
set ret [llength [execsql $q]]
do_test "1.7.$tn.asc.(n=$n ret=$ret)" {
expr {$n < ($fraction*$nReadX) && $ret==$cnt}
} {1}
set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC"
set n [execsql_reads $q]
set ret [llength [execsql $q]]
do_test "1.7.$tn.desc.(n=$n ret=$ret)" {
expr {$n < 2*$fraction*$nReadX && $ret==$cnt}
} {1}
}
do_execsql_test 1.8.1 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND +rowid < 'text';
} {10000}
do_execsql_test 1.8.2 {
SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text';
} {10000}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
finish_test

55
ext/fts5/test/fts5ai.test Normal file
View File

@@ -0,0 +1,55 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, it tests transactions and savepoints
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ai
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a);
} {}
do_execsql_test 1.1 {
BEGIN;
INSERT INTO t1 VALUES('a b c');
INSERT INTO t1 VALUES('d e f');
SAVEPOINT one;
INSERT INTO t1 VALUES('g h i');
SAVEPOINT two;
INSERT INTO t1 VALUES('j k l');
ROLLBACK TO one;
INSERT INTO t1 VALUES('m n o');
SAVEPOINT two;
INSERT INTO t1 VALUES('p q r');
RELEASE one;
SAVEPOINT one;
INSERT INTO t1 VALUES('s t u');
ROLLBACK TO one;
COMMIT;
}
do_execsql_test 1.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
finish_test

69
ext/fts5/test/fts5aj.test Normal file
View File

@@ -0,0 +1,69 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, this tests that, provided the amount of data remains
# constant, the FTS index does not grow indefinitely as rows are inserted
# and deleted,
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aj
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc doc {} {
set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
set res [list]
for {set i 0} {$i < 20} {incr i} {
lappend res [lindex $dict [expr int(rand() * 26)]]
}
set res
}
proc structure {} {
set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}]
foreach lvl [lrange $val 1 end] {
lappend res [expr [llength $lvl]-2]
}
set res
}
expr srand(0)
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
}
for {set iTest 0} {$iTest < 50000} {incr iTest} {
if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
set new [doc]
execsql { INSERT INTO t1 VALUES($new) }
if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
if {0==($iTest % 1000)} {
set sz [db one {SELECT count(*) FROM t1_data}]
set s [structure]
do_execsql_test 1.$iTest.$sz.{$s} {
INSERT INTO t1(t1) VALUES('integrity-check')
}
}
}
do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }
finish_test

143
ext/fts5/test/fts5ak.test Normal file
View File

@@ -0,0 +1,143 @@
# 2014 November 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, the auxiliary function "highlight".
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ak
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x);
INSERT INTO ft1 VALUES('i d d a g i b g d d');
INSERT INTO ft1 VALUES('h d b j c c g a c a');
INSERT INTO ft1 VALUES('e j a e f h b f h h');
INSERT INTO ft1 VALUES('j f h d g h i b d f');
INSERT INTO ft1 VALUES('d c j d c j b c g e');
INSERT INTO ft1 VALUES('i a d e g j g d a a');
INSERT INTO ft1 VALUES('j f c e d a h j d b');
INSERT INTO ft1 VALUES('i c c f a d g h j e');
INSERT INTO ft1 VALUES('i d i g c d c h b f');
INSERT INTO ft1 VALUES('g d a e h a b c f j');
}
do_execsql_test 1.2 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e';
} {
{[e] j a [e] f h b f h h}
{d c j d c j b c g [e]}
{i a d [e] g j g d a a}
{j f c [e] d a h j d b}
{i c c f a d g h j [e]}
{g d a [e] h a b c f j}
}
do_execsql_test 1.3 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d';
} {
{[h d] b j c c g a c a}
{j f [h d] g h i b d f}
}
do_execsql_test 1.4 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d';
} {
{i [d d] a g i b g [d d]}
}
do_execsql_test 1.5 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e'
} {
{[e] j a [e] f h b f h h}
{d c j d c j b c g [e]}
{i a d [e] g j g d a a}
{j f c [e] d a h j d b}
{i c c f a d g h j [e]}
{g d a [e] h a b c f j}
}
do_execsql_test 1.6 {
SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d';
} {
{i [d d] a g i b g [d d]}
}
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE ft2 USING fts5(x);
INSERT INTO ft2 VALUES('a b c d e f g h i j');
}
do_execsql_test 2.2 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e'
} {{a [b c d e] f g h i j}}
do_execsql_test 2.3 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g'
} {
{a [b c d] [e f g] h i j}
}
do_execsql_test 2.4 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c'
} {
{a [b c d] e f g h i j}
}
do_execsql_test 2.5 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e'
} {
{a [b c d e] f g h i j}
}
do_execsql_test 2.6.1 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d'
} {
{a b c [d] e [f] g h i j}
}
do_execsql_test 2.6.2 {
SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f'
} {
{a b c [d] e [f] g h i j}
}
#-------------------------------------------------------------------------
# The example from the docs.
#
do_execsql_test 3.1 {
-- Assuming this:
CREATE VIRTUAL TABLE ft USING fts5(a);
INSERT INTO ft VALUES('a b c x c d e');
INSERT INTO ft VALUES('a b c c d e');
INSERT INTO ft VALUES('a b c d e');
-- The following SELECT statement returns these three rows:
-- '[a b c] x [c d e]'
-- '[a b c] [c d e]'
-- '[a b c d e]'
SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e';
} {
{[a b c] x [c d e]}
{[a b c] [c d e]}
{[a b c d e]}
}
finish_test

281
ext/fts5/test/fts5al.test Normal file
View File

@@ -0,0 +1,281 @@
# 2014 November 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
# Specifically, this function tests the %_config table.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5al
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE ft1 USING fts5(x);
SELECT * FROM ft1_config;
} {version 3}
do_execsql_test 1.2 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
SELECT * FROM ft1_config;
} {pgsz 32 version 3}
do_execsql_test 1.3 {
INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
SELECT * FROM ft1_config;
} {pgsz 64 version 3}
#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
1 "fname()"
2 "fname(1)"
3 "fname(1,2)"
4 "fname(null,NULL,nUlL)"
5 " fname ( null , NULL , nUlL ) "
6 "fname('abc')"
7 "fname('a''bc')"
8 "fname('''abc')"
9 "fname('abc''')"
7 "fname( 'a''bc' )"
8 "fname('''abc' )"
9 "fname( 'abc''' )"
10 "fname(X'1234ab')"
11 "myfunc(1.2)"
12 "myfunc(-1.0)"
13 "myfunc(.01,'abc')"
} {
do_execsql_test 2.1.$tn {
INSERT INTO ft1(ft1, rank) VALUES('rank', $defn);
}
}
foreach {tn defn} {
1 ""
2 "fname"
3 "fname(X'234ab')"
4 "myfunc(-1.,'abc')"
} {
do_test 2.2.$tn {
catchsql { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn) }
} {1 {SQL logic error or missing database}}
}
#-------------------------------------------------------------------------
# Assorted tests of the tcl interface for creating extension functions.
#
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1 VALUES('q w e r t y');
INSERT INTO t1 VALUES('y t r e w q');
}
proc argtest {cmd args} { return $args }
sqlite3_fts5_create_function db argtest argtest
do_execsql_test 3.2.1 {
SELECT argtest(t1, 123) FROM t1 WHERE t1 MATCH 'q'
} {123 123}
do_execsql_test 3.2.2 {
SELECT argtest(t1, 123, 456) FROM t1 WHERE t1 MATCH 'q'
} {{123 456} {123 456}}
proc rowidtest {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db rowidtest rowidtest
do_execsql_test 3.3.1 {
SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q'
} {1 2}
proc insttest {cmd} {
set res [list]
for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
lappend res [$cmd xInst $i]
}
set res
}
sqlite3_fts5_create_function db insttest insttest
do_execsql_test 3.4.1 {
SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
{{0 0 0}}
{{0 0 5}}
}
do_execsql_test 3.4.2 {
SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w'
} {
{{1 0 1}}
{{0 0 2} {1 0 4}}
}
proc coltest {cmd} {
list [$cmd xColumnSize 0] [$cmd xColumnText 0]
}
sqlite3_fts5_create_function db coltest coltest
do_execsql_test 3.5.1 {
SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
{6 {q w e r t y}}
{6 {y t r e w q}}
}
#-------------------------------------------------------------------------
# Tests for remapping the "rank" column.
#
# 4.1.*: Mapped to a function with no arguments.
# 4.2.*: Mapped to a function with one or more arguments.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b);
INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e');
INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q');
INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t');
INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s');
INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l');
INSERT INTO t2 VALUES('k r i l j b g i p a', 't q c h a i m g n l');
INSERT INTO t2 VALUES('a e c q n m o m d g', 'l c t g i s q g q e');
INSERT INTO t2 VALUES('b o j h f o g b p e', 'r t l h s b g i c p');
INSERT INTO t2 VALUES('s q k f q b j g h f', 'n m a o p e i e k t');
INSERT INTO t2 VALUES('o q g g q c o k a b', 'r t k p t f t h p c');
}
proc firstinst {cmd} {
foreach {p c o} [$cmd xInst 0] {}
expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst
do_execsql_test 4.1.1 {
SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.2 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
do_execsql_test 4.1.4 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.5 {
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
do_execsql_test 4.1.6 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0
}
proc rowidplus {cmd ival} {
expr [$cmd xRowid] + $ival
}
sqlite3_fts5_create_function db rowidplus rowidplus
do_execsql_test 4.2.1 {
INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
10 110
}
do_execsql_test 4.2.2 {
INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
10 121
}
do_execsql_test 4.2.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)'
} {
10 122
}
proc rowidmod {cmd imod} {
expr [$cmd xRowid] % $imod
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_execsql_test 4.3.1 {
CREATE VIRTUAL TABLE t3 USING fts5(x);
INSERT INTO t3 VALUES('a one');
INSERT INTO t3 VALUES('a two');
INSERT INTO t3 VALUES('a three');
INSERT INTO t3 VALUES('a four');
INSERT INTO t3 VALUES('a five');
INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()');
}
breakpoint
do_execsql_test 4.3.2 {
SELECT * FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)'
ORDER BY rank ASC
} {
{a four} {a one} {a five} {a two} {a three}
}
do_execsql_test 4.3.3 {
SELECT *, rank FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)'
ORDER BY rank ASC
} {
{a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2
}
do_catchsql_test 4.4.3 {
SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'xyz(3)'
} {1 {no such function: xyz}}
do_catchsql_test 4.4.4 {
SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL
} {1 {parse error in rank function: }}
finish_test

View File

@@ -0,0 +1,103 @@
# 2015 Jun 10
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on renaming FTS5 tables using the
# "ALTER TABLE ... RENAME TO ..." command
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5alter
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Test renaming regular, contentless and columnsize=0 FTS5 tables.
#
do_execsql_test 1.1.0 {
CREATE VIRTUAL TABLE "a x" USING fts5(a, x);
INSERT INTO "a x" VALUES('a a a', 'x x x');
ALTER TABLE "a x" RENAME TO "x y";
}
do_execsql_test 1.1.1 {
SELECT * FROM "x y";
SELECT rowid FROM "x y" WHERE "x y" MATCH 'a'
} {{a a a} {x x x} 1}
do_execsql_test 1.2.0 {
CREATE VIRTUAL TABLE "one/two" USING fts5(one, columnsize=0);
INSERT INTO "one/two"(rowid, one) VALUES(456, 'd d d');
ALTER TABLE "one/two" RENAME TO "three/four";
}
do_execsql_test 1.2.1 {
SELECT * FROM "three/four";
SELECT rowid FROM "three/four" WHERE "three/four" MATCH 'd'
} {{d d d} 456}
do_execsql_test 1.3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(val, content='');
INSERT INTO t1(rowid, val) VALUES(-1, 'drop table');
INSERT INTO t1(rowid, val) VALUES(-2, 'drop view');
ALTER TABLE t1 RENAME TO t2;
}
do_execsql_test 1.3.1 {
SELECT rowid, * FROM t2;
SELECT rowid FROM t2 WHERE t2 MATCH 'table'
} {-2 {} -1 {} -1}
#-------------------------------------------------------------------------
# Test renaming an FTS5 table within a transaction.
#
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE zz USING fts5(a);
INSERT INTO zz(rowid, a) VALUES(-56, 'a b c');
BEGIN;
INSERT INTO zz(rowid, a) VALUES(-22, 'a b c');
ALTER TABLE zz RENAME TO yy;
SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
COMMIT;
} {-56 -22}
do_execsql_test 2.2 {
BEGIN;
ALTER TABLE yy RENAME TO ww;
INSERT INTO ww(rowid, a) VALUES(-11, 'a b c');
SELECT rowid FROM ww WHERE ww MATCH 'a + b + c';
} {-56 -22 -11}
do_execsql_test 2.3 {
ROLLBACK;
SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}
#-------------------------------------------------------------------------
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE abc USING fts5(a);
INSERT INTO abc(rowid, a) VALUES(1, 'a');
BEGIN;
INSERT INTO abc(rowid, a) VALUES(2, 'a');
}
breakpoint
do_execsql_test 3.2 {
SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}
do_execsql_test 3.3 {
COMMIT;
SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}
finish_test

379
ext/fts5/test/fts5auto.test Normal file
View File

@@ -0,0 +1,379 @@
# 2015 May 30
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file contains automatically generated tests for various types
# of MATCH expressions.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auto
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set data {
-4026076
{n x w k b p x b n t t d s} {f j j s p j o}
{w v i y r} {i p y s}
{a o q v e n q r} {q v g u c y a z y}
3995120
{c} {e e w d t}
{x c p f w r s m l r b f d} {g g u e}
{s n u t d v p d} {b k v p m f}
-2913881
{k m} {a}
{w r j z n s l} {m j i w d t w e l}
{z n c} {v f b m}
174082
{j} {q l w u k e q v r i}
{j l} {u v w r s p e l}
{p i k j k q c t g u s} {g u y s m h q k g t e s o r}
3207399
{e t} {}
{p} {y v r b e k h d e v}
{t m w z b g q t s d d h} {o n v u i t o y k j}
182399
{} {m o s o x d y f a x j z}
{x n z r c d} {n r x i r}
{s v s} {a u}
768994
{e u t q v z q k j p u f j p} {y c b}
{p s d} {k n w p m p p}
{u o x s d} {f s g r d b d r m m m z y}
3931037
{c j p x e} {c n k t h z o i}
{} {r r p j k x w q}
{o r d z d} {x}
3105748
{p x r u} {x i s w o t o g x m z i w}
{q x m z} {h c j w b l y w x c o}
{m b k v} {t v q i s a d x}
-2501642
{o u d n w o m o o s n t r h} {k p e u y p e z d j r y g}
{v b b h d d q y j q j} {a m w d t}
{y e f n} {a k x i x}
-1745680
{z u w j f d b f} {j w i c g u d w e}
{m f p v m a s p v c o s} {s c r z o t w l b e a q}
{m k q} {k b a v o}
-932328
{r v i u m q d r} {f z u v h c m r f g}
{r x r} {k p i d h h w h z u a x}
{k m j p} {h l j a e u c i q x x f x g}
-3923818
{t t p b n u i h e c k} {m z}
{v u d c} {v y y j s g}
{o a f k k q p h g x e n z x} {h d w c o l}
-2145922
{z z l f a l g e d c d h} {j b j p k o o u b q}
{d i g q t f d r h k} {n w g j c x r p t y f l c t}
{d o c u k f o} {r y s x z s p p h g t p y c}
4552917
{j w j y h l k u} {n a}
{y h w c n k} {b}
{w} {z l r t s i m v c y}
2292008
{q v q j w y y x u t} {r q z n h a b o}
{d q y} {y v o e j}
{} {a b h c d l p d x}
1407892
{n j j u q d o a u c f} {r d b w o q n g}
{d e v w s} {v d v o u o x s l s j z y}
{j y w h i f g i h m} {v n z b n y}
-4412544
{g h h r s} {h e r e}
{n q s} {o p z r m l l t}
{p} {f s u o b j}
1209110
{o a a z t t u h j} {z z i r k r}
{i c x q w g v o x z i z p} {q o g k i n z x e d v w v}
{p f v b g f e d n p u c y k} {q z z a i p a a s r e z}
3448977
{i v} {l u x t b o k}
{f h u v p} {k a o y j}
{d m k c j} {v c e r u e f i t}
-4703774
{d h v w u z r e h x o l t} {p s f y w y r q d a m w}
{c h g c g j j f t b i c q} {s e}
{c t q j g f} {v n r w y r a g e j d}
2414151
{s o o s d s k q b f q v p e} {j r o b t o p d l o o x}
{d d k t v e} {}
{t v o d w} {w e q w h y c y y i j b a m}
-3342407
{m c h n e p d o c r w n t} {j d k s p q l}
{t g s r w x j l r z r} {h}
{r q v x i r a n h s} {m y p b v w r a u o g q r}
-993951
{l n p u o j d x t u u c o j} {k r n a r e k v i t o e}
{q f t t a a c z v f} {o n m p v f o e n}
{h z h i p s b j z h} {i t w m k c u g n i}
1575251
{} {z s i j d o x j a r t}
{h g j u j n v e n z} {p z j n n f}
{s q q f d w r l y i z d o m} {b a n d h t b y g h d}
4263668
{q g t h f s} {s g x p f q z i s o f l i}
{q k} {w v h a x n a r b}
{m j a h o b i x k r w z q u} {m t r g j o e q t m p u l}
2487819
{m w g x r n e u t s r} {b x a t u u j c r n}
{j} {w f j r e e y l p}
{o u h b} {o c a c a b v}
167966
{o d b s d o a u m o x y} {c}
{r w d o b v} {z e b}
{i n z a f g z o} {m u b a g}
1948599
{n r g q d j s} {n k}
{l b p d v t k h y y} {u m k e c}
{t b n y o t b} {j w c i r x x}
2941631
{l d p l b g f} {e k e}
{p j} {m c s w t b k n l d x}
{f o v y v l} {c w p s w j w c u t y}
3561104
{d r j j r j i g p} {u}
{g r j q} {z l p d s n f c h t d c v z}
{w r c f s x z y} {g f o k g g}
-2223281
{y e t j j z f p o m m z} {h k o g o}
{m x a t} {l q x l}
{r w k d l s y b} {q g k b}
-4502874
{k k b x k l f} {r}
{} {q m z b k h k u n e z}
{z q g y m y u} {}
1757599
{d p z j y u r} {z p l q w j t j}
{n i r x r y j} {}
{h} {w t d q c x z z x e e}
-4809589
{} {z p x u h i i n g}
{w q s u d b f x n} {l y k b b r x t i}
{n d v j q o t o d p z e} {u r y u v u c}
1068408
{y e} {e g s k e w t p v o b k}
{z c m s} {r u r u h n h b p q g b}
{j k b l} {m c d t s r s q a d b o f}
-1972554
{m s w} {d k v s a r k p a r i v}
{g j z k p} {y k c v r e u o q f i b a}
{i p i} {c z w c y b n z i v}
-2052385
{} {x e u f f g n c i x n e i e}
{} {p s w d x p g}
{} {s j a h n}
2805981
{m x g c w o e} {k g u y r y i u e g g}
{f k j v t x p h x k u} {w i}
{b l f z f v t n} {i u d o d p h s m u}
2507621
{} {u b n l x f n j t}
{u r x l h} {h r l m r}
{d y e n b s q v t k n q q} {x l t v w h a s k}
-3138375
{e o f j y x u w v e w z} {r d q g k n n v r c z n e w}
{l y i q z k j p u f q s k} {c i l l i m a a g a z r x f}
{a v k h m q z b y n z} {q g w c y r r o a}
-457971
{j x a w e c s h f l f} {q}
{j f v j u m d q r v v} {x n v a w}
{i e h d h f u w t t z} {v s u l s v o v i k n e}
2265221
{z t c y w n y r t} {n b a x s}
{q w a v} {a b s d x i g w t e z h}
{t l} {j k r w f f y j o k u}
-3941280
{r x t o z} {f j n z k}
{t x e b t d b k w i s} {j t y h i h}
{y q g n g s u v c z j z n g} {n n g t l p h}
2084745
{z d z d} {j}
{o e k t b k a z l w} {o p i h k c x}
{c r b t i j f} {z e n m}
1265843
{} {j s g j j x u y}
{u q t f} {g o g}
{w o j e d} {w q n a c t q x j}
-2941116
{i n c u o} {f b}
{o m s q d o z a q} {f s v o b b}
{o a z c h r} {j e w h b f z}
-1265441
{p g z q v a o a x a} {s t h}
{w i p o c} {s n d g f z w q o d v v l j}
{y f b i a s v} {u m o z k k s t s d p b l p}
-1989158
{r i c n} {r e w w i n z}
{q u s y b w u g y g f o} {y}
{d} {j x i b x u y d c p v a h}
2391989
{b n w x w f q h p i} {e u b b i n a i o c d g}
{v a z o i e n l x l r} {r u f o r k w m d w}
{k s} {r f e j q p w}
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
} {}
fts5_aux_test_functions db
proc matchdata {expr tbl collist {order ASC}} {
set cols ""
foreach e $collist {
append cols ", '$e'"
}
set tclexpr [db one [subst -novar {
SELECT fts5_expr_tcl(
$expr, 'nearset $cols -pc ::pc' [set cols]
)
}]]
set res [list]
db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
set cols [list]
foreach col $x(*) {
if {$col != "rowid"} { lappend cols $x($col) }
}
# set cols [list $a $b $c $d $e $f]
set ::pc 0
set rowdata [eval $tclexpr]
if {$rowdata != ""} { lappend res $x(rowid) $rowdata }
}
set res
}
proc do_auto_test {tn tbl cols expr} {
foreach order {asc desc} {
set res [matchdata $expr $tbl $cols $order]
set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"
set ::autotest_expr $expr
do_execsql_test $testname [subst -novar {
SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl]
WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order]
}] $res
}
}
#-------------------------------------------------------------------------
#
for {set fold 0} {$fold < 3} {incr fold} {
switch $fold {
0 { set map {} }
1 { set map {
a a b a c b d b e c f c g d h d
i e j e k f l f m g g g o h p h
q i r i s j t j u k v k w l x l
y m z m
}}
2 { set map {
a a b a c a d a e a f a g a h a
i b j b k b l b m b g b o b p b
q c r c s c t c u c v c w c x c
}}
}
execsql {
BEGIN;
DELETE FROM tt;
}
foreach {rowid a b c d e f} [string map $map $data] {
if {$rowid==-4703774} {
execsql {
INSERT INTO tt(rowid, a, b, c, d, e, f)
VALUES($rowid, $a, $b, $c, $d, $e, $f)
}
}
}
execsql COMMIT
foreach {tn expr} {
A.1 { {a} : x }
A.2 { {a b} : x }
A.3 { {a b f} : x }
A.4 { {f a b} : x }
A.5 { {f a b} : x y }
A.6 { {f a b} : x + y }
A.7 { {c a b} : x + c }
A.8 { {c d} : "l m" }
A.9 { {c e} : "l m" }
A.10 { {a b c a b c a b c f f e} : "l m" }
B.1 { a NOT b }
B.2 { a NOT a:b }
B.3 { a OR (b AND c) }
B.4 { a OR (b AND {a b c}:c) }
B.5 { a OR "b c" }
B.6 { a OR b OR c }
C.1 { a OR (b AND "b c") }
C.2 { a OR (b AND "z c") }
} {
do_auto_test 3.$fold.$tn tt {a b c d e f} $expr
}
}
proc replace_elems {list args} {
set ret $list
foreach {idx elem} $args {
set ret [lreplace $ret $idx $idx $elem]
}
set ret
}
#-------------------------------------------------------------------------
#
set bigdoc [string trim [string repeat "a " 1000]]
do_test 4.0 {
set a [replace_elems $bigdoc 50 x 950 x]
set b [replace_elems $bigdoc 20 y 21 x 887 x 888 y]
set c [replace_elems $bigdoc 1 z 444 z 789 z]
execsql {
CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3);
INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c);
INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c);
}
} {}
foreach {tn expr} {
1 x
2 y
3 z
4 {c1 : x} 5 {c2 : x} 6 {c3 : x}
7 {c1 : y} 8 {c2 : y} 9 {c3 : y}
10 {c1 : z} 11 {c2 : z} 12 {c3 : z}
} {
breakpoint
do_auto_test 4.$tn yy {c1 c2 c3} $expr
}
finish_test

250
ext/fts5/test/fts5aux.test Normal file
View File

@@ -0,0 +1,250 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the auxiliary function APIs.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aux
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc inst {cmd i} {
$cmd xInst $i
}
sqlite3_fts5_create_function db inst inst
proc colsize {cmd i} {
$cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize
proc totalsize {cmd i} {
$cmd xColumnTotalSize $i
}
sqlite3_fts5_create_function db totalsize totalsize
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
INSERT INTO f1 VALUES('one two', 'two one zero');
INSERT INTO f1 VALUES('one one', 'one one one');
}
do_catchsql_test 1.1 {
SELECT inst(f1, -1) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_catchsql_test 1.2 {
SELECT inst(f1, 0) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 0 1}}}
do_catchsql_test 1.3 {
SELECT inst(f1, 1) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 1 0}}}
do_catchsql_test 1.4 {
SELECT inst(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_catchsql_test 2.1 {
SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_execsql_test 2.2 {
SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {2 3}
do_execsql_test 2.3 {
SELECT colsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {5}
do_execsql_test 2.4.1 {
SELECT totalsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {10}
do_execsql_test 2.4.2 {
SELECT totalsize(f1, 0) FROM f1 WHERE f1 MATCH 'zero';
} {4}
do_execsql_test 2.4.3 {
SELECT totalsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {6}
do_catchsql_test 2.4.4 {
SELECT totalsize(f1, 2) FROM f1 WHERE f1 MATCH 'zero';
} {1 SQLITE_RANGE}
#-------------------------------------------------------------------------
# Test the xSet and xGetAuxdata APIs with a NULL destructor.
#
proc prevrowid {add cmd} {
set res [$cmd xGetAuxdataInt 0]
set r [$cmd xRowid]
$cmd xSetAuxdataInt $r
return [expr $res + $add]
}
sqlite3_fts5_create_function db prevrowid [list prevrowid 0]
sqlite3_fts5_create_function db prevrowid1 [list prevrowid 1]
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE e5 USING fts5(x);
INSERT INTO e5 VALUES('a b c');
INSERT INTO e5 VALUES('d e f');
INSERT INTO e5 VALUES('a b c');
INSERT INTO e5 VALUES('d e f');
INSERT INTO e5 VALUES('a b c');
}
do_execsql_test 3.1 {
SELECT prevrowid(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'c'
} {0+1 1+3 3+5}
do_execsql_test 3.2 {
SELECT prevrowid(e5) || '+' || prevrowid1(e5) || '+' || rowid
FROM e5 WHERE e5 MATCH 'e'
} {0+1+2 2+3+4}
#-------------------------------------------------------------------------
# Test that if the xQueryPhrase callback returns other than SQLITE_OK,
# the query is abandoned. And that if it returns an error code other than
# SQLITE_DONE, the error is propagated back to the caller.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE e7 USING fts5(x);
INSERT INTO e7 VALUES('a x a');
INSERT INTO e7 VALUES('b x b');
INSERT INTO e7 VALUES('c x c');
INSERT INTO e7 VALUES('d x d');
INSERT INTO e7 VALUES('e x e');
}
proc xCallback {rowid code cmd} {
set r [$cmd xRowid]
lappend ::cb $r
if {$r==$rowid} { return $code }
return ""
}
proc phrasequery {cmd code} {
set ::cb [list]
$cmd xQueryPhrase 1 [list xCallback [$cmd xRowid] $code]
set ::cb
}
sqlite3_fts5_create_function db phrasequery phrasequery
do_execsql_test 4.1 {
SELECT phrasequery(e7, 'SQLITE_OK') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3 4 5}}
do_execsql_test 4.2 {
SELECT phrasequery(e7, 'SQLITE_DONE') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3}}
do_catchsql_test 4.3 {
SELECT phrasequery(e7, 'SQLITE_ERROR') FROM e7 WHERE e7 MATCH 'c x'
} {1 SQLITE_ERROR}
#-------------------------------------------------------------------------
# Auxiliary function calls with many cursors in the global cursor list.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE e9 USING fts5(y);
INSERT INTO e9(rowid, y) VALUES(1, 'i iii');
INSERT INTO e9(rowid, y) VALUES(2, 'ii iv');
INSERT INTO e9(rowid, y) VALUES(3, 'ii');
INSERT INTO e9(rowid, y) VALUES(4, 'i iv');
INSERT INTO e9(rowid, y) VALUES(5, 'iii');
}
proc my_rowid {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db my_rowid my_rowid
foreach {var q} {
s1 i
s2 ii
s3 iii
s4 iv
} {
set sql "SELECT my_rowid(e9) FROM e9 WHERE e9 MATCH '$q'"
set $var [sqlite3_prepare db $sql -1 dummy]
}
do_test 5.1.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 1
do_test 5.1.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 2
do_test 5.1.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 1
do_test 5.1.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 2
do_test 5.2.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 4
do_test 5.2.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 3
do_test 5.2.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 5
do_test 5.2.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 4
sqlite3_finalize $s1
sqlite3_finalize $s2
sqlite3_finalize $s3
sqlite3_finalize $s4
#-------------------------------------------------------------------------
# Passing an invalid first argument to an auxiliary function is detected.
#
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE e11 USING fts5(y, z);
INSERT INTO e11(rowid, y, z) VALUES(1, 'a b', 45);
INSERT INTO e11(rowid, y, z) VALUES(2, 'b c', 46);
}
do_catchsql_test 6.1 {
SELECT my_rowid(z) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 45}}
do_catchsql_test 6.2 {
SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 0}}
#-------------------------------------------------------------------------
# Test passing an out-of-range phrase number to xPhraseSize (should
# return 0).
#
proc my_phrasesize {cmd iPhrase} { $cmd xPhraseSize $iPhrase }
sqlite3_fts5_create_function db my_phrasesize my_phrasesize
do_execsql_test 7.1 {
CREATE VIRTUAL TABLE t1 USING fts5(a);
INSERT INTO t1 VALUES('a b c');
}
do_execsql_test 7.2 {
SELECT
my_phrasesize(t1, -1),
my_phrasesize(t1, 0),
my_phrasesize(t1, 1),
my_phrasesize(t1, 2)
FROM t1 WHERE t1 MATCH 'a OR b+c'
} {0 1 2 0}
#-------------------------------------------------------------------------
#
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE x1 USING fts5(a);
}
foreach {tn lRow res} {
4 {"a a a" "b" "a d"} {"[a] [a] [a]" "[a] d"}
1 {"b d" "a b"} {"[b] [d]" "[a] b"}
2 {"d b" "a d"} {"[d] [b]" "[a] d"}
3 {"a a d"} {"[a] [a] d"}
} {
execsql { DELETE FROM x1 }
foreach row $lRow { execsql { INSERT INTO x1 VALUES($row) } }
breakpoint
do_execsql_test 8.$tn {
SELECT highlight(x1, 0, '[', ']') FROM x1 WHERE x1 MATCH 'a OR (b AND d)';
} $res
}
finish_test

View File

@@ -0,0 +1,115 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auxdata
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1');
INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2');
INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3');
INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4');
INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5');
}
proc aux_function_1 {cmd tn} {
switch [$cmd xRowid] {
1 {
do_test $tn.1 [list $cmd xGetAuxdata 0 ] {}
$cmd xSetAuxdata "one"
}
2 {
do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one}
$cmd xSetAuxdata "two"
}
3 {
do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two}
}
4 {
do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two}
}
5 {
do_test $tn.5 [list $cmd xGetAuxdata 0 ] {}
}
}
}
sqlite3_fts5_create_function db aux_function_1 aux_function_1
db eval {
SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a'
ORDER BY rowid ASC
}
proc aux_function_2 {cmd tn inst} {
if {$inst == "A"} {
switch [$cmd xRowid] {
1 {
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {}
$cmd xSetAuxdata "one $inst"
}
2 {
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst"
$cmd xSetAuxdata "two $inst"
}
3 {
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst"
}
4 {
do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst"
}
5 {
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
}
}
} else {
switch [$cmd xRowid] {
1 {
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A"
}
2 {
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A"
}
3 {
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A"
}
4 {
do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {}
}
5 {
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
}
}
}
}
sqlite3_fts5_create_function db aux_function_2 aux_function_2
db eval {
SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B')
FROM f1 WHERE f1 MATCH 'a'
ORDER BY rowid ASC
}
finish_test

View File

@@ -0,0 +1,64 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on really large position lists. Those that require
# 4 or 5 byte position-list size varints. Because of the amount of memory
# required, these tests only run on 64-bit platforms.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5bigpl
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
if { $tcl_platform(wordSize)<8 } {
finish_test
return
}
do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
do_test 1.1 {
foreach t {a b c d e f g h i j} {
set doc [string repeat "$t " 1200000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
do_test 1.2 {
execsql { DELETE FROM t1 }
foreach t {"a b" "b a" "c d" "d c"} {
set doc [string repeat "$t " 600000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation.
# The generated database is roughly 635MiB.
#
do_test 2.1...slow {
execsql { DELETE FROM t1 }
foreach t {a} {
set doc [string repeat "$t " 150000000]
execsql { INSERT INTO t1 VALUES($doc) }
}
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
finish_test

View File

@@ -0,0 +1,138 @@
# 2015 Jun 10
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on fts5 tables with the columnsize=0 option.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5columnsize
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Check that the option can be parsed and that the %_docsize table is
# only created if it is set to true.
#
foreach {tn outcome stmt} {
1 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0) }
2 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=1) }
3 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='0') }
4 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='1') }
5 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='') }
6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) }
7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) }
8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
9 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=11) }
} {
execsql {
DROP TABLE IF EXISTS t1;
}
if {$outcome==2} {
do_catchsql_test 1.$tn.1 $stmt {1 {malformed columnsize=... directive}}
} else {
do_execsql_test 1.$tn.2 $stmt
do_execsql_test 1.$tn.3 {
SELECT count(*) FROM sqlite_master WHERE name = 't1_docsize'
} $outcome
}
}
#-------------------------------------------------------------------------
# Run tests on a table with no %_content or %_docsize backing store.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x, columnsize=0, content='');
}
do_catchsql_test 2.1 {
INSERT INTO t2 VALUES('a b c d e f');
} {1 {datatype mismatch}}
do_execsql_test 2.2 {
INSERT INTO t2(rowid, x) VALUES(1, 'c d e f');
INSERT INTO t2(rowid, x) VALUES(2, 'c d e f g h');
INSERT INTO t2(rowid, x) VALUES(3, 'a b c d e f g h');
} {}
do_execsql_test 2.3 {
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {3 :: 1 2 3 :: 2 3}
do_execsql_test 2.4 {
INSERT INTO t2(t2, rowid, x) VALUES('delete', 2, 'c d e f g h');
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {3 :: 1 3 :: 3}
do_execsql_test 2.5 {
INSERT INTO t2(t2) VALUES('delete-all');
SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
SELECT rowid FROM t2 WHERE t2 MATCH 'h';
} {:: ::}
do_execsql_test 2.6 {
INSERT INTO t2(rowid, x) VALUES(1, 'o t t f');
INSERT INTO t2(rowid, x) VALUES(2, 'f s s e');
INSERT INTO t2(rowid, x) VALUES(3, 'n t e t');
}
do_catchsql_test 2.7.1 {
SELECT rowid FROM t2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.2 {
SELECT rowid FROM t2 WHERE rowid=2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.3 {
SELECT rowid FROM t2 WHERE rowid BETWEEN 1 AND 3
} {1 {t2: table does not support scanning}}
do_execsql_test 2.X {
DROP TABLE t2
}
#-------------------------------------------------------------------------
# Test the xColumnSize() API
#
fts5_aux_test_functions db
do_execsql_test 3.1.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0);
INSERT INTO t3 VALUES('a a', 'b b b', 'c');
INSERT INTO t3 VALUES('x a x', 'b b b y', '');
}
do_execsql_test 3.1.1 {
SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
1 {2 0 1} 2 {3 0 0}
}
do_execsql_test 3.1.2 {
INSERT INTO t3 VALUES(NULL, NULL, 'a a a a');
DELETE FROM t3 WHERE rowid = 1;
SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
2 {3 0 0} 3 {0 0 4}
}
do_execsql_test 3.2.0 {
CREATE VIRTUAL TABLE t4 USING fts5(x, y UNINDEXED, z, columnsize=0, content='');
INSERT INTO t4(rowid, x, y, z) VALUES(1, 'a a', 'b b b', 'c');
INSERT INTO t4(rowid, x, y, z) VALUES(2, 'x a x', 'b b b y', '');
}
do_execsql_test 3.2.1 {
SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
} {
1 {-1 0 -1} 2 {-1 0 -1}
}
finish_test

View File

@@ -0,0 +1,208 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on the code in fts5_config.c, which is largely concerned
# with parsing the various configuration and CREATE TABLE options.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5config
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Try different types of quote characters.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5('a', "b", [c], `d`);
PRAGMA table_info = t1;
} {
0 a {} 0 {} 0
1 b {} 0 {} 0
2 c {} 0 {} 0
3 d {} 0 {} 0
}
#-------------------------------------------------------------------------
# Syntax errors in the prefix= option.
#
foreach {tn opt} {
1 {prefix=x}
2 {prefix='x'}
3 {prefix='$'}
} {
set res [list 1 {malformed prefix=... directive}]
do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}
#-------------------------------------------------------------------------
# Syntax errors in the 'rank' option.
#
foreach {tn val} {
1 "f1(xyz)"
2 "f1(zyx)"
3 "f1(nzz)"
4 "f1(x'!!')"
5 "f1(x':;')"
6 "f1(x'[]')"
7 "f1(x'{}')"
8 "f1('abc)"
} {
do_catchsql_test 3.$tn {
INSERT INTO t1(t1, rank) VALUES('rank', $val);
} {1 {SQL logic error or missing database}}
}
#-------------------------------------------------------------------------
# The parsing of SQL literals specified as part of 'rank' options.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE zzz USING fts5(one);
INSERT INTO zzz VALUES('a b c');
}
proc first {cmd A} { return $A }
sqlite3_fts5_create_function db first first
foreach {tn arg} {
1 "123"
2 "'01234567890ABCDEF'"
3 "x'0123'"
4 "x'ABCD'"
5 "x'0123456789ABCDEF'"
6 "x'0123456789abcdef'"
7 "22.5"
8 "-91.5"
9 "-.5"
10 "''''"
11 "+.5"
} {
set func [string map {' ''} "first($arg)"]
do_execsql_test 4.1.$tn "
INSERT INTO zzz(zzz, rank) VALUES('rank', '$func');
SELECT rank IS $arg FROM zzz WHERE zzz MATCH 'a + b + c'
" 1
}
do_execsql_test 4.2 {
INSERT INTO zzz(zzz, rank) VALUES('rank', 'f1()');
} {}
#-------------------------------------------------------------------------
# Misquoting in tokenize= and other options.
#
do_catchsql_test 5.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, tokenize="porter 'ascii");
} {1 {parse error in tokenize directive}}
breakpoint
do_catchsql_test 5.2 {
CREATE VIRTUAL TABLE xx USING fts5(x, [y[]);
} {0 {}}
do_catchsql_test 5.3 {
CREATE VIRTUAL TABLE yy USING fts5(x, [y]]);
} {1 {unrecognized token: "]"}}
#-------------------------------------------------------------------------
# Errors in prefix= directives.
#
do_catchsql_test 6.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix=1, prefix=2);
} {1 {multiple prefix=... directives}}
do_catchsql_test 6.2 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1, 2, 1001');
} {1 {prefix length out of range: 1001}}
do_catchsql_test 6.3 {
CREATE VIRTUAL TAbLE abc USING fts5(a, prefix='1, 2, 0000');
} {1 {prefix length out of range: 0}}
do_catchsql_test 6.4 {
CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1 , 1000000');
} {1 {malformed prefix=... directive}}
#-------------------------------------------------------------------------
# Duplicate tokenize= and other options.
#
do_catchsql_test 7.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, tokenize=porter, tokenize=ascii);
} {1 {multiple tokenize=... directives}}
do_catchsql_test 7.2 {
CREATE VIRTUAL TABLE abc USING fts5(a, content=porter, content=ascii);
} {1 {multiple content=... directives}}
do_catchsql_test 7.3 {
CREATE VIRTUAL TABLE abc USING fts5(a, content_rowid=porter, content_rowid=a);
} {1 {multiple content_rowid=... directives}}
#-------------------------------------------------------------------------
# Unrecognized option.
#
do_catchsql_test 8.0 {
CREATE VIRTUAL TABLE abc USING fts5(a, nosuchoption=123);
} {1 {unrecognized option: "nosuchoption"}}
do_catchsql_test 8.1 {
CREATE VIRTUAL TABLE abc USING fts5(a, "nosuchoption"=123);
} {1 {parse error in ""nosuchoption"=123"}}
#-------------------------------------------------------------------------
# Errors in:
#
# 9.1.* 'pgsz' options.
# 9.2.* 'automerge' options.
# 9.3.* 'crisismerge' options.
#
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE abc USING fts5(a, b);
} {}
do_catchsql_test 9.1.1 {
INSERT INTO abc(abc, rank) VALUES('pgsz', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.2 {
INSERT INTO abc(abc, rank) VALUES('pgsz', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.3 {
INSERT INTO abc(abc, rank) VALUES('pgsz', 66.67);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.1 {
INSERT INTO abc(abc, rank) VALUES('automerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.2 {
INSERT INTO abc(abc, rank) VALUES('automerge', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.3 {
INSERT INTO abc(abc, rank) VALUES('automerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.2.4 {
INSERT INTO abc(abc, rank) VALUES('automerge', 1);
} {}
do_catchsql_test 9.3.1 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.3.2 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.3.3 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 1);
} {}
do_execsql_test 9.3.4 {
INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000);
} {}
do_catchsql_test 9.4.1 {
INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1);
} {1 {SQL logic error or missing database}}
finish_test

View File

@@ -0,0 +1,258 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file contains tests for the content= and content_rowid= options.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5content
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Contentless tables
#
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE f1 USING fts5(a, b, content='');
INSERT INTO f1(rowid, a, b) VALUES(1, 'one', 'o n e');
INSERT INTO f1(rowid, a, b) VALUES(2, 'two', 't w o');
INSERT INTO f1(rowid, a, b) VALUES(3, 'three', 't h r e e');
}
do_execsql_test 1.2 {
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2}
do_execsql_test 1.3 {
INSERT INTO f1(a, b) VALUES('four', 'f o u r');
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2 4}
do_execsql_test 1.4 {
SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o';
} {1 {} {} 2 {} {} 4 {} {}}
do_execsql_test 1.5 {
SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o';
} {1 {} 2 {} 4 {}}
do_execsql_test 1.6 {
SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.7 {
SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL
FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.8 {
SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL
FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}
do_execsql_test 1.9 {
SELECT rowid FROM f1;
} {1 2 3 4}
do_execsql_test 1.10 {
SELECT * FROM f1;
} {{} {} {} {} {} {} {} {}}
do_execsql_test 1.11 {
SELECT rowid, a, b FROM f1 ORDER BY rowid ASC;
} {1 {} {} 2 {} {} 3 {} {} 4 {} {}}
do_execsql_test 1.12 {
SELECT a IS NULL FROM f1;
} {1 1 1 1}
do_catchsql_test 1.13 {
DELETE FROM f1 WHERE rowid = 2;
} {1 {cannot DELETE from contentless fts5 table: f1}}
do_catchsql_test 1.14 {
UPDATE f1 SET a = 'a b c' WHERE rowid = 2;
} {1 {cannot UPDATE contentless fts5 table: f1}}
do_execsql_test 1.15 {
INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o');
} {}
do_execsql_test 1.16 {
SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 4}
do_execsql_test 1.17 {
SELECT rowid FROM f1;
} {1 3 4}
#-------------------------------------------------------------------------
# External content tables
#
reset_db
do_execsql_test 2.1 {
-- Create a table. And an external content fts5 table to index it.
CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c);
CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a');
-- Triggers to keep the FTS index up to date.
CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN
INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
END;
CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN
INSERT INTO fts_idx(fts_idx, rowid, b, c)
VALUES('delete', old.a, old.b, old.c);
END;
CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN
INSERT INTO fts_idx(fts_idx, rowid, b, c)
VALUES('delete', old.a, old.b, old.c);
INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
END;
}
do_execsql_test 2.2 {
INSERT INTO tbl VALUES(1, 'one', 'o n e');
INSERT INTO tbl VALUES(NULL, 'two', 't w o');
INSERT INTO tbl VALUES(3, 'three', 't h r e e');
}
do_execsql_test 2.3 {
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.4 {
DELETE FROM tbl WHERE rowid=2;
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.5 {
UPDATE tbl SET c = c || ' x y z';
INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}
do_execsql_test 2.6 {
SELECT * FROM fts_idx WHERE fts_idx MATCH 't AND x';
} {three {t h r e e x y z}}
do_execsql_test 2.7 {
SELECT highlight(fts_idx, 1, '[', ']') FROM fts_idx
WHERE fts_idx MATCH 't AND x';
} {{[t] h r e e [x] y z}}
#-------------------------------------------------------------------------
# Quick tests of the 'delete-all' command.
#
do_execsql_test 3.1 {
CREATE VIRTUAL TABLE t3 USING fts5(x, content='');
INSERT INTO t3 VALUES('a b c');
INSERT INTO t3 VALUES('d e f');
}
do_execsql_test 3.2 {
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {2 4}
do_execsql_test 3.3 {
INSERT INTO t3(t3) VALUES('delete-all');
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {0 2}
do_execsql_test 3.4 {
INSERT INTO t3 VALUES('a b c');
INSERT INTO t3 VALUES('d e f');
SELECT rowid FROM t3 WHERE t3 MATCH 'e';
} {2}
do_execsql_test 3.5 {
SELECT rowid FROM t3 WHERE t3 MATCH 'c';
} {1}
do_execsql_test 3.6 {
SELECT count(*) FROM t3_docsize;
SELECT count(*) FROM t3_data;
} {2 4}
do_execsql_test 3.7 {
CREATE VIRTUAL TABLE t4 USING fts5(x);
} {}
do_catchsql_test 3.8 {
INSERT INTO t4(t4) VALUES('delete-all');
} {1 {'delete-all' may only be used with a contentless or external content fts5 table}}
#-------------------------------------------------------------------------
# Test an external content table with a more interesting schema.
#
do_execsql_test 4.1 {
CREATE TABLE x2(a, "key col" PRIMARY KEY, b, c) WITHOUT ROWID;
INSERT INTO x2 VALUES('a b', 1, 'c d' , 'e f');
INSERT INTO x2 VALUES('x y', -40, 'z z' , 'y x');
CREATE VIRTUAL TABLE t2 USING fts5(a, c, content=x2, content_rowid='key col');
INSERT INTO t2(t2) VALUES('rebuild');
}
do_execsql_test 4.2 { SELECT rowid FROM t2 } {-40 1}
do_execsql_test 4.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'c'} {}
do_execsql_test 4.4 { SELECT rowid FROM t2 WHERE t2 MATCH 'a'} {1}
do_execsql_test 4.5 { SELECT rowid FROM t2 WHERE t2 MATCH 'x'} {-40}
do_execsql_test 4.6 { INSERT INTO t2(t2) VALUES('integrity-check') } {}
do_execsql_test 4.7 {
DELETE FROM x2 WHERE "key col" = 1;
INSERT INTO t2(t2, rowid, a, c) VALUES('delete', 1, 'a b', 'e f');
INSERT INTO t2(t2) VALUES('integrity-check');
}
do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {}
do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40}
#-------------------------------------------------------------------------
# Test that if the 'rowid' field of a 'delete' is not an integer, no
# changes are made to the FTS index.
#
do_execsql_test 5.0 {
CREATE VIRTUAL TABLE t5 USING fts5(a, b, content=);
INSERT INTO t5(rowid, a, b) VALUES(-1, 'one', 'two');
INSERT INTO t5(rowid, a, b) VALUES( 0, 'three', 'four');
INSERT INTO t5(rowid, a, b) VALUES( 1, 'five', 'six');
}
set ::checksum [execsql {SELECT md5sum(id, block) FROM t5_data}]
do_execsql_test 5.1 {
INSERT INTO t5(t5, rowid, a, b) VALUES('delete', NULL, 'three', 'four');
SELECT md5sum(id, block) FROM t5_data;
} $::checksum
#-------------------------------------------------------------------------
# Check that a contentless table can be dropped.
#
reset_db
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, y, content="");
SELECT name FROM sqlite_master;
} {xx xx_data xx_idx xx_docsize xx_config}
do_execsql_test 6.2 {
DROP TABLE xx;
SELECT name FROM sqlite_master;
} {}
finish_test

View File

@@ -0,0 +1,99 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that the FTS5 'integrity-check' command detects
# inconsistencies (corruption) in the on-disk backing tables.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_test 1.1 {
db transaction {
for {set i 1} {$i < 200} {incr i} {
set doc [list [string repeat x $i] [string repeat y $i]]
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
}
}
fts5_level_segs t1
} {1}
db_save
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]
do_test 1.3 {
execsql {
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
do_test 1.4 {
db_restore_and_reopen
execsql {
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
rowid = fts5_rowid('segment', $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#--------------------------------------------------------------------
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
for {set i 0} {$i < 500} {incr i} {
execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
}
execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
} {}
#--------------------------------------------------------------------
# A mundane test - missing row in the %_content table.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t3 USING fts5(x);
INSERT INTO t3 VALUES('one o');
INSERT INTO t3 VALUES('two e');
INSERT INTO t3 VALUES('three o');
INSERT INTO t3 VALUES('four e');
INSERT INTO t3 VALUES('five o');
}
do_execsql_test 3.1 {
SELECT * FROM t3 WHERE t3 MATCH 'o'
} {{one o} {three o} {five o}}
do_catchsql_test 3.1 {
DELETE FROM t3_content WHERE rowid = 3;
SELECT * FROM t3 WHERE t3 MATCH 'o';
} {1 {database disk image is malformed}}
finish_test

View File

@@ -0,0 +1,272 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case
# "correctly" means without crashing.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_may_be_corrupt 1
# Create a simple FTS5 table containing 100 documents. Each document
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]
if 1 {
# Test 1:
#
# For each page in the t1_data table, open a transaction and DELETE
# the t1_data entry. Then run:
#
# * an integrity-check, and
# * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'"
#
# and check that the corruption is detected in both cases. The
# rollback the transaction.
#
# Test 2:
#
# Same thing, except instead of deleting a row from t1_data, replace its
# blob content with integer value 14.
#
foreach {tno stmt} {
1 { DELETE FROM t1_data WHERE rowid=$rowid }
2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
} {
set tn 0
foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
incr tn
#if {$tn!=224} continue
do_test 1.$tno.$tn.1.$rowid {
execsql { BEGIN }
execsql $stmt
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
if {($rowid & $mask)==0} {
# Node is a leaf node, not a b-tree node.
do_catchsql_test 1.$tno.$tn.2.$rowid {
SELECT rowid FROM t1 WHERE t1 MATCH 'x*'
} {1 {database disk image is malformed}}
}
do_execsql_test 1.$tno.$tn.3.$rowid {
ROLLBACK;
INSERT INTO t1(t1) VALUES('integrity-check');
} {}
}
}
# Using the same database as the 1.* tests.
#
# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
# of the fts index. For test $i, truncate the rightmost leafpage to $i
# bytes. Then test both the integrity-check detects the corruption.
#
# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
# corruption. It may not report the db as corrupt because truncating the
# final leaf to some sizes may create a valid leaf page.
#
set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}]
set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}]
set all [db eval {SELECT rowid FROM t1}]
for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} {
do_execsql_test 2.$i.1 {
BEGIN;
UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid;
}
do_catchsql_test 2.$i.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_test 2.$i.3 {
set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}]
expr {
$res=="1 {database disk image is malformed}"
|| $res=="0 {$all}"
}
} 1
do_execsql_test 2.$i.4 {
ROLLBACK;
INSERT INTO t1(t1) VALUES('integrity-check');
} {}
}
#-------------------------------------------------------------------------
# Test that corruption in leaf page headers is detected by queries that use
# doclist-indexes.
#
set doc "A B C D E F G H I J "
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE x3 USING fts5(tt);
INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000)
INSERT INTO x3
SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}
foreach {tn hdr} {
1 "\x00\x00\x00\x00"
2 "\xFF\xFF\xFF\xFF"
3 "\x44\x45"
} {
set tn2 0
set nCorrupt 0
set nCorrupt2 0
foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 3.$tn.$tn2.1 {
execsql BEGIN
set fd [db incrblob main x3_data block $rowid]
fconfigure $fd -encoding binary -translation binary
set existing [read $fd [string length $hdr]]
seek $fd 0
puts -nonewline $fd $hdr
close $fd
set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}]
if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
set {} 1
} {1}
if {($tn2 % 10)==0 && $existing != $hdr} {
do_test 3.$tn.$tn2.2 {
catchsql { INSERT INTO x3(x3) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
}
execsql ROLLBACK
}
do_test 3.$tn.x { expr $nCorrupt>0 } 1
}
#--------------------------------------------------------------------
#
set doc "A B C D E F G H I J "
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x4 USING fts5(tt);
INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10)
INSERT INTO x4
SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}
foreach {tn nCut} {
1 1
2 10
} {
set tn2 0
set nCorrupt 0
foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 4.$tn.$tn2 {
execsql {
BEGIN;
UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut)
WHERE id = $rowid;
}
set res [catchsql {
SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC
}]
if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
set {} 1
} {1}
execsql ROLLBACK
}
do_test 4.$tn.x { expr $nCorrupt>0 } 1
}
}
set doc [string repeat "A B C " 1000]
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10)
INSERT INTO x5 SELECT $doc FROM ii;
}
foreach {tn hdr} {
1 "\x00\x01"
} {
set tn2 0
set nCorrupt 0
foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
if {$rowid & $mask} continue
incr tn2
do_test 4.$tn.$tn2 {
execsql BEGIN
set fd [db incrblob main x5_data block $rowid]
fconfigure $fd -encoding binary -translation binary
puts -nonewline $fd $hdr
close $fd
catchsql { INSERT INTO x5(x5) VALUES('integrity-check') }
set {} {}
} {}
execsql ROLLBACK
}
}
#--------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
CREATE VIRTUAL TABLE x5 USING fts5(tt);
INSERT INTO x5 VALUES('a');
INSERT INTO x5 VALUES('a a');
INSERT INTO x5 VALUES('a a a');
INSERT INTO x5 VALUES('a a a a');
UPDATE x5_docsize SET sz = X'' WHERE id=3;
}
proc colsize {cmd i} {
$cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize
do_catchsql_test 5.2 {
SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}
sqlite3_fts5_may_be_corrupt 0
finish_test

View File

@@ -0,0 +1,80 @@
# 2015 Apr 24
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case
# "correctly" means without crashing.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt3
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
sqlite3_fts5_may_be_corrupt 1
# Create a simple FTS5 table containing 100 documents. Each document
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]
do_test 1.1 {
# Pick out the rowid of the right-most b-tree leaf in the new segment.
set rowid [db one {
SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1
}]
set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}]
set {} {}
} {}
for {set i 0} {$i < $L} {incr i} {
do_test 1.2.$i {
catchsql {
BEGIN;
UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid;
INSERT INTO t1(t1) VALUES('integrity-check');
}
} {1 {database disk image is malformed}}
catchsql ROLLBACK
}
#-------------------------------------------------------------------------
# Test that trailing bytes appended to the averages record are ignored.
#
do_execsql_test 2.1 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2 VALUES(rnddoc(10));
INSERT INTO t2 VALUES(rnddoc(10));
SELECT length(block) FROM t2_data WHERE id=1;
} {2}
do_execsql_test 2.2 {
UPDATE t2_data SET block = block || 'abcd' WHERE id=1;
SELECT length(block) FROM t2_data WHERE id=1;
} {6}
do_execsql_test 2.2 {
INSERT INTO t2 VALUES(rnddoc(10));
SELECT length(block) FROM t2_data WHERE id=1;
} {2}
sqlite3_fts5_may_be_corrupt 0
finish_test

View File

@@ -0,0 +1,132 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on uses of doclist-index records.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5dlidx
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
if { $tcl_platform(wordSize)<8 } {
finish_test
return
}
proc do_fb_test {tn sql res} {
set res2 [lsort -integer -decr $res]
uplevel [list do_execsql_test $tn.1 $sql $res]
uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2]
}
# This test populates the FTS5 table containing $nEntry entries. Rows are
# numbered from 0 to ($nEntry-1). The rowid for row $i is:
#
# ($iFirst + $i*$nStep)
#
# Each document is of the form "a b c a b c a b c...". If the row number ($i)
# is an integer multiple of $spc1, then an "x" token is appended to the
# document. If it is *also* a multiple of $spc2, a "y" token is also appended.
#
proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} {
do_execsql_test $tn.0 { DELETE FROM t1 }
set xdoc [list]
set ydoc [list]
execsql BEGIN
for {set i 0} {$i < $nEntry} {incr i} {
set rowid [expr $i * $nStep]
set doc [string trim [string repeat "a b c " 100]]
if {($i % $spc1)==0} {
lappend xdoc $rowid
append doc " x"
if {($i % $spc2)==0} {
lappend ydoc $rowid
append doc " y"
}
}
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
}
execsql COMMIT
breakpoint
do_test $tn.1 {
execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}
do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc
do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc
do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc
do_fb_test $tn.5.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc
do_fb_test $tn.5.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}
foreach {tn pgsz} {
1 32
2 200
} {
do_execsql_test $tn.0 {
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
}
do_dlidx_test1 1.$tn.1 10 100 10000 0 1000
do_dlidx_test1 1.$tn.2 10 10 10000 0 128
do_dlidx_test1 1.$tn.3 10 10 66 0 36028797018963970
do_dlidx_test1 1.$tn.4 10 10 50 0 150000000000000000
do_dlidx_test1 1.$tn.5 10 10 200 0 [expr 1<<55]
do_dlidx_test1 1.$tn.6 10 10 30 0 [expr 1<<58]
}
proc do_dlidx_test2 {tn nEntry iFirst nStep} {
set str [string repeat "a " 500]
execsql {
BEGIN;
DROP TABLE IF EXISTS t1;
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
INSERT INTO t1 VALUES('b a');
WITH iii(ii, i) AS (
SELECT 1, $iFirst UNION ALL
SELECT ii+1, i+$nStep FROM iii WHERE ii<$nEntry
)
INSERT INTO t1(rowid,x) SELECT i, $str FROM iii;
COMMIT;
}
do_execsql_test $tn.1 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a'
} {1}
breakpoint
do_execsql_test $tn.2 {
SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC
} {1}
}
do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128]
finish_test

View File

@@ -0,0 +1,47 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on edge cases in the doclist format.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5doclist
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Create a table with 1000 columns. Then add some large documents to it.
# All text is in the right most column of the table.
#
do_test 1.0 {
set cols [list]
for {set i 0} {$i < 900} {incr i} { lappend cols "x$i" }
execsql "CREATE VIRTUAL TABLE ccc USING fts5([join $cols ,])"
} {}
db func rnddoc fts5_rnddoc
do_execsql_test 1.1 {
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
INSERT INTO ccc(x899) SELECT rnddoc(500) FROM ii;
}
do_execsql_test 1.2 {
INSERT INTO ccc(ccc) VALUES('integrity-check');
}
finish_test

93
ext/fts5/test/fts5ea.test Normal file
View File

@@ -0,0 +1,93 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# Test the fts5 expression parser directly using the fts5_expr() SQL
# test function.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ea
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_syntax_error_test {tn expr err} {
set ::se_expr $expr
do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}
proc do_syntax_test {tn expr res} {
set ::se_expr $expr
do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}
foreach {tn expr res} {
1 {abc} {"abc"}
2 {abc def} {"abc" AND "def"}
3 {abc*} {"abc" *}
4 {"abc def ghi" *} {"abc" + "def" + "ghi" *}
5 {one AND two} {"one" AND "two"}
6 {one+two} {"one" + "two"}
7 {one AND two OR three} {("one" AND "two") OR "three"}
8 {one OR two AND three} {"one" OR ("two" AND "three")}
9 {NEAR(one two)} {NEAR("one" "two", 10)}
10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)}
11 {a OR b NOT c} {"a" OR ("b" NOT "c")}
12 "\x20one\x20two\x20three" {"one" AND "two" AND "three"}
13 "\x09one\x0Atwo\x0Dthree" {"one" AND "two" AND "three"}
14 {"abc""def"} {"abc" + "def"}
} {
do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}
foreach {tn expr res} {
1 {c1:abc}
{c1 : "abc"}
2 {c2 : NEAR(one two) c1:"hello world"}
{c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"}
} {
do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res]
}
foreach {tn expr err} {
1 {AND} {fts5: syntax error near "AND"}
2 {abc def AND} {fts5: syntax error near ""}
3 {abc OR AND} {fts5: syntax error near "AND"}
4 {(a OR b) abc} {fts5: syntax error near "abc"}
5 {NEaR (a b)} {fts5: syntax error near "NEaR"}
6 {NEa (a b)} {fts5: syntax error near "NEa"}
7 {(a OR b) NOT c)} {fts5: syntax error near ")"}
8 {nosuch: a nosuch2: b} {no such column: nosuch}
9 {addr: a nosuch2: b} {no such column: nosuch2}
10 {NOT} {fts5: syntax error near "NOT"}
11 {a AND "abc} {unterminated string}
12 {NEAR(a b, xyz)} {expected integer, got "xyz"}
13 {NEAR(a b, // )} {fts5: syntax error near "/"}
14 {NEAR(a b, "xyz" )} {expected integer, got ""xyz""}
} {
do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err]
}
#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}
finish_test

60
ext/fts5/test/fts5eb.test Normal file
View File

@@ -0,0 +1,60 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5eb
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_syntax_error_test {tn expr err} {
set ::se_expr $expr
do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}
proc do_syntax_test {tn expr res} {
set ::se_expr $expr
do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}
foreach {tn expr res} {
1 {abc} {"abc"}
2 {abc .} {"abc"}
3 {.} {}
4 {abc OR .} {"abc"}
5 {abc NOT .} {"abc"}
6 {abc AND .} {"abc"}
7 {. OR abc} {"abc"}
8 {. NOT abc} {"abc"}
9 {. AND abc} {"abc"}
10 {abc + . + def} {"abc" + "def"}
11 {abc . def} {"abc" AND "def"}
12 {r+e OR w} {"r" + "e" OR "w"}
} {
do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}
do_catchsql_test 2.1 {
SELECT fts5_expr()
} {1 {wrong number of arguments to function fts5_expr}}
do_catchsql_test 2.1 {
SELECT fts5_expr_tcl()
} {1 {wrong number of arguments to function fts5_expr_tcl}}
finish_test

View File

@@ -0,0 +1,353 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The
# focus of this script is testing the FTS5 module.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault1
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
# Simple tests:
#
# 1: CREATE VIRTUAL TABLE
# 2: INSERT statement
# 3: DELETE statement
# 4: MATCH expressions
#
#
faultsim_save_and_close
do_faultsim_test 1 -faults ioerr-t* -prep {
faultsim_restore_and_reopen
} -body {
execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') }
} -test {
faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}}
}
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
}
faultsim_save_and_close
do_faultsim_test 2 -prep {
faultsim_restore_and_reopen
} -body {
execsql {
INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
}
} -test {
faultsim_test_result {0 {}}
}
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
}
faultsim_save_and_close
do_faultsim_test 3 -prep {
faultsim_restore_and_reopen
} -body {
execsql { DELETE FROM t1 }
} -test {
faultsim_test_result {0 {}}
}
reset_db
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, b);
INSERT INTO t2 VALUES('m f a jj th q jr ar', 'hj n h h sg j i m');
INSERT INTO t2 VALUES('nr s t g od j kf h', 'sb h aq rg op rb n nl');
INSERT INTO t2 VALUES('do h h pb p p q fr', 'c rj qs or cr a l i');
INSERT INTO t2 VALUES('lk gp t i lq mq qm p', 'h mr g f op ld aj h');
INSERT INTO t2 VALUES('ct d sq kc qi k f j', 'sn gh c of g s qt q');
INSERT INTO t2 VALUES('d ea d d om mp s ab', 'dm hg l df cm ft pa c');
INSERT INTO t2 VALUES('tc dk c jn n t sr ge', 'a a kn bc n i af h');
INSERT INTO t2 VALUES('ie ii d i b sa qo rf', 'a h m aq i b m fn');
INSERT INTO t2 VALUES('gs r fo a er m h li', 'tm c p gl eb ml q r');
INSERT INTO t2 VALUES('k fe fd rd a gi ho kk', 'ng m c r d ml rm r');
}
faultsim_save_and_close
foreach {tn expr res} {
1 { dk } 7
2 { m f } 1
3 { f* } {1 3 4 5 6 8 9 10}
4 { m OR f } {1 4 5 8 9 10}
5 { sn + gh } {5}
6 { "sn gh" } {5}
7 { NEAR(r a, 5) } {9}
8 { m* f* } {1 4 6 8 9 10}
9 { m* + f* } {1 8}
} {
do_faultsim_test 4.$tn -prep {
faultsim_restore_and_reopen
} -body "
execsql { SELECT rowid FROM t2 WHERE t2 MATCH '$expr' }
" -test "
faultsim_test_result {[list 0 $res]}
"
}
#-------------------------------------------------------------------------
# The following tests use a larger database populated with random data.
#
# The database page size is set to 512 bytes and the FTS5 page size left
# at the default 1000 bytes. This means that reading a node may require
# pulling an overflow page from disk, which is an extra opportunity for
# an error to occur.
#
reset_db
do_execsql_test 5.0.1 {
PRAGMA main.page_size = 512;
CREATE VIRTUAL TABLE x1 USING fts5(a, b);
PRAGMA main.page_size;
} {512}
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
}
set doc
}
db func rnddoc rnddoc
do_execsql_test 5.0.2 {
WITH r(a, b) AS (
SELECT rnddoc(6), rnddoc(6) UNION ALL
SELECT rnddoc(6), rnddoc(6) FROM r
)
INSERT INTO x1 SELECT * FROM r LIMIT 10000;
}
set res [db one {
SELECT count(*) FROM x1 WHERE x1.a LIKE '%abc%' OR x1.b LIKE '%abc%'}
]
do_faultsim_test 5.1 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abc' }
} -test {
faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.2 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abcd' }
} -test {
faultsim_test_result [list 0 0]
}
proc test_astar {a b} {
return [expr { [regexp {a[^ ][^ ]} $a] || [regexp {a[^ ][^ ]} $b] }]
}
db func test_astar test_astar
set res [db one { SELECT count(*) FROM x1 WHERE test_astar(a, b) } ]
do_faultsim_test 5.3 -faults oom* -body {
execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'a*' }
} -test {
faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.4 -faults oom* -prep {
db close
sqlite3 db test.db
} -body {
execsql { INSERT INTO x1 VALUES('a b c d', 'e f g h') }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 5.5.1 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=1
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.2 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=10
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.3 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
SELECT min(rowid) FROM x1_data WHERE rowid>20
)
}
} -test {
faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.4 -faults oom* -body {
execsql {
SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
SELECT max(rowid) FROM x1_data
)
}
} -test {
faultsim_test_result [list 0 1]
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('automerge', 0);
INSERT INTO x1 VALUES('a b c'); -- 1
INSERT INTO x1 VALUES('a b c'); -- 2
INSERT INTO x1 VALUES('a b c'); -- 3
INSERT INTO x1 VALUES('a b c'); -- 4
INSERT INTO x1 VALUES('a b c'); -- 5
INSERT INTO x1 VALUES('a b c'); -- 6
INSERT INTO x1 VALUES('a b c'); -- 7
INSERT INTO x1 VALUES('a b c'); -- 8
INSERT INTO x1 VALUES('a b c'); -- 9
INSERT INTO x1 VALUES('a b c'); -- 10
INSERT INTO x1 VALUES('a b c'); -- 11
INSERT INTO x1 VALUES('a b c'); -- 12
INSERT INTO x1 VALUES('a b c'); -- 13
INSERT INTO x1 VALUES('a b c'); -- 14
INSERT INTO x1 VALUES('a b c'); -- 15
SELECT count(*) FROM x1_data;
} {17}
faultsim_save_and_close
do_faultsim_test 6.1 -faults oom* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1 VALUES('d e f') }
} -test {
faultsim_test_result [list 0 {}]
if {$testrc==0} {
set nCnt [db one {SELECT count(*) FROM x1_data}]
if {$nCnt!=3} { error "expected 3 entries but there are $nCnt" }
}
}
do_faultsim_test 6.2 -faults oom* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1, rank) VALUES('pgsz', 32) }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 6.3 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1) VALUES('integrity-check') }
} -test {
faultsim_test_result [list 0 {}]
}
do_faultsim_test 6.4 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO x1(x1) VALUES('optimize') }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
#
do_faultsim_test 7.0 -faults oom* -prep {
catch { db close }
} -body {
sqlite3 db test.db
} -test {
faultsim_test_result [list 0 {}] {1 {}} {1 {initialization of fts5 failed: }}
}
#-------------------------------------------------------------------------
# A prefix query against a large document set.
#
proc rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]
for {set i 0} {$i < $n} {incr i} {
lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
}
set doc
}
reset_db
db func rnddoc rnddoc
do_test 8.0 {
execsql { CREATE VIRTUAL TABLE x1 USING fts5(a) }
set ::res [list]
for {set i 1} {$i<100} {incr i 1} {
execsql { INSERT INTO x1 VALUES( rnddoc(50) ) }
lappend ::res $i
}
} {}
do_faultsim_test 8.1 -faults oom* -prep {
} -body {
execsql {
SELECT rowid FROM x1 WHERE x1 MATCH 'x*'
}
} -test {
faultsim_test_result [list 0 $::res]
}
#-------------------------------------------------------------------------
# Segment promotion.
#
do_test 9.0 {
reset_db
db func rnddoc fts5_rnddoc
execsql {
CREATE VIRTUAL TABLE s2 USING fts5(x);
INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}
for {set i 1} {$i <= 16} {incr i} {
execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
}
fts5_level_segs s2
} {0 1}
set insert_doc [db one {SELECT rnddoc(160)}]
faultsim_save_and_close
do_faultsim_test 9.1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO s2 VALUES($::insert_doc) }
} -test {
faultsim_test_result {0 {}}
if {$testrc==0} {
set ls [fts5_level_segs s2]
if {$ls != "2 0"} { error "fts5_level_segs says {$ls}" }
}
}
finish_test

View File

@@ -0,0 +1,140 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault2
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set doc [string trim [string repeat "x y z " 200]]
do_execsql_test 1.0 {
CREATE TABLE t1(a INTEGER PRIMARY KEY, x);
CREATE VIRTUAL TABLE x1 USING fts5(x, content='t1', content_rowid='a');
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
WITH input(a,b) AS (
SELECT 1, $doc UNION ALL
SELECT a+1, ($doc || CASE WHEN (a+1)%100 THEN '' ELSE ' xyz' END)
FROM input WHERE a < 1000
)
INSERT INTO t1 SELECT * FROM input;
INSERT INTO x1(x1) VALUES('rebuild');
}
do_faultsim_test 1.1 -faults oom-* -prep {
} -body {
execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z AND xyz' }
} -test {
faultsim_test_result {0 {100 200 300 400 500 600 700 800 900 1000}}
}
do_faultsim_test 1.2 -faults oom-* -prep {
} -body {
execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC}
} -test {
faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}}
}
#-------------------------------------------------------------------------
# OOM within a query that accesses the in-memory hash table.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c);
INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four');
INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two');
}
do_faultsim_test 2.1 -faults oom-trans* -prep {
execsql {
BEGIN;
INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten');
}
} -body {
execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' }
} -test {
faultsim_test_result {0 {1 3}}
catchsql { ROLLBACK }
}
#-------------------------------------------------------------------------
# OOM within an 'optimize' operation that writes multiple pages to disk.
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE zzz USING fts5(z);
INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
INSERT INTO zzz VALUES('a b c d');
INSERT INTO zzz SELECT 'c d e f' FROM zzz;
INSERT INTO zzz SELECT 'e f g h' FROM zzz;
INSERT INTO zzz SELECT 'i j k l' FROM zzz;
INSERT INTO zzz SELECT 'l k m n' FROM zzz;
INSERT INTO zzz SELECT 'o p q r' FROM zzz;
}
faultsim_save_and_close
do_faultsim_test 3.1 -faults oom-trans* -prep {
faultsim_restore_and_reopen
execsql { SELECT rowid FROM zzz }
} -body {
execsql { INSERT INTO zzz(zzz) VALUES('optimize') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM within an 'integrity-check' operation.
#
reset_db
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE zzz USING fts5(z);
INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<10)
INSERT INTO zzz SELECT rnddoc(10) || ' xccc' FROM ii;
}
do_faultsim_test 4.1 -faults oom-trans* -prep {
} -body {
execsql { INSERT INTO zzz(zzz) VALUES('integrity-check') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while parsing a tokenize=option
#
reset_db
faultsim_save_and_close
do_faultsim_test 5.0 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql {
CREATE VIRTUAL TABLE uio USING fts5(a, b,
tokenize="porter 'ascii'",
content="another table",
content_rowid="somecolumn"
);
}
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@@ -0,0 +1,113 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault3
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# An OOM while resuming a partially completed segment merge.
#
db func rnddoc fts5_rnddoc
do_test 1.0 {
expr srand(0)
execsql {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
INSERT INTO xx(xx, rank) VALUES('automerge', 16);
}
for {set i 0} {$i < 10} {incr i} {
execsql {
BEGIN;
INSERT INTO xx(x) VALUES(rnddoc(20));
INSERT INTO xx(x) VALUES(rnddoc(20));
INSERT INTO xx(x) VALUES(rnddoc(20));
COMMIT
}
}
execsql {
INSERT INTO xx(xx, rank) VALUES('automerge', 2);
INSERT INTO xx(xx, rank) VALUES('merge', 50);
}
} {}
faultsim_save_and_close
do_faultsim_test 1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(xx, rank) VALUES('merge', 1) }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close
set doc "a long term abcdefghijklmnopqrstuvwxyz "
append doc "and then abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz "
append doc [string repeat "abcdefghijklmnopqrstuvwxyz" 10]
do_faultsim_test 2 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
}
faultsim_save_and_close
set doc [fts5_rnddoc 1000]
do_faultsim_test 3.1 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
set doc [string repeat "abc " 100]
do_faultsim_test 3.2 -faults oom-* -prep {
faultsim_restore_and_reopen
} -body {
execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
faultsim_test_result [list 0 {}]
}
finish_test

View File

@@ -0,0 +1,419 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault4
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# An OOM while dropping an fts5 table.
#
db func rnddoc fts5_rnddoc
do_test 1.0 {
execsql { CREATE VIRTUAL TABLE xx USING fts5(x) }
} {}
faultsim_save_and_close
do_faultsim_test 1 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM xx }
} -body {
execsql { DROP TABLE xx }
} -test {
faultsim_test_result [list 0 {}]
}
#-------------------------------------------------------------------------
# An OOM within an "ORDER BY rank" query.
#
db func rnddoc fts5_rnddoc
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx VALUES ('abc ' || rnddoc(10));
INSERT INTO xx VALUES ('abc abc' || rnddoc(9));
INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8));
} {}
faultsim_save_and_close
do_faultsim_test 2 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM xx }
} -body {
execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank }
} -test {
faultsim_test_result [list 0 {3 2 1}]
}
#-------------------------------------------------------------------------
# An OOM while "reseeking" an FTS cursor.
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE jj USING fts5(j);
INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s');
INSERT INTO jj(rowid, j) VALUES(202, 't w t f s');
INSERT INTO jj(rowid, j) VALUES(303, 'w t f');
INSERT INTO jj(rowid, j) VALUES(404, 't');
}
faultsim_save_and_close
do_faultsim_test 3 -faults oom-* -prep {
faultsim_restore_and_reopen
execsql { SELECT * FROM jj }
} -body {
set res [list]
db eval { SELECT rowid FROM jj WHERE jj MATCH 't' } {
lappend res $rowid
if {$rowid==303} {
execsql { DELETE FROM jj WHERE rowid=404 }
}
}
set res
} -test {
faultsim_test_result [list 0 {101 202 303}]
}
#-------------------------------------------------------------------------
# An OOM within a special "*reads" query.
#
reset_db
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE x1 USING fts5(x);
INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
WITH ii(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10 )
INSERT INTO x1 SELECT rnddoc(5) FROM ii;
}
set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}]
do_faultsim_test 4 -faults oom-* -body {
db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'}
} -test {
faultsim_test_result {0 {0 {} 3}}
}
#-------------------------------------------------------------------------
# An OOM within a query that uses a custom rank function.
#
reset_db
do_execsql_test 5.0 {
PRAGMA encoding='utf16';
CREATE VIRTUAL TABLE x2 USING fts5(x);
INSERT INTO x2(rowid, x) VALUES(10, 'a b c'); -- 3
INSERT INTO x2(rowid, x) VALUES(20, 'a b c'); -- 6
INSERT INTO x2(rowid, x) VALUES(30, 'a b c'); -- 2
INSERT INTO x2(rowid, x) VALUES(40, 'a b c'); -- 5
INSERT INTO x2(rowid, x) VALUES(50, 'a b c'); -- 1
}
proc rowidmod {cmd mod} {
set row [$cmd xRowid]
expr {$row % $mod}
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_faultsim_test 5.1 -faults oom-* -body {
db eval {
SELECT rowid || '-' || rank FROM x2 WHERE x2 MATCH 'b' AND
rank MATCH "rowidmod('7')" ORDER BY rank
}
} -test {
faultsim_test_result {0 {50-1 30-2 10-3 40-5 20-6}}
}
proc rowidprefix {cmd prefix} {
set row [$cmd xRowid]
set {} "${row}-${prefix}"
}
sqlite3_fts5_create_function db rowidprefix rowidprefix
set str [string repeat abcdefghijklmnopqrstuvwxyz 10]
do_faultsim_test 5.2 -faults oom-* -body {
db eval "
SELECT rank, x FROM x2 WHERE x2 MATCH 'b' AND
rank MATCH 'rowidprefix(''$::str'')'
LIMIT 1
"
} -test {
faultsim_test_result "0 {10-$::str {a b c}}"
}
#-------------------------------------------------------------------------
# OOM errors within auxiliary functions.
#
reset_db
do_execsql_test 6.0 {
CREATE VIRTUAL TABLE x3 USING fts5(xxx);
INSERT INTO x3 VALUES('a b c d c b a');
INSERT INTO x3 VALUES('a a a a a a a');
INSERT INTO x3 VALUES('a a a a a a a');
}
do_faultsim_test 6.1 -faults oom-t* -body {
db eval { SELECT highlight(x3, 0, '*', '*') FROM x3 WHERE x3 MATCH 'c' }
} -test {
faultsim_test_result {0 {{a b *c* d *c* b a}}}
}
proc firstinst {cmd} {
foreach {p c o} [$cmd xInst 0] {}
expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst
do_faultsim_test 6.2 -faults oom-t* -body {
db eval { SELECT firstinst(x3) FROM x3 WHERE x3 MATCH 'c' }
} -test {
faultsim_test_result {0 2} {1 SQLITE_NOMEM}
}
proc previc {cmd} {
set res [$cmd xGetAuxdataInt 0]
$cmd xSetAuxdataInt [$cmd xInstCount]
return $res
}
sqlite3_fts5_create_function db previc previc
do_faultsim_test 6.2 -faults oom-t* -body {
db eval { SELECT previc(x3) FROM x3 WHERE x3 MATCH 'a' }
} -test {
faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# OOM error when querying for a phrase with many tokens.
#
reset_db
do_execsql_test 7.0 {
CREATE VIRTUAL TABLE tt USING fts5(x, y);
INSERT INTO tt VALUES('f b g b c b', 'f a d c c b'); -- 1
INSERT INTO tt VALUES('d a e f e d', 'f b b d e e'); -- 2
INSERT INTO tt VALUES('f b g a d c', 'e f c f a d'); -- 3
INSERT INTO tt VALUES('f f c d g f', 'f a e b g b'); -- 4
INSERT INTO tt VALUES('a g b d a g', 'e g a e a c'); -- 5
INSERT INTO tt VALUES('c d b d e f', 'f g e g e e'); -- 6
INSERT INTO tt VALUES('e g f f b c', 'f c e f g f'); -- 7
INSERT INTO tt VALUES('e g c f c e', 'f e e a f g'); -- 8
INSERT INTO tt VALUES('e a e b e e', 'd c c f f f'); -- 9
INSERT INTO tt VALUES('f a g g c c', 'e g d g c e'); -- 10
INSERT INTO tt VALUES('c d b a e f', 'f g e h e e'); -- 11
CREATE VIRTUAL TABLE tt2 USING fts5(o);
INSERT INTO tt2(rowid, o) SELECT rowid, x||' '||y FROM tt;
INSERT INTO tt2(rowid, o) VALUES(12, 'a b c d e f g h i j k l');
}
do_faultsim_test 7.2 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'f+g+e+g+e+e' }
} -test {
faultsim_test_result {0 6} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.3 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d e f)' }
} -test {
faultsim_test_result {0 11} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.4 -faults oom-t* -body {
db eval { SELECT rowid FROM tt2 WHERE tt2 MATCH '"g c f c e f e e a f"' }
} -test {
faultsim_test_result {0 8} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.5 -faults oom-* -body {
db eval {SELECT rowid FROM tt2 WHERE tt2 MATCH 'NEAR(a b c d e f g h i j k)'}
} -test {
faultsim_test_result {0 12} {1 SQLITE_NOMEM}
}
do_faultsim_test 7.6 -faults oom-* -body {
db eval {SELECT rowid FROM tt WHERE tt MATCH 'y: "c c"'}
} -test {
faultsim_test_result {0 {1 9}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
INSERT INTO tt(rowid, x) VALUES(1, 'a b c d x x');
WITH ii(i) AS (SELECT 2 UNION ALL SELECT i+1 FROM ii WHERE i<99)
INSERT INTO tt(rowid, x) SELECT i, 'a b c x x d' FROM ii;
INSERT INTO tt(rowid, x) VALUES(100, 'a b c d x x');
COMMIT;
}
do_faultsim_test 8.1 -faults oom-t* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d, 2)' }
} -test {
faultsim_test_result {0 {1 100}} {1 SQLITE_NOMEM}
}
do_faultsim_test 8.2 -faults oom-t* -body {
db eval { SELECT count(*) FROM tt WHERE tt MATCH 'a OR d' }
} -test {
faultsim_test_result {0 100} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# Fault in NOT query.
#
reset_db
do_execsql_test 9.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200)
INSERT INTO tt(rowid, x)
SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END
FROM ii;
COMMIT;
}
do_faultsim_test 9.1 -faults oom-* -body {
db eval { SELECT rowid FROM tt WHERE tt MATCH 'a NOT x' }
} -test {
faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM}
}
#-------------------------------------------------------------------------
# OOM in fts5_expr() SQL function.
#
do_faultsim_test 10.1 -faults oom-t* -body {
db one { SELECT fts5_expr('a AND b NEAR(a b)') }
} -test {
faultsim_test_result {0 {"a" AND "b" AND NEAR("a" "b", 10)}}
}
do_faultsim_test 10.2 -faults oom-t* -body {
db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') }
} -test {
set res {AND [ns -col 0 -- {a b c}] [ns -- {b}] [ns -near 10 -- {a} {b}]}
faultsim_test_result [list 0 $res]
}
do_faultsim_test 10.3 -faults oom-t* -body {
db one { SELECT fts5_expr('x:a', 'x') }
} -test {
faultsim_test_result {0 {x : "a"}}
}
#-------------------------------------------------------------------------
# OOM while configuring 'rank' option.
#
reset_db
do_execsql_test 11.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
}
do_faultsim_test 11.1 -faults oom-t* -body {
db eval { INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)') }
} -test {
faultsim_test_result {0 {}} {1 {disk I/O error}}
}
#-------------------------------------------------------------------------
# OOM while creating an fts5vocab table.
#
reset_db
do_execsql_test 12.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
}
faultsim_save_and_close
do_faultsim_test 12.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM sqlite_master }
} -body {
db eval { CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while querying an fts5vocab table.
#
reset_db
do_execsql_test 13.0 {
CREATE VIRTUAL TABLE ft USING fts5(x);
INSERT INTO ft VALUES('a b');
CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row');
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM vv }
} -body {
db eval { SELECT * FROM vv }
} -test {
faultsim_test_result {0 {a 1 1 b 1 1}}
}
#-------------------------------------------------------------------------
# OOM in multi-column token query.
#
reset_db
do_execsql_test 13.0 {
CREATE VIRTUAL TABLE ft USING fts5(x, y, z);
INSERT INTO ft(ft, rank) VALUES('pgsz', 32);
INSERT INTO ft VALUES(
'x x x x x x x x x x x x x x x x',
'y y y y y y y y y y y y y y y y',
'z z z z z z z z x x x x x x x x'
);
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
INSERT INTO ft SELECT * FROM ft;
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM ft }
} -body {
db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' }
} -test {
faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}}
}
#-------------------------------------------------------------------------
# OOM in an "ALTER TABLE RENAME TO"
#
reset_db
do_execsql_test 14.0 {
CREATE VIRTUAL TABLE "tbl one" USING fts5(x, y, z);
}
faultsim_save_and_close
do_faultsim_test 14.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM "tbl one" }
} -body {
db eval { ALTER TABLE "tbl one" RENAME TO "tbl two" }
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@@ -0,0 +1,96 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault5
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# OOM while creating an FTS5 table.
#
do_faultsim_test 1.1 -faults oom-t* -prep {
db eval { DROP TABLE IF EXISTS abc }
} -body {
db eval { CREATE VIRTUAL TABLE abc USING fts5(x,y) }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while writing a multi-tier doclist-index. And while running
# integrity-check on the same.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE tt USING fts5(x);
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close
do_faultsim_test 2.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM tt }
} -body {
set str [string repeat "abc " 50]
db eval {
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO tt(rowid, x) SELECT i, $str FROM ii;
}
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.2 -faults oom-t* -body {
db eval { INSERT INTO tt(tt) VALUES('integrity-check') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while scanning an fts5vocab table.
#
reset_db
do_test 3.0 {
execsql {
CREATE VIRTUAL TABLE tt USING fts5(x);
CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'row');
INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
BEGIN;
}
for {set i 0} {$i < 20} {incr i} {
set str [string repeat "$i " 50]
execsql { INSERT INTO tt VALUES($str) }
}
execsql COMMIT
} {}
do_faultsim_test 3.1 -faults oom-t* -body {
db eval {
SELECT term FROM tv;
}
} -test {
faultsim_test_result {0 {0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9}}
}
finish_test

View File

@@ -0,0 +1,152 @@
# 2014 June 17
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#
source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault6
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, b);
INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');
INSERT INTO tt VALUES('c d g b f d', 'b g e c g c');
INSERT INTO tt VALUES('c c f d e d', 'c e g d b c');
INSERT INTO tt VALUES('e a f c e f', 'g b a c d g');
INSERT INTO tt VALUES('c g f b b d', 'g c d c f g');
INSERT INTO tt VALUES('d a g a b b', 'g c g g c e');
INSERT INTO tt VALUES('e f a b c e', 'f d c d c c');
INSERT INTO tt VALUES('e c a g c d', 'b b g f f b');
INSERT INTO tt VALUES('g b d d e b', 'f f b d a c');
INSERT INTO tt VALUES('e a d a e d', 'c e a e f g');
}
faultsim_save_and_close
do_faultsim_test 1.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt) VALUES('rebuild') }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 1.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { REPLACE INTO tt(rowid, a, b) VALUES(6, 'x y z', 'l l l'); }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM within a special delete.
#
reset_db
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE tt USING fts5(a, content="");
INSERT INTO tt VALUES('c d c g g f');
INSERT INTO tt VALUES('c d g b f d');
INSERT INTO tt VALUES('c c f d e d');
INSERT INTO tt VALUES('e a f c e f');
INSERT INTO tt VALUES('c g f b b d');
INSERT INTO tt VALUES('d a g a b b');
INSERT INTO tt VALUES('e f a b c e');
INSERT INTO tt VALUES('e c a g c d');
INSERT INTO tt VALUES('g b d d e b');
INSERT INTO tt VALUES('e a d a e d');
}
faultsim_save_and_close
do_faultsim_test 2.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt, rowid, a) VALUES('delete', 3, 'c d g b f d'); }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt(tt) VALUES('delete-all') }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 2.3 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval { INSERT INTO tt VALUES('x y z') }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM in the ASCII tokenizer with very large tokens.
#
# Also the unicode tokenizer.
#
set t1 [string repeat wxyz 20]
set t2 [string repeat wxyz 200]
set t3 [string repeat wxyz 2000]
set doc "$t1 $t2 $t3"
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content="");
CREATE VIRTUAL TABLE xyz2 USING fts5(c, content="");
}
faultsim_save_and_close
do_faultsim_test 3.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM xyz }
} -body {
db eval { INSERT INTO xyz VALUES($::doc) }
} -test {
faultsim_test_result {0 {}}
}
do_faultsim_test 3.2 -faults oom-t* -prep {
faultsim_restore_and_reopen
db eval { SELECT * FROM xyz2 }
} -body {
db eval { INSERT INTO xyz2 VALUES($::doc) }
} -test {
faultsim_test_result {0 {}}
}
#-------------------------------------------------------------------------
# OOM while initializing a unicode61 tokenizer.
#
reset_db
faultsim_save_and_close
do_faultsim_test 4.1 -faults oom-t* -prep {
faultsim_restore_and_reopen
} -body {
db eval {
CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
}
} -test {
faultsim_test_result {0 {}}
}
finish_test

View File

@@ -0,0 +1,43 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that SQLITE_FULL is returned if the FTS5 table cannot find a free
# segid to use. In practice this can only really happen when automerge and
# crisismerge are both disabled.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5full
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('automerge', 0);
INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000);
}
db func rnddoc fts5_rnddoc
do_test 1.1 {
list [catch {
for {set i 0} {$i < 2500} {incr i} {
execsql { INSERT INTO x8 VALUES( rnddoc(5) ); }
}
} msg] $msg
} {1 {database or disk is full}}
finish_test

108
ext/fts5/test/fts5hash.test Normal file
View File

@@ -0,0 +1,108 @@
# 2015 April 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file are focused on the code in fts5_hash.c.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5hash
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# Return a list of tokens (a vocabulary) that all share the same hash
# key value. This can be used to test hash collisions.
#
proc build_vocab1 {args} {
set O(-nslot) 1024
set O(-nword) 20
set O(-hash) 88
set O(-prefix) ""
if {[llength $args] % 2} { error "bad args" }
array set O2 $args
foreach {k v} $args {
if {[info exists O($k)]==0} { error "bad option: $k" }
set O($k) $v
}
set L [list]
while {[llength $L] < $O(-nword)} {
set t "$O(-prefix)[random_token]"
set h [sqlite3_fts5_token_hash $O(-nslot) $t]
if {$O(-hash)==$h} { lappend L $t }
}
return $L
}
proc random_token {} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set iVal [expr int(rand() * 2000000)]
return [string map $map $iVal]
}
proc random_doc {vocab nWord} {
set doc ""
set nVocab [llength $vocab]
for {set i 0} {$i<$nWord} {incr i} {
set j [expr {int(rand() * $nVocab)}]
lappend doc [lindex $vocab $j]
}
return $doc
}
set vocab [build_vocab1]
db func r random_doc
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE eee USING fts5(e, ee);
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
INSERT INTO eee(eee) VALUES('integrity-check');
COMMIT;
INSERT INTO eee(eee) VALUES('integrity-check');
}
set hash [sqlite3_fts5_token_hash 1024 xyz]
set vocab [build_vocab1 -prefix xyz -hash $hash]
lappend vocab xyz
do_execsql_test 1.1 {
CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row');
BEGIN;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
INSERT INTO eee(eee) VALUES('integrity-check');
}
do_test 1.2 {
db eval { SELECT term, doc FROM vocab } {
set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
if {$nRow != $doc} {
error "term=$term fts5vocab=$doc cnt=$nRow"
}
}
set {} {}
} {}
do_execsql_test 1.3 {
COMMIT;
INSERT INTO eee(eee) VALUES('integrity-check');
}
finish_test

View File

@@ -0,0 +1,107 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on the integrity-check procedure.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5integrity
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x);
INSERT INTO xx VALUES('term');
}
do_execsql_test 1.1 {
INSERT INTO xx(xx) VALUES('integrity-check');
}
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
INSERT INTO yy VALUES('term');
}
do_execsql_test 2.1 {
INSERT INTO yy(yy) VALUES('integrity-check');
}
#--------------------------------------------------------------------
#
do_execsql_test 3.0 {
CREATE VIRTUAL TABLE zz USING fts5(z);
INSERT INTO zz(zz, rank) VALUES('pgsz', 32);
INSERT INTO zz VALUES('b b b b b b b b b b b b b b');
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz SELECT z FROM zz;
INSERT INTO zz(zz) VALUES('optimize');
}
do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); }
#--------------------------------------------------------------------
# Mess around with a docsize record. And the averages record. Then
# check that integrity-check picks it up.
#
do_execsql_test 4.0 {
CREATE VIRTUAL TABLE aa USING fts5(zz);
INSERT INTO aa(zz) VALUES('a b c d e');
INSERT INTO aa(zz) VALUES('a b c d');
INSERT INTO aa(zz) VALUES('a b c');
INSERT INTO aa(zz) VALUES('a b');
INSERT INTO aa(zz) VALUES('a');
SELECT length(sz) FROM aa_docsize;
} {1 1 1 1 1}
do_execsql_test 4.1 {
INSERT INTO aa(aa) VALUES('integrity-check');
}
do_catchsql_test 4.2 {
BEGIN;
UPDATE aa_docsize SET sz = X'44' WHERE rowid = 3;
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.3 {
ROLLBACK;
BEGIN;
UPDATE aa_data SET block = X'44' WHERE rowid = 1;
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.4 {
ROLLBACK;
BEGIN;
INSERT INTO aa_docsize VALUES(23, X'04');
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
do_catchsql_test 4.5 {
ROLLBACK;
BEGIN;
INSERT INTO aa_docsize VALUES(23, X'00');
INSERT INTO aa_content VALUES(23, '');
INSERT INTO aa(aa) VALUES('integrity-check');
} {1 {database disk image is malformed}}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit
finish_test

View File

@@ -0,0 +1,457 @@
# 2015 August 05
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5matchinfo
# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
ifcapable !fts5 { finish_test ; return }
proc mit {blob} {
set scan(littleEndian) i*
set scan(bigEndian) I*
binary scan $blob $scan($::tcl_platform(byteOrder)) r
return $r
}
db func mit mit
sqlite3_fts5_register_matchinfo db
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(content);
}
do_execsql_test 1.1 {
INSERT INTO t1(content) VALUES('I wandered lonely as a cloud');
INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,');
INSERT INTO t1(content) VALUES('When all at once I saw a crowd,');
INSERT INTO t1(content) VALUES('A host, of golden daffodils,');
SELECT mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'I';
} {{1 1 1 2 2} {1 1 1 2 2}}
# Now create an FTS4 table that does not specify matchinfo=fts3.
#
do_execsql_test 1.2 {
CREATE VIRTUAL TABLE t2 USING fts5(content);
INSERT INTO t2 SELECT * FROM t1;
SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I';
} {{1 1 1 2 2} {1 1 1 2 2}}
#--------------------------------------------------------------------------
# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
#
# The first argument - $tn - is a test identifier. This may be either a
# full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
# is set, just the numeric component (i.e. "1.1").
#
# The second argument is the name of an FTSX table. The third is the
# full text of a WHERE/MATCH expression to query the table for
# (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
# key-value list (serialized array) with matchinfo() format specifiers
# as keys, and the results of executing the statement:
#
# SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
#
# For example:
#
# CREATE VIRTUAL TABLE t1 USING fts4;
# INSERT INTO t1 VALUES('abc');
# INSERT INTO t1 VALUES('def');
# INSERT INTO t1 VALUES('abc abc');
#
# do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
# n {3 3}
# p {1 1}
# c {1 1}
# x {{1 3 2} {2 3 2}}
# }
#
# If the $results list contains keys mapped to "-" instead of a matchinfo()
# result, then this command computes the expected results based on other
# mappings to test the matchinfo() function. For example, the command above
# could be changed to:
#
# do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
# n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
# pcx -
# }
#
# And this command would compute the expected results for matchinfo(t1, 'pcx')
# based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and
# matchinfo(t1, 'x') in order to test 'pcx'.
#
proc do_matchinfo_test {tn tbl expr results} {
foreach {fmt res} $results {
if {$res == "-"} continue
set resarray($fmt) $res
}
set nRow 0
foreach {fmt res} [array get resarray] {
if {[llength $res]>$nRow} { set nRow [llength $res] }
}
# Construct expected results for any formats for which the caller
# supplied result is "-".
#
foreach {fmt res} $results {
if {$res == "-"} {
set res [list]
for {set iRow 0} {$iRow<$nRow} {incr iRow} {
set rowres [list]
foreach c [split $fmt ""] {
set rowres [concat $rowres [lindex $resarray($c) $iRow]]
}
lappend res $rowres
}
set resarray($fmt) $res
}
}
# Test each matchinfo() request individually.
#
foreach {fmt res} [array get resarray] {
set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
do_execsql_test $tn.$fmt $sql [normalize2 $res]
}
# Test them all executed together (multiple invocations of matchinfo()).
#
set exprlist [list]
foreach {format res} [array get resarray] {
lappend exprlist "mit(matchinfo($tbl, '$format'))"
}
set allres [list]
for {set iRow 0} {$iRow<$nRow} {incr iRow} {
foreach {format res} [array get resarray] {
lappend allres [lindex $res $iRow]
}
}
set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
do_execsql_test $tn.multi $sql [normalize2 $allres]
}
proc normalize2 {list_of_lists} {
set res [list]
foreach elem $list_of_lists {
lappend res [list {*}$elem]
}
return $res
}
do_execsql_test 4.1.0 {
CREATE VIRTUAL TABLE t4 USING fts5(x, y);
INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
}
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
s {{3 0} {0 3}}
}
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
p {3 3}
x {
{1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1}
{0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1}
}
}
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
p {3 3}
c {2 2}
x {
{1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1}
{0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1}
}
n {2 2}
l {{5 5} {5 5}}
a {{5 5} {5 5}}
s {{3 0} {0 3}}
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
xpxsscplax -
}
do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
p {1 1}
c {2 2}
x {
{0 1 1 1 1 1}
{1 1 1 0 1 1}
}
n {2 2}
l {{5 5} {5 5}}
a {{5 5} {5 5}}
s {{0 1} {1 0}}
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
sxsxs -
}
do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} }
do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} {
x {
{0 1 1 1 1 1 0 0 0 0 0 0}
{1 1 1 0 1 1 0 0 0 0 0 0}
}
}
do_matchinfo_test 4.1.8 t4 {t4 MATCH 'f NOT abcd'} {
x {
{0 1 1 1 1 1 0 0 0 0 0 0}
{1 1 1 0 1 1 0 0 0 0 0 0}
}
}
do_execsql_test 4.2.0 {
CREATE VIRTUAL TABLE t5 USING fts5(content);
INSERT INTO t5 VALUES('a a a a a');
INSERT INTO t5 VALUES('a b a b a');
INSERT INTO t5 VALUES('c b c b c');
INSERT INTO t5 VALUES('x x x x x');
}
do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} {
x {{5 8 2 5 8 2} {3 8 2 3 8 2}}
s {2 1}
}
do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} }
do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} }
do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} }
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
# It used to be that the second 'a' token would be deferred. That doesn't
# work any longer.
if 0 {
do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} {
x {{5 8 2 5 5 5} {3 8 2 3 5 5}}
s {2 1}
}
}
do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} }
do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} }
do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
do_execsql_test 4.4.0.1 { INSERT INTO t5(t5) VALUES('optimize') }
do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} }
do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} }
do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
do_execsql_test 4.5.0 {
CREATE VIRTUAL TABLE t6 USING fts5(a, b, c);
INSERT INTO t6 VALUES('a', 'b', 'c');
}
do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'} { s {{1 1 1}} }
#-------------------------------------------------------------------------
# Test the outcome of matchinfo() when used within a query that does not
# use the full-text index (i.e. lookup by rowid or full-table scan).
#
do_execsql_test 7.1 {
CREATE VIRTUAL TABLE t10 USING fts5(content);
INSERT INTO t10 VALUES('first record');
INSERT INTO t10 VALUES('second record');
}
do_execsql_test 7.2 {
SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10;
} {blob 8 blob 8}
do_execsql_test 7.3 {
SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10 WHERE rowid=1;
} {blob 8}
do_execsql_test 7.4 {
SELECT typeof(matchinfo(t10)), length(matchinfo(t10))
FROM t10 WHERE t10 MATCH 'record'
} {blob 20 blob 20}
#-------------------------------------------------------------------------
# Test a special case - matchinfo('nxa') with many zero length documents.
# Special because "x" internally uses a statement used by both "n" and "a".
# This was causing a problem at one point in the obscure case where the
# total number of bytes of data stored in an fts3 table was greater than
# the number of rows. i.e. when the following query returns true:
#
# SELECT sum(length(content)) < count(*) FROM fts4table;
#
do_execsql_test 8.1 {
CREATE VIRTUAL TABLE t11 USING fts5(content);
INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
INSERT INTO t11 VALUES('quitealongstringoftext');
INSERT INTO t11 VALUES('anotherquitealongstringoftext');
INSERT INTO t11 VALUES('athirdlongstringoftext');
INSERT INTO t11 VALUES('andonemoreforgoodluck');
}
do_test 8.2 {
for {set i 0} {$i < 200} {incr i} {
execsql { INSERT INTO t11 VALUES('') }
}
execsql { INSERT INTO t11(t11) VALUES('optimize') }
} {}
do_execsql_test 8.3 {
SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*'
} {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}}
#-------------------------------------------------------------------------
do_execsql_test 9.1 {
CREATE VIRTUAL TABLE t12 USING fts5(content);
INSERT INTO t12 VALUES('a b c d');
SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a';
} {{0 1 1 0 1 1 1 1 1}}
do_execsql_test 9.2 {
INSERT INTO t12 VALUES('a d c d');
SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a';
} {
{0 2 2 0 3 2 1 2 2} {1 2 2 1 3 2 1 2 2}
}
do_execsql_test 9.3 {
INSERT INTO t12 VALUES('a d d a');
SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a';
} {
{0 4 3 0 5 3 1 4 3} {1 4 3 1 5 3 1 4 3} {2 4 3 2 5 3 2 4 3}
}
#---------------------------------------------------------------------------
# Test for a memory leak
#
do_execsql_test 10.1 {
DROP TABLE t10;
CREATE VIRTUAL TABLE t10 USING fts5(idx, value);
INSERT INTO t10 values (1, 'one'),(2, 'two'),(3, 'three');
SELECT t10.rowid, t10.*
FROM t10
JOIN (SELECT 1 AS idx UNION SELECT 2 UNION SELECT 3) AS x
WHERE t10 MATCH x.idx
AND matchinfo(t10) not null
GROUP BY t10.rowid
ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db
do_execsql_test 11.0 {
CREATE VIRTUAL TABLE tt USING fts3(x, y);
INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1
INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2
INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3
INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4
INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5
INSERT INTO tt VALUES('g a c e b b', 'd b f b g g'); -- 6
INSERT INTO tt VALUES('f d a a f c', 'e e a d c f'); -- 7
INSERT INTO tt VALUES('a c b b g f', 'a b a e d f'); -- 8
INSERT INTO tt VALUES('b a f e c c', 'f d b b a b'); -- 9
INSERT INTO tt VALUES('f d c e a c', 'f a f a a f'); -- 10
}
db func mit mit
foreach {tn expr res} {
1 "a" {
1 {1 2} 2 {1 0} 3 {0 1} 4 {1 0} 5 {1 0}
6 {1 0} 7 {2 1} 8 {1 2} 9 {1 1} 10 {1 3}
}
2 "b" {
1 {0 1} 2 {1 0} 3 {1 2} 4 {0 1} 5 {0 1}
6 {2 2} 8 {2 1} 9 {1 3}
}
3 "y:a" {
1 {0 2} 3 {0 1}
7 {0 1} 8 {0 2} 9 {0 1} 10 {0 3}
}
4 "x:a" {
1 {1 0} 2 {1 0} 4 {1 0} 5 {1 0}
6 {1 0} 7 {2 0} 8 {1 0} 9 {1 0} 10 {1 0}
}
5 "a OR b" {
1 {1 2 0 1} 2 {1 0 1 0} 3 {0 1 1 2} 4 {1 0 0 1} 5 {1 0 0 1}
6 {1 0 2 2} 7 {2 1 0 0} 8 {1 2 2 1} 9 {1 1 1 3} 10 {1 3 0 0}
}
6 "a AND b" {
1 {1 2 0 1} 2 {1 0 1 0} 3 {0 1 1 2} 4 {1 0 0 1} 5 {1 0 0 1}
6 {1 0 2 2} 8 {1 2 2 1} 9 {1 1 1 3}
}
7 "a OR (a AND b)" {
1 {1 2 1 2 0 1} 2 {1 0 1 0 1 0} 3 {0 1 0 1 1 2} 4 {1 0 1 0 0 1}
5 {1 0 1 0 0 1} 6 {1 0 1 0 2 2} 7 {2 1 0 0 0 0} 8 {1 2 1 2 2 1}
9 {1 1 1 1 1 3} 10 {1 3 0 0 0 0}
}
} {
do_execsql_test 11.1.$tn.1 {
SELECT rowid, mit(matchinfo(tt, 'y')) FROM tt WHERE tt MATCH $expr
} $res
set r2 [list]
foreach {rowid L} $res {
lappend r2 $rowid
set M [list]
foreach {a b} $L {
lappend M [expr ($a ? 1 : 0) + ($b ? 2 : 0)]
}
lappend r2 $M
}
do_execsql_test 11.1.$tn.2 {
SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
} $r2
do_execsql_test 11.1.$tn.2 {
SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
} $r2
}
set sqlite_fts3_enable_parentheses 0
#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db
db func mit mit
do_test 12.0 {
set cols [list]
for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
} {}
do_execsql_test 12.1 {
INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]
set sqlite_fts3_enable_parentheses 0
finish_test

View File

@@ -0,0 +1,194 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that focus on incremental merges of segments.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5merge
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
db func repeat [list string repeat]
#-------------------------------------------------------------------------
# Create an fts index so that:
#
# * the index consists of two top-level segments
# * each segment contains records related to $nRowPerSeg rows
# * all rows consist of tokens "x" and "y" only.
#
# Then run ('merge', 1) until everything is completely merged.
#
proc do_merge1_test {testname nRowPerSeg} {
set ::nRowPerSeg [expr $nRowPerSeg]
do_execsql_test $testname.0 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
}
for {set tn 1} {[lindex [fts5_level_segs x8] 0]>0} {incr tn} {
do_execsql_test $testname.$tn {
INSERT INTO x8(x8, rank) VALUES('merge', 1);
INSERT INTO x8(x8) VALUES('integrity-check');
}
if {$tn>5} break
}
do_test $testname.x [list expr "$tn < 5"] 1
}
do_merge1_test 1.1 1
do_merge1_test 1.2 2
do_merge1_test 1.3 3
do_merge1_test 1.4 4
do_merge1_test 1.5 10
do_merge1_test 1.6 20
do_merge1_test 1.7 100
#-------------------------------------------------------------------------
#
proc do_merge2_test {testname nRow} {
db func rnddoc fts5_rnddoc
do_execsql_test $testname.0 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
}
set ::nRow $nRow
do_test $testname.1 {
for {set i 0} {$i < $::nRow} {incr i} {
execsql { INSERT INTO x8 VALUES( rnddoc(($i%16) + 5) ) }
while {[not_merged x8]} {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
INSERT INTO x8(x8, rank) VALUES('automerge', 16);
INSERT INTO x8(x8) VALUES('integrity-check');
}
}
}
} {}
}
proc not_merged {tbl} {
set segs [fts5_level_segs $tbl]
foreach s $segs { if {$s>1} { return 1 } }
return 0
}
do_merge2_test 2.1 5
do_merge2_test 2.2 10
do_merge2_test 2.3 20
#-------------------------------------------------------------------------
# Test that an auto-merge will complete any merge that has already been
# started, even if the number of input segments is less than the current
# value of the 'automerge' configuration parameter.
#
db func rnddoc fts5_rnddoc
do_execsql_test 3.1 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
INSERT INTO x8 VALUES(rnddoc(100));
INSERT INTO x8 VALUES(rnddoc(100));
}
do_test 3.2 {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 4);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
fts5_level_segs x8
} {2}
do_test 3.3 {
execsql {
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
fts5_level_segs x8
} {2 1}
do_test 3.4 {
execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4) }
while {[not_merged x8]} {
execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) }
}
fts5_level_segs x8
} {0 1}
#-------------------------------------------------------------------------
#
proc mydoc {} {
set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]]
return [string repeat "$x " 30]
}
db func mydoc mydoc
proc mycount {} {
set res [list]
foreach x {a b c d e f g h i j} {
lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}]
}
set res
}
#1 32
foreach {tn pgsz} {
2 1000
} {
do_execsql_test 4.$tn.1 {
DROP TABLE IF EXISTS x8;
CREATE VIRTUAL TABLE x8 USING fts5(i);
INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz);
}
do_execsql_test 4.$tn.2 {
INSERT INTO x8(x8, rank) VALUES('merge', 1);
}
do_execsql_test 4.$tn.3 {
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO x8 SELECT mydoc() FROM ii;
WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
INSERT INTO x8 SELECT mydoc() FROM ii;
INSERT INTO x8(x8, rank) VALUES('automerge', 2);
}
set expect [mycount]
for {set i 0} {$i < 20} {incr i} {
do_test 4.$tn.4.$i {
execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); }
mycount
} $expect
break
}
# db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r }
}
finish_test

View File

@@ -0,0 +1,71 @@
# 2014 Jan 08
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on the NEAR operator.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5near
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc do_near_test {tn doc near res} {
uplevel [list do_execsql_test $tn "
DELETE FROM t1;
INSERT INTO t1 VALUES('$doc');
SELECT count(*) FROM t1 WHERE t1 MATCH '$near';
" $res]
}
execsql {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "ascii tokenchars '.'")
}
do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1
do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1
do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1
do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0
do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1
do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1
do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1
do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0
do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1
do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0
do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1
do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0
do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1
do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0
do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1
do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0
do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1
do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0
do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1
do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1
do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0
do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1
do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0
do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1
do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0
finish_test

View File

@@ -0,0 +1,66 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5optimize
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
proc rnddoc {nWord} {
set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z}
set nVocab [llength $vocab]
set ret [list]
for {set i 0} {$i < $nWord} {incr i} {
lappend ret [lindex $vocab [expr {int(rand() * $nVocab)}]]
}
return $ret
}
foreach {tn nStep} {
1 2
2 10
3 50
4 500
} {
if {$tn!=4} continue
reset_db
db func rnddoc rnddoc
do_execsql_test 1.$tn.1 {
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}
do_test 1.$tn.2 {
for {set i 0} {$i < $nStep} {incr i} {
execsql { INSERT INTO t1 VALUES( rnddoc(5), rnddoc(5) ) }
}
} {}
do_execsql_test 1.$tn.3 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 1.$tn.4 {
INSERT INTO t1(t1) VALUES('optimize');
}
do_execsql_test 1.$tn.5 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
}
finish_test

View File

@@ -0,0 +1,67 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing the planner (xBestIndex function).
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5plan
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE TABLE t1(x, y);
CREATE VIRTUAL TABLE f1 USING fts5(ff);
}
do_eqp_test 1.1 {
SELECT * FROM t1, f1 WHERE f1 MATCH t1.x
} {
0 0 0 {SCAN TABLE t1}
0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
}
do_eqp_test 1.2 {
SELECT * FROM t1, f1 WHERE f1 > t1.x
} {
0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
0 1 0 {SCAN TABLE t1}
}
do_eqp_test 1.3 {
SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}
do_eqp_test 1.4 {
SELECT * FROM f1 ORDER BY rank
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}
do_eqp_test 1.5 {
SELECT * FROM f1 WHERE rank MATCH ?
} {
0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:}
}
finish_test

11806
ext/fts5/test/fts5porter.test Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,70 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 porter stemmer implementation.
#
# These are extra tests added to those in fts5porter.test in order to
# improve test coverage of the porter stemmer implementation.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5porter2
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
set test_vocab {
tion tion
ation ation
vation vation
avation avat
vion vion
ion ion
relational relat
relation relat
relate relat
zzz zzz
ii ii
iiing ii
xtional xtional
xenci xenci
xlogi xlogi
realization realiz
realize realiz
xization xizat
capitalism capit
talism talism
xiveness xive
xfulness xful
xousness xous
xical xical
xicate xicat
xicity xiciti
ies ie
eed e
eing e
s s
}
set i 0
foreach {in out} $test_vocab {
do_test "1.$i.($in -> $out)" {
lindex [sqlite3_fts5_tokenize db porter $in] 0
} $out
incr i
}
finish_test

View File

@@ -0,0 +1,67 @@
# 2015 Jan 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on prefix indexes.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5prefix
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
INSERT INTO xx VALUES('one two three');
INSERT INTO xx VALUES('four five six');
INSERT INTO xx VALUES('seven eight nine ten');
}
do_execsql_test 1.1 {
SELECT rowid FROM xx WHERE xx MATCH 't*'
} {1 3}
#-------------------------------------------------------------------------
# Check that prefix indexes really do index n-character prefixes, not
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
# diacritic removal.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2)
}
do_test 2.1 {
foreach {rowid string} {
1 "\xCA\xCB\xCC\xCD"
2 "\u1234\u5678\u4321\u8765"
} {
execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
}
} {}
do_execsql_test 2.2 {
INSERT INTO t1(t1) VALUES('integrity-check');
}
foreach {tn q res} {
1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
do_execsql_test 2.3.$tn $q $res
}
finish_test

View File

@@ -0,0 +1,45 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing queries that use the "rank" column.
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rank
# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
# "ORDER BY rank" + highlight() + large poslists.
#
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE xyz USING fts5(z);
}
do_test 1.1 {
set doc [string trim [string repeat "x y " 500]]
execsql { INSERT INTO xyz VALUES($doc) }
} {}
do_execsql_test 1.2 {
SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x' ORDER BY rank
} [list [string map {x [x]} $doc]]
do_execsql_test 1.3 {
SELECT highlight(xyz, 0, '[', ']') FROM xyz
WHERE xyz MATCH 'x AND y' ORDER BY rank
} [list [string map {x [x] y [y]} $doc]]
finish_test

Some files were not shown because too many files have changed in this diff Show More