mirror of
https://github.com/sqlite/sqlite.git
synced 2025-12-24 14:17:58 +03:00
Enable prefix-search in query-parsing and snippet generation. If the
character immediately after the end of a term is '*', that term is marked for prefix matching. Modify term comparison in snippetOffsetsOfColumn() to respect isPrefix. fts2n.test runs prefix searching through some obvious test cases. (CVS 3893) FossilOrigin-Name: 7c4c65924035d9f260f6b64eb92c5c6cf6c04b7b
This commit is contained in:
@@ -3056,8 +3056,10 @@ static void snippetOffsetsOfColumn(
|
||||
int iCol;
|
||||
iCol = aTerm[i].iColumn;
|
||||
if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
|
||||
if( aTerm[i].nTerm!=nToken ) continue;
|
||||
if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
|
||||
if( aTerm[i].nTerm>nToken ) continue;
|
||||
if( !aTerm[i].isPrefix && aTerm[i].nTerm<nToken ) continue;
|
||||
assert( aTerm[i].nTerm<=nToken );
|
||||
if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
|
||||
if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
|
||||
match |= 1<<i;
|
||||
if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
|
||||
@@ -3500,6 +3502,9 @@ static int tokenizeSegment(
|
||||
if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
|
||||
pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
|
||||
}
|
||||
if( iEnd<nSegment && pSegment[iEnd]=='*' ){
|
||||
pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
|
||||
}
|
||||
pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
|
||||
if( inPhrase ){
|
||||
nTerm++;
|
||||
|
||||
15
manifest
15
manifest
@@ -1,5 +1,5 @@
|
||||
C First\sapproximation\sof\sincremental\sblob\sIO\sAPI.\s(CVS\s3892)
|
||||
D 2007-05-01T17:49:49
|
||||
C Enable\sprefix-search\sin\squery-parsing\sand\ssnippet\sgeneration.\s\sIf\sthe\ncharacter\simmediately\safter\sthe\send\sof\sa\sterm\sis\s'*',\sthat\sterm\sis\nmarked\sfor\sprefix\smatching.\s\sModify\sterm\scomparison\sin\nsnippetOffsetsOfColumn()\sto\srespect\sisPrefix.\s\sfts2n.test\sruns\sprefix\nsearching\sthrough\ssome\sobvious\stest\scases.\s(CVS\s3893)
|
||||
D 2007-05-01T18:25:53
|
||||
F Makefile.in 8cab54f7c9f5af8f22fd97ddf1ecfd1e1860de62
|
||||
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
|
||||
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
|
||||
@@ -34,7 +34,7 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
|
||||
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
|
||||
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
|
||||
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
|
||||
F ext/fts2/fts2.c a6762b7a6cc173eb83a0aa9506c1b7be66f00786
|
||||
F ext/fts2/fts2.c 9e1f5942fc063f2d6778bc44372eba9b2b43eebd
|
||||
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
|
||||
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
|
||||
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
|
||||
@@ -236,6 +236,7 @@ F test/fts2j.test f68d7611f76309bc8b94170f3740d9fbbc061d9b
|
||||
F test/fts2k.test 222d0b3bc8667753f18406aaea9906a6098ea016
|
||||
F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3
|
||||
F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
|
||||
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
|
||||
F test/func.test 8a3bc8e8365dc0053c826923c0f738645f50f2f5
|
||||
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
|
||||
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
|
||||
@@ -468,7 +469,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
|
||||
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
|
||||
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
|
||||
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
|
||||
P 72c796307338c2751a91c30f6fb16989afbf3816
|
||||
R 30bd64cea77836fbaf98d3678ccdaa09
|
||||
U danielk1977
|
||||
Z 5cdc2c295a9fcd37c90d369f1b123096
|
||||
P c444836e7b690c16dd6acff571c613a23beb42dc
|
||||
R 15961852c9b829c1a838faae38aeed43
|
||||
U shess
|
||||
Z c808046b1e2602c778234ecda5713f7c
|
||||
|
||||
@@ -1 +1 @@
|
||||
c444836e7b690c16dd6acff571c613a23beb42dc
|
||||
7c4c65924035d9f260f6b64eb92c5c6cf6c04b7b
|
||||
196
test/fts2n.test
Normal file
196
test/fts2n.test
Normal file
@@ -0,0 +1,196 @@
|
||||
# 2007 April 26
|
||||
#
|
||||
# The author disclaims copyright to this source code.
|
||||
#
|
||||
#*************************************************************************
|
||||
# This file implements tests for prefix-searching in the fts2
|
||||
# component of the SQLite library.
|
||||
#
|
||||
# $Id: fts2n.test,v 1.1 2007/05/01 18:25:53 shess Exp $
|
||||
#
|
||||
|
||||
set testdir [file dirname $argv0]
|
||||
source $testdir/tester.tcl
|
||||
|
||||
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
|
||||
ifcapable !fts2 {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
# A large string to prime the pump with.
|
||||
set text {
|
||||
Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
|
||||
iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
|
||||
sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
|
||||
aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
|
||||
ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
|
||||
at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
|
||||
ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
|
||||
luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
|
||||
lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
|
||||
potenti. Cum sociis natoque penatibus et magnis dis parturient
|
||||
montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
|
||||
suscipit nec, consequat quis, risus.
|
||||
}
|
||||
|
||||
db eval {
|
||||
CREATE VIRTUAL TABLE t1 USING fts2(c);
|
||||
|
||||
INSERT INTO t1(rowid, c) VALUES(1, $text);
|
||||
INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
|
||||
}
|
||||
|
||||
# Exact match
|
||||
do_test fts2n-1.1 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
|
||||
} {1}
|
||||
|
||||
# And a prefix
|
||||
do_test fts2n-1.2 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
|
||||
} {1}
|
||||
|
||||
# Prefix includes exact match
|
||||
do_test fts2n-1.3 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
|
||||
} {1}
|
||||
|
||||
# Make certain everything isn't considered a prefix!
|
||||
do_test fts2n-1.4 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
|
||||
} {}
|
||||
|
||||
# Prefix across multiple rows.
|
||||
do_test fts2n-1.5 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
|
||||
} {1 2}
|
||||
|
||||
# Likewise, with multiple hits in one document.
|
||||
do_test fts2n-1.6 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
|
||||
} {1 2}
|
||||
|
||||
# Prefix which should only hit one document.
|
||||
do_test fts2n-1.7 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
|
||||
} {2}
|
||||
|
||||
# * not at end is dropped.
|
||||
do_test fts2n-1.8 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
|
||||
} {}
|
||||
|
||||
# Stand-alone * is dropped.
|
||||
do_test fts2n-1.9 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
|
||||
} {}
|
||||
|
||||
# Phrase-query prefix.
|
||||
do_test fts2n-1.10 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
|
||||
} {2}
|
||||
do_test fts2n-1.11 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
|
||||
} {}
|
||||
|
||||
# Phrase query with multiple prefix matches.
|
||||
do_test fts2n-1.12 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
|
||||
} {1 2}
|
||||
|
||||
# Phrase query with multiple prefix matches.
|
||||
do_test fts2n-1.13 {
|
||||
execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
|
||||
} {2}
|
||||
|
||||
|
||||
|
||||
|
||||
# Test across updates (and, by implication, deletes).
|
||||
|
||||
# Version of text without "lorem".
|
||||
regsub -all {[Ll]orem} $text '' ntext
|
||||
|
||||
db eval {
|
||||
CREATE VIRTUAL TABLE t2 USING fts2(c);
|
||||
|
||||
INSERT INTO t2(rowid, c) VALUES(1, $text);
|
||||
INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
|
||||
UPDATE t2 SET c = $ntext WHERE rowid = 1;
|
||||
}
|
||||
|
||||
# Can't see lorem as an exact match.
|
||||
do_test fts2n-2.1 {
|
||||
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
|
||||
} {}
|
||||
|
||||
# Can't see a prefix of lorem, either.
|
||||
do_test fts2n-2.2 {
|
||||
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
|
||||
} {}
|
||||
|
||||
# Can see lovely in the other document.
|
||||
do_test fts2n-2.3 {
|
||||
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
|
||||
} {2}
|
||||
|
||||
# Can still see other hits.
|
||||
do_test fts2n-2.4 {
|
||||
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
|
||||
} {1 2}
|
||||
|
||||
# Prefix which should only hit one document.
|
||||
do_test fts2n-2.5 {
|
||||
execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
|
||||
} {2}
|
||||
|
||||
|
||||
|
||||
# Test with a segment which will have multiple levels in the tree.
|
||||
|
||||
# Build a big document with lots of unique terms.
|
||||
set bigtext $text
|
||||
foreach c {a b c d e} {
|
||||
regsub -all {[A-Za-z]+} $bigtext "&$c" t
|
||||
append bigtext $t
|
||||
}
|
||||
|
||||
# Populate a table with many copies of the big document, so that we
|
||||
# can test the number of hits found. Populate $ret with the expected
|
||||
# hit counts for each row. offsets() returns 4 elements for every
|
||||
# hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
|
||||
# $bigtext.
|
||||
set ret {6 1}
|
||||
db eval {
|
||||
BEGIN;
|
||||
CREATE VIRTUAL TABLE t3 USING fts2(c);
|
||||
|
||||
INSERT INTO t3(rowid, c) VALUES(1, $text);
|
||||
INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
|
||||
}
|
||||
for {set i 0} {$i<100} {incr i} {
|
||||
db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
|
||||
lappend ret 192
|
||||
}
|
||||
db eval {COMMIT;}
|
||||
|
||||
# Test that we get the expected number of hits.
|
||||
do_test fts2n-3.1 {
|
||||
set t {}
|
||||
db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
|
||||
set l [llength $o]
|
||||
lappend t [expr {$l/4}]
|
||||
}
|
||||
set t
|
||||
} $ret
|
||||
|
||||
# TODO(shess) It would be useful to test a couple edge cases, but I
|
||||
# don't know if we have the precision to manage it from here at this
|
||||
# time. Prefix hits can cross leaves, which the code above _should_
|
||||
# hit by virtue of size. There are two variations on this. If the
|
||||
# tree is 2 levels high, the code will find the leaf-node extent
|
||||
# directly, but if it's higher, the code will have to follow two
|
||||
# separate interior branches down the tree. Both should be tested.
|
||||
|
||||
finish_test
|
||||
Reference in New Issue
Block a user