1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fix a problem preventing delete markers from ever being removed from the FTS index.

FossilOrigin-Name: 7f47ae5c5ddb1227484ddae7c6960183932a052a
This commit is contained in:
dan
2014-05-13 20:11:37 +00:00
parent 6bb9889ef0
commit c0caea21de
5 changed files with 174 additions and 52 deletions

View File

@ -382,8 +382,9 @@ static int fts3SqlStmt(
"ORDER BY level DESC, idx ASC",
/* Update statements used while promoting segments */
/* 38 */ "UPDATE %Q.'%q_segdir' SET level=-1,idx=? WHERE level=? AND idx=?",
/* 39 */ "UPDATE %Q.'%q_segdir' SET level=? WHERE level=-1"
/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? "
"WHERE level=? AND idx=?",
/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1"
};
int rc = SQLITE_OK;
@ -3091,6 +3092,7 @@ static int fts3SegmentMerge(
Fts3SegFilter filter; /* Segment term filter condition */
Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
i64 iMaxLevel = 0; /* Max level number for this index/langid */
assert( iLevel==FTS3_SEGCURSOR_ALL
|| iLevel==FTS3_SEGCURSOR_PENDING
@ -3102,6 +3104,11 @@ static int fts3SegmentMerge(
rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel);
if( rc!=SQLITE_OK ) goto finished;
}
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
** segment. The level of the new segment is equal to the numerically
@ -3111,21 +3118,21 @@ static int fts3SegmentMerge(
rc = SQLITE_DONE;
goto finished;
}
rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
iNewLevel = iMaxLevel;
bIgnoreEmpty = 1;
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
assert( FTS3_SEGCURSOR_PENDING==-1 );
iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
@ -3142,7 +3149,7 @@ static int fts3SegmentMerge(
csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
}
if( rc!=SQLITE_OK ) goto finished;
assert( pWriter );
assert( pWriter || bIgnoreEmpty );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
rc = fts3DeleteSegdir(
@ -3150,9 +3157,11 @@ static int fts3SegmentMerge(
);
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
if( pWriter ){
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
if( rc==SQLITE_OK ){
rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData);
}
}
finished:
@ -4725,6 +4734,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
int bUseHint = 0; /* True if attempting to append */
int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
/* Search the %_segdir table for the absolute level with the smallest
** relative level number that contains at least nMin segments, if any.
@ -4778,6 +4788,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
** to start work on some other level. */
memset(pWriter, 0, nAlloc);
pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
if( rc==SQLITE_OK ){
rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
assert( bUseHint==1 || bUseHint==0 );
if( (iIdx-bUseHint)==0 ) pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY;
}
if( rc==SQLITE_OK ){
rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
}
@ -4785,16 +4801,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
&& SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
&& SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
){
int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
if( rc==SQLITE_OK ){
if( bUseHint && iIdx>0 ){
const char *zKey = pCsr->zTerm;
int nKey = pCsr->nTerm;
rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
}else{
rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
}
if( bUseHint && iIdx>0 ){
const char *zKey = pCsr->zTerm;
int nKey = pCsr->nTerm;
rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
}else{
rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
}
if( rc==SQLITE_OK && pWriter->nLeafEst ){

View File

@ -1,5 +1,5 @@
C Experimental\scode\sto\sprevent\sFTS\sindexes\sfrom\sgrowing\sindefinitely\sas\sthe\stable\sis\supdated.
D 2014-05-12T20:04:48.571
C Fix\sa\sproblem\spreventing\sdelete\smarkers\sfrom\sever\sbeing\sremoved\sfrom\sthe\sFTS\sindex.
D 2014-05-13T20:11:37.423
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3
F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004
F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9
F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d
F ext/fts3/fts3_write.c 5fd2aa9d1812387c6254304e20d9ac2b29e16700
F ext/fts3/fts3_write.c 283b24477729129a210d91b48f7c53181583a848
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
@ -539,7 +539,7 @@ F test/fts3conf.test ee8500c86dd58ec075e8831a1e216a79989436de
F test/fts3corrupt.test 2710b77983cc7789295ddbffea52c1d3b7506dbb
F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba
F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7
F test/fts3d.test 597b0b76e41f0d672e2731c4d7b631d628efd13f
F test/fts3d.test 95c17d1b67b33a5eac0bf5a0d11116a0c0ac7a3a
F test/fts3defer.test 0be4440b73a2e651fc1e472066686d6ada4b9963
F test/fts3defer2.test e880e3b65bdf999f4746cdaefa65f14a98b9b724
F test/fts3defer3.test dd53fc13223c6d8264a98244e9b19abd35ed71cd
@ -570,7 +570,7 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca
F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8
F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06
F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01
F test/fts4growth.test f7eac9fadfe67765c4a0d6202c85f7272766fb9e
F test/fts4growth.test 3b1f8c98b603b38dc9fe4a266f4f5ddb0c73f092
F test/fts4incr.test 361960ed3550e781f3f313e17e2182ef9cefc0e9
F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7
F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee
@ -1171,10 +1171,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7
R de45a14af3a90f6b390273a9a2d8cfa9
T *branch * fts4-experimental
T *sym-fts4-experimental *
T -sym-trunk *
P b3b505a4dd0c679437a4272109f1188175088cd1
R d1a058fc899c4ecbd09fd7fceab935e7
U dan
Z a03d85f9bfa278d813164b1e97a88ff7
Z f678f48fb654227badac5ce98e23d62c

View File

@ -1 +1 @@
b3b505a4dd0c679437a4272109f1188175088cd1
7f47ae5c5ddb1227484ddae7c6960183932a052a

View File

@ -213,16 +213,17 @@ do_test fts3d-4.matches {
{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \
{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}]
check_terms_all fts3d-4.1 {a four is one test that this three two was}
puts [db eval {SELECT c FROM t1 } ]
check_terms_all fts3d-4.1 {a four is test that this was}
check_doclist_all fts3d-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist_all fts3d-4.1.2 four {}
check_doclist_all fts3d-4.1.3 is {[1 0[1]] [3 0[1]]}
check_doclist_all fts3d-4.1.4 one {}
#check_doclist_all fts3d-4.1.4 one {}
check_doclist_all fts3d-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist_all fts3d-4.1.6 that {[2 0[0]]}
check_doclist_all fts3d-4.1.7 this {[1 0[0]] [3 0[0]]}
check_doclist_all fts3d-4.1.8 three {}
check_doclist_all fts3d-4.1.9 two {}
#check_doclist_all fts3d-4.1.8 three {}
#check_doclist_all fts3d-4.1.9 two {}
check_doclist_all fts3d-4.1.10 was {[2 0[1]]}
check_terms fts3d-4.2 0 0 {a four test that was}
@ -239,16 +240,16 @@ check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]}
check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]}
check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]}
check_terms fts3d-4.4 1 0 {a four is one test that this three two was}
check_terms fts3d-4.4 1 0 {a four is test that this was}
check_doclist fts3d-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]}
check_doclist fts3d-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]}
check_doclist fts3d-4.4.2 1 0 four {[2 0[4]] [3 0[4]]}
check_doclist fts3d-4.4.3 1 0 is {[1 0[1]] [3 0[1]]}
check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]}
#check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]}
check_doclist fts3d-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]}
check_doclist fts3d-4.4.6 1 0 that {[2 0[0]]}
check_doclist fts3d-4.4.7 1 0 this {[1 0[0]] [3 0[0]]}
check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]}
check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]}
#check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]}
#check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]}
check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]}
# Optimize should leave the result in the level of the highest-level

View File

@ -151,7 +151,118 @@ do_execsql_test 2.8 {
} {86120}
#--------------------------------------------------------------------------
# Test that delete markers are removed from FTS segments when possible.
# It is only possible to remove delete markers when the output of the
# merge operation will become the oldest segment in the index.
#
# 3.1 - when the oldest segment is created by an 'optimize'.
# 3.2 - when the oldest segment is created by an incremental merge.
# 3.3 - by a crisis merge.
#
proc insert_doc {args} {
foreach iDoc $args {
set L [lindex {
{In your eagerness to engage the Trojans,}
{dont any of you charge ahead of others,}
{trusting in your strength and horsemanship.}
{And dont lag behind. That will hurt our charge.}
{Any man whose chariot confronts an enemys}
{should thrust with his spear at him from there.}
{Thats the most effective tactic, the way}
{men wiped out city strongholds long ago —}
{their chests full of that style and spirit.}
} [expr $iDoc%9]]
execsql { REPLACE INTO x3(docid, content) VALUES($iDoc, $L) }
}
}
proc delete_doc {args} {
foreach iDoc $args {
execsql { DELETE FROM x3 WHERE docid = $iDoc }
}
}
proc second {x} { lindex $x 1 }
db func second second
do_execsql_test 3.0 { CREATE VIRTUAL TABLE x3 USING fts4 }
do_test 3.1.1 {
db transaction { insert_doc 1 2 3 4 5 6 }
execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 412}
do_test 3.1.2 {
delete_doc 1 2 3 4 5 6
execsql { SELECT count(*) FROM x3_segdir }
} {0}
do_test 3.1.3 {
db transaction {
insert_doc 1 2 3 4 5 6 7 8 9
delete_doc 9 8 7
}
execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 591 0 1 65 0 2 72 0 3 76}
do_test 3.1.4 {
execsql { INSERT INTO x3(x3) VALUES('optimize') }
execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {0 0 412}
do_test 3.2.1 {
execsql { DELETE FROM x3 }
insert_doc 8 7 6 5 4 3 2 1
delete_doc 7 8
execsql { SELECT count(*) FROM x3_segdir }
} {10}
do_test 3.2.2 {
execsql { INSERT INTO x3(x3) VALUES('merge=500,10') }
execsql { SELECT level, idx, second(end_block) FROM x3_segdir }
} {1 0 412}
# This assumes the crisis merge happens when there are already 16
# segments and one more is added.
#
do_test 3.3.1 {
execsql { DELETE FROM x3 }
insert_doc 1 2 3 4 5 6 7 8 9 10 11
delete_doc 11 10 9 8 7
execsql { SELECT count(*) FROM x3_segdir }
} {16}
do_test 3.3.2 {
insert_doc 12
execsql { SELECT level, idx, second(end_block) FROM x3_segdir WHERE level=1 }
} {1 0 412}
#--------------------------------------------------------------------------
do_execsql_test 4.1 {
DROP TABLE IF EXISTS x4;
DROP TABLE IF EXISTS t1;
CREATE TABLE t1(docid, words);
CREATE VIRTUAL TABLE x4 USING fts4(words);
}
do_test 4.2 {
fts_kjv_genesis
execsql { INSERT INTO x4 SELECT words FROM t1 }
execsql { INSERT INTO x4 SELECT words FROM t1 }
} {}
do_execsql_test 4.3 {
SELECT level, idx, second(end_block) FROM x4_segdir
} {0 0 117483 0 1 118006}
do_execsql_test 4.4 {
INSERT INTO x4(x4) VALUES('merge=10,2');
SELECT count(*) FROM x4_segdir;
} {3}
breakpoint
do_execsql_test 4.5 {
INSERT INTO x4(x4) VALUES('merge=10,2');
SELECT count(*) FROM x4_segdir;
} {3}
if 0 {
do_execsql_test 3.1 {
DROP TABLE IF EXISTS x2;
DROP TABLE IF EXISTS t1;
@ -174,16 +285,17 @@ proc t1_to_x2 {} {
#execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level}
#} {0 13 1 15 2 5}
#proc second {x} { lindex $x 1 }
#db func second second
#for {set i 0} {$i <1000} {incr i} {
# t1_to_x2
# db eval {
# SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level;
# } {
# puts "$i.$level: $c"
# }
#}
proc second {x} { lindex $x 1 }
db func second second
for {set i 0} {$i <1000} {incr i} {
t1_to_x2
db eval {
SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level;
} {
puts "$i.$level: $c"
}
}
}
finish_test