diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index c9beba6adc..fe347ba3ec 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1621,10 +1621,58 @@ static int fts3DoclistMerge( typedef struct TermSelect TermSelect; struct TermSelect { int isReqPos; - char *aOutput; /* Malloc'd output buffer */ - int nOutput; /* Size of output in bytes */ + char *aaOutput[16]; /* Malloc'd output buffer */ + int anOutput[16]; /* Size of output in bytes */ }; +/* +** Merge all doclists in the TermSelect.aaOutput[] array into a single +** doclist stored in TermSelect.aaOutput[0]. If successful, delete all +** other doclists (except the aaOutput[0] one) and return SQLITE_OK. +** +** If an OOM error occurs, return SQLITE_NOMEM. In this case it is +** the responsibility of the caller to free any doclists left in the +** TermSelect.aaOutput[] array. +*/ +static int fts3TermSelectMerge(TermSelect *pTS){ + int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); + char *aOut = 0; + int nOut = 0; + int i; + + /* Loop through the doclists in the aaOutput[] array. Merge them all + ** into a single doclist. + */ + for(i=0; iaaOutput); i++){ + if( pTS->aaOutput[i] ){ + if( !aOut ){ + aOut = pTS->aaOutput[i]; + nOut = pTS->anOutput[i]; + pTS->aaOutput[0] = 0; + }else{ + int nNew = nOut + pTS->anOutput[i]; + char *aNew = sqlite3_malloc(nNew); + if( !aNew ){ + sqlite3_free(aOut); + return SQLITE_NOMEM; + } + fts3DoclistMerge(mergetype, 0, 0, + aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut + ); + sqlite3_free(pTS->aaOutput[i]); + sqlite3_free(aOut); + pTS->aaOutput[i] = 0; + aOut = aNew; + nOut = nNew; + } + } + } + + pTS->aaOutput[0] = aOut; + pTS->anOutput[0] = nOut; + return SQLITE_OK; +} + /* ** This function is used as the sqlite3Fts3SegReaderIterate() callback when ** querying the full-text index for a doclist associated with a term or @@ -1639,38 +1687,63 @@ static int fts3TermSelectCb( int nDoclist ){ TermSelect *pTS = (TermSelect *)pContext; - int nNew = pTS->nOutput + nDoclist; - char *aNew = sqlite3_malloc(nNew); UNUSED_PARAMETER(p); UNUSED_PARAMETER(zTerm); UNUSED_PARAMETER(nTerm); - if( !aNew ){ - return SQLITE_NOMEM; - } - - if( pTS->nOutput==0 ){ + if( pTS->aaOutput[0]==0 ){ /* If this is the first term selected, copy the doclist to the output ** buffer using memcpy(). TODO: Add a way to transfer control of the ** aDoclist buffer from the caller so as to avoid the memcpy(). */ - memcpy(aNew, aDoclist, nDoclist); + pTS->aaOutput[0] = sqlite3_malloc(nDoclist); + pTS->anOutput[0] = nDoclist; + if( pTS->aaOutput[0] ){ + memcpy(pTS->aaOutput[0], aDoclist, nDoclist); + }else{ + return SQLITE_NOMEM; + } }else{ - /* The output buffer is not empty. Merge doclist aDoclist with the - ** existing output. This can only happen with prefix-searches (as - ** searches for exact terms return exactly one doclist). - */ int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); - fts3DoclistMerge(mergetype, 0, 0, - aNew, &nNew, pTS->aOutput, pTS->nOutput, aDoclist, nDoclist - ); + char *aMerge = aDoclist; + int nMerge = nDoclist; + int iOut; + + for(iOut=0; iOutaaOutput); iOut++){ + char *aNew; + int nNew; + if( pTS->aaOutput[iOut]==0 ){ + assert( iOut>0 ); + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + break; + } + + nNew = nMerge + pTS->anOutput[iOut]; + aNew = sqlite3_malloc(nNew); + if( !aNew ){ + if( aMerge!=aDoclist ){ + sqlite3_free(aMerge); + } + return SQLITE_NOMEM; + } + fts3DoclistMerge(mergetype, 0, 0, + aNew, &nNew, pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge + ); + + if( iOut>0 ) sqlite3_free(aMerge); + sqlite3_free(pTS->aaOutput[iOut]); + pTS->aaOutput[iOut] = 0; + + aMerge = aNew; + nMerge = nNew; + if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + } + } } - - sqlite3_free(pTS->aOutput); - pTS->aOutput = aNew; - pTS->nOutput = nNew; - return SQLITE_OK; } @@ -1794,12 +1867,17 @@ static int fts3TermSelect( rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter, fts3TermSelectCb, (void *)&tsc ); + if( rc==SQLITE_OK ){ + rc = fts3TermSelectMerge(&tsc); + } if( rc==SQLITE_OK ){ - *ppOut = tsc.aOutput; - *pnOut = tsc.nOutput; + *ppOut = tsc.aaOutput[0]; + *pnOut = tsc.anOutput[0]; }else{ - sqlite3_free(tsc.aOutput); + for(i=0; i=0} continue - if {[llength $INCLUDE]>0 && [lsearch -exact $INCLUDE $tail]<0} continue - source $testfile - catch {db close} - if {$sqlite_open_file_count>0} { - puts "$tail did not close all files: $sqlite_open_file_count" - fail_test $tail - set sqlite_open_file_count 0 - } -} - -set sqlite_open_file_count 0 -really_finish_test +finish_test diff --git a/test/fts3an.test b/test/fts3an.test index 77ca9e5ad4..5211027767 100644 --- a/test/fts3an.test +++ b/test/fts3an.test @@ -185,12 +185,34 @@ do_test fts3an-3.1 { set t } $ret -# TODO(shess) It would be useful to test a couple edge cases, but I -# don't know if we have the precision to manage it from here at this -# time. Prefix hits can cross leaves, which the code above _should_ -# hit by virtue of size. There are two variations on this. If the -# tree is 2 levels high, the code will find the leaf-node extent -# directly, but if it is higher, the code will have to follow two -# separate interior branches down the tree. Both should be tested. +# Test a boundary condition: More than 2^16 terms that match a searched for +# prefix in a single segment. +# +puts "This next test can take a little while (~ 30 seconds)..." +do_test fts3an-4.1 { + execsql { CREATE VIRTUAL TABLE ft USING fts3(x) } + execsql BEGIN + execsql { INSERT INTO ft VALUES(NULL) } + execsql { INSERT INTO ft SELECT * FROM ft } ;# 2 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 4 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 8 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 16 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 32 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 64 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 128 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 256 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 512 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 1024 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 2048 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 4096 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 8192 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 16384 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 32768 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 65536 + execsql { INSERT INTO ft SELECT * FROM ft } ;# 131072 + execsql COMMIT + execsql { UPDATE ft SET x = 'abc' || rowid } + execsql { SELECT count(*) FROM ft WHERE x MATCH 'abc*' } +} {131072} finish_test diff --git a/test/permutations.test b/test/permutations.test index 4949e13ecc..f1fb74fe61 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -153,6 +153,17 @@ test_suite "threads" -prefix "" -description { thread004.test thread005.test walthread.test } +test_suite "fts3" -prefix "" -description { + All FTS3 tests except fts3malloc.test and fts3rnd.test. +} -files { + fts3aa.test fts3ab.test fts3ac.test fts3ad.test fts3ae.test + fts3af.test fts3ag.test fts3ah.test fts3ai.test fts3aj.test + fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test + fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test + fts3e.test fts3expr.test fts3expr2.test fts3near.test + fts3query.test fts3snippet.test +} + lappend ::testsuitelist xxx #-------------------------------------------------------------------------