From 4f7c5e684a6d4dcb2cbd0db82c8d25140e8fa836 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 19 Oct 2010 14:07:59 +0000 Subject: [PATCH] Experimental changes to fts4 to try to selectively avoid loading very large doclists. FossilOrigin-Name: 5ae0ba447a561e3b6637b52f9b83a9fc683d2572 --- ext/fts3/fts3.c | 858 +++++++++++++++++++++++++++++++--------- ext/fts3/fts3Int.h | 52 ++- ext/fts3/fts3_expr.c | 6 +- ext/fts3/fts3_snippet.c | 6 +- ext/fts3/fts3_write.c | 403 +++++++++++++++---- ext/fts3/fts3speed.tcl | 123 ++++++ manifest | 46 +-- manifest.uuid | 2 +- test/fts3ah.test | 31 +- test/fts3cov.test | 11 +- test/fts3defer.test | 360 +++++++++++++++++ test/malloc_common.tcl | 8 +- test/permutations.test | 2 +- 13 files changed, 1571 insertions(+), 337 deletions(-) create mode 100644 ext/fts3/fts3speed.tcl create mode 100644 test/fts3defer.test diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 5323a5c1ae..bfd0f57ee7 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -446,11 +446,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ for(i=0; iaStmt); i++){ sqlite3_finalize(p->aStmt[i]); } - for(i=0; inLeavesStmt; i++){ - sqlite3_finalize(p->aLeavesStmt[i]); - } - sqlite3_free(p->zSelectLeaves); - sqlite3_free(p->aLeavesStmt); + sqlite3_free(p->zSegmentsTbl); /* Invoke the tokenizer destructor to free the tokenizer. */ p->pTokenizer->pModule->xDestroy(p->pTokenizer); @@ -461,7 +457,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ /* ** Construct one or more SQL statements from the format string given -** and then evaluate those statements. The success code is writting +** and then evaluate those statements. The success code is written ** into *pRc. ** ** If *pRc is initially non-zero then this routine is a no-op. @@ -513,33 +509,38 @@ static int fts3DestroyMethod(sqlite3_vtab *pVtab){ ** Invoke sqlite3_declare_vtab() to declare the schema for the FTS3 table ** passed as the first argument. This is done as part of the xConnect() ** and xCreate() methods. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. */ -static int fts3DeclareVtab(Fts3Table *p){ - int i; /* Iterator variable */ - int rc; /* Return code */ - char *zSql; /* SQL statement passed to declare_vtab() */ - char *zCols; /* List of user defined columns */ +static void fts3DeclareVtab(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int i; /* Iterator variable */ + int rc; /* Return code */ + char *zSql; /* SQL statement passed to declare_vtab() */ + char *zCols; /* List of user defined columns */ - /* Create a list of user columns for the virtual table */ - zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); - for(i=1; zCols && inColumn; i++){ - zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); + /* Create a list of user columns for the virtual table */ + zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); + for(i=1; zCols && inColumn; i++){ + zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]); + } + + /* Create the whole "CREATE TABLE" statement to pass to SQLite */ + zSql = sqlite3_mprintf( + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName + ); + if( !zCols || !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_declare_vtab(p->db, zSql); + } + + sqlite3_free(zSql); + sqlite3_free(zCols); + *pRc = rc; } - - /* Create the whole "CREATE TABLE" statement to pass to SQLite */ - zSql = sqlite3_mprintf( - "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName - ); - - if( !zCols || !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_declare_vtab(p->db, zSql); - } - - sqlite3_free(zSql); - sqlite3_free(zCols); - return rc; } /* @@ -639,6 +640,37 @@ static void fts3TableExists( if( rc!=SQLITE_ABORT ) *pRc = rc; } +/* +** Store the current database page-size in bytes in p->nPgsz. +** +** If *pRc is non-zero when this function is called, it is a no-op. +** Otherwise, if an error occurs, an SQLite error code is stored in *pRc +** before returning. +*/ +static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ + if( *pRc==SQLITE_OK ){ + int rc; /* Return code */ + char *zSql; /* SQL text "PRAGMA %Q.page_size" */ + sqlite3_stmt *pStmt; /* Compiled "PRAGMA %Q.page_size" statement */ + + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", p->zDb); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(p->db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + p->nPgsz = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } + } + assert( p->nPgsz>0 || rc!=SQLITE_OK ); + sqlite3_free(zSql); + *pRc = rc; + } +} + /* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. @@ -763,12 +795,15 @@ static int fts3InitVtab( fts3TableExists(&rc, db, argv[1], argv[2], "_content", &p->bHasContent); fts3TableExists(&rc, db, argv[1], argv[2], "_docsize", &p->bHasDocsize); } - if( rc!=SQLITE_OK ) goto fts3_init_out; - rc = fts3DeclareVtab(p); - if( rc!=SQLITE_OK ) goto fts3_init_out; + /* Figure out the page-size for the database. This is required in order to + ** estimate the cost of loading large doclists from the database (see + ** function sqlite3Fts3SegReaderCost() for details). + */ + fts3DatabasePageSize(&rc, p); - *ppVTab = &p->base; + /* Declare the table schema to SQLite. */ + fts3DeclareVtab(&rc, p); fts3_init_out: assert( p || (pTokenizer && rc!=SQLITE_OK) ); @@ -778,6 +813,8 @@ fts3_init_out: }else{ pTokenizer->pModule->xDestroy(pTokenizer); } + }else{ + *ppVTab = &p->base; } return rc; } @@ -893,6 +930,7 @@ static int fulltextClose(sqlite3_vtab_cursor *pCursor){ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; sqlite3_finalize(pCsr->pStmt); sqlite3Fts3ExprFree(pCsr->pExpr); + sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); sqlite3_free(pCsr); @@ -930,36 +968,7 @@ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ } } -/* -** Advance the cursor to the next row in the %_content table that -** matches the search criteria. For a MATCH search, this will be -** the next row that matches. For a full-table scan, this will be -** simply the next row in the %_content table. For a docid lookup, -** this routine simply sets the EOF flag. -** -** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned -** even if we reach end-of-file. The fts3EofMethod() will be called -** subsequently to determine whether or not an EOF was hit. -*/ -static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ - int rc = SQLITE_OK; /* Return code */ - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - if( pCsr->aDoclist==0 ){ - if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ - pCsr->isEof = 1; - rc = sqlite3_reset(pCsr->pStmt); - } - }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ - pCsr->isEof = 1; - }else{ - sqlite3_reset(pCsr->pStmt); - fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - } - return rc; -} /* @@ -1286,20 +1295,44 @@ static void fts3PoslistMerge( /* ** nToken==1 searches for adjacent positions. +** +** This function is used to merge two position lists into one. When it is +** called, *pp1 and *pp2 must both point to position lists. A position-list is +** the part of a doclist that follows each document id. For example, if a row +** contains: +** +** 'a b c'|'x y z'|'a b b a' +** +** Then the position list for this row for token 'b' would consist of: +** +** 0x02 0x01 0x02 0x03 0x03 0x00 +** +** When this function returns, both *pp1 and *pp2 are left pointing to the +** byte following the 0x00 terminator of their respective position lists. +** +** If isSaveLeft is 0, an entry is added to the output position list for +** each position in *pp2 for which there exists one or more positions in +** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. +** when the *pp1 token appears before the *pp2 token, but not more than nToken +** slots before it. */ static int fts3PoslistPhraseMerge( - char **pp, /* Output buffer */ + char **pp, /* IN/OUT: Preallocated output buffer */ int nToken, /* Maximum difference in token positions */ int isSaveLeft, /* Save the left position */ - char **pp1, /* Left input list */ - char **pp2 /* Right input list */ + int isExact, /* If *pp1 is exactly nTokens before *pp2 */ + char **pp1, /* IN/OUT: Left input list */ + char **pp2 /* IN/OUT: Right input list */ ){ char *p = (pp ? *pp : 0); char *p1 = *pp1; char *p2 = *pp2; - int iCol1 = 0; int iCol2 = 0; + + /* Never set both isSaveLeft and isExact for the same invocation. */ + assert( isSaveLeft==0 || isExact==0 ); + assert( *p1!=0 && *p2!=0 ); if( *p1==POS_COLUMN ){ p1++; @@ -1328,7 +1361,9 @@ static int fts3PoslistPhraseMerge( fts3GetDeltaVarint(&p2, &iPos2); iPos2 -= 2; while( 1 ){ - if( iPos2>iPos1 && iPos2<=iPos1+nToken ){ + if( iPos2==iPos1+nToken + || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) + ){ sqlite3_int64 iSave; if( !pp ){ fts3PoslistCopy(0, &p2); @@ -1411,21 +1446,21 @@ static int fts3PoslistNearMerge( char *p2 = *pp2; if( !pp ){ - if( fts3PoslistPhraseMerge(0, nRight, 0, pp1, pp2) ) return 1; + if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1; *pp1 = p1; *pp2 = p2; - return fts3PoslistPhraseMerge(0, nLeft, 0, pp2, pp1); + return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1); }else{ char *pTmp1 = aTmp; char *pTmp2; char *aTmp2; int res = 1; - fts3PoslistPhraseMerge(&pTmp1, nRight, 0, pp1, pp2); + fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); aTmp2 = pTmp2 = pTmp1; *pp1 = p1; *pp2 = p2; - fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, pp2, pp1); + fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ fts3PoslistMerge(pp, &aTmp, &aTmp2); }else if( pTmp1!=aTmp ){ @@ -1555,7 +1590,7 @@ static int fts3DoclistMerge( char *pSave = p; sqlite3_int64 iPrevSave = iPrev; fts3PutDeltaVarint(&p, &iPrev, i1); - if( 0==fts3PoslistPhraseMerge(ppPos, 1, 0, &p1, &p2) ){ + if( 0==fts3PoslistPhraseMerge(ppPos, nParam1, 0, 1, &p1, &p2) ){ p = pSave; iPrev = iPrevSave; } @@ -1747,43 +1782,106 @@ static int fts3TermSelectCb( return SQLITE_OK; } +static int fts3DeferredTermSelect( + Fts3DeferredToken *pToken, /* Phrase token */ + int isTermPos, /* True to include positions */ + int *pnOut, /* OUT: Size of list */ + char **ppOut /* OUT: Body of list */ +){ + char *aSource; + int nSource; + + aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource); + if( !aSource ){ + *pnOut = 0; + *ppOut = 0; + }else if( isTermPos ){ + *ppOut = sqlite3_malloc(nSource); + if( !*ppOut ) return SQLITE_NOMEM; + memcpy(*ppOut, aSource, nSource); + *pnOut = nSource; + }else{ + sqlite3_int64 docid; + *pnOut = sqlite3Fts3GetVarint(aSource, &docid); + *ppOut = sqlite3_malloc(*pnOut); + if( !*ppOut ) return SQLITE_NOMEM; + sqlite3Fts3PutVarint(*ppOut, docid); + } + + return SQLITE_OK; +} + /* -** This function retreives the doclist for the specified term (or term -** prefix) from the database. -** -** The returned doclist may be in one of two formats, depending on the -** value of parameter isReqPos. If isReqPos is zero, then the doclist is -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos -** is non-zero, then the returned list is in the same format as is stored -** in the database without the found length specifier at the start of on-disk -** doclists. +** An Fts3SegReaderArray is used to store an array of Fts3SegReader objects. +** Elements are added to the array using fts3SegReaderArrayAdd(). */ -static int fts3TermSelect( - Fts3Table *p, /* Virtual table handle */ - int iColumn, /* Column to query (or -ve for all columns) */ +struct Fts3SegReaderArray { + int nSegment; /* Number of valid entries in apSegment[] */ + int nAlloc; /* Allocated size of apSegment[] */ + int nCost; /* The cost of executing SegReaderIterate() */ + Fts3SegReader *apSegment[1]; /* Array of seg-reader objects */ +}; + + +/* +** Free an Fts3SegReaderArray object. Also free all seg-readers in the +** array (using sqlite3Fts3SegReaderFree()). +*/ +static void fts3SegReaderArrayFree(Fts3SegReaderArray *pArray){ + if( pArray ){ + int i; + for(i=0; inSegment; i++){ + sqlite3Fts3SegReaderFree(0, pArray->apSegment[i]); + } + sqlite3_free(pArray); + } +} + +static int fts3SegReaderArrayAdd( + Fts3SegReaderArray **ppArray, + Fts3SegReader *pNew +){ + Fts3SegReaderArray *pArray = *ppArray; + + if( !pArray || pArray->nAlloc==pArray->nSegment ){ + int nNew = (pArray ? pArray->nAlloc+16 : 16); + pArray = (Fts3SegReaderArray *)sqlite3_realloc(pArray, + sizeof(Fts3SegReaderArray) + (nNew-1) * sizeof(Fts3SegReader*) + ); + if( !pArray ){ + sqlite3Fts3SegReaderFree(0, pNew); + return SQLITE_NOMEM; + } + if( nNew==16 ){ + pArray->nSegment = 0; + pArray->nCost = 0; + } + pArray->nAlloc = nNew; + *ppArray = pArray; + } + + pArray->apSegment[pArray->nSegment++] = pNew; + return SQLITE_OK; +} + +static int fts3TermSegReaderArray( + Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ - int isReqPos, /* True to include position lists in output */ - int *pnOut, /* OUT: Size of buffer at *ppOut */ - char **ppOut /* OUT: Malloced result buffer */ + Fts3SegReaderArray **ppArray /* OUT: Allocated seg-reader array */ ){ - int i; - TermSelect tsc; - Fts3SegFilter filter; /* Segment term filter configuration */ - Fts3SegReader **apSegment; /* Array of segments to read data from */ - int nSegment = 0; /* Size of apSegment array */ - int nAlloc = 16; /* Allocated size of segment array */ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int rc; /* Return code */ + Fts3SegReaderArray *pArray = 0; /* Array object to build */ + Fts3SegReader *pReader = 0; /* Seg-reader to add to pArray */ sqlite3_stmt *pStmt = 0; /* SQL statement to scan %_segdir table */ int iAge = 0; /* Used to assign ages to segments */ - apSegment = (Fts3SegReader **)sqlite3_malloc(sizeof(Fts3SegReader*)*nAlloc); - if( !apSegment ) return SQLITE_NOMEM; - rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &apSegment[0]); - if( rc!=SQLITE_OK ) goto finished; - if( apSegment[0] ){ - nSegment = 1; + /* Allocate a seg-reader to scan the pending terms, if any. */ + rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pReader); + if( rc==SQLITE_OK && pReader ) { + rc = fts3SegReaderArrayAdd(&pArray, pReader); } /* Loop through the entire %_segdir table. For each segment, create a @@ -1791,12 +1889,10 @@ static int fts3TermSelect( ** that may contain a term that matches zTerm/nTerm. For non-prefix ** searches, this is always a single leaf. For prefix searches, this ** may be a contiguous block of leaves. - ** - ** The code in this loop does not actually load any leaves into memory - ** (unless the root node happens to be a leaf). It simply examines the - ** b-tree structure to determine which leaves need to be inspected. */ - rc = sqlite3Fts3AllSegdirs(p, &pStmt); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3AllSegdirs(p, &pStmt); + } while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ Fts3SegReader *pNew = 0; int nRoot = sqlite3_column_bytes(pStmt, 4); @@ -1829,43 +1925,69 @@ static int fts3TermSelect( } iAge++; - /* If a new Fts3SegReader was allocated, add it to the apSegment array. */ + /* If a new Fts3SegReader was allocated, add it to the array. */ assert( pNew!=0 || rc!=SQLITE_OK ); - if( pNew ){ - if( nSegment==nAlloc ){ - Fts3SegReader **pArray; - nAlloc += 16; - pArray = (Fts3SegReader **)sqlite3_realloc( - apSegment, nAlloc*sizeof(Fts3SegReader *) - ); - if( !pArray ){ - sqlite3Fts3SegReaderFree(p, pNew); - rc = SQLITE_NOMEM; - goto finished; - } - apSegment = pArray; - } - apSegment[nSegment++] = pNew; + if( rc==SQLITE_OK ){ + rc = fts3SegReaderArrayAdd(&pArray, pNew); + }else{ + sqlite3Fts3SegReaderFree(p, pNew); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderCost(pCsr, pNew, &pArray->nCost); } } - if( rc!=SQLITE_DONE ){ - assert( rc!=SQLITE_OK ); - goto finished; - } + if( rc==SQLITE_DONE ){ + rc = sqlite3_reset(pStmt); + }else{ + sqlite3_reset(pStmt); + } + if( rc!=SQLITE_OK ){ + fts3SegReaderArrayFree(pArray); + pArray = 0; + } + *ppArray = pArray; + return rc; +} + +/* +** This function retreives the doclist for the specified term (or term +** prefix) from the database. +** +** The returned doclist may be in one of two formats, depending on the +** value of parameter isReqPos. If isReqPos is zero, then the doclist is +** a sorted list of delta-compressed docids (a bare doclist). If isReqPos +** is non-zero, then the returned list is in the same format as is stored +** in the database without the found length specifier at the start of on-disk +** doclists. +*/ +static int fts3TermSelect( + Fts3Table *p, /* Virtual table handle */ + Fts3PhraseToken *pTok, /* Token to query for */ + int iColumn, /* Column to query (or -ve for all columns) */ + int isReqPos, /* True to include position lists in output */ + int *pnOut, /* OUT: Size of buffer at *ppOut */ + char **ppOut /* OUT: Malloced result buffer */ +){ + int rc; /* Return code */ + Fts3SegReaderArray *pArray; /* Seg-reader array for this term */ + TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3SegFilter filter; /* Segment term filter configuration */ + + pArray = pTok->pArray; memset(&tsc, 0, sizeof(TermSelect)); tsc.isReqPos = isReqPos; filter.flags = FTS3_SEGMENT_IGNORE_EMPTY - | (isPrefix ? FTS3_SEGMENT_PREFIX : 0) + | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) | (iColumnnColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; - filter.zTerm = zTerm; - filter.nTerm = nTerm; + filter.zTerm = pTok->z; + filter.nTerm = pTok->n; - rc = sqlite3Fts3SegReaderIterate(p, apSegment, nSegment, &filter, - fts3TermSelectCb, (void *)&tsc + rc = sqlite3Fts3SegReaderIterate(p, pArray->apSegment, pArray->nSegment, + &filter, fts3TermSelectCb, (void *)&tsc ); if( rc==SQLITE_OK ){ rc = fts3TermSelectMerge(&tsc); @@ -1875,26 +1997,88 @@ static int fts3TermSelect( *ppOut = tsc.aaOutput[0]; *pnOut = tsc.anOutput[0]; }else{ + int i; for(i=0; ipArray = 0; return rc; } +static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ + int nDoc = 0; /* Return value */ + if( aList ){ + char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ + char *p = aList; /* Cursor */ + sqlite3_int64 dummy; /* For Fts3GetVarint() */ + + while( ppLeft); + if( rc==SQLITE_OK ){ + rc = fts3DeferExpression(pCsr, pExpr->pRight); + } + if( pExpr->eType==FTSQUERY_PHRASE ){ + int iCol = pExpr->pPhrase->iColumn; + int i; + pExpr->bDeferred = 1; + for(i=0; rc==SQLITE_OK && ipPhrase->nToken; i++){ + Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; + if( pToken->pDeferred==0 ){ + rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); + } + } + } + } + return rc; +} + +static void fts3DoclistStripPositions(char *aList, int *pnList){ + if( aList ){ + char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */ + char *p = aList; /* Input cursor */ + char *pOut = aList; /* Output cursor */ + sqlite3_int64 iPrev = 0; + + while( piColumn; int isTermPos = (pPhrase->nToken>1 || isReqPos); + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + + int iPrevTok = 0; + int nDoc = 0; + + /* If this is an xFilter() evaluation, create a segment-reader for each + ** phrase token. Or, if this is an xNest() or snippet/offsets/matchinfo + ** evaluation, only create segment-readers if there are no Fts3DeferredToken + ** objects attached to the phrase-tokens. + */ + for(ii=0; iinToken; ii++){ + Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; + if( pTok->pArray==0 && (pCsr->doDeferred==0 || pTok->pDeferred==0) ){ + rc = fts3TermSegReaderArray( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + ); + if( rc!=SQLITE_OK ) return rc; + } + } for(ii=0; iinToken; ii++){ - struct PhraseToken *pTok = &pPhrase->aToken[ii]; - char *z = pTok->z; /* Next token of the phrase */ - int n = pTok->n; /* Size of z in bytes */ - int isPrefix = pTok->isPrefix;/* True if token is a prefix */ + Fts3PhraseToken *pTok; /* Token to find doclist for */ + int iTok; /* The token being queried this iteration */ char *pList; /* Pointer to token doclist */ int nList; /* Size of buffer at pList */ - rc = fts3TermSelect(p, iCol, z, n, isPrefix, isTermPos, &nList, &pList); + /* Select a token to process. If this is an xFilter() call, then tokens + ** are processed in order from least to most costly. Otherwise, tokens + ** are processed in the order in which they occur in the phrase. + */ + if( pCsr->doDeferred || isReqPos ){ + iTok = ii; + pTok = &pPhrase->aToken[iTok]; + }else{ + int nMinCost = 0x7FFFFFFF; + int jj; + + /* Find the remaining token with the lowest cost. */ + for(jj=0; jjnToken; jj++){ + Fts3SegReaderArray *pArray = pPhrase->aToken[jj].pArray; + if( pArray && pArray->nCostnCost; + } + } + pTok = &pPhrase->aToken[iTok]; + + /* This branch is taken if it is determined that loading the doclist + ** for the next token would require more IO than loading all documents + ** currently identified by doclist pOut/nOut. No further doclists will + ** be loaded from the full-text index for this phrase. + */ + if( nMinCost>nDoc && ii>0 ){ + rc = fts3DeferExpression(pCsr, pCsr->pExpr); + break; + } + } + + if( pCsr->doDeferred && pTok->pDeferred ){ + rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); + }else{ + assert( pTok->pArray ); + rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); + } + assert( rc!=SQLITE_OK || pCsr->doDeferred || pTok->pArray==0 ); if( rc!=SQLITE_OK ) break; if( ii==0 ){ pOut = pList; nOut = nList; }else{ - /* Merge the new term list and the current output. If this is the - ** last term in the phrase, and positions are not required in the - ** output of this function, the positions can be dropped as part - ** of this merge. Either way, the result of this merge will be - ** smaller than nList bytes. The code in fts3DoclistMerge() is written - ** so that it is safe to use pList as the output as well as an input - ** in this case. + /* Merge the new term list and the current output. */ + char *aLeft, *aRight; + int nLeft, nRight; + int nDist; + int mt; + + /* If this is the final token of the phrase, and positions were not + ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE. + ** This drops the position information from the output list. */ - int mergetype = MERGE_POS_PHRASE; - if( ii==pPhrase->nToken-1 && !isReqPos ){ - mergetype = MERGE_PHRASE; + mt = MERGE_POS_PHRASE; + if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE; + + assert( iPrevTok!=iTok ); + if( iPrevToknToken-1), pOut, nOut); } if( rc==SQLITE_OK ){ + if( ii!=pPhrase->nToken ){ + assert( pCsr->doDeferred==0 && isReqPos==0 ); + fts3DoclistStripPositions(pOut, &nOut); + } *paOut = pOut; *pnOut = nOut; }else{ @@ -2018,14 +2280,99 @@ int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ return rc; } +typedef struct ExprAndCost ExprAndCost; +struct ExprAndCost { + Fts3Expr *pExpr; + int nCost; +}; + +int fts3ExprCost(Fts3Expr *pExpr){ + int nCost; /* Return value */ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int ii; + nCost = 0; + for(ii=0; iinToken; ii++){ + nCost += pPhrase->aToken[ii].pArray->nCost; + } + }else{ + nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); + } + return nCost; +} + +static void fts3ExprAssignCosts( + Fts3Expr *pExpr, /* Expression to create seg-readers for */ + ExprAndCost **ppExprCost /* OUT: Write to *ppExprCost */ +){ + if( pExpr->eType==FTSQUERY_AND ){ + fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); + fts3ExprAssignCosts(pExpr->pRight, ppExprCost); + }else{ + (*ppExprCost)->pExpr = pExpr; + (*ppExprCost)->nCost = fts3ExprCost(pExpr);; + (*ppExprCost)++; + } +} + +static int fts3ExprAllocateSegReaders( + Fts3Cursor *pCsr, /* FTS3 table */ + Fts3Expr *pExpr, /* Expression to create seg-readers for */ + int *pnExpr /* OUT: Number of AND'd expressions */ +){ + int rc = SQLITE_OK; /* Return code */ + + if( pCsr->doDeferred ) return SQLITE_OK; + if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ + (*pnExpr)++; + pnExpr = 0; + } + + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + Fts3Phrase *pPhrase = pExpr->pPhrase; + int ii; + + for(ii=0; rc==SQLITE_OK && iinToken; ii++){ + Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; + if( pTok->pArray==0 ){ + rc = fts3TermSegReaderArray( + pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray + ); + } + } + }else{ + rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); + if( rc==SQLITE_OK ){ + rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr); + } + } + return rc; +} + +static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase ){ + int kk; + for(kk=0; kknToken; kk++){ + fts3SegReaderArrayFree(pPhrase->aToken[kk].pArray); + pPhrase->aToken[kk].pArray = 0; + } + } + fts3ExprFreeSegReaders(pExpr->pLeft); + fts3ExprFreeSegReaders(pExpr->pRight); + } +} + /* ** Evaluate the full-text expression pExpr against fts3 table pTab. Store ** the resulting doclist in *paOut and *pnOut. This routine mallocs for ** the space needed to store the output. The caller is responsible for ** freeing the space when it has finished. */ -static int evalFts3Expr( - Fts3Table *p, /* Virtual table handle */ +static int fts3EvalExpr( + Fts3Cursor *p, /* Virtual table cursor handle */ Fts3Expr *pExpr, /* Parsed fts3 expression */ char **paOut, /* OUT: Pointer to malloc'd result buffer */ int *pnOut, /* OUT: Size of buffer at *paOut */ @@ -2038,27 +2385,94 @@ static int evalFts3Expr( *pnOut = 0; if( pExpr ){ + assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT + || pExpr->eType==FTSQUERY_PHRASE + ); assert( pExpr->eType==FTSQUERY_PHRASE || pExpr->eType==FTSQUERY_NEAR || isReqPos==0 ); + if( pExpr->eType==FTSQUERY_PHRASE ){ - rc = fts3PhraseSelect(p, pExpr->pPhrase, + rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), paOut, pnOut ); + fts3ExprFreeSegReaders(pExpr); + }else if( p->doDeferred==0 && pExpr->eType==FTSQUERY_AND ){ + ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */ + int nExpr = 0; /* Size of aExpr[] */ + char *aRet = 0; /* Doclist to return to caller */ + int nRet = 0; /* Length of aRet[] in bytes */ + int nDoc = 0x7FFFFFFF; + + assert( !isReqPos ); + + rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr); + if( rc==SQLITE_OK ){ + aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr); + if( !aExpr ) rc = SQLITE_NOMEM; + } + if( rc==SQLITE_OK ){ + int ii; /* Used to iterate through expressions */ + + fts3ExprAssignCosts(pExpr, &aExpr); + aExpr -= nExpr; + for(ii=0; iipExpr && (pBest==0 || pCand->nCostnCost) ){ + pBest = pCand; + } + } + + if( pBest->nCost>nDoc ){ + rc = fts3DeferExpression(p, p->pExpr); + break; + }else{ + rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0); + if( rc!=SQLITE_OK ) break; + pBest->pExpr = 0; + if( ii==0 ){ + aRet = aNew; + nRet = nNew; + }else{ + fts3DoclistMerge( + MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew + ); + sqlite3_free(aNew); + } + nDoc = fts3DoclistCountDocids(0, aRet, nRet); + } + } + } + + *paOut = aRet; + *pnOut = nRet; + sqlite3_free(aExpr); + fts3ExprFreeSegReaders(pExpr); + }else{ char *aLeft; char *aRight; int nLeft; int nRight; - if( 0==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) - && 0==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) + assert( pExpr->eType==FTSQUERY_NEAR + || pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_NOT + || (pExpr->eType==FTSQUERY_AND && p->doDeferred) + ); + + if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) + && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) ){ - assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT - ); switch( pExpr->eType ){ case FTSQUERY_NEAR: { Fts3Expr *pLeft; @@ -2117,6 +2531,73 @@ static int evalFts3Expr( return rc; } +/* +** +*/ +static int fts3EvalDeferred(Fts3Cursor *pCsr, int *pbRes){ + int rc = SQLITE_OK; + if( pCsr->pDeferred==0 ){ + *pbRes = 1; + }else{ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + sqlite3Fts3FreeDeferredDoclists(pCsr); + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + if( rc==SQLITE_OK ){ + char *a = 0; + int n = 0; + pCsr->doDeferred = 1; + rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); + pCsr->doDeferred = 0; + assert( n>=0 ); + *pbRes = (n>0); + sqlite3_free(a); + } + } + return rc; +} + +/* +** Advance the cursor to the next row in the %_content table that +** matches the search criteria. For a MATCH search, this will be +** the next row that matches. For a full-table scan, this will be +** simply the next row in the %_content table. For a docid lookup, +** this routine simply sets the EOF flag. +** +** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned +** even if we reach end-of-file. The fts3EofMethod() will be called +** subsequently to determine whether or not an EOF was hit. +*/ +static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ + int res; + int rc = SQLITE_OK; /* Return code */ + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + + do { + if( pCsr->aDoclist==0 ){ + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); + break; + } + pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); + }else{ + if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ + pCsr->isEof = 1; + break; + } + sqlite3_reset(pCsr->pStmt); + fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + } + }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); + + return rc; +} + + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -2167,24 +2648,7 @@ static int fts3FilterMethod( sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); - /* Compile a SELECT statement for this cursor. For a full-table-scan, the - ** statement loops through all rows of the %_content table. For a - ** full-text query or docid lookup, the statement retrieves a single - ** row by docid. - */ - zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - } - if( rc!=SQLITE_OK ) return rc; - pCsr->eSearch = (i16)idxNum; - - if( idxNum==FTS3_DOCID_SEARCH ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); - }else if( idxNum!=FTS3_FULLSCAN_SEARCH ){ + if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ int iCol = idxNum-FTS3_FULLTEXT_SEARCH; const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); @@ -2206,11 +2670,33 @@ static int fts3FilterMethod( rc = sqlite3Fts3ReadLock(p); if( rc!=SQLITE_OK ) return rc; - rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; + if( pCsr->nDoclist<0 ){ + assert( pCsr->aDoclist==0 ); + idxNum = FTS3_FULLSCAN_SEARCH; + } } + /* Compile a SELECT statement for this cursor. For a full-table-scan, the + ** statement loops through all rows of the %_content table. For a + ** full-text query or docid lookup, the statement retrieves a single + ** row by docid. + */ + zSql = sqlite3_mprintf(azSql[idxNum==FTS3_FULLSCAN_SEARCH], p->zDb, p->zName); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + } + if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + } + pCsr->eSearch = (i16)idxNum; + if( rc!=SQLITE_OK ) return rc; return fts3NextMethod(pCursor); } @@ -2334,8 +2820,12 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ -int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){ - return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); +int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ + int rc; + pCsr->doDeferred = 1; + rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); + pCsr->doDeferred = 0; + return rc; } /* diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 8ed31aedd8..ef00377a7f 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -96,8 +96,12 @@ typedef struct Fts3Table Fts3Table; typedef struct Fts3Cursor Fts3Cursor; typedef struct Fts3Expr Fts3Expr; typedef struct Fts3Phrase Fts3Phrase; -typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3PhraseToken Fts3PhraseToken; + typedef struct Fts3SegFilter Fts3SegFilter; +typedef struct Fts3DeferredToken Fts3DeferredToken; +typedef struct Fts3SegReader Fts3SegReader; +typedef struct Fts3SegReaderArray Fts3SegReaderArray; /* ** A connection to a fulltext index is an instance of the following @@ -120,17 +124,8 @@ struct Fts3Table { */ sqlite3_stmt *aStmt[25]; - /* Pointer to string containing the SQL: - ** - ** "SELECT block FROM %_segments WHERE blockid BETWEEN ? AND ? - ** ORDER BY blockid" - */ - char *zSelectLeaves; - int nLeavesStmt; /* Valid statements in aLeavesStmt */ - int nLeavesTotal; /* Total number of prepared leaves stmts */ - int nLeavesAlloc; /* Allocated size of aLeavesStmt */ - sqlite3_stmt **aLeavesStmt; /* Array of prepared zSelectLeaves stmts */ - + char *zSegmentsTbl; /* Name of %_segments table */ + int nPgsz; /* Page size for host database */ int nNodeSize; /* Soft limit for node size */ u8 bHasContent; /* True if %_content table exists */ u8 bHasDocsize; /* True if %_docsize table exists */ @@ -160,12 +155,16 @@ struct Fts3Cursor { u8 isRequireSeek; /* True if must seek pStmt to %_content row */ sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ Fts3Expr *pExpr; /* Parsed MATCH query string */ + Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ + + int doDeferred; + int nRowAvg; /* Average size of database rows, in pages */ }; /* @@ -190,18 +189,23 @@ struct Fts3Cursor { /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. -** For a sequence of tokens contained in "...", nToken will be the number -** of tokens in the string. +** For a sequence of tokens contained in double-quotes (i.e. "one two three") +** nToken will be the number of tokens in the string. */ + +struct Fts3PhraseToken { + char *z; /* Text of the token */ + int n; /* Number of bytes in buffer z */ + int isPrefix; /* True if token ends with a "*" character */ + Fts3SegReaderArray *pArray; + Fts3DeferredToken *pDeferred; +}; + struct Fts3Phrase { int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ int isNot; /* Phrase prefixed by unary not (-) operator */ - struct PhraseToken { - char *z; /* Text of the token */ - int n; /* Number of bytes in buffer pointed to by z */ - int isPrefix; /* True if token ends in with a "*" character */ - } aToken[1]; /* One entry for each token in the phrase */ + Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* @@ -225,6 +229,8 @@ struct Fts3Expr { Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ + int bDeferred; + int isLoaded; /* True if aDoclist/nDoclist are initialized. */ char *aDoclist; /* Buffer containing doclist */ int nDoclist; /* Size of aDoclist in bytes */ @@ -275,6 +281,12 @@ int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor*, u32*); int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor*, u32*); int sqlite3Fts3ReadLock(Fts3Table *); +void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); +int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); +int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); +void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); +char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *); + /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 @@ -297,7 +309,7 @@ int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); -int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *); +int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index 008ba8148c..0f411f097f 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -223,7 +223,7 @@ static int getNextString( rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); - p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken)); + p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); if( !p || !zTemp ){ goto no_mem; @@ -235,6 +235,8 @@ static int getNextString( p->pPhrase = (Fts3Phrase *)&p[1]; p->pPhrase->nToken = ii+1; p->pPhrase->aToken[ii].n = nToken; + p->pPhrase->aToken[ii].pDeferred = 0; + p->pPhrase->aToken[ii].pArray = 0; memcpy(&zTemp[nTemp], zToken, nToken); nTemp += nToken; if( iEndpPhrase->nToken-1):0) * sizeof(struct PhraseToken); + nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); p = fts3ReallocOrFree(p, nByte + nTemp); if( !p ){ goto no_mem; diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index d67f7ac098..4f3d9ec3e8 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -24,7 +24,7 @@ */ typedef struct LoadDoclistCtx LoadDoclistCtx; struct LoadDoclistCtx { - Fts3Table *pTab; /* FTS3 Table */ + Fts3Cursor *pCsr; /* FTS3 Cursor */ int nPhrase; /* Number of phrases seen so far */ int nToken; /* Number of tokens seen so far */ }; @@ -218,7 +218,7 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){ p->nToken += pExpr->pPhrase->nToken; if( pExpr->isLoaded==0 ){ - rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); + rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ rc = fts3ExprNearTrim(pExpr); @@ -261,7 +261,7 @@ static int fts3ExprLoadDoclists( ){ int rc; /* Return Code */ LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ - sCtx.pTab = (Fts3Table *)pCsr->base.pVtab; + sCtx.pCsr = pCsr; rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx); if( rc==SQLITE_OK ){ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0); diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index e434360953..92eaf4dc93 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -42,6 +42,17 @@ struct PendingList { sqlite3_int64 iLastPos; }; + +/* +** Each cursor has a (possibly empty) linked list of the following objects. +*/ +struct Fts3DeferredToken { + Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ + int iCol; /* Column token must occur in */ + Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ + PendingList *pList; /* Doclist is assembled here */ +}; + /* ** An instance of this structure is used to iterate through the terms on ** a contiguous set of segment b-tree leaf nodes. Although the details of @@ -51,6 +62,7 @@ struct PendingList { ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() +** sqlite3Fts3SegReaderCost() ** sqlite3Fts3SegReaderIterate() ** ** Methods used to manipulate Fts3SegReader structures: @@ -61,9 +73,13 @@ struct PendingList { */ struct Fts3SegReader { int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ - sqlite3_int64 iStartBlock; - sqlite3_int64 iEndBlock; - sqlite3_stmt *pStmt; /* SQL Statement to access leaf nodes */ + + sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ + sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ + sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ + sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + sqlite3_blob *pBlob; /* Blob open on iStartBlock */ + char *aNode; /* Pointer to node data (or NULL) */ int nNode; /* Size of buffer at aNode (or 0) */ int nTermAlloc; /* Allocated size of zTerm buffer */ @@ -85,6 +101,7 @@ struct Fts3SegReader { }; #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) +#define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1]) /* ** An instance of this structure is used to create a segment b-tree in the @@ -490,10 +507,10 @@ static int fts3PendingListAppend( ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ static int fts3PendingTermsAdd( - Fts3Table *p, /* FTS table into which text will be inserted */ - const char *zText, /* Text of document to be inseted */ - int iCol, /* Column number into which text is inserted */ - u32 *pnWord /* OUT: Number of tokens inserted */ + Fts3Table *p, /* Table into which text will be inserted */ + const char *zText, /* Text of document to be inserted */ + int iCol, /* Column into which text is being inserted */ + u32 *pnWord /* OUT: Number of tokens inserted */ ){ int rc; int iStart; @@ -786,12 +803,42 @@ static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){ return rc; } +/* +** The %_segments table is declared as follows: +** +** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) +** +** This function opens a read-only blob handle on the "block" column of +** row iSegment of the %_segments table associated with FTS3 table p. +** +** If all goes well, SQLITE_OK is returned and *ppBlob set to the +** read-only blob handle. It is the responsibility of the caller to call +** sqlite3_blob_close() on the blob handle. Or, if an error occurs, an +** SQLite error code is returned and *ppBlob is either not modified or +** set to 0. +*/ +static int fts3OpenSegmentsBlob( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iSegment, /* Rowid in %_segments table */ + sqlite3_blob **ppBlob /* OUT: Read-only blob handle */ +){ + if( 0==p->zSegmentsTbl + && 0==(p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName)) + ) { + return SQLITE_NOMEM; + } + return sqlite3_blob_open( + p->db, p->zDb, p->zSegmentsTbl, "block", iSegment, 0, ppBlob + ); +} + + /* ** Move the iterator passed as the first argument to the next term in the ** segment. If successful, SQLITE_OK is returned. If there is no next term, ** SQLITE_DONE. Otherwise, an SQLite error code. */ -static int fts3SegReaderNext(Fts3SegReader *pReader){ +static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){ char *pNext; /* Cursor variable */ int nPrefix; /* Number of bytes in term prefix */ int nSuffix; /* Number of bytes in term suffix */ @@ -803,7 +850,9 @@ static int fts3SegReaderNext(Fts3SegReader *pReader){ } if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ + sqlite3_blob *pBlob; int rc; + if( fts3SegReaderIsPending(pReader) ){ Fts3HashElem *pElem = *(pReader->ppNextElem); if( pElem==0 ){ @@ -819,17 +868,33 @@ static int fts3SegReaderNext(Fts3SegReader *pReader){ } return SQLITE_OK; } - if( !pReader->pStmt ){ - pReader->aNode = 0; + + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); + } + pReader->aNode = 0; + + /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf + ** blocks have already been traversed. */ + if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ return SQLITE_OK; } - rc = sqlite3_step(pReader->pStmt); - if( rc!=SQLITE_ROW ){ - pReader->aNode = 0; - return (rc==SQLITE_DONE ? SQLITE_OK : rc); + + rc = fts3OpenSegmentsBlob(p, ++pReader->iCurrentBlock, &pBlob); + if( rc==SQLITE_OK ){ + pReader->nNode = sqlite3_blob_bytes(pBlob); + pReader->aNode = (char *)sqlite3_malloc(pReader->nNode); + if( pReader->aNode ){ + rc = sqlite3_blob_read(pBlob, pReader->aNode, pReader->nNode, 0); + }else{ + rc = SQLITE_NOMEM; + } + sqlite3_blob_close(pBlob); + } + + if( rc!=SQLITE_OK ){ + return rc; } - pReader->nNode = sqlite3_column_bytes(pReader->pStmt, 0); - pReader->aNode = (char *)sqlite3_column_blob(pReader->pStmt, 0); pNext = pReader->aNode; } @@ -914,25 +979,104 @@ static void fts3SegReaderNextDocid( } } +/* +** This function is called to estimate the amount of data that will be +** loaded from the disk If SegReaderIterate() is called on this seg-reader, +** in units of average document size. +** +** This can be used as follows: If the caller has a small doclist that +** contains references to N documents, and is considering merging it with +** a large doclist (size X "average documents"), it may opt not to load +** the large doclist if X>N. +*/ +int sqlite3Fts3SegReaderCost( + Fts3Cursor *pCsr, /* FTS3 cursor handle */ + Fts3SegReader *pReader, /* Segment-reader handle */ + int *pnCost /* IN/OUT: Number of bytes read */ +){ + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Return code */ + int nCost = 0; /* Cost in bytes to return */ + sqlite3_int64 iLeaf; /* Used to iterate through required leaves */ + int pgsz = p->nPgsz; /* Database page size */ + + /* If this seg-reader is reading the pending-terms table, or if all data + ** for the segment is stored on the root page of the b-tree, then the cost + ** is zero. In this case all required data is already in main memory. + */ + if( p->bHasDocsize + && !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + sqlite3_blob *pBlob = 0; + + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0); + if( rc ) return rc; + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *a = sqlite3_column_blob(pStmt, 0); + if( a ){ + const char *pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( anRowAvg = (((nByte / nDoc) + pgsz - 1) / pgsz); + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK || pCsr->nRowAvg==0 ) return rc; + } + + rc = fts3OpenSegmentsBlob(p, pReader->iStartBlock, &pBlob); + if( rc==SQLITE_OK ){ + /* Assume that a blob flows over onto overflow pages if it is larger + ** than (pgsz-35) bytes in size (the file-format documentation + ** confirms this). + */ + int nBlob = sqlite3_blob_bytes(pBlob); + if( (nBlob+35)>pgsz ){ + int nOvfl = (nBlob + 34)/pgsz; + nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg); + } + } + assert( rc==SQLITE_OK || pBlob==0 ); + sqlite3_blob_close(pBlob); + } + + *pnCost += nCost; + return rc; +} + /* ** Free all allocations associated with the iterator passed as the ** second argument. */ void sqlite3Fts3SegReaderFree(Fts3Table *p, Fts3SegReader *pReader){ - if( pReader ){ - if( pReader->pStmt ){ - /* Move the leaf-range SELECT statement to the aLeavesStmt[] array, - ** so that it can be reused when required by another query. - */ - assert( p->nLeavesStmtnLeavesTotal ); - sqlite3_reset(pReader->pStmt); - p->aLeavesStmt[p->nLeavesStmt++] = pReader->pStmt; + if( pReader && !fts3SegReaderIsPending(pReader) ){ + sqlite3_free(pReader->zTerm); + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); } - if( !fts3SegReaderIsPending(pReader) ){ - sqlite3_free(pReader->zTerm); - } - sqlite3_free(pReader); } + sqlite3_free(pReader); } /* @@ -961,8 +1105,9 @@ int sqlite3Fts3SegReaderNew( return SQLITE_NOMEM; } memset(pReader, 0, sizeof(Fts3SegReader)); - pReader->iStartBlock = iStartLeaf; pReader->iIdx = iAge; + pReader->iStartBlock = iStartLeaf; + pReader->iLeafEndBlock = iEndLeaf; pReader->iEndBlock = iEndBlock; if( nExtra ){ @@ -971,52 +1116,9 @@ int sqlite3Fts3SegReaderNew( pReader->nNode = nRoot; memcpy(pReader->aNode, zRoot, nRoot); }else{ - /* If the text of the SQL statement to iterate through a contiguous - ** set of entries in the %_segments table has not yet been composed, - ** compose it now. - */ - if( !p->zSelectLeaves ){ - p->zSelectLeaves = sqlite3_mprintf( - "SELECT block FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ? " - "ORDER BY blockid", p->zDb, p->zName - ); - if( !p->zSelectLeaves ){ - rc = SQLITE_NOMEM; - goto finished; - } - } - - /* If there are no free statements in the aLeavesStmt[] array, prepare - ** a new statement now. Otherwise, reuse a prepared statement from - ** aLeavesStmt[]. - */ - if( p->nLeavesStmt==0 ){ - if( p->nLeavesTotal==p->nLeavesAlloc ){ - int nNew = p->nLeavesAlloc + 16; - sqlite3_stmt **aNew = (sqlite3_stmt **)sqlite3_realloc( - p->aLeavesStmt, nNew*sizeof(sqlite3_stmt *) - ); - if( !aNew ){ - rc = SQLITE_NOMEM; - goto finished; - } - p->nLeavesAlloc = nNew; - p->aLeavesStmt = aNew; - } - rc = sqlite3_prepare_v2(p->db, p->zSelectLeaves, -1, &pReader->pStmt, 0); - if( rc!=SQLITE_OK ){ - goto finished; - } - p->nLeavesTotal++; - }else{ - pReader->pStmt = p->aLeavesStmt[--p->nLeavesStmt]; - } - - /* Bind the start and end leaf blockids to the prepared SQL statement. */ - sqlite3_bind_int64(pReader->pStmt, 1, iStartLeaf); - sqlite3_bind_int64(pReader->pStmt, 2, iEndLeaf); + pReader->iCurrentBlock = iStartLeaf-1; } - rc = fts3SegReaderNext(pReader); + rc = fts3SegReaderNext(p, pReader); finished: if( rc==SQLITE_OK ){ @@ -1113,7 +1215,7 @@ int sqlite3Fts3SegReaderPending( pReader->iIdx = 0x7FFFFFFF; pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); - fts3SegReaderNext(pReader); + fts3SegReaderNext(p, pReader); } } @@ -1991,7 +2093,7 @@ int sqlite3Fts3SegReaderIterate( for(i=0; ipList ){ + *pnByte = pDeferred->pList->nData; + return pDeferred->pList->aData; + } + *pnByte = 0; + return 0; +} + +/* +** Helper fucntion for FreeDeferredDoclists(). This function removes all +** references to deferred doclists from within the tree of Fts3Expr +** structures headed by +*/ +static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ + if( pExpr ){ + fts3DeferredDoclistClear(pExpr->pLeft); + fts3DeferredDoclistClear(pExpr->pRight); + if( pExpr->bDeferred && pExpr->isLoaded ){ + sqlite3_free(pExpr->aDoclist); + pExpr->isLoaded = 0; + pExpr->aDoclist = 0; + pExpr->nDoclist = 0; + pExpr->pCurrent = 0; + pExpr->iCurrent = 0; + } + } +} + +/* +** Delete all cached deferred doclists. Deferred doclists are cached +** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +*/ +void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ + sqlite3_free(pDef->pList); + pDef->pList = 0; + } + fts3DeferredDoclistClear(pCsr->pExpr); +} + +/* +** Free all entries in the pCsr->pDeffered list. Entries are added to +** this list using sqlite3Fts3DeferToken(). +*/ +void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + Fts3DeferredToken *pNext; + for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ + pNext = pDef->pNext; + sqlite3_free(pDef->pList); + sqlite3_free(pDef); + } + pCsr->pDeferred = 0; +} + +/* +** Generate deferred-doclists for all tokens in the pCsr->pDeferred list +** based on the row that pCsr currently points to. +** +** A deferred-doclist is like any other doclist with position information +** included, except that it only contains entries for a single row of the +** table, not for all rows. +*/ +int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + if( pCsr->pDeferred ){ + int i; /* Used to iterate through table columns */ + sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ + Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ + + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer *pT = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pT->pModule; + + assert( pCsr->isRequireSeek==0 ); + iDocid = sqlite3_column_int64(pCsr->pStmt, 0); + + for(i=0; inColumn && rc==SQLITE_OK; i++){ + const char *zText = sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; + + rc = pModule->xOpen(pT, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken; /* Number of bytes in token */ + int iDum1, iDum2; /* Dummy variables */ + int iPos; /* Position of token in zText */ + + pTC->pTokenizer = pT; + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->n==nToken || (pPT->isPrefix && pPT->nz, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); + } + } + } + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); + } + } + } + + return rc; +} + +/* +** Add an entry for token pToken to the pCsr->pDeferred list. +*/ +int sqlite3Fts3DeferToken( + Fts3Cursor *pCsr, /* Fts3 table cursor */ + Fts3PhraseToken *pToken, /* Token to defer */ + int iCol /* Column that token must appear in (or -1) */ +){ + Fts3DeferredToken *pDeferred; + pDeferred = sqlite3_malloc(sizeof(*pDeferred)); + if( !pDeferred ){ + return SQLITE_NOMEM; + } + memset(pDeferred, 0, sizeof(*pDeferred)); + pDeferred->pToken = pToken; + pDeferred->pNext = pCsr->pDeferred; + pDeferred->iCol = iCol; + pCsr->pDeferred = pDeferred; + + assert( pToken->pDeferred==0 ); + pToken->pDeferred = pDeferred; + + return SQLITE_OK; +} + + /* ** This function does the work for the xUpdate method of FTS3 virtual ** tables. diff --git a/ext/fts3/fts3speed.tcl b/ext/fts3/fts3speed.tcl new file mode 100644 index 0000000000..bf420aacb9 --- /dev/null +++ b/ext/fts3/fts3speed.tcl @@ -0,0 +1,123 @@ + + +#-------------------------------------------------------------------------- +# This script contains several sub-programs used to test FTS3/FTS4 +# performance. It does not run the queries directly, but generates SQL +# scripts that can be run using the shell tool. +# +# The following cases are tested: +# +# 1. Inserting documents into an FTS3 table. +# 2. Optimizing an FTS3 table (i.e. "INSERT INTO t1 VALUES('optimize')"). +# 3. Deleting documents from an FTS3 table. +# 4. Querying FTS3 tables. +# + +# Number of tokens in vocabulary. And number of tokens in each document. +# +set VOCAB_SIZE 2000 +set DOC_SIZE 100 + +set NUM_INSERTS 1000 +set NUM_SELECTS 1000 + +# Force everything in this script to be deterministic. +# +expr {srand(0)} + +proc usage {} { + puts stderr "Usage: $::argv0 " + exit -1 +} + +proc sql {sql} { + puts $::fd $sql +} + + +# Return a list of $nWord randomly generated tokens each between 2 and 10 +# characters in length. +# +proc build_vocab {nWord} { + set ret [list] + set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z] + for {set i 0} {$i<$nWord} {incr i} { + set len [expr {int((rand()*9.0)+2)}] + set term "" + for {set j 0} {$j<$len} {incr j} { + append term [lindex $chars [expr {int(rand()*[llength $chars])}]] + } + lappend ret $term + } + set ret +} + +proc select_term {} { + set n [llength $::vocab] + set t [expr int(rand()*$n*3)] + if {$t>=2*$n} { set t [expr {($t-2*$n)/100}] } + if {$t>=$n} { set t [expr {($t-$n)/10}] } + lindex $::vocab $t +} + +proc select_doc {nTerm} { + set ret [list] + for {set i 0} {$i<$nTerm} {incr i} { + lappend ret [select_term] + } + set ret +} + +proc test_1 {nInsert} { + sql "PRAGMA synchronous = OFF;" + sql "DROP TABLE IF EXISTS t1;" + sql "CREATE VIRTUAL TABLE t1 USING fts4;" + for {set i 0} {$i < $nInsert} {incr i} { + set doc [select_doc $::DOC_SIZE] + #sql "INSERT INTO t1 VALUES('$doc');" + sql "\"$doc\"" + } +} + +proc test_2 {} { + sql "INSERT INTO t1(t1) VALUES('optimize');" +} + +proc test_3 {nSelect} { + for {set i 0} {$i < $nSelect} {incr i} { + sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term]';" + } +} + +proc test_4 {nSelect} { + for {set i 0} {$i < $nSelect} {incr i} { + sql "SELECT count(*) FROM t1 WHERE t1 MATCH '[select_term] [select_term]';" + } +} + +if {[llength $argv]!=0} usage + +set ::vocab [build_vocab $::VOCAB_SIZE] + +set ::fd [open fts3speed_insert.sql w] +test_1 $NUM_INSERTS +close $::fd + +set ::fd [open fts3speed_select.sql w] +test_3 $NUM_SELECTS +close $::fd + +set ::fd [open fts3speed_select2.sql w] +test_4 $NUM_SELECTS +close $::fd + +set ::fd [open fts3speed_optimize.sql w] +test_2 +close $::fd + +puts "Success. Created files:" +puts " fts3speed_insert.sql" +puts " fts3speed_select.sql" +puts " fts3speed_select2.sql" +puts " fts3speed_optimize.sql" + diff --git a/manifest b/manifest index 1a93e47329..3a7cae981d 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Avoid\staking\slocks\son\sunused\sdatabase\sconnections\swhen\scommitting\sa\nread\stransaction. -D 2010-10-14T01:17:30 +C Experimental\schanges\sto\sfts4\sto\stry\sto\sselectively\savoid\sloading\svery\slarge\sdoclists. +D 2010-10-19T14:08:00 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b01fdfcfecf8a0716c29867a67959f6148b79961 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -64,19 +61,20 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 03be86c59ac1a60d448c6eda460a8975ff2f170d +F ext/fts3/fts3.c 9d4ccf3b7bbfbeeef03dba91377c4d72b757dcb9 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h b4f0b05ccafe1e5b4be2f052e9840dbd78a0395f -F ext/fts3/fts3_expr.c 42d5697731cd30fbeabd081bb3e6d3df5531f606 +F ext/fts3/fts3Int.h a640e4fbdb2fcab1457f87993ca3f4ceaa31e776 +F ext/fts3/fts3_expr.c a5aee50edde20e5c9116199bd58be869a3a22c9f F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba -F ext/fts3/fts3_snippet.c 2c4c921155e4b6befd272041fb903d999ac07d30 +F ext/fts3/fts3_snippet.c 474c11e718610cade73e6009f75ffc173d4c42c5 F ext/fts3/fts3_tokenizer.c b4f2d01c24573852755bc92864816785dae39318 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d -F ext/fts3/fts3_write.c 97a583b9e1d23d5af4278f3ee3c16a37c3e077f4 +F ext/fts3/fts3_write.c 29b63a98de55d4eb34b7fc6fd90b3224d6cdc7ff +F ext/fts3/fts3speed.tcl 71b9cdc8f499822124a9eef42003e31a88f26f16 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/icu.c 850e9a36567bbcce6bd85a4b68243cad8e3c2de2 @@ -422,7 +420,7 @@ F test/fts3ad.test e40570cb6f74f059129ad48bcef3d7cbc20dda49 F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c F test/fts3ag.test 0b7d303f61ae5d620c4efb5e825713ea34ff9441 -F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff +F test/fts3ah.test 3c5a1bd49979d7b5b5ed9fdbcdd14a7bfe5a5ff9 F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2 F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4 F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d @@ -433,8 +431,9 @@ F test/fts3ao.test 8fee868a0e131b98ce3e8907dc69936278e8b29a F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958 -F test/fts3cov.test 3a9d8618a3107166530c447e808f8992372e0415 +F test/fts3cov.test 6f1ff88ff6b5abcfff6979098cb9d0c68a69202e F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52 +F test/fts3defer.test a9f81bba6e1132dd6a2ad3cf11e4628733975c8c F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a @@ -535,7 +534,7 @@ F test/mallocH.test 79b65aed612c9b3ed2dcdaa727c85895fd1bfbdb F test/mallocI.test a88c2b9627c8506bf4703d8397420043a786cdb6 F test/mallocJ.test b5d1839da331d96223e5f458856f8ffe1366f62e F test/mallocK.test d79968641d1b70d88f6c01bdb9a7eb4a55582cc9 -F test/malloc_common.tcl cda732c0d2365a058c2a73778cf6b6da6db54452 +F test/malloc_common.tcl 9dfb33f12173f9a8b029dae0443c569b59b980b6 F test/manydb.test b3d3bc4c25657e7f68d157f031eb4db7b3df0d3c F test/memdb.test 0825155b2290e900264daaaf0334b6dfe69ea498 F test/memleak.test 10b9c6c57e19fc68c32941495e9ba1c50123f6e2 @@ -570,7 +569,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806 F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb F test/pcache.test 4118a183908ecaed343a06fcef3ba82e87e0129d F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16 -F test/permutations.test ca1c985cf68c692096d0325b33c62f2b576446a5 +F test/permutations.test ec9b2ebd52ff43c5a3bec4723098fab1ef29d944 F test/pragma.test fdfc09067ea104a0c247a1a79d8093b56656f850 F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea @@ -876,14 +875,11 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P ea8c2f5f8a890dcb422e9e46298ae6ca378c74b7 -R 03d2b3a92f9647a3db71aa9ca75489f4 -U drh -Z c2cdb52e62b9c570a5e1c8427f4ce5c4 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFMtlotoxKgR168RlERAuf0AJ96F+hVDOt4y4GU2wooqTHtO4kKZgCeKIVj -dUPTQwgWJgqAaN5BweGLucY= -=QYIr ------END PGP SIGNATURE----- +P c0ee614fd988f445c4884a37f494479bdd669185 +R 0c4aebbf44d624104504e37bec992917 +T *bgcolor * #c0ffc0 +T *branch * experimental +T *sym-experimental * +T -sym-trunk * +U dan +Z c2b99a58ccad27405ff8b0fcedef5c33 diff --git a/manifest.uuid b/manifest.uuid index 4e2a5db003..ee8a49e8ae 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -c0ee614fd988f445c4884a37f494479bdd669185 \ No newline at end of file +5ae0ba447a561e3b6637b52f9b83a9fc683d2572 \ No newline at end of file diff --git a/test/fts3ah.test b/test/fts3ah.test index 1a58e49f4b..6e8f2d541a 100644 --- a/test/fts3ah.test +++ b/test/fts3ah.test @@ -1,37 +1,32 @@ -# 2006 October 31 (scaaarey) +# 2006 October 31 # -# The author disclaims copyright to this source code. +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The focus -# here is testing correct handling of excessively long terms. -# -# $Id: fts3ah.test,v 1.1 2007/08/20 17:38:42 shess Exp $ +# here is testing correct handling of very long terms. # set testdir [file dirname $argv0] source $testdir/tester.tcl -# If SQLITE_ENABLE_FTS3 is defined, omit this file. +# If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test return } -# Generate a term of len copies of char. -proc bigterm {char len} { - for {set term ""} {$len>0} {incr len -1} { - append term $char - } - return $term -} - # Generate a document of bigterms based on characters from the list # chars. proc bigtermdoc {chars len} { set doc "" foreach char $chars { - append doc " " [bigterm $char $len] + append doc " " [string repeat $char $len] } return $doc } @@ -41,9 +36,9 @@ set doc1 [bigtermdoc {a b c d} $len] set doc2 [bigtermdoc {b d e f} $len] set doc3 [bigtermdoc {a c e} $len] -set aterm [bigterm a $len] -set bterm [bigterm b $len] -set xterm [bigterm x $len] +set aterm [string repeat a $len] +set bterm [string repeat b $len] +set xterm [string repeat x $len] db eval { CREATE VIRTUAL TABLE t1 USING fts3(content); diff --git a/test/fts3cov.test b/test/fts3cov.test index d3fe4fa8c5..92def056cd 100644 --- a/test/fts3cov.test +++ b/test/fts3cov.test @@ -82,27 +82,28 @@ do_test fts3cov-2.1 { INSERT INTO t1 VALUES('And she in the midnight wood will pray'); INSERT INTO t1 VALUES('For the weal of her lover that''s far away.'); COMMIT; - + } + execsql { INSERT INTO t1(t1) VALUES('optimize'); SELECT substr(hex(root), 1, 2) FROM t1_segdir; } } {03} # Test the "missing entry" case: -do_test fts3cov-2.1 { +do_test fts3cov-2.2 { set root [db one {SELECT root FROM t1_segdir}] read_fts3varint [string range $root 1 end] left_child execsql { DELETE FROM t1_segments WHERE blockid = $left_child } } {} -do_error_test fts3cov-2.2 { +do_error_test fts3cov-2.3 { SELECT * FROM t1 WHERE t1 MATCH 'c*' } {database disk image is malformed} # Test the "replaced with NULL" case: -do_test fts3cov-2.3 { +do_test fts3cov-2.4 { execsql { INSERT INTO t1_segments VALUES($left_child, NULL) } } {} -do_error_test fts3cov-2.4 { +do_error_test fts3cov-2.5 { SELECT * FROM t1 WHERE t1 MATCH 'cloud' } {database disk image is malformed} diff --git a/test/fts3defer.test b/test/fts3defer.test new file mode 100644 index 0000000000..dd35be8e1e --- /dev/null +++ b/test/fts3defer.test @@ -0,0 +1,360 @@ +# 2010 October 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl + +ifcapable !fts3 { + finish_test + return +} + +set ::testprefix fts3defer + +#-------------------------------------------------------------------------- +# Test cases fts3defer-1.* are the "warm body" cases. The database contains +# one row with 15000 instances of the token "a". This makes the doclist for +# "a" so large that FTS3 will avoid loading it in most cases. +# +# To show this, test cases fts3defer-1.2.* execute a bunch of FTS3 queries +# involving token "a". Then, fts3defer-1.3.* replaces the doclist for token +# "a" with all zeroes and fts3defer-1.4.* repeats the tests from 1.2. If +# the tests still work, we can conclude that the doclist for "a" was not +# used. +# + +set aaa [string repeat "a " 15000] + +do_execsql_test 1.1 { + CREATE VIRTUAL TABLE t1 USING fts4; + BEGIN; + INSERT INTO t1 VALUES('this is a dog'); + INSERT INTO t1 VALUES('an instance of a phrase'); + INSERT INTO t1 VALUES('an instance of a longer phrase'); + INSERT INTO t1 VALUES($aaa); + COMMIT; +} {} + +set tests { + 1 {SELECT rowid FROM t1 WHERE t1 MATCH '"a dog"'} {1} + 2 {SELECT rowid FROM t1 WHERE t1 MATCH '"is a dog"'} {1} + 3 {SELECT rowid FROM t1 WHERE t1 MATCH '"a longer phrase"'} {3} + 4 {SELECT snippet(t1) FROM t1 WHERE t1 MATCH '"a longer phrase"'} + {"an instance of a longer phrase"} + 5 {SELECT rowid FROM t1 WHERE t1 MATCH 'a dog'} {1} +} + +do_select_tests 1.2 $tests + +do_execsql_test 1.3 { + SELECT count(*) FROM t1_segments WHERE length(block)>10000; + UPDATE t1_segments + SET block = zeroblob(length(block)) + WHERE length(block)>10000; +} {1} + +do_select_tests 1.4 $tests + +# Drop the table. It is corrupt now anyhow, so not useful for subsequent tests. +# +do_execsql_test 1.5 { DROP TABLE t1 } + +#-------------------------------------------------------------------------- +# These tests - fts3defer-2.* - are more rigorous. They test that for a +# variety of queries, FTS3 and FTS4 return the same results. And that +# zeroing the very large doclists that FTS4 does not load does not change +# the results. +# +# They use the following pseudo-randomly generated document data. The +# tokens "zm" and "jk" are especially common in this dataset. Additionally, +# two documents are added to the pseudo-random data before it is loaded +# into FTS4 containing 100,000 instances of the "zm" and "jk" tokens. This +# makes the doclists for those tokens so large that FTS4 avoids loading them +# into memory if possible. +# +set data [list] +lappend data [string repeat "zm " 100000] +lappend data [string repeat "jk " 100000] +lappend data {*}{ + "zm zm agmckuiu uhzq nsab jk rrkx duszemmzl hyq jk" + "jk uhzq zm zm rgpzmlnmd zm zk jk jk zm" + "duszemmzl zm jk xldlpy zm jk sbptoa xh jk xldlpy" + "zm xh zm xqf azavwm jk jk trqd rgpzmlnmd jk" + "zm vwq urvysbnykk ubwrfqnbjf zk lsz jk doiwavhwwo jk jk" + "jk xduvfhk orpfawpx zkhdvkw jk mjpavjuhw zm jk duszemmzl zm" + "jk igju jk jk zm hmjf xh zm gwdfhwurx zk" + "vgsld jk jk zm hrlipdm jn zm zsmhnf vgsld duszemmzl" + "gtuiexzsu aayxpmve zm zm zm drir scpgna xh azavwm uhzq" + "farlehdhq hkfoudzftq igju duszemmzl xnxhf ewle zm hrlipdm urvysbnykk kn" + "xnxhf jk jk agmckuiu duszemmzl jk zm zm jk vgsld" + "zm zm zm jk jk urvysbnykk ogttbykvt zm zm jk" + "iasrqgqv zm azavwm zidhxhbtv jk jk mjpavjuhw zm zm ajmvcydy" + "rgpzmlnmd tmt mjpavjuhw xh igju jk azavwm fibokdry vgsld ofm" + "zm jk vgsld jk xh jk csjqxhgj drir jk pmrb" + "xh jk jk zm rrkx duszemmzl mjpavjuhw xldlpy igju zm" + "jk hkfoudzftq zf rrkx wdmy jupk jk zm urvysbnykk npywgdvgz" + "zm jk zm zm zhbrzadb uenvbm aayxpmve urvysbnykk duszemmzl jk" + "uenvbm jk zm fxw xh bdilwmjw mjpavjuhw uv jk zm" + "nk jk bnhc pahlds jk igju dzadnqzprr jk jk jk" + "uhzq uv zm duszemmzl tlqix jk jk xh jk zm" + "jk zm agmckuiu urvysbnykk jk jk zm zm jk jk" + "azavwm mjpavjuhw lsgshn trqd xldlpy ogyavjvv agmckuiu ryvwwhlbc jk jk" + "tmt jk zk zm azavwm ofm acpgim bvgimjik iasrqgqv wuvajhwqz" + "igju ogyavjvv xrbdak rrkx fibokdry zf ujfhmrllq jk zm hxgwvib" + "zm pahlds jk uenvbm aayxpmve iaf hmjf xph vnlyvtkgx zm" + "jk xnxhf igju jk xh jk nvfasfh zm js jk" + "zm zm rwaj igju xr rrkx xnxhf nvfasfh skxbsqzvmt xatbxeqq" + "vgsld zm ujfhmrllq uhzq ogyavjvv nsab azavwm zm vgsld jmfiqhwnjg" + "ymjoym duszemmzl urvysbnykk azavwm jk jmfiqhwnjg bu qcdziqomqk vnlyvtkgx" + "zm nbilqcnz dzadnqzprr xh bkfgzsxn urvysbnykk xrujfzxqf zm zf agmckuiu" + "jk urvysbnykk nvfasfh zf xh zm zm qcdziqomqk qvxtclg wdmy" + "fibokdry jk urvysbnykk jk xr osff zm cvnnsl zm vgsld" + "jk mjpavjuhw hkfoudzftq jk zm xh xqf urvysbnykk jk iasrqgqv" + "jk csjqxhgj duszemmzl iasrqgqv aayxpmve zm brsuoqww jk qpmhtvl wluvgsw" + "jk mj azavwm jk zm jn dzadnqzprr zm jk uhzq" + "zk xqf jupk fxw nbilqcnz zm jk jcpiwj tznlvbfcv nvfasfh" + "jk jcpiwj zm xnxhf zm mjpavjuhw mj drir pa pvjrjlas" + "duszemmzl dzadnqzprr jk swc duszemmzl tmt jk jk pahlds jk" + "zk zm jk zm zm eczkjblu zm hi pmrb jk" + "azavwm zm iz agmckuiu jk sntk jk duszemmzl duszemmzl zm" + "jk zm jk eczkjblu urvysbnykk sk gnl jk ttvgf hmjf" + "jk bnhc jjrxpjkb mjpavjuhw fibokdry igju jk zm zm xh" + "wxe ogttbykvt uhzq xr iaf zf urvysbnykk aayxpmve oacaxgjoo mjpavjuhw" + "gazrt jk ephknonq myjp uenvbm wuvajhwqz jk zm xnxhf nvfasfh" + "zm aayxpmve csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm" + "sokcyf zm ogyavjvv jk zm fibokdry zm jk igju igju" + "vgsld bvgimjik xuprtlyle jk akmikrqyt jk aayxpmve hkfoudzftq ddjj ithtir" + "zm uhzq ovkyevlgv zk uenvbm csjqxhgj jk vgsld pgybs jk" + "zm agmckuiu zexh fibokdry jk uhzq bu tugflixoex xnxhf sk" + "zm zf uenvbm jk azavwm zm zm agmckuiu zm jk" + "rrkx jk zf jt zm oacaxgjoo fibokdry wdmy igju csjqxhgj" + "hi igju zm jk zidhxhbtv dzadnqzprr jk jk trqd duszemmzl" + "zm zm mjpavjuhw xrbdak qrvbjruc jk qzzqdxq guwq cvnnsl zm" + "ithtir jk jk qcdziqomqk zm farlehdhq zm zm xrbdak jk" + "ixfipk csjqxhgj azavwm sokcyf ttvgf vgsld jk sk xh zk" + "nvfasfh azavwm zm zm zm fxw nvfasfh zk gnl trqd" + "zm fibokdry csjqxhgj ofm dzadnqzprr jk akmikrqyt orpfawpx duszemmzl vwq" + "csjqxhgj jk jk vgsld urvysbnykk jk nxum jk jk nxum" + "zm hkfoudzftq jk ryvwwhlbc mjpavjuhw ephknonq jk zm ogyavjvv zm" + "lwa hi xnxhf qdyerbws zk njtc jk uhzq zm jk" + "trqd zm dzadnqzprr zm urvysbnykk jk lsz jk mjpavjuhw cmnnkna" + "duszemmzl zk jk jk fibokdry jseuhjnzo zm aayxpmve zk jk" + "fibokdry jk sviq qvxtclg wdmy jk doiwavhwwo zexh jk zm" + "jupk zm xh jk mjpavjuhw zm jk nsab npywgdvgz duszemmzl" + "zm igju zm zm nvfasfh eh hkfoudzftq fibokdry fxw xkblf" + "jk zm jk jk zm xh zk abthnzcv zf csjqxhgj" + "zm zm jk nkaotm urvysbnykk sbptoa bq jk ktxdty ubwrfqnbjf" + "nvfasfh aayxpmve xdcuz zm tugflixoex jcpiwj zm mjpavjuhw fibokdry doiwavhwwo" + "iaf jk mjpavjuhw zm duszemmzl jk jk uhzq pahlds fibokdry" + "ddjj zk azavwm jk swc zm gjtexkv jk xh jk" + "igju jk csjqxhgj zm jk dzadnqzprr duszemmzl ulvcbv jk jk" + "jk fibokdry zm csjqxhgj jn zm zm zm zf uhzq" + "duszemmzl jk xkblf zk hrlipdm aayxpmve uenvbm uhzq jk zf" + "dzadnqzprr jk zm zdu nvfasfh zm jk urvysbnykk hmjf jk" + "jk aayxpmve aserrdxm acpgim fibokdry jk drir wxe brsuoqww rrkx" + "uhzq csjqxhgj nvfasfh jk rrkx qbamok trqd uenvbm sntk zm" + "ps azavwm zkhdvkw jk zm jk jk zm csjqxhgj xedlrcfo" + "jk jk ogyavjvv jk zm farlehdhq duszemmzl jk agitgxamxe jk" + "qzzqdxq rwaj jk jk zm xqf jk uenvbm jk zk" + "zm hxgwvib akmikrqyt zf agmckuiu uenvbm bq npywgdvgz azavwm jk" + "zf jmfiqhwnjg js igju zm aayxpmve zm mbxnljomiv csjqxhgj nvfasfh" + "zm jk jk gazrt jk jk lkc jk nvfasfh jk" + "xldlpy orpfawpx zkhdvkw jk zm igju zm urvysbnykk dzadnqzprr mbxnljomiv" + "urvysbnykk jk zk igju zm uenvbm jk zm ithtir jk" + "zm zk zm zf ofm zm xdcuz dzadnqzprr zm vgsld" + "sbptoa jk tugflixoex jk zm zm vgsld zm xh zm" + "uhzq jk zk evvivo vgsld vniqnuynvf agmckuiu jk zm zm" + "zm nvfasfh zm zm zm abthnzcv uenvbm jk zk dzadnqzprr" + "zm azavwm igju qzzqdxq jk xnxhf abthnzcv jk nvfasfh zm" + "qbamok fxw vgsld igju cmnnkna xnxhf vniqnuynvf zk xh zm" + "nvfasfh zk zm mjpavjuhw dzadnqzprr jk jk duszemmzl xldlpy nvfasfh" + "xnxhf sviq nsab npywgdvgz osff vgsld farlehdhq fibokdry wjbkhzsa hhac" + "zm azavwm scpgna jk jk bq jk duszemmzl fibokdry ovkyevlgv" + "csjqxhgj zm jk jk duszemmzl zk xh zm jk zf" + "urvysbnykk dzadnqzprr csjqxhgj mjpavjuhw ubwrfqnbjf nkaotm jk jk zm drir" + "nvfasfh xh igju zm wluvgsw jk zm srwwnezqk ewle ovnq" + "jk nvfasfh eh ktxdty urvysbnykk vgsld zm jk eh uenvbm" + "orpfawpx pahlds jk uhzq hi zm zm zf jk dzadnqzprr" + "srwwnezqk csjqxhgj rbwzuf nvfasfh jcpiwj xldlpy nvfasfh jk vgsld wjybxmieki" +} + + + +foreach {tn setup} { + 1 { + set dmt_modes 0 + execsql { CREATE VIRTUAL TABLE t1 USING FTS3 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + } + 2 { + set dmt_modes 0 + execsql { CREATE VIRTUAL TABLE t1 USING FTS4 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + } + 3 { + set dmt_modes {0 1 2} + execsql { CREATE VIRTUAL TABLE t1 USING FTS4 } + foreach doc $data { execsql { INSERT INTO t1 VALUES($doc) } } + execsql { + UPDATE t1_segments + SET block = zeroblob(length(block)) + WHERE length(block)>10000; + } + } +} { + + execsql { DROP TABLE IF EXISTS t1 } + eval $setup + set ::testprefix fts3defer-2.$tn + set DO_MALLOC_TEST 0 + + do_execsql_test 0 { + SELECT count(*) FROM t1_segments WHERE length(block)>10000 + } {2} + + do_select_test 1.1 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk xnxhf' + } {13 29 40 47 48 52 63 92} + do_select_test 1.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk eh' + } {100} + do_select_test 1.3 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf' + } {7 70 98} + do_select_test 1.4 { + SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl jk' + } {3 5 8 10 13 18 20 23 32 37 41 43 55 60 65 67 72 74 76 81 94 96 97} + do_select_test 1.5 { + SELECT rowid FROM t1 WHERE t1 MATCH 'ubwrfqnbjf jk' + } {7 70 98} + do_select_test 1.6 { + SELECT rowid FROM t1 WHERE t1 MATCH 'jk ubwrfqnbjf jk jk jk jk' + } {7 70 98} + do_select_test 1.7 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm xnxhf' + } {12 13 29 30 40 47 48 52 63 92 93} + do_select_test 1.8 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm eh' + } {68 100} + do_select_test 1.9 { + SELECT rowid FROM t1 WHERE t1 MATCH 'zm ubwrfqnbjf' + } {7 70 98} + + do_select_test 2.1 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm agmckuiu"' + } {3 24 52 53} + do_select_test 2.2 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm zf"' + } {33 53 75 88 101} + do_select_test 2.3 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } {48 65 84} + do_select_test 2.4 { + SELECT rowid FROM t1 WHERE t1 MATCH '"aayxpmve zm"' + } {11 37 84} + do_select_test 2.5 { + SELECT rowid FROM t1 WHERE t1 MATCH '"jk azavwm"' + } {16 53} + do_select_test 2.6 { + SELECT rowid FROM t1 WHERE t1 MATCH '"xh jk jk"' + } {18} + do_select_test 2.7 { + SELECT rowid FROM t1 WHERE t1 MATCH '"zm jk vgsld"' + } {13 17} + + do_select_test 3.1 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm agmckuiu"' + } { + {zm [zm] [agmckuiu] uhzq nsab jk rrkx duszemmzl hyq jk} + {jk [zm] [agmckuiu] urvysbnykk jk jk zm zm jk jk} + {[zm] [agmckuiu] zexh fibokdry jk uhzq bu tugflixoex xnxhf sk} + {zm zf uenvbm jk azavwm zm [zm] [agmckuiu] zm jk} + } + + do_select_test 3.2 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'xnxhf jk' + } { + {[xnxhf] [jk] [jk] agmckuiu duszemmzl [jk] zm zm [jk] vgsld} + {[jk] [xnxhf] igju [jk] xh [jk] nvfasfh zm js [jk]} + {[jk] jcpiwj zm [xnxhf] zm mjpavjuhw mj drir pa pvjrjlas} + {gazrt [jk] ephknonq myjp uenvbm wuvajhwqz [jk] zm [xnxhf] nvfasfh} + {zm aayxpmve csjqxhgj [xnxhf] xr [jk] aayxpmve [xnxhf] zm zm} + {zm agmckuiu zexh fibokdry [jk] uhzq bu tugflixoex [xnxhf] sk} + {lwa hi [xnxhf] qdyerbws zk njtc [jk] uhzq zm [jk]} + {zm azavwm igju qzzqdxq [jk] [xnxhf] abthnzcv [jk] nvfasfh zm} + } + + do_select_test 4.1 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"jk uenvbm"' + } { + {0 0 10 2 0 1 13 6} {0 0 26 2 0 1 29 6} + } + + do_select_test 4.2 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'duszemmzl jk fibokdry' + } { + {0 2 3 8 0 1 36 2 0 0 58 9} + {0 0 0 9 0 1 13 2 0 1 16 2 0 2 19 8 0 1 53 2} + {0 1 4 2 0 0 20 9 0 1 30 2 0 1 33 2 0 2 48 8} + {0 1 17 2 0 1 20 2 0 1 26 2 0 0 29 9 0 2 39 8} + } + + # The following block of tests runs normally with FTS3 or FTS4 without the + # long doclists zeroed. And with OOM-injection for FTS4 with long doclists + # zeroed. Change this by messing with the [set dmt_modes] commands above. + # + foreach DO_MALLOC_TEST $dmt_modes { + + # Phrase search. + do_select_test 5.$DO_MALLOC_TEST.1 { + SELECT rowid FROM t1 WHERE t1 MATCH '"jk mjpavjuhw"' + } {8 15 36 64 67 72} + + # Multiple tokens search. + do_select_test 5.$DO_MALLOC_TEST.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'duszemmzl zm' + } {3 5 8 10 12 13 18 20 23 37 43 55 60 65 67 72 74 81 94 96 97} + + # snippet() function with phrase. + do_select_test 5.$DO_MALLOC_TEST.3 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } { + {[zm] [aayxpmve] csjqxhgj xnxhf xr jk aayxpmve xnxhf zm zm} + {duszemmzl zk jk jk fibokdry jseuhjnzo [zm] [aayxpmve] zk jk} + {zf jmfiqhwnjg js igju [zm] [aayxpmve] zm mbxnljomiv csjqxhgj nvfasfh} + } + + # snippet() function with multiple tokens. + do_select_test 5.$DO_MALLOC_TEST.4 { + SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'zm zhbrzadb' + } { + {[zm] jk [zm] [zm] [zhbrzadb] uenvbm aayxpmve urvysbnykk duszemmzl jk} + } + + # snippet() function with phrase. + do_select_test 5.$DO_MALLOC_TEST.5 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH '"zm aayxpmve"' + } { + {0 0 0 2 0 1 3 8} {0 0 38 2 0 1 41 8} {0 0 22 2 0 1 25 8} + } + + # snippet() function with multiple tokens. + do_select_test 5.$DO_MALLOC_TEST.6 { + SELECT offsets(t1) FROM t1 WHERE t1 MATCH 'zm zhbrzadb' + } { + {0 0 0 2 0 0 6 2 0 0 9 2 0 1 12 8} + } + } +} + + +finish_test diff --git a/test/malloc_common.tcl b/test/malloc_common.tcl index 6b7869d1da..4b2478abe5 100644 --- a/test/malloc_common.tcl +++ b/test/malloc_common.tcl @@ -526,7 +526,7 @@ proc do_malloc_test {tn args} { # match the expected results passed via parameter $result. # proc do_select_test {name sql result} { - uplevel [list doPassiveTest 0 $name $sql [list 0 $result]] + uplevel [list doPassiveTest 0 $name $sql [list 0 [list {*}$result]]] } proc do_restart_select_test {name sql result} { @@ -540,6 +540,12 @@ proc do_error_test {name sql error} { proc doPassiveTest {isRestart name sql catchres} { if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 } + if {[info exists ::testprefix] + && [string is integer [string range $name 0 0]] + } { + set name $::testprefix.$name + } + switch $::DO_MALLOC_TEST { 0 { # No malloc failures. do_test $name [list set {} [uplevel [list catchsql $sql]]] $catchres diff --git a/test/permutations.test b/test/permutations.test index fb1604ca96..95896b6d34 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -166,7 +166,7 @@ test_suite "fts3" -prefix "" -description { fts3ak.test fts3al.test fts3am.test fts3an.test fts3ao.test fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test fts3e.test fts3expr.test fts3expr2.test fts3near.test - fts3query.test fts3snippet.test + fts3query.test fts3snippet.test fts3defer.test }