1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fix various issues to do with deferred tokens, NEAR expressions and matchinfo().

FossilOrigin-Name: 3972a787df5ec253b99b148385655e7b68d851fa
This commit is contained in:
dan
2011-06-08 18:39:07 +00:00
parent 3eabcf5f46
commit abf2545ed9
8 changed files with 265 additions and 474 deletions

View File

@ -1122,9 +1122,7 @@ static int fts3InitVtab(
}
/* Figure out the page-size for the database. This is required in order to
** estimate the cost of loading large doclists from the database (see
** function sqlite3Fts3SegReaderCost() for details).
*/
** estimate the cost of loading large doclists from the database. */
fts3DatabasePageSize(&rc, p);
p->nNodeSize = p->nPgsz-35;
@ -1965,9 +1963,9 @@ static void fts3PutDeltaVarint3(
static int fts3DoclistOrMerge(
int bDescIdx, /* True if arguments are desc */
u8 *a1, int n1, /* First doclist */
u8 *a2, int n2, /* Second doclist */
u8 **paOut, int *pnOut /* OUT: Malloc'd doclist */
char *a1, int n1, /* First doclist */
char *a2, int n2, /* Second doclist */
char **paOut, int *pnOut /* OUT: Malloc'd doclist */
){
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
@ -1977,7 +1975,6 @@ static int fts3DoclistOrMerge(
char *p1 = a1;
char *p2 = a2;
char *p;
int nOut;
char *aOut;
int bFirstOut = 0;
@ -2016,8 +2013,8 @@ static int fts3DoclistOrMerge(
static void fts3DoclistPhraseMerge(
int bDescIdx, /* True if arguments are desc */
int nDist, /* Distance from left to right (1=adjacent) */
u8 *aLeft, int nLeft, /* Left doclist */
u8 *aRight, int *pnRight /* IN/OUT: Right/output doclist */
char *aLeft, int nLeft, /* Left doclist */
char *aRight, int *pnRight /* IN/OUT: Right/output doclist */
){
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
@ -2063,83 +2060,6 @@ static void fts3DoclistPhraseMerge(
*pnRight = p - aOut;
}
/*
** This function merges two doclists according to the requirements of a
** NEAR operator.
*/
static int fts3DoclistNearMerge(
int bDescIdx,
int nNear, /* Parameter to NEAR operator */
int nTokenLeft, /* Number of tokens in LHS phrase arg */
char *aLeft, /* Doclist for LHS (incl. positions) */
int nLeft, /* Size of LHS doclist in bytes */
int nTokenRight, /* As nTokenLeft */
char *aRight, /* As aLeft */
int nRight, /* As nRight */
char **paOut, /* OUT: Results of merge (malloced) */
int *pnOut /* OUT: Sized of output buffer */
){
char *aOut; /* Buffer to write output doclist to */
char *aTmp; /* Temp buffer used by PoslistNearMerge() */
sqlite3_int64 i1 = 0;
sqlite3_int64 i2 = 0;
sqlite3_int64 iPrev = 0;
int bFirstOut = 0;
char *pEnd1 = &aLeft[nLeft];
char *pEnd2 = &aRight[nRight];
char *p1 = aLeft;
char *p2 = aRight;
char *p;
int nParam1 = nNear+nTokenRight;
int nParam2 = nNear+nTokenLeft;
p = aOut = sqlite3_malloc(nLeft+nRight+1);
aTmp = sqlite3_malloc(2*(nLeft+nRight+1));
if( !aOut || !aTmp ){
sqlite3_free(aOut);
sqlite3_free(aTmp);
*paOut = 0;
*pnOut = 0;
return SQLITE_NOMEM;
}
fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
while( p1 && p2 ){
sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
if( iDiff==0 ){
char *pSave = p;
sqlite3_int64 iPrevSave = iPrev;
int bFirstOutSave = bFirstOut;
fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
if( !fts3PoslistNearMerge(&p, aTmp, nParam1, nParam2, &p1, &p2) ){
p = pSave;
iPrev = iPrevSave;
bFirstOut = bFirstOutSave;
}
fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
}else if( iDiff<0 ){
fts3PoslistCopy(0, &p1);
fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
}else{
fts3PoslistCopy(0, &p2);
fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
}
}
sqlite3_free(aTmp);
*paOut = aOut;
*pnOut = p - aOut;
return SQLITE_OK;
}
/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
@ -2166,7 +2086,7 @@ static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
pTS->aaOutput[i] = 0;
}else{
int nNew;
u8 *aNew;
char *aNew;
int rc = fts3DoclistOrMerge(p->bDescIdx,
pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew
@ -2231,7 +2151,7 @@ static int fts3TermSelectCb(
pTS->anOutput[iOut] = nMerge;
break;
}else{
u8 *aNew;
char *aNew;
int nNew;
int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge,
@ -2403,7 +2323,6 @@ int sqlite3Fts3TermSegReaderCursor(
pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
if( pSegcsr ){
int i;
int nCost = 0;
int bFound = 0; /* True once an index has been found */
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
@ -2436,10 +2355,6 @@ int sqlite3Fts3TermSegReaderCursor(
);
pSegcsr->bLookup = !isPrefix;
}
for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){
rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost);
}
pSegcsr->nCost = nCost;
}
*ppSegcsr = pSegcsr;
@ -3053,11 +2968,10 @@ static int fts3RenameMethod(
}
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(iSavepoint);
assert( p->inTransaction );
assert( p->mxSavepoint < iSavepoint );
TESTONLY( p->mxSavepoint = iSavepoint );
assert( ((Fts3Table *)pVtab)->inTransaction );
assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
return fts3SyncMethod(pVtab);
}
static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
@ -3328,7 +3242,6 @@ static int fts3EvalPhraseLoad(
}
static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int iToken;
int rc = SQLITE_OK;
@ -3450,12 +3363,10 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
*/
static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
int rc;
Fts3Doclist *pList = &p->doclist;
Fts3PhraseToken *pFirst = &p->aToken[0];
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
assert( pList->aAll==0 );
assert( p->doclist.aAll==0 );
if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1
&& pFirst->pSegcsr && pFirst->pSegcsr->bLookup
){
@ -3565,14 +3476,15 @@ static int fts3EvalPhraseNext(
);
pDL->pList = pDL->pNextDocid;
}else{
char *pIter;
char *pIter; /* Used to iterate through aAll */
char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */
if( pDL->pNextDocid ){
pIter = pDL->pNextDocid;
}else{
pIter = pDL->aAll;
}
if( pIter>=&pDL->aAll[pDL->nAll] ){
if( pIter>=pEnd ){
/* We have already reached the end of this doclist. EOF. */
*pbEof = 1;
}else{
@ -3586,7 +3498,17 @@ static int fts3EvalPhraseNext(
pDL->pList = pIter;
fts3PoslistCopy(0, &pIter);
pDL->nList = (pIter - pDL->pList);
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
** edited in place by fts3EvalNearTrim2(), then pIter may not actually
** point to the start of the next docid value. The following line deals
** with this case by advancing pIter past the zero-padding added by
** fts3EvalNearTrim2(). */
while( pIter<pEnd && *pIter==0 ) pIter++;
pDL->pNextDocid = pIter;
assert( *pIter || pIter>=&pDL->aAll[pDL->nAll] );
*pbEof = 0;
}
}
@ -3617,90 +3539,6 @@ static void fts3EvalStartReaders(
}
}
static void fts3EvalNearMerge(
int bDescIdx,
Fts3Expr *p1,
Fts3Expr *p2,
int nNear,
int *pRc
){
if( *pRc==SQLITE_OK ){
int rc; /* Return code */
Fts3Phrase *pLeft = p1->pPhrase;
Fts3Phrase *pRight = p2->pPhrase;
assert( p2->eType==FTSQUERY_PHRASE && pLeft );
assert( p2->eType==FTSQUERY_PHRASE && pRight );
if( pLeft->doclist.aAll==0 ){
sqlite3_free(pRight->doclist.aAll);
pRight->doclist.aAll = 0;
pRight->doclist.nAll = 0;
}else if( pRight->doclist.aAll ){
char *aOut; /* Buffer in which to assemble new doclist */
int nOut; /* Size of buffer aOut in bytes */
*pRc = fts3DoclistNearMerge(bDescIdx, nNear,
pLeft->nToken, pLeft->doclist.aAll, pLeft->doclist.nAll,
pRight->nToken, pRight->doclist.aAll, pRight->doclist.nAll,
&aOut, &nOut
);
sqlite3_free(pRight->doclist.aAll);
pRight->doclist.aAll = aOut;
pRight->doclist.nAll = nOut;
}
}
}
static void fts3EvalNearTrim(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
if( pExpr && SQLITE_OK==*pRc ){
if( pExpr->eType==FTSQUERY_NEAR ){
Fts3Expr *pLeft = pExpr->pLeft;
int nPhrase = 2;
Fts3Expr **aPhrase;
assert( pLeft );
assert( pExpr->pRight );
assert( pExpr->pRight->eType==FTSQUERY_PHRASE );
while( pLeft->eType!=FTSQUERY_PHRASE ){
assert( pLeft->eType==FTSQUERY_NEAR );
assert( pLeft->pRight->eType==FTSQUERY_PHRASE );
pLeft = pLeft->pLeft;
nPhrase++;
}
aPhrase = (Fts3Expr **)sqlite3_malloc(sizeof(Fts3Expr *) * nPhrase);
if( !aPhrase ){
*pRc = SQLITE_NOMEM;
}else{
Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
int i = 1;
aPhrase[0] = pLeft;
do {
pLeft = pLeft->pParent;
aPhrase[i++] = pLeft->pRight;
}while( pLeft!=pExpr );
for(i=0; i<(nPhrase-1); i++){
int nNear = aPhrase[i+1]->pParent->nNear;
fts3EvalNearMerge(p->bDescIdx, aPhrase[i], aPhrase[i+1], nNear, pRc);
}
for(i=nPhrase-2; i>=0; i--){
int nNear = aPhrase[i+1]->pParent->nNear;
fts3EvalNearMerge(p->bDescIdx, aPhrase[i+1], aPhrase[i], nNear, pRc);
}
sqlite3_free(aPhrase);
}
}else{
fts3EvalNearTrim(pCsr, pExpr->pLeft, pRc);
fts3EvalNearTrim(pCsr, pExpr->pRight, pRc);
}
}
}
typedef struct Fts3TokenAndCost Fts3TokenAndCost;
struct Fts3TokenAndCost {
@ -3777,6 +3615,7 @@ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
return SQLITE_CORRUPT_VTAB;
}
pCsr->nDoc = nDoc;
pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz);
assert( pCsr->nRowAvg>0 );
rc = sqlite3_reset(pStmt);
@ -3902,7 +3741,6 @@ int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
rc = SQLITE_NOMEM;
}else{
int ii;
int nDocSize;
Fts3TokenAndCost *pTC = aTC;
Fts3Expr **ppOr = apOr;
@ -3910,55 +3748,12 @@ int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
nToken = pTC-aTC;
nOr = ppOr-apOr;
if( rc==SQLITE_OK ){
rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
}
#if 0
for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){
int jj;
pTC = 0;
for(jj=0; jj<nToken; jj++){
if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){
pTC = &aTC[jj];
}
}
assert( pTC );
/* At this point pTC points to the cheapest remaining token. */
if( ii==0 ){
if( pTC->nOvfl ){
nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
}else{
/* TODO: Fix this so that the doclist need not be read twice. */
Fts3PhraseToken *pToken = pTC->pToken;
int nList = 0;
char *pList = 0;
rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
if( rc==SQLITE_OK ){
nDocEst = fts3DoclistCountDocids(1, pList, nList);
}
sqlite3_free(pList);
if( rc==SQLITE_OK ){
rc = sqlite3Fts3TermSegReaderCursor(pCsr,
pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
);
}
}
}else{
if( pTC->nOvfl>=(nDocEst*nDocSize) ){
Fts3PhraseToken *pToken = pTC->pToken;
rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
fts3SegReaderCursorFree(pToken->pSegcsr);
pToken->pSegcsr = 0;
}
nDocEst = 1 + (nDocEst/4);
}
pTC->pToken = 0;
}
#endif
sqlite3_free(aTC);
}
@ -3986,6 +3781,7 @@ static int fts3EvalNearTrim2(
){
int nParam1 = nNear + pPhrase->nToken;
int nParam2 = nNear + *pnToken;
int nNew;
char *p2;
char *pOut;
int res;
@ -3994,9 +3790,15 @@ static int fts3EvalNearTrim2(
res = fts3PoslistNearMerge(
&pOut, aTmp, nParam1, nParam2, paPoslist, &p2
);
pPhrase->doclist.nList = pOut - pPhrase->doclist.pList;
if( res ){
nNew = (pOut - pPhrase->doclist.pList) - 1;
assert( pPhrase->doclist.pList[nNew]=='\0' );
assert( nNew<=pPhrase->doclist.nList && nNew>0 );
memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
pPhrase->doclist.nList = nNew;
*paPoslist = pPhrase->doclist.pList;
*pnToken = pPhrase->nToken;
}
return res;
}
@ -4305,75 +4107,202 @@ int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
return rc;
}
/*
** Return a pointer to the entire doclist, including positions, associated
** with the phrase passed as the second argument. It is illegal to call
** this function if the phrase consists entirely of deferred tokens.
**
** TODO: This function is only used by the code for the matchinfo('x')
** auxiliary function - to obtain the following two values:
**
** 1. The total number of times the phrase appears in each column in all
** rows in the FTS table.
**
** 2. For each column, the total number of rows in the FTS table for which
** the phrase appears at least once in the column.
**
** It would be better if there was an sqlite3Fts3EvalXXX() function
** specifically to retrieve these values. If that were done, the concept
** of which tokens are deferred or incremental would be entirely encapsulated
** within the sqlite3Fts3EvalXXX()/fts3EvalXXX() functions in this file.
*/
int sqlite3Fts3EvalPhraseDoclist(
Fts3Cursor *pCsr, /* FTS3 cursor object */
Fts3Expr *pExpr, /* Phrase to return doclist for */
const char **ppList, /* OUT: Buffer containing doclist */
int *pnList /* OUT: Size of returned buffer, in bytes */
static void fts3EvalRestart(
Fts3Cursor *pCsr,
Fts3Expr *pExpr,
int *pRc
){
int rc = SQLITE_OK;
if( pExpr && *pRc==SQLITE_OK ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
/* It is illegal to call this function if the phrase is entirely deferred
** (it may contain some deferred tokens, but must also contain at least
** one token for which the doclist may be read from the full-text index).
*/
assert( !pExpr->bDeferred );
if( pPhrase ){
fts3EvalFreeDeferredDoclist(pPhrase);
if( pPhrase->bIncr ){
/* This phrase was being loaded from disk incrementally. But the
** matchinfo() function requires that the entire doclist be loaded into
** memory. This block loads the doclist into memory and modifies the
** Fts3Phrase structure so that it does not use the incremental strategy.
*/
TESTONLY( int bEof = pExpr->bEof; )
TESTONLY( int bStart = pExpr->bStart; )
sqlite3_int64 iDocid = pExpr->iDocid;
sqlite3Fts3EvalPhraseCleanup(pPhrase);
memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
*pRc = sqlite3Fts3EvalStart(pCsr, pExpr, 0);
}else{
pPhrase->doclist.pNextDocid = 0;
pPhrase->doclist.iDocid = 0;
}
}
pExpr->iDocid = 0;
rc = sqlite3Fts3EvalStart(pCsr, pExpr, 0);
assert( pExpr->bEof==bEof );
assert( pExpr->bStart==bStart );
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
if( pExpr->bStart && !pExpr->bEof ){
pExpr->bEof = 0;
pExpr->bStart = 0;
while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){
fts3EvalNext(pCsr, pExpr, &rc);
assert( !pExpr->bEof );
fts3EvalRestart(pCsr, pExpr->pLeft, pRc);
fts3EvalRestart(pCsr, pExpr->pRight, pRc);
}
}
static void fts3EvalUpdateCounts(
Fts3Cursor *pCsr,
Fts3Expr *pExpr,
int *pRc
){
if( pExpr && *pRc==SQLITE_OK ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
if( pPhrase && pPhrase->doclist.pList ){
int iCol = 0;
char *p = pPhrase->doclist.pList;
assert( *p );
while( 1 ){
u8 c = 0;
int iCnt = 0;
while( 0xFE & (*p | c) ){
if( (c&0x80)==0 ) iCnt++;
c = *p++ & 0x80;
}
/* aMI[iCol*3 + 1] = Number of occurrences
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
*/
pExpr->aMI[iCol*3 + 1] += iCnt;
pExpr->aMI[iCol*3 + 2] += (iCnt>0);
if( *p==0x00 ) break;
p++;
p += sqlite3Fts3GetVarint32(p, &iCol);
}
}
fts3EvalUpdateCounts(pCsr, pExpr->pLeft, pRc);
fts3EvalUpdateCounts(pCsr, pExpr->pRight, pRc);
}
}
static int fts3EvalNearStats(
Fts3Cursor *pCsr,
Fts3Expr *pExpr
){
int rc = SQLITE_OK; /* Return code */
assert( pExpr->eType==FTSQUERY_PHRASE );
if( pExpr->aMI==0 ){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
Fts3Expr *pRoot; /* Root of NEAR expression */
Fts3Expr *p; /* Iterator used for several purposes */
sqlite3_int64 iPrevId = pCsr->iPrevId;
sqlite3_int64 iDocid;
u8 bEof;
/* Find the root of the NEAR expression */
pRoot = pExpr;
while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
pRoot = pRoot->pParent;
}
iDocid = pRoot->iDocid;
bEof = pRoot->bEof;
/* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
for(p=pRoot; p; p=p->pLeft){
Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
assert( pE->aMI==0 );
pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32));
if( !pE->aMI ) return SQLITE_NOMEM;
memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
}
fts3EvalRestart(pCsr, pRoot, &rc);
while( pCsr->isEof==0 && rc==SQLITE_OK ){
do {
/* Ensure the %_content statement is reset. */
if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
assert( sqlite3_data_count(pCsr->pStmt)==0 );
/* Advance to the next document */
fts3EvalNext(pCsr, pRoot, &rc);
pCsr->isEof = pRoot->bEof;
pCsr->isRequireSeek = 1;
pCsr->isMatchinfoNeeded = 1;
pCsr->iPrevId = pRoot->iDocid;
}while( pCsr->isEof==0
&& pRoot->eType==FTSQUERY_NEAR
&& fts3EvalLoadDeferred(pCsr, &rc)
);
if( pCsr->isEof==0 ){
fts3EvalUpdateCounts(pCsr, pRoot, &rc);
}
}
pCsr->isEof = 0;
pCsr->iPrevId = iPrevId;
if( bEof ){
pRoot->bEof = bEof;
}else{
fts3EvalRestart(pCsr, pRoot, &rc);
while( pRoot->iDocid<iDocid && rc==SQLITE_OK ){
fts3EvalNext(pCsr, pRoot, &rc);
assert( pRoot->bEof==0 );
}
fts3EvalLoadDeferred(pCsr, &rc);
}
}
return rc;
}
/*
** This function is used by the matchinfo() module to query a phrase
** expression node for the following information:
**
** 1. The total number of occurrences of the phrase in each column of
** the FTS table (considering all rows), and
**
** 2. For each column, the number of rows in the table for which the
** column contains at least one instance of the phrase.
**
** If no error occurs, SQLITE_OK is returned and the values for each column
** written into the array aiOut as follows:
**
** aiOut[iCol*3 + 1] = Number of occurrences
** aiOut[iCol*3 + 2] = Number of rows containing at least one instance
**
** Caveats:
**
** * If a phrase consists entirely of deferred tokens, then all output
** values are set to the number of documents in the table. In other
** words we assume that very common tokens occur exactly once in each
** column of each row of the table.
**
** * If a phrase contains some deferred tokens (and some non-deferred
** tokens), count the potential occurrence identified by considering
** the non-deferred tokens instead of actual phrase occurrences.
**
** * If the phrase is part of a NEAR expression, then only phrase instances
** that meet the NEAR constraint are included in the counts.
*/
int sqlite3Fts3EvalPhraseStats(
Fts3Cursor *pCsr, /* FTS cursor handle */
Fts3Expr *pExpr, /* Phrase expression */
u32 *aiOut /* Array to write results into (see above) */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc = SQLITE_OK;
int iCol;
if( pExpr->bDeferred ){
assert( pCsr->nDoc>0 );
for(iCol=0; iCol<pTab->nColumn; iCol++){
aiOut[iCol*3 + 1] = pCsr->nDoc;
aiOut[iCol*3 + 2] = pCsr->nDoc;
}
}else{
rc = fts3EvalNearStats(pCsr, pExpr);
if( rc==SQLITE_OK ){
assert( pExpr->aMI );
for(iCol=0; iCol<pTab->nColumn; iCol++){
aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1];
aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2];
}
}
}
if( rc==SQLITE_OK
&& pExpr->pParent
&& pExpr->pParent->eType==FTSQUERY_NEAR
){
}
*pnList = pPhrase->doclist.nAll;
*ppList = pPhrase->doclist.aAll;
return rc;
}

View File

@ -240,6 +240,7 @@ struct Fts3Cursor {
u8 bDesc; /* True to sort in descending order */
int eEvalmode; /* An FTS3_EVAL_XX constant */
int nRowAvg; /* Average size of database rows, in pages */
int nDoc; /* Documents in table */
int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */
u32 *aMatchinfo; /* Information about most recent match */
@ -323,9 +324,17 @@ struct Fts3Phrase {
** "Length" field found in doclists stored on disk is omitted from this
** buffer.
**
** Variable pCurrent always points to the start of a docid field within
** aDoclist. Since the doclist is usually scanned in docid order, this can
** be used to accelerate seeking to the required docid within the doclist.
** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global
** matchinfo data. If it is not NULL, it points to an array of size nCol*3,
** where nCol is the number of columns in the queried FTS table. The array
** is populated as follows:
**
** aMI[iCol*3 + 0] = Undefined
** aMI[iCol*3 + 1] = Number of occurrences
** aMI[iCol*3 + 2] = Number of rows containing at least one instance
**
** The aMI array is allocated using sqlite3_malloc(). It should be freed
** when the expression node is.
*/
struct Fts3Expr {
int eType; /* One of the FTSQUERY_XXX values defined below */
@ -340,6 +349,8 @@ struct Fts3Expr {
u8 bEof; /* True this expression is at EOF already */
u8 bStart; /* True if iDocid is valid */
u8 bDeferred; /* True if this expression is entirely deferred */
u32 *aMI;
};
/*
@ -370,7 +381,6 @@ int sqlite3Fts3SegReaderNew(int, sqlite3_int64,
int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
@ -382,7 +392,6 @@ void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *);
void sqlite3Fts3SegmentsClose(Fts3Table *);
/* Special values interpreted by sqlite3SegReaderCursor() */
@ -441,8 +450,7 @@ int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
@ -480,9 +488,6 @@ int sqlite3Fts3TermSegReaderCursor(
Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */
);
int sqlite3Fts3EvalPhraseCache(Fts3Cursor *, Fts3Phrase *);
sqlite3_int64 sqlite3Fts3EvalDocid(Fts3Cursor *, Fts3Expr *);
int sqlite3Fts3EvalPhraseDoclist(Fts3Cursor*, Fts3Expr*, const char**,int*);
void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int);

View File

@ -769,6 +769,7 @@ void sqlite3Fts3ExprFree(Fts3Expr *p){
sqlite3Fts3ExprFree(p->pLeft);
sqlite3Fts3ExprFree(p->pRight);
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
sqlite3_free(p->aMI);
sqlite3_free(p);
}
}

View File

@ -720,26 +720,6 @@ static int fts3ColumnlistCount(char **ppCollist){
return nEntry;
}
static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
char *pCsr = *pp;
while( *pCsr ){
int nHit;
sqlite3_int64 iCol = 0;
if( *pCsr==0x01 ){
pCsr++;
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
}
nHit = fts3ColumnlistCount(&pCsr);
assert( nHit>0 );
if( isGlobal ){
aOut[iCol*3+1]++;
}
aOut[iCol*3] += nHit;
}
pCsr++;
*pp = pCsr;
}
/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
@ -773,32 +753,9 @@ static int fts3ExprGlobalHitsCb(
void *pCtx /* Pointer to MatchInfo structure */
){
MatchInfo *p = (MatchInfo *)pCtx;
u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];
if( pExpr->bDeferred ){
int iCol; /* Column index */
for(iCol=0; iCol<p->nCol; iCol++){
aOut[iCol*3 + 1] = (u32)p->nDoc;
aOut[iCol*3 + 2] = (u32)p->nDoc;
}
}else{
char *pIter;
char *pEnd;
int n;
int rc = sqlite3Fts3EvalPhraseDoclist(
p->pCursor, pExpr, (const char **)&pIter, &n
return sqlite3Fts3EvalPhraseStats(
p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
);
if( rc!=SQLITE_OK ) return rc;
pEnd = &pIter[n];
/* Fill in the global hit count matrix row for this phrase. */
while( pIter<pEnd ){
while( *pIter++ & 0x80 ); /* Skip past docid. */
fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
}
}
return SQLITE_OK;
}
/*

View File

@ -94,7 +94,6 @@ struct Fts3DeferredToken {
**
** sqlite3Fts3SegReaderNew()
** sqlite3Fts3SegReaderFree()
** sqlite3Fts3SegReaderCost()
** sqlite3Fts3SegReaderIterate()
**
** Methods used to manipulate Fts3SegReader structures:
@ -1295,95 +1294,6 @@ static int fts3SegReaderNextDocid(
return SQLITE_OK;
}
/*
** This function is called to estimate the amount of data that will be
** loaded from the disk If SegReaderIterate() is called on this seg-reader,
** in units of average document size.
**
** This can be used as follows: If the caller has a small doclist that
** contains references to N documents, and is considering merging it with
** a large doclist (size X "average documents"), it may opt not to load
** the large doclist if X>N.
*/
int sqlite3Fts3SegReaderCost(
Fts3Cursor *pCsr, /* FTS3 cursor handle */
Fts3SegReader *pReader, /* Segment-reader handle */
int *pnCost /* IN/OUT: Number of bytes read */
){
Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
int rc = SQLITE_OK; /* Return code */
int nCost = 0; /* Cost in bytes to return */
int pgsz = p->nPgsz; /* Database page size */
assert( pgsz>0 );
/* If this seg-reader is reading the pending-terms table, or if all data
** for the segment is stored on the root page of the b-tree, then the cost
** is zero. In this case all required data is already in main memory.
*/
if( p->bHasStat
&& !fts3SegReaderIsPending(pReader)
&& !fts3SegReaderIsRootOnly(pReader)
){
int nBlob = 0;
sqlite3_int64 iBlock;
if( pCsr->nRowAvg==0 ){
/* The average document size, which is required to calculate the cost
** of each doclist, has not yet been determined. Read the required
** data from the %_stat table to calculate it.
**
** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
** varints, where nCol is the number of columns in the FTS3 table.
** The first varint is the number of documents currently stored in
** the table. The following nCol varints contain the total amount of
** data stored in all rows of each column of the table, from left
** to right.
*/
sqlite3_stmt *pStmt;
sqlite3_int64 nDoc = 0;
sqlite3_int64 nByte = 0;
const char *pEnd;
const char *a;
rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
if( rc!=SQLITE_OK ) return rc;
a = sqlite3_column_blob(pStmt, 0);
assert( a );
pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
a += sqlite3Fts3GetVarint(a, &nDoc);
while( a<pEnd ){
a += sqlite3Fts3GetVarint(a, &nByte);
}
if( nDoc==0 || nByte==0 ){
sqlite3_reset(pStmt);
return SQLITE_CORRUPT_VTAB;
}
pCsr->nRowAvg = (int)(((nByte / nDoc) + pgsz) / pgsz);
assert( pCsr->nRowAvg>0 );
rc = sqlite3_reset(pStmt);
if( rc!=SQLITE_OK ) return rc;
}
/* Assume that a blob flows over onto overflow pages if it is larger
** than (pgsz-35) bytes in size (the file-format documentation
** confirms this).
*/
for(iBlock=pReader->iStartBlock; iBlock<=pReader->iLeafEndBlock; iBlock++){
rc = sqlite3Fts3ReadBlock(p, iBlock, 0, &nBlob, 0);
if( rc!=SQLITE_OK ) break;
if( (nBlob+35)>pgsz ){
int nOvfl = (nBlob + 34)/pgsz;
nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg);
}
}
}
*pnCost += nCost;
return rc;
}
int sqlite3Fts3MsrOvfl(
Fts3Cursor *pCsr,
@ -2416,7 +2326,6 @@ int sqlite3Fts3MsrIncrNext(
}
while( 1 ){
int nSort;
Fts3SegReader *pSeg;
pSeg = pMsr->apSegment[0];
@ -2958,20 +2867,6 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
return rc;
}
/*
** Return the deferred doclist associated with deferred token pDeferred.
** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already
** been called to allocate and populate the doclist.
*/
char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){
if( pDeferred->pList ){
*pnByte = pDeferred->pList->nData;
return pDeferred->pList->aData;
}
*pnByte = 0;
return 0;
}
/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.

View File

@ -1,5 +1,5 @@
C Have\sNEAR\squeries\suse\sincremental\smerging.\sFix\sissues\ssurrounding\sthe\sdeferred\stoken\soptimization.
D 2011-06-07T18:35:45.780
C Fix\svarious\sissues\sto\sdo\swith\sdeferred\stokens,\sNEAR\sexpressions\sand\smatchinfo().
D 2011-06-08T18:39:07.487
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 11dcc00a8d0e5202def00e81732784fb0cc4fe1d
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -61,21 +61,21 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 9d2d2cab4d64f0769046d88b6740c6e1f229d1e3
F ext/fts3/fts3.c b44083cafb9840be0927f8b9fb2ab4f373167f77
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h d76b021d5b7061eff7aa4055b5938eebef2bdb6a
F ext/fts3/fts3Int.h a999cfbf605efec293a88519f74192f5204c84d6
F ext/fts3/fts3_aux.c baed9dab7fb4604ae8cafdb2d7700abe93beffbe
F ext/fts3/fts3_expr.c 0ae554230ada457e61e8184b24faac96aad78f6b
F ext/fts3/fts3_expr.c b95f0d76bcf4507c73a838f3178c4ed8c42dc2bb
F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c d61cfd81fb0fd8fbcb25adcaee0ba671aefaa5c2
F ext/fts3/fts3_snippet.c 0485969cce410760b50d587a77186f9c7f7e96be
F ext/fts3/fts3_snippet.c 82e2c1e420c871c02f6e85ea438570118d7105c8
F ext/fts3/fts3_term.c 6c7f33ab732a2a0f281898685650e3a492e1e2f1
F ext/fts3/fts3_tokenizer.c 055f3dc7369585350b28db1ee0f3b214dca6724d
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
F ext/fts3/fts3_write.c ed525afd524d713abe7da174d56ad935dfc26008
F ext/fts3/fts3_write.c bc24cec303d86aeb4b40fcbdf9f252f93ef78fc7
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
@ -471,7 +471,7 @@ F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/fts3fault.test f83e556465bb69dc8bc676339eca408dce4ca246
F test/fts3fault2.test dc96203af6ba31ce20163fc35460e1556e8edf4d
F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b
F test/fts3matchinfo.test f424597b6843659ecbc2009e8823380233ebf375
F test/fts3matchinfo.test 08a82d18cc08abb28aec41d412b4c2ef25ba6a5f
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
F test/fts3prefix.test 36246609111ec1683f7ea5ed27666ce2cefb5676
F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
@ -943,7 +943,7 @@ F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c
F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
F tool/warnings.sh 347d974d143cf132f953b565fbc03026f19fcb4d
P 567dd84359218245d4e6887547e2a48881f2c8e0
R 020efe4a51ef4472e0e5c3f4175d0de6
P 9d10a6846b12a9cc8fd4fdc3affd931a27218b5a
R 37e4da2cb9907d0ccf1d8076445165fd
U dan
Z 740d1ddba83232619fca71041707ab60
Z 147c4bbcabf01e6d99dff7a301984a70

View File

@ -1 +1 @@
9d10a6846b12a9cc8fd4fdc3affd931a27218b5a
3972a787df5ec253b99b148385655e7b68d851fa

View File

@ -244,9 +244,13 @@ do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} }
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} {
# It used to be that the second 'a' token would be deferred. That doesn't
# work any longer.
if 0 {
do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} {
x {{5 8 2 5 5 5} {3 8 2 3 5 5}}
s {2 1}
}
}
do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} }