From 9a67058a63da4e319c3cafb1ba94e228bdef2e88 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 4 Aug 2014 20:07:40 +0000 Subject: [PATCH] Fix fts5_index.c to use doclist-indexes when possible. Only some cases work so far. FossilOrigin-Name: 90b82d3ef613b2915e0e280dc1d2e5a2b617d59c --- ext/fts5/fts5_index.c | 530 ++++++++++++++++++++++++++++++++---------- manifest | 16 +- manifest.uuid | 2 +- test/fts5aa.test | 3 +- test/fts5ah.test | 37 ++- 5 files changed, 452 insertions(+), 136 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 271caa8a0d..a1735a1104 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -266,6 +266,7 @@ typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5ChunkIter Fts5ChunkIter; typedef struct Fts5Data Fts5Data; +typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; @@ -279,7 +280,6 @@ typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; - /* ** One object per %_data table. */ @@ -321,6 +321,9 @@ struct Fts5DoclistIter { int nPoslist; }; +/* +** Each iterator used by external modules is an instance of this type. +*/ struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; @@ -489,6 +492,8 @@ struct Fts5SegIter { int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ + Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ + /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ @@ -540,6 +545,26 @@ struct Fts5NodeIter { int bDlidx; }; +/* +** An instance of the following type is used to iterate through the contents +** of a doclist-index record. +** +** pData: +** A reference to the dlidx record. +*/ +struct Fts5DlidxIter { + Fts5Data *pData; /* Data for doclist index, if any */ + int iOff; /* Current offset into pDlidx */ + int bRowidValid; /* iRowid is valid */ + int bEof; /* At EOF already */ + + /* Output variables */ + int iLeafPgno; /* Page number of current leaf page */ + int bZero; /* True if current leaf has no rowids */ + i64 iRowid; /* If bZero==0, first rowid on leaf */ +}; + + /* ** An Fts5BtreeIter object is used to iterate through all entries in the ** b-tree hierarchy belonging to a single fts5 segment. In this case the @@ -577,6 +602,55 @@ struct Fts5BtreeIter { int bDlidx; /* True if there exists a dlidx */ }; + +/* +** Decode a segment-data rowid from the %_data table. This function is +** the opposite of macro FTS5_SEGMENT_ROWID(). +*/ +static void fts5DecodeRowid( + i64 iRowid, /* Rowid from %_data table */ + int *piIdx, /* OUT: Index */ + int *piSegid, /* OUT: Segment id */ + int *piHeight, /* OUT: Height */ + int *piPgno /* OUT: Page number */ +){ + *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); + iRowid >>= FTS5_DATA_PAGE_B; + + *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); + iRowid >>= FTS5_DATA_HEIGHT_B; + + *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); + iRowid >>= FTS5_DATA_ID_B; + + *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); +} + +static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ + int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + fts5DecodeRowid(iKey, &iIdx, &iSegid, &iHeight, &iPgno); + + if( iSegid==0 ){ + if( iKey==FTS5_AVERAGES_ROWID ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, + "(structure idx=%d)", (int)(iKey-10) + ); + } + } + else if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(dlidx idx=%d segid=%d pgno=%d)", + iIdx, iSegid, iPgno + ); + }else{ + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(idx=%d segid=%d h=%d pgno=%d)", + iIdx, iSegid, iHeight, iPgno + ); + } +} + + static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); @@ -666,7 +740,15 @@ static Fts5Data *fts5DataReadOrBuffer( ){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ - int rc; + int rc = SQLITE_OK; + +#if 0 +Fts5Buffer buf = {0,0,0}; +fts5DebugRowid(&rc, &buf, iRowid); +fprintf(stdout, "read: %s\n", buf.p); +fflush(stdout); +sqlite3_free(buf.p); +#endif /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ @@ -1041,6 +1123,115 @@ static void fts5NodeIterFree(Fts5NodeIter *pIter){ fts5BufferFree(&pIter->term); } +/* +** Return non-zero if EOF is reached. +*/ +static int fts5DlidxIterNext(Fts5DlidxIter *pIter, int bRev){ + if( bRev ){ + i64 iVal; + int iOff = pIter->iOff; + int iLimit; + u8 *a = pIter->pData->p; + + /* Currently iOff points to the first byte of a varint. This block + ** decrements iOff until it points to the first byte of the previous + ** varint. Taking care not to read any memory locations that occur + ** before the buffer in memory. */ + iLimit = (iOff>9 ? iOff-9 : 0); + for(iOff--; iOff>iLimit; iOff--){ + if( (a[iOff-1] & 0x80)==0 ) break; + } + pIter->iOff = iOff; + + if( iOff<=0 ){ + pIter->bEof = 1; + return 1; + } + + getVarint(&a[iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else if( iOff==0 ){ + pIter->iRowid = iVal; + }else{ + pIter->iRowid += iVal; + } + pIter->iLeafPgno--; + }else{ + i64 iVal; + if( pIter->iOff>=pIter->pData->n ){ + pIter->bEof = 1; + return 1; + } + pIter->iOff += getVarint(&pIter->pData->p[pIter->iOff], (u64*)&iVal); + if( iVal==0 ){ + pIter->bZero = 1; + }else{ + pIter->bZero = 0; + if( pIter->bRowidValid ){ + pIter->iRowid -= iVal; + }else{ + pIter->bRowidValid = 1; + pIter->iRowid = iVal; + } + } + pIter->iLeafPgno++; + } + return 0; +} + +static void fts5DlidxIterLast(Fts5DlidxIter *pIter){ + while( 0==fts5DlidxIterNext(pIter, 0) ); + assert( pIter->iOff==pIter->pData->n && pIter->bEof==1 ); + pIter->bEof = 0; +} + +static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ + return (p->rc!=SQLITE_OK || pIter->bEof); +} + +static void fts5DlidxIterInit( + Fts5Index *p, /* Fts5 Backend to iterate within */ + int bRev, /* True for ORDER BY ASC */ + int iIdx, int iSegid, /* Segment iSegid within index iIdx */ + int iLeafPgno, /* Leaf page number to load dlidx for */ + Fts5DlidxIter **ppIter /* OUT: Populated iterator */ +){ + Fts5DlidxIter *pIter = *ppIter; + Fts5Data *pDlidx; + + pDlidx = fts5DataRead(p, FTS5_DOCLIST_IDX_ROWID(iIdx, iSegid, iLeafPgno)); + if( pDlidx==0 ) return; + if( pIter==0 ){ + *ppIter = pIter = (Fts5DlidxIter*)fts5IdxMalloc(p, sizeof(Fts5DlidxIter)); + if( pIter==0 ){ + fts5DataRelease(pDlidx); + return; + } + }else{ + memset(pIter, 0, sizeof(Fts5DlidxIter)); + } + + pIter->pData = pDlidx; + + pIter->iLeafPgno = iLeafPgno; + if( bRev==0 ){ + fts5DlidxIterNext(pIter, 0); + }else{ + fts5DlidxIterLast(pIter); + } +} + +/* +** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). +*/ +static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ + if( pIter ){ + fts5DataRelease(pIter->pData); + sqlite3_free(pIter); + } +} + /* ** Load the next leaf page into the segment iterator. */ @@ -1175,6 +1366,50 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ pIter->iRowidOffset = iRowidOffset; } +/* +** +*/ +static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ + assert( pIter->flags & FTS5_SEGITER_REVERSE ); + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ + Fts5Data *pNew; + pIter->iLeafPgno--; + pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( + pIter->iIdx, pIter->pSeg->iSegid, 0, pIter->iLeafPgno + )); + if( pNew ){ + if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ + if( pIter->iTermLeafOffsetn ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = pIter->iTermLeafOffset; + } + }else{ + int iRowidOff, dummy; + fts5LeafHeader(pNew, &iRowidOff, &dummy); + if( iRowidOff ){ + pIter->pLeaf = pNew; + pIter->iLeafOffset = iRowidOff; + } + } + + if( pIter->pLeaf ){ + u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; + pIter->iLeafOffset += getVarint(a, (u64*)&pIter->iRowid); + break; + }else{ + fts5DataRelease(pNew); + } + } + } + + if( pIter->pLeaf ){ + fts5SegIterReverseInitPage(p, pIter); + } +} /* ** Advance iterator pIter to the next entry. @@ -1202,6 +1437,8 @@ static void fts5SegIterNext( getVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid += iDelta; }else{ + fts5SegIterReverseNewPage(p, pIter); +#if 0 fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ @@ -1238,6 +1475,7 @@ static void fts5SegIterNext( if( pIter->pLeaf ){ fts5SegIterReverseInitPage(p, pIter); } +#endif } }else{ Fts5Data *pLeaf = pIter->pLeaf; @@ -1380,6 +1618,42 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ pIter->flags |= FTS5_SEGITER_REVERSE; } +/* +** Iterator pIter currently points to the first rowid of a doclist within +** index iIdx. There is a doclist-index associated with the final term on +** the current page. If the current term is the last term on the page, +** load the doclist-index from disk and initialize an iterator at +** (pIter->pDlidx). +*/ +static void fts5SegIterLoadDlidx(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ + int iSegid = pIter->pSeg->iSegid; + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ + int iOff = pIter->iLeafOffset; /* Byte offset within current leaf */ + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx==0 ); + + /* Check if the current doclist ends on this page. If it does, return + ** early without loading the doclist-index (as it belongs to a different + ** term. */ + while( iOffn ){ + i64 iDelta; + int nPoslist; + + /* iOff is currently the offset of the size field of a position list. */ + iOff += getVarint32(&pLeaf->p[iOff], nPoslist); + iOff += nPoslist; + + if( iOffn ){ + iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); + if( iDelta==0 ) return; + } + } + + fts5DlidxIterInit(p, bRev, iIdx, iSegid, pIter->iLeafPgno, &pIter->pDlidx); +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg, index iIdx. If there is no such term in the index, the iterator @@ -1399,6 +1673,7 @@ static void fts5SegIterSeekInit( int iPg = 1; int h; int bGe = ((flags & FTS5INDEX_QUERY_PREFIX) && iIdx==0); + int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_ASC)==0 ); assert( pTerm && nTerm ); @@ -1418,11 +1693,13 @@ static void fts5SegIterSeekInit( assert( node.term.n==0 ); iPg = node.iChild; + bDlidx = node.bDlidx; for(fts5NodeIterNext(&p->rc, &node); node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; fts5NodeIterNext(&p->rc, &node) ){ iPg = node.iChild; + bDlidx = node.bDlidx; } fts5NodeIterFree(&node); fts5DataRelease(pNode); @@ -1430,6 +1707,7 @@ static void fts5SegIterSeekInit( if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; + bDlidx = 0; } pIter->iLeafPgno = iPg - 1; @@ -1454,8 +1732,13 @@ static void fts5SegIterSeekInit( if( bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; - if( pIter->pLeaf && (flags & FTS5INDEX_QUERY_ASC) ){ - fts5SegIterReverse(p, iIdx, pIter); + if( pIter->pLeaf ){ + if( bDlidx ){ + fts5SegIterLoadDlidx(p, iIdx, pIter); + } + if( flags & FTS5INDEX_QUERY_ASC ){ + fts5SegIterReverse(p, iIdx, pIter); + } } } } @@ -1466,6 +1749,7 @@ static void fts5SegIterSeekInit( static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); + fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } @@ -1551,6 +1835,84 @@ static void fts5MultiIterAdvanced( } } +/* +** Move the seg-iter so that it points to the first rowid on page iLeafPgno. +** It is an error if leaf iLeafPgno contains no rowid. +*/ +static void fts5SegIterGotoPage( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int iLeafPgno +){ + assert( iLeafPgno>pIter->iLeafPgno ); + if( p->rc==SQLITE_OK ){ + pIter->iLeafPgno = iLeafPgno-1; + fts5SegIterNextPage(p, pIter); + assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); + } + + if( p->rc==SQLITE_OK ){ + int iOff; + u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + iOff = fts5GetU16(&a[0]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + iOff += getVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + } + } +} + +/* +** Advance the iterator passed as the second argument until it is at or +** past rowid iFrom. Regardless of the value of iFrom, the iterator is +** always advanced at least once. +*/ +static void fts5SegIterNextFrom( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + i64 iMatch /* Advance iterator at least this far */ +){ + int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); + Fts5DlidxIter *pDlidx = pIter->pDlidx; + int iLeafPgno = pIter->iLeafPgno; + + assert( pIter->flags & FTS5_SEGITER_ONETERM ); + assert( pIter->pDlidx ); + assert( pIter->pLeaf ); + + + if( bRev==0 ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatchiRowid ){ + if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; + fts5DlidxIterNext(pDlidx, 0); + } + assert( iLeafPgno>=pIter->iLeafPgno || p->rc ); + if( iLeafPgno>pIter->iLeafPgno ){ + fts5SegIterGotoPage(p, pIter, iLeafPgno); + } + }else if( 0 ){ + while( fts5DlidxIterEof(p, pDlidx)==0 && iMatch>pDlidx->iRowid ){ + fts5DlidxIterNext(pDlidx, 0); + if( pDlidx->bZero==0 ) iLeafPgno = pDlidx->iLeafPgno; + } + assert( iLeafPgno<=pIter->iLeafPgno || p->rc ); + if( iLeafPgnoiLeafPgno ){ + fts5SegIterGotoPage(p, pIter, iLeafPgno); + } + } + + while( 1 ){ + fts5SegIterNext(p, pIter); + if( pIter->pLeaf==0 ) break; + if( bRev==0 && pIter->iRowid<=iMatch ) break; + if( bRev!=0 && pIter->iRowid>=iMatch ) break; + } +} + /* ** Move the iterator to the next entry. ** @@ -1558,10 +1920,20 @@ static void fts5MultiIterAdvanced( ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ -static void fts5MultiIterNext(Fts5Index *p, Fts5MultiSegIter *pIter){ +static void fts5MultiIterNext( + Fts5Index *p, + Fts5MultiSegIter *pIter, + int bFrom, /* True if argument iFrom is valid */ + i64 iFrom /* Advance at least as far as this */ +){ if( p->rc==SQLITE_OK ){ int iFirst = pIter->aFirst[1]; - fts5SegIterNext(p, &pIter->aSeg[iFirst]); + Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; + if( bFrom && pSeg->pDlidx ){ + fts5SegIterNextFrom(p, pSeg, iFrom); + }else{ + fts5SegIterNext(p, pSeg); + } fts5MultiIterAdvanced(p, pIter, iFirst, 1); } } @@ -1682,7 +2054,7 @@ static void fts5MultiIterNextFrom( ){ while( 1 ){ i64 iRowid; - fts5MultiIterNext(p, pIter); + fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid<=iMatch ) break; @@ -2589,7 +2961,7 @@ fflush(stdout); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter) + fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ]; Fts5ChunkIter sPos; /* Used to iterate through position list */ @@ -3005,36 +3377,7 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ fts5BufferFree(&pIter->term); } -typedef struct DoclistIdxIter DoclistIdxIter; -struct DoclistIdxIter { - Fts5Data *pDlidx; /* Data for doclist index, if any */ - int iOff; /* Current offset into pDlidx */ - int bRowidValid; /* iRowid is valid */ - int bZero; /* True if current leaf has no rowid */ - i64 iRowid; /* If bZero==0, first rowid on leaf */ -}; - -/* -** Return non-zero if EOF is reached. -*/ -static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){ - i64 iVal; - if( pIter->iOff>=pIter->pDlidx->n ) return 1; - pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal); - if( iVal==0 ){ - pIter->bZero = 1; - }else{ - pIter->bZero = 0; - if( pIter->bRowidValid ){ - pIter->iRowid -= iVal; - }else{ - pIter->bRowidValid = 1; - pIter->iRowid = iVal; - } - } - return 0; -} static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ @@ -3052,7 +3395,6 @@ static void fts5IndexIntegrityCheckSegment( Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ - DoclistIdxIter dliter; /* For iterating through any doclist index */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ @@ -3079,65 +3421,50 @@ static void fts5IndexIntegrityCheckSegment( fts5DataRelease(pLeaf); if( p->rc ) break; - memset(&dliter, 0, sizeof(DoclistIdxIter)); - if( iter.bDlidx ){ - i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf); - dliter.pDlidx = fts5DataRead(p, iDlidxRowid); - } /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ - for(i=1; i<=iter.nEmpty; i++){ + for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ pLeaf = fts5DataRead(p, iRow+i); if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } - if( pLeaf && dliter.pDlidx ){ - if( fts5IndexDoclistIterNext(&dliter) ){ - p->rc = FTS5_CORRUPT; - }else{ + fts5DataRelease(pLeaf); + } + + /* If there is a doclist-index, check that it looks right. */ + if( iter.bDlidx ){ + Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ + int nEntry = 0; + int iSegid = pSeg->iSegid; + int bRev = 0; + + for(fts5DlidxIterInit(p, bRev, iIdx, iSegid, iter.iLeaf, &pDlidx); + fts5DlidxIterEof(p, pDlidx)==0; + fts5DlidxIterNext(pDlidx, bRev) + ){ + i64 iKey = FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pDlidx->iLeafPgno); + pLeaf = fts5DataRead(p, iKey); + if( pLeaf ){ int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( dliter.bZero ){ + if( pDlidx->bZero ){ if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT; }else{ i64 iRowid; getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; - } - } - } - fts5DataRelease(pLeaf); - } - - /* There may (or may not be) a final entry in the doclist. The entry - ** is only present if the page following the nEmpty termless pages - ** (a) exists and (b) contains at least one rowid that is part of - ** the doclist. */ - if( dliter.pDlidx ){ - if( (iter.iLeaf + iter.nEmpty)==pSeg->pgnoLast ){ - /* The next page does not exist. So the iterator should be at EOF. */ - if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; - }else{ - Fts5Data *pLeaf = fts5DataRead(p, iRow+i); - if( pLeaf ){ - int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( iRowidOff==0 ){ - if( fts5IndexDoclistIterNext(&dliter)==0 ) p->rc = FTS5_CORRUPT; - }else{ - if( fts5IndexDoclistIterNext(&dliter) ){ - p->rc = FTS5_CORRUPT; - }else{ - i64 iRowid; - getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); - if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT; - } + if( iRowid!=pDlidx->iRowid ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } + nEntry++; } - } - fts5DataRelease(dliter.pDlidx); + /* Check that the doclist-index was the right length */ + if( p->rc==SQLITE_OK && nEntry!=iter.nEmpty && nEntry!=iter.nEmpty+1 ){ + p->rc = FTS5_CORRUPT; + } + fts5DlidxIterFree(pDlidx); + } } if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ @@ -3169,7 +3496,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ Fts5Structure *pStruct = fts5StructureRead(p, iIdx); for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; - fts5MultiIterNext(p, pIter) + fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5PosIter sPos; /* Used to iterate through position list */ int n; /* Size of term in bytes */ @@ -3250,29 +3577,6 @@ static void fts5DecodeStructure( fts5StructureRelease(p); } -/* -** Decode a segment-data rowid from the %_data table. This function is -** the opposite of macro FTS5_SEGMENT_ROWID(). -*/ -static void fts5DecodeRowid( - i64 iRowid, /* Rowid from %_data table */ - int *piIdx, /* OUT: Index */ - int *piSegid, /* OUT: Segment id */ - int *piHeight, /* OUT: Height */ - int *piPgno /* OUT: Page number */ -){ - *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); - iRowid >>= FTS5_DATA_PAGE_B; - - *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); - iRowid >>= FTS5_DATA_HEIGHT_B; - - *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); - iRowid >>= FTS5_DATA_ID_B; - - *piIdx = (int)(iRowid & (((i64)1 << FTS5_DATA_IDX_B) - 1)); -} - /* ** Buffer (a/n) is assumed to contain a list of serialized varints. Read ** each varint and append its string representation to buffer pBuf. Return @@ -3331,7 +3635,7 @@ static void fts5DecodeFunction( sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ - int iIdx,iSegid,iHeight,iPgno; /* Rowid compenents */ + int iIdx,iSegid,iHeight,iPgno; /* Rowid components */ const u8 *a; int n; /* Record to decode */ Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ @@ -3343,12 +3647,10 @@ static void fts5DecodeFunction( a = sqlite3_value_blob(apVal[1]); fts5DecodeRowid(iRowid, &iIdx, &iSegid, &iHeight, &iPgno); + fts5DebugRowid(&rc, &s, iRowid); if( iHeight==FTS5_SEGMENT_MAX_HEIGHT ){ int i = 0; i64 iPrev; - sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)", - iIdx, iSegid, iPgno - ); if( n>0 ){ i = getVarint(&a[i], (u64*)&iPrev); sqlite3Fts5BufferAppendPrintf(&rc, &s, " %lld", iPrev); @@ -3367,20 +3669,14 @@ static void fts5DecodeFunction( }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "{averages} "); + /* todo */ }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &s, - "{structure idx=%d}", (int)(iRowid-10) - ); fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; memset(&term, 0, sizeof(Fts5Buffer)); - sqlite3Fts5BufferAppendPrintf(&rc, &s, "(idx=%d segid=%d h=%d pgno=%d) ", - iIdx, iSegid, iHeight, iPgno - ); if( iHeight==0 ){ int iTermOff = 0; @@ -3666,7 +3962,7 @@ static void fts5SetupPrefixIter( memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 0, 1, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; - fts5MultiIterNext(p, p1) + fts5MultiIterNext(p, p1, 0, 0) ){ i64 iRowid = fts5MultiIterRowid(p1); int nTerm; @@ -3785,7 +4081,7 @@ void sqlite3Fts5IterNext(Fts5IndexIter *pIter){ fts5DoclistIterNext(pIter->pDoclist); }else{ fts5BufferZero(&pIter->poslist); - fts5MultiIterNext(pIter->pIndex, pIter->pMulti); + fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); } } diff --git a/manifest b/manifest index 87e8bca504..bd1784eeb1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Start\schanging\sthings\sto\suse\sdoclist\sindexes\sas\srequired.\scode\sis\snot\sactivated\syet. -D 2014-08-02T20:49:36.405 +C Fix\sfts5_index.c\sto\suse\sdoclist-indexes\swhen\spossible.\sOnly\ssome\scases\swork\sso\sfar. +D 2014-08-04T20:07:40.532 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 F ext/fts5/fts5_expr.c 9402474456732ddb5019f83a77907852f108a96a -F ext/fts5/fts5_index.c 20c905c323d866251e15d7ed2486c309914ceeb9 +F ext/fts5/fts5_index.c 3578823a9a43fcc77ce46c7f6efddfd155544053 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -595,14 +595,14 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test ec150ac2778f871550bcdbea34598fba08717a4e +F test/fts5aa.test f54245091fee924030722234070fcba95a493549 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test 2b01e7d2b3a31b668cba2afad5cb1c651895a255 +F test/fts5ah.test ca1f12b5738992c2edbdeb6c16133d41cfb9c031 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P c98934155cb48adfda57bd0fd1b950226d45f67a -R 0addd1f8d0c92beb67e8a764402a7ad4 +P b8864da95db2c0e611116304d607e35a86c9247d +R 2807aba63fb0a8821d708dc4cbc7f577 U dan -Z f1895ff018d8274d19451cd024daaa99 +Z 6114a7973c3151dffa74bd597b78489f diff --git a/manifest.uuid b/manifest.uuid index 8a1f54c530..a558eb6008 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b8864da95db2c0e611116304d607e35a86c9247d \ No newline at end of file +90b82d3ef613b2915e0e280dc1d2e5a2b617d59c \ No newline at end of file diff --git a/test/fts5aa.test b/test/fts5aa.test index 87e1494d55..3d1fc99289 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -50,7 +50,7 @@ do_execsql_test 2.1 { do_execsql_test 2.2 { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } { - {{structure idx=0} {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} + {(structure idx=0) {lvl=0 nMerge=0 {id=27723 h=1 leaves=1..1}}} } do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); @@ -182,7 +182,6 @@ for {set i 1} {$i <= 10} {incr i} { } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} - if {[set_test_counter errors]} exit } #------------------------------------------------------------------------- diff --git a/test/fts5ah.test b/test/fts5ah.test index e108ec2ddd..3f217d9f86 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -55,16 +55,37 @@ proc reads {} { do_test 1.4 { set nRead [reads] - db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } - set a [expr [reads] - $nRead] -} {} + execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } + set nReadX [expr [reads] - $nRead] + expr $nReadX>1000 +} {1} -do_test 1.5 { - set nRead [reads] - db eval { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } - set a [expr [reads] - $nRead] -} {} +foreach {tn q res} " + 1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W] + 2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W] + 3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W] + 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] +" { + do_test 1.5.$tn.1 { + set nRead [reads] + execsql $q + set n [expr [reads] - $nRead] + expr {$n < ($nReadX / 10)} + } {1} + + do_test 1.5.$tn.2 { + set nRead [reads] + execsql "$q ORDER BY rowid ASC" + set n [expr [reads] - $nRead] + expr {$n < ($nReadX / 10)} + } {1} + + do_execsql_test 1.5.$tn.3 $q [lsort -int -decr $res] + do_execsql_test 1.5.$tn.4 "$q ORDER BY rowid ASC" [lsort -int -incr $res] +} + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} finish_test