diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index cc6457df6a..c61598e477 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ }else{ rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ - rc = SQLITE_CORRUPT_VTAB; + rc = FTS5_CORRUPT; } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index d7172e315e..728b6461a1 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -33,6 +33,13 @@ #define FTS5_RANK_NAME "rank" #define FTS5_ROWID_NAME "rowid" +#ifdef SQLITE_DEBUG +# define FTS5_CORRUPT sqlite3Fts5Corrupt() +int sqlite3Fts5Corrupt(void); +#else +# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB +#endif + /************************************************************************** ** Interface to code in fts5.c. */ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3cd4892a59..0cb2c2ee31 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -256,10 +256,7 @@ FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno) #ifdef SQLITE_DEBUG -static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } -# define FTS5_CORRUPT fts5Corrupt() -#else -# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB +int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } #endif @@ -373,7 +370,7 @@ struct Fts5Structure { struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ - Fts5Buffer term; /* Buffer containing previous term on page */ + Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5SegWriter { int iIdx; /* Index to write to */ @@ -383,6 +380,7 @@ struct Fts5SegWriter { i64 iPrevRowid; /* Previous docid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ + u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ Fts5Buffer dlidx; /* Doclist index */ @@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; i64 iRowid; - if( pPage->term.n==0 ){ + if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); @@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ /* Initialize the next page. */ fts5BufferZero(&pPage->buf); - fts5BufferZero(&pPage->term); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; + + /* The new leaf holds no terms */ + pWriter->bFirstTermInPage = 1; } /* @@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm( /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - assert( pPage->term.n==0 ); + assert( pWriter->bFirstTermInPage ); } if( p->rc ) return; - if( pPage->term.n==0 ){ + if( pWriter->bFirstTermInPage ){ /* Update the "first term" field of the page header. */ assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); nPrefix = 0; - if( pWriter->aWriter[0].pgno!=1 ){ - fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm); + if( pPage->pgno!=1 ){ + /* This is the first term on a leaf that is not the leftmost leaf in + ** the segment b-tree. In this case it is necessary to add a term to + ** the b-tree hierarchy that is (a) larger than the largest term + ** already written to the segment and (b) smaller than or equal to + ** this term. In other words, a prefix of (pTerm/nTerm) that is one + ** byte longer than the longest prefix (pTerm/nTerm) shares with the + ** previous term. + ** + ** Usually, the previous term is available in pPage->term. The exception + ** is if this is the first term written in an incremental-merge step. + ** In this case the previous term is not available, so just write a + ** copy of (pTerm/nTerm) into the parent node. This is slightly + ** inefficient, but still correct. */ + int n = nTerm; + if( pPage->term.n ){ + n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); + } + fts5WriteBtreeTerm(p, pWriter, n, pTerm); pPage = &pWriter->aWriter[0]; } }else{ - nPrefix = fts5PrefixCompress( - pPage->term.n, pPage->term.p, nTerm, pTerm - ); + nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); } @@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm( /* Update the Fts5PageWriter.term field. */ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); + pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; @@ -2900,6 +2916,7 @@ static void fts5WriteInit( if( pWriter->aWriter==0 ) return; pWriter->nWriter = 1; pWriter->aWriter[0].pgno = 1; + pWriter->bFirstTermInPage = 1; } static void fts5WriteInitForAppend( @@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend( pWriter->nEmpty = pSeg->pgnoLast-1; } assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); + pWriter->bFirstTermInPage = 1; + assert( pWriter->aWriter[0].term.n==0 ); } } @@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ int iIdx; /* Used to iterate through indexes */ u64 cksum2 = 0; /* Checksum based on contents of indexes */ + /* Check that the internal nodes of each segment match the leaves */ + for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ + Fts5Structure *pStruct = fts5StructureRead(p, iIdx); + if( pStruct ){ + int iLvl, iSeg; + for(iLvl=0; iLvlnLevel; iLvl++){ + for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ + Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; + fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); + } + } + } + fts5StructureRelease(pStruct); + } + /* Check that the checksum of the index matches the argument checksum */ for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){ Fts5MultiSegIter *pIter; @@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ } if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; - /* Check that the internal nodes of each segment match the leaves */ - for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){ - Fts5Structure *pStruct = fts5StructureRead(p, iIdx); - if( pStruct ){ - int iLvl, iSeg; - for(iLvl=0; iLvlnLevel; iLvl++){ - for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ - Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; - fts5IndexIntegrityCheckSegment(p, iIdx, pSeg); - } - } - } - fts5StructureRelease(pStruct); - } - return fts5IndexReturn(p); } @@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ if( rc==SQLITE_OK ){ p->apHash = apNew; }else{ - for(i=0; inCol; i++){ - if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB; + if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; } } @@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); - if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } if( rc==SQLITE_OK ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); - if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB; + if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } /* Pass the expected checksum down to the FTS index module. It will @@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ } rc = sqlite3_reset(pLookup); if( bCorrupt && rc==SQLITE_OK ){ - rc = SQLITE_CORRUPT_VTAB; + rc = FTS5_CORRUPT; } } return rc; diff --git a/ext/fts5/test/fts5_common.tcl b/ext/fts5/test/fts5_common.tcl index 94e82d8684..9c612d202d 100644 --- a/ext/fts5/test/fts5_common.tcl +++ b/ext/fts5/test/fts5_common.tcl @@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} { set ret } +proc fts5_level_segids {tbl} { + set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" + set ret [list] + foreach L [lrange [db one $sql] 1 end] { + set lvl [list] + foreach S [lrange $L 2 end] { + regexp {id=([1234567890]*)} $S -> segid + lappend lvl $segid + } + lappend ret $lvl + } + set ret +} + proc fts5_rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test new file mode 100644 index 0000000000..052563f779 --- /dev/null +++ b/ext/fts5/test/fts5corrupt.test @@ -0,0 +1,75 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5corrupt + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); +} + +do_test 1.1 { + db transaction { + for {set i 1} {$i < 200} {incr i} { + set doc [list [string repeat x $i] [string repeat y $i]] + execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } + } + } + fts5_level_segs t1 +} {1} +db_save + +do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } +set segid [lindex [fts5_level_segids t1] 0] + +do_test 1.3 { + execsql { + DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4); + } + catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {1 {SQL logic error or missing database}} + +do_test 1.4 { + db_restore_and_reopen + execsql { + UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE + rowid = fts5_rowid('segment', 0, $segid, 0, 4); + } + catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {1 {database disk image is malformed}} + +db_restore_and_reopen +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} + + + +#-------------------------------------------------------------------- +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(x); + INSERT INTO t2(t2, rank) VALUES('pgsz', 32); +} +do_test 2.1 { + db transaction { + for {set i 0} {$i < 20} {incr i} { + execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') } + } + for {set i 0} {$i < 20} {incr i} { + execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') } + } + } +} {} +db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} + +finish_test + diff --git a/manifest b/manifest index 5ed6b12d4c..9fdcf2e5e5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\ssome\sredundant\scode\sfrom\sfts5. -D 2015-01-23T06:50:33.338 +C Fix\scompression\sof\skeys\sstored\son\sinternal\ssegment\sb-tree\snodes\sby\sfts5. +D 2015-01-23T17:43:21.454 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,22 +104,22 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a -F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34 +F ext/fts5/fts5.c 41b852b654f79f522668bc7ba292755fb261f855 F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a -F ext/fts5/fts5Int.h da4ad7558c2284fdf3297f907e2c5454a2237e15 +F ext/fts5/fts5Int.h 1d8f968b8ff71de15176acf8f4b14a2bdebcb6e3 F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9 F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700 F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808 F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b -F ext/fts5/fts5_index.c 604e346f7a04b87f11090b91a80afa50bc74f88b -F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4 +F ext/fts5/fts5_index.c dda2ed8dab9910aedd8de0169ca029c5336b9e42 +F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548 F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5 F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0 F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba -F ext/fts5/test/fts5_common.tcl 7db772d34fa0139d4b58d2b321928c9ccd30f699 +F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32 F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0 F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8 @@ -134,6 +134,7 @@ F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0 +F ext/fts5/test/fts5corrupt.test b81ed310018ddffb34da7802f74018d94a2b3961 F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5 @@ -1282,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 5b295897153e9b26cd0d2e7ea112a4d461d0a665 -R 02a6ea9937d0b3e40d0a3982c5f888d6 +P 939b7a5de25e064bdf08e03864c35ab718da6f6f +R 7096f8b96e0f85e1680222eb9ee6721b U dan -Z 8b5a907ba25d2b7d394f8c2162c154cb +Z 0515045012673cdccd49d82241057133 diff --git a/manifest.uuid b/manifest.uuid index 63b8637677..324db551da 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -939b7a5de25e064bdf08e03864c35ab718da6f6f \ No newline at end of file +51444f67c0cc58a3023eb1cd78e7cf889da6c80f \ No newline at end of file