mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-05 15:55:57 +03:00
Fix compression of keys stored on internal segment b-tree nodes by fts5.
FossilOrigin-Name: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
This commit is contained in:
@@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){
|
||||
}else{
|
||||
rc = sqlite3_reset(pCsr->pStmt);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = SQLITE_CORRUPT_VTAB;
|
||||
rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -33,6 +33,13 @@
|
||||
#define FTS5_RANK_NAME "rank"
|
||||
#define FTS5_ROWID_NAME "rowid"
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
|
||||
int sqlite3Fts5Corrupt(void);
|
||||
#else
|
||||
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
|
||||
#endif
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5.c.
|
||||
*/
|
||||
|
@@ -256,10 +256,7 @@
|
||||
FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
||||
# define FTS5_CORRUPT fts5Corrupt()
|
||||
#else
|
||||
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
|
||||
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
||||
#endif
|
||||
|
||||
|
||||
@@ -383,6 +380,7 @@ struct Fts5SegWriter {
|
||||
i64 iPrevRowid; /* Previous docid written to current leaf */
|
||||
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
|
||||
u8 bFirstRowidInPage; /* True if next rowid is first in page */
|
||||
u8 bFirstTermInPage; /* True if next term will be first in leaf */
|
||||
int nLeafWritten; /* Number of leaf pages written */
|
||||
int nEmpty; /* Number of contiguous term-less nodes */
|
||||
Fts5Buffer dlidx; /* Doclist index */
|
||||
@@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
||||
Fts5PageWriter *pPage = &pWriter->aWriter[0];
|
||||
i64 iRowid;
|
||||
|
||||
if( pPage->term.n==0 ){
|
||||
if( pWriter->bFirstTermInPage ){
|
||||
/* No term was written to this page. */
|
||||
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
|
||||
fts5WriteBtreeNoTerm(p, pWriter);
|
||||
@@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
||||
|
||||
/* Initialize the next page. */
|
||||
fts5BufferZero(&pPage->buf);
|
||||
fts5BufferZero(&pPage->term);
|
||||
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
||||
pPage->pgno++;
|
||||
|
||||
/* Increase the leaves written counter */
|
||||
pWriter->nLeafWritten++;
|
||||
|
||||
/* The new leaf holds no terms */
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm(
|
||||
/* Zero the first term and first docid fields */
|
||||
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
|
||||
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
||||
assert( pPage->term.n==0 );
|
||||
assert( pWriter->bFirstTermInPage );
|
||||
}
|
||||
if( p->rc ) return;
|
||||
|
||||
if( pPage->term.n==0 ){
|
||||
if( pWriter->bFirstTermInPage ){
|
||||
/* Update the "first term" field of the page header. */
|
||||
assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
|
||||
fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
|
||||
nPrefix = 0;
|
||||
if( pWriter->aWriter[0].pgno!=1 ){
|
||||
fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm);
|
||||
if( pPage->pgno!=1 ){
|
||||
/* This is the first term on a leaf that is not the leftmost leaf in
|
||||
** the segment b-tree. In this case it is necessary to add a term to
|
||||
** the b-tree hierarchy that is (a) larger than the largest term
|
||||
** already written to the segment and (b) smaller than or equal to
|
||||
** this term. In other words, a prefix of (pTerm/nTerm) that is one
|
||||
** byte longer than the longest prefix (pTerm/nTerm) shares with the
|
||||
** previous term.
|
||||
**
|
||||
** Usually, the previous term is available in pPage->term. The exception
|
||||
** is if this is the first term written in an incremental-merge step.
|
||||
** In this case the previous term is not available, so just write a
|
||||
** copy of (pTerm/nTerm) into the parent node. This is slightly
|
||||
** inefficient, but still correct. */
|
||||
int n = nTerm;
|
||||
if( pPage->term.n ){
|
||||
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
|
||||
}
|
||||
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
|
||||
pPage = &pWriter->aWriter[0];
|
||||
}
|
||||
}else{
|
||||
nPrefix = fts5PrefixCompress(
|
||||
pPage->term.n, pPage->term.p, nTerm, pTerm
|
||||
);
|
||||
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
|
||||
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
|
||||
}
|
||||
|
||||
@@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm(
|
||||
|
||||
/* Update the Fts5PageWriter.term field. */
|
||||
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
|
||||
pWriter->bFirstTermInPage = 0;
|
||||
|
||||
pWriter->bFirstRowidInPage = 0;
|
||||
pWriter->bFirstRowidInDoclist = 1;
|
||||
@@ -2900,6 +2916,7 @@ static void fts5WriteInit(
|
||||
if( pWriter->aWriter==0 ) return;
|
||||
pWriter->nWriter = 1;
|
||||
pWriter->aWriter[0].pgno = 1;
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
}
|
||||
|
||||
static void fts5WriteInitForAppend(
|
||||
@@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend(
|
||||
pWriter->nEmpty = pSeg->pgnoLast-1;
|
||||
}
|
||||
assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
assert( pWriter->aWriter[0].term.n==0 );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
int iIdx; /* Used to iterate through indexes */
|
||||
u64 cksum2 = 0; /* Checksum based on contents of indexes */
|
||||
|
||||
/* Check that the internal nodes of each segment match the leaves */
|
||||
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
|
||||
if( pStruct ){
|
||||
int iLvl, iSeg;
|
||||
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
||||
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
||||
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
||||
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
|
||||
}
|
||||
}
|
||||
}
|
||||
fts5StructureRelease(pStruct);
|
||||
}
|
||||
|
||||
/* Check that the checksum of the index matches the argument checksum */
|
||||
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5MultiSegIter *pIter;
|
||||
@@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
}
|
||||
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
|
||||
|
||||
/* Check that the internal nodes of each segment match the leaves */
|
||||
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
|
||||
if( pStruct ){
|
||||
int iLvl, iSeg;
|
||||
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
||||
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
||||
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
||||
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
|
||||
}
|
||||
}
|
||||
}
|
||||
fts5StructureRelease(pStruct);
|
||||
}
|
||||
|
||||
return fts5IndexReturn(p);
|
||||
}
|
||||
|
||||
@@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
|
||||
if( rc==SQLITE_OK ){
|
||||
p->apHash = apNew;
|
||||
}else{
|
||||
if( apNew ){
|
||||
for(i=0; i<nHash; i++){
|
||||
sqlite3Fts5HashFree(apNew[i]);
|
||||
}
|
||||
sqlite3_free(apNew);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
@@ -789,7 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
(void*)&ctx,
|
||||
fts5StorageIntegrityCallback
|
||||
);
|
||||
if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
|
||||
aTotalSize[i] += ctx.szCol;
|
||||
}
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
@@ -803,7 +803,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
int i;
|
||||
rc = fts5StorageLoadTotals(p, 0);
|
||||
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
|
||||
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
|
||||
i64 nRow;
|
||||
rc = fts5StorageCount(p, "content", &nRow);
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
i64 nRow;
|
||||
rc = fts5StorageCount(p, "docsize", &nRow);
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
|
||||
/* Pass the expected checksum down to the FTS index module. It will
|
||||
@@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
|
||||
}
|
||||
rc = sqlite3_reset(pLookup);
|
||||
if( bCorrupt && rc==SQLITE_OK ){
|
||||
rc = SQLITE_CORRUPT_VTAB;
|
||||
rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
|
@@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} {
|
||||
set ret
|
||||
}
|
||||
|
||||
proc fts5_level_segids {tbl} {
|
||||
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
|
||||
set ret [list]
|
||||
foreach L [lrange [db one $sql] 1 end] {
|
||||
set lvl [list]
|
||||
foreach S [lrange $L 2 end] {
|
||||
regexp {id=([1234567890]*)} $S -> segid
|
||||
lappend lvl $segid
|
||||
}
|
||||
lappend ret $lvl
|
||||
}
|
||||
set ret
|
||||
}
|
||||
|
||||
proc fts5_rnddoc {n} {
|
||||
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
|
||||
set doc [list]
|
||||
|
75
ext/fts5/test/fts5corrupt.test
Normal file
75
ext/fts5/test/fts5corrupt.test
Normal file
@@ -0,0 +1,75 @@
|
||||
# 2014 Dec 20
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||
set testprefix fts5corrupt
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(x);
|
||||
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
|
||||
do_test 1.1 {
|
||||
db transaction {
|
||||
for {set i 1} {$i < 200} {incr i} {
|
||||
set doc [list [string repeat x $i] [string repeat y $i]]
|
||||
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
|
||||
}
|
||||
}
|
||||
fts5_level_segs t1
|
||||
} {1}
|
||||
db_save
|
||||
|
||||
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
set segid [lindex [fts5_level_segids t1] 0]
|
||||
|
||||
do_test 1.3 {
|
||||
execsql {
|
||||
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
|
||||
}
|
||||
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
} {1 {SQL logic error or missing database}}
|
||||
|
||||
do_test 1.4 {
|
||||
db_restore_and_reopen
|
||||
execsql {
|
||||
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
|
||||
rowid = fts5_rowid('segment', 0, $segid, 0, 4);
|
||||
}
|
||||
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
} {1 {database disk image is malformed}}
|
||||
|
||||
db_restore_and_reopen
|
||||
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
do_execsql_test 2.0 {
|
||||
CREATE VIRTUAL TABLE t2 USING fts5(x);
|
||||
INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
do_test 2.1 {
|
||||
db transaction {
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') }
|
||||
}
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') }
|
||||
}
|
||||
}
|
||||
} {}
|
||||
db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
|
||||
|
||||
finish_test
|
||||
|
21
manifest
21
manifest
@@ -1,5 +1,5 @@
|
||||
C Remove\ssome\sredundant\scode\sfrom\sfts5.
|
||||
D 2015-01-23T06:50:33.338
|
||||
C Fix\scompression\sof\skeys\sstored\son\sinternal\ssegment\sb-tree\snodes\sby\sfts5.
|
||||
D 2015-01-23T17:43:21.454
|
||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
|
||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||
@@ -104,22 +104,22 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
|
||||
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
|
||||
F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e
|
||||
F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
|
||||
F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34
|
||||
F ext/fts5/fts5.c 41b852b654f79f522668bc7ba292755fb261f855
|
||||
F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a
|
||||
F ext/fts5/fts5Int.h da4ad7558c2284fdf3297f907e2c5454a2237e15
|
||||
F ext/fts5/fts5Int.h 1d8f968b8ff71de15176acf8f4b14a2bdebcb6e3
|
||||
F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f
|
||||
F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9
|
||||
F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700
|
||||
F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808
|
||||
F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b
|
||||
F ext/fts5/fts5_index.c 604e346f7a04b87f11090b91a80afa50bc74f88b
|
||||
F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4
|
||||
F ext/fts5/fts5_index.c dda2ed8dab9910aedd8de0169ca029c5336b9e42
|
||||
F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548
|
||||
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
|
||||
F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0
|
||||
F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
|
||||
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
|
||||
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
|
||||
F ext/fts5/test/fts5_common.tcl 7db772d34fa0139d4b58d2b321928c9ccd30f699
|
||||
F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32
|
||||
F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f
|
||||
F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0
|
||||
F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8
|
||||
@@ -134,6 +134,7 @@ F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce
|
||||
F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a
|
||||
F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b
|
||||
F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0
|
||||
F ext/fts5/test/fts5corrupt.test b81ed310018ddffb34da7802f74018d94a2b3961
|
||||
F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c
|
||||
F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e
|
||||
F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5
|
||||
@@ -1282,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
|
||||
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
|
||||
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
|
||||
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
|
||||
P 5b295897153e9b26cd0d2e7ea112a4d461d0a665
|
||||
R 02a6ea9937d0b3e40d0a3982c5f888d6
|
||||
P 939b7a5de25e064bdf08e03864c35ab718da6f6f
|
||||
R 7096f8b96e0f85e1680222eb9ee6721b
|
||||
U dan
|
||||
Z 8b5a907ba25d2b7d394f8c2162c154cb
|
||||
Z 0515045012673cdccd49d82241057133
|
||||
|
@@ -1 +1 @@
|
||||
939b7a5de25e064bdf08e03864c35ab718da6f6f
|
||||
51444f67c0cc58a3023eb1cd78e7cf889da6c80f
|
Reference in New Issue
Block a user