1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-05 15:55:57 +03:00

Fix compression of keys stored on internal segment b-tree nodes by fts5.

FossilOrigin-Name: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
This commit is contained in:
dan
2015-01-23 17:43:21 +00:00
parent 626d9e3062
commit 641cb4360a
8 changed files with 167 additions and 49 deletions

View File

@@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){
}else{
rc = sqlite3_reset(pCsr->pStmt);
if( rc==SQLITE_OK ){
rc = SQLITE_CORRUPT_VTAB;
rc = FTS5_CORRUPT;
}
}
}

View File

@@ -33,6 +33,13 @@
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"
#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif
/**************************************************************************
** Interface to code in fts5.c.
*/

View File

@@ -256,10 +256,7 @@
FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
#ifdef SQLITE_DEBUG
static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
# define FTS5_CORRUPT fts5Corrupt()
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif
@@ -383,6 +380,7 @@ struct Fts5SegWriter {
i64 iPrevRowid; /* Previous docid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
Fts5Buffer dlidx; /* Doclist index */
@@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *pPage = &pWriter->aWriter[0];
i64 iRowid;
if( pPage->term.n==0 ){
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5WriteBtreeNoTerm(p, pWriter);
@@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->term);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
pPage->pgno++;
/* Increase the leaves written counter */
pWriter->nLeafWritten++;
/* The new leaf holds no terms */
pWriter->bFirstTermInPage = 1;
}
/*
@@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm(
/* Zero the first term and first docid fields */
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
assert( pPage->term.n==0 );
assert( pWriter->bFirstTermInPage );
}
if( p->rc ) return;
if( pPage->term.n==0 ){
if( pWriter->bFirstTermInPage ){
/* Update the "first term" field of the page header. */
assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
nPrefix = 0;
if( pWriter->aWriter[0].pgno!=1 ){
fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm);
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
** the segment b-tree. In this case it is necessary to add a term to
** the b-tree hierarchy that is (a) larger than the largest term
** already written to the segment and (b) smaller than or equal to
** this term. In other words, a prefix of (pTerm/nTerm) that is one
** byte longer than the longest prefix (pTerm/nTerm) shares with the
** previous term.
**
** Usually, the previous term is available in pPage->term. The exception
** is if this is the first term written in an incremental-merge step.
** In this case the previous term is not available, so just write a
** copy of (pTerm/nTerm) into the parent node. This is slightly
** inefficient, but still correct. */
int n = nTerm;
if( pPage->term.n ){
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
pPage = &pWriter->aWriter[0];
}
}else{
nPrefix = fts5PrefixCompress(
pPage->term.n, pPage->term.p, nTerm, pTerm
);
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
}
@@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm(
/* Update the Fts5PageWriter.term field. */
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
pWriter->bFirstTermInPage = 0;
pWriter->bFirstRowidInPage = 0;
pWriter->bFirstRowidInDoclist = 1;
@@ -2900,6 +2916,7 @@ static void fts5WriteInit(
if( pWriter->aWriter==0 ) return;
pWriter->nWriter = 1;
pWriter->aWriter[0].pgno = 1;
pWriter->bFirstTermInPage = 1;
}
static void fts5WriteInitForAppend(
@@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend(
pWriter->nEmpty = pSeg->pgnoLast-1;
}
assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
pWriter->bFirstTermInPage = 1;
assert( pWriter->aWriter[0].term.n==0 );
}
}
@@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
int iIdx; /* Used to iterate through indexes */
u64 cksum2 = 0; /* Checksum based on contents of indexes */
/* Check that the internal nodes of each segment match the leaves */
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
if( pStruct ){
int iLvl, iSeg;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
}
}
}
fts5StructureRelease(pStruct);
}
/* Check that the checksum of the index matches the argument checksum */
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
Fts5MultiSegIter *pIter;
@@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
}
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
/* Check that the internal nodes of each segment match the leaves */
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
if( pStruct ){
int iLvl, iSeg;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
}
}
}
fts5StructureRelease(pStruct);
}
return fts5IndexReturn(p);
}
@@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
if( rc==SQLITE_OK ){
p->apHash = apNew;
}else{
if( apNew ){
for(i=0; i<nHash; i++){
sqlite3Fts5HashFree(apNew[i]);
}
sqlite3_free(apNew);
}
return rc;
}
}

View File

@@ -789,7 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
(void*)&ctx,
fts5StorageIntegrityCallback
);
if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
aTotalSize[i] += ctx.szCol;
}
if( rc!=SQLITE_OK ) break;
@@ -803,7 +803,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
int i;
rc = fts5StorageLoadTotals(p, 0);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
}
}
@@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
i64 nRow;
rc = fts5StorageCount(p, "content", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
}
if( rc==SQLITE_OK ){
i64 nRow;
rc = fts5StorageCount(p, "docsize", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
}
/* Pass the expected checksum down to the FTS index module. It will
@@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
}
rc = sqlite3_reset(pLookup);
if( bCorrupt && rc==SQLITE_OK ){
rc = SQLITE_CORRUPT_VTAB;
rc = FTS5_CORRUPT;
}
}
return rc;

View File

@@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} {
set ret
}
proc fts5_level_segids {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
set lvl [list]
foreach S [lrange $L 2 end] {
regexp {id=([1234567890]*)} $S -> segid
lappend lvl $segid
}
lappend ret $lvl
}
set ret
}
proc fts5_rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]

View File

@@ -0,0 +1,75 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_test 1.1 {
db transaction {
for {set i 1} {$i < 200} {incr i} {
set doc [list [string repeat x $i] [string repeat y $i]]
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
}
}
fts5_level_segs t1
} {1}
db_save
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]
do_test 1.3 {
execsql {
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {SQL logic error or missing database}}
do_test 1.4 {
db_restore_and_reopen
execsql {
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
rowid = fts5_rowid('segment', 0, $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#--------------------------------------------------------------------
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
}
do_test 2.1 {
db transaction {
for {set i 0} {$i < 20} {incr i} {
execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') }
}
for {set i 0} {$i < 20} {incr i} {
execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') }
}
}
} {}
db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
finish_test

View File

@@ -1,5 +1,5 @@
C Remove\ssome\sredundant\scode\sfrom\sfts5.
D 2015-01-23T06:50:33.338
C Fix\scompression\sof\skeys\sstored\son\sinternal\ssegment\sb-tree\snodes\sby\sfts5.
D 2015-01-23T17:43:21.454
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -104,22 +104,22 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e
F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
F ext/fts5/fts5.c 0ba5a8f27e1aa4deab82f0fc295d55f67dfe7f34
F ext/fts5/fts5.c 41b852b654f79f522668bc7ba292755fb261f855
F ext/fts5/fts5.h f931954065693898d26c51f23f1d27200184a69a
F ext/fts5/fts5Int.h da4ad7558c2284fdf3297f907e2c5454a2237e15
F ext/fts5/fts5Int.h 1d8f968b8ff71de15176acf8f4b14a2bdebcb6e3
F ext/fts5/fts5_aux.c 549aef152b0fd46020f5595d861b1fd60b3f9b4f
F ext/fts5/fts5_buffer.c 32dd3c950392346ca69a0f1803501766c5c954f9
F ext/fts5/fts5_config.c e3421a76c2abd33a05ac09df0c97c64952d1e700
F ext/fts5/fts5_expr.c 8a0e643768666dc2bffe74104141274809699808
F ext/fts5/fts5_hash.c 7a87f9f2eae2216c710064821fa0621ac6a8ce7b
F ext/fts5/fts5_index.c 604e346f7a04b87f11090b91a80afa50bc74f88b
F ext/fts5/fts5_storage.c d56722960982d0c48ba1b88d9001fefed8cff1a4
F ext/fts5/fts5_index.c dda2ed8dab9910aedd8de0169ca029c5336b9e42
F ext/fts5/fts5_storage.c f7c12c9f454b2a525827b3d85fd222789236f548
F ext/fts5/fts5_tcl.c 1293fac2bb26903fd3d5cdee59c5885ba7e620d5
F ext/fts5/fts5_tokenize.c 7c61d5c35c3449597bdeaa54dd48afe26852c7b0
F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl 7db772d34fa0139d4b58d2b321928c9ccd30f699
F ext/fts5/test/fts5_common.tcl d9ea79fdbc9ecbb3541bf89d13ee0e03a8dc3d32
F ext/fts5/test/fts5aa.test 8ddbbcbedab67101dc9a86fd5c39d78b0e06515f
F ext/fts5/test/fts5ab.test 3f3ad2fb9ed60a0df57b626fa6fe6ef41d4deee0
F ext/fts5/test/fts5ac.test 48181b7c873da0e3b4a3316760fcb90d88e7fbd8
@@ -134,6 +134,7 @@ F ext/fts5/test/fts5ak.test dc7bcd087dea0451ec40bba173962a0ba3a1d8ce
F ext/fts5/test/fts5al.test 633fdb3d974629d01ba7734d180dbc2ad8ed772a
F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b
F ext/fts5/test/fts5content.test 4234e0b11e003fe1e80472aa637f70464396fdd0
F ext/fts5/test/fts5corrupt.test b81ed310018ddffb34da7802f74018d94a2b3961
F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c
F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e
F ext/fts5/test/fts5fault1.test f9bafb61b40061ad19b61d15003c5faeea4a57b5
@@ -1282,7 +1283,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 5b295897153e9b26cd0d2e7ea112a4d461d0a665
R 02a6ea9937d0b3e40d0a3982c5f888d6
P 939b7a5de25e064bdf08e03864c35ab718da6f6f
R 7096f8b96e0f85e1680222eb9ee6721b
U dan
Z 8b5a907ba25d2b7d394f8c2162c154cb
Z 0515045012673cdccd49d82241057133

View File

@@ -1 +1 @@
939b7a5de25e064bdf08e03864c35ab718da6f6f
51444f67c0cc58a3023eb1cd78e7cf889da6c80f