1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Fix compression of keys stored on internal segment b-tree nodes by fts5.

FossilOrigin-Name: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
This commit is contained in:
dan
2015-01-23 17:43:21 +00:00
parent 626d9e3062
commit 641cb4360a
8 changed files with 167 additions and 49 deletions

View File

@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){
}else{
rc = sqlite3_reset(pCsr->pStmt);
if( rc==SQLITE_OK ){
rc = SQLITE_CORRUPT_VTAB;
rc = FTS5_CORRUPT;
}
}
}

View File

@ -33,6 +33,13 @@
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"
#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif
/**************************************************************************
** Interface to code in fts5.c.
*/

View File

@ -256,10 +256,7 @@
FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
#ifdef SQLITE_DEBUG
static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
# define FTS5_CORRUPT fts5Corrupt()
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif
@ -373,7 +370,7 @@ struct Fts5Structure {
struct Fts5PageWriter {
int pgno; /* Page number for this page */
Fts5Buffer buf; /* Buffer containing page data */
Fts5Buffer term; /* Buffer containing previous term on page */
Fts5Buffer term; /* Buffer containing previous term on page */
};
struct Fts5SegWriter {
int iIdx; /* Index to write to */
@ -383,6 +380,7 @@ struct Fts5SegWriter {
i64 iPrevRowid; /* Previous docid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
u8 bFirstTermInPage; /* True if next term will be first in leaf */
int nLeafWritten; /* Number of leaf pages written */
int nEmpty; /* Number of contiguous term-less nodes */
Fts5Buffer dlidx; /* Doclist index */
@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
Fts5PageWriter *pPage = &pWriter->aWriter[0];
i64 iRowid;
if( pPage->term.n==0 ){
if( pWriter->bFirstTermInPage ){
/* No term was written to this page. */
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
fts5WriteBtreeNoTerm(p, pWriter);
@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
/* Initialize the next page. */
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->term);
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
pPage->pgno++;
/* Increase the leaves written counter */
pWriter->nLeafWritten++;
/* The new leaf holds no terms */
pWriter->bFirstTermInPage = 1;
}
/*
@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm(
/* Zero the first term and first docid fields */
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
assert( pPage->term.n==0 );
assert( pWriter->bFirstTermInPage );
}
if( p->rc ) return;
if( pPage->term.n==0 ){
if( pWriter->bFirstTermInPage ){
/* Update the "first term" field of the page header. */
assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
nPrefix = 0;
if( pWriter->aWriter[0].pgno!=1 ){
fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm);
if( pPage->pgno!=1 ){
/* This is the first term on a leaf that is not the leftmost leaf in
** the segment b-tree. In this case it is necessary to add a term to
** the b-tree hierarchy that is (a) larger than the largest term
** already written to the segment and (b) smaller than or equal to
** this term. In other words, a prefix of (pTerm/nTerm) that is one
** byte longer than the longest prefix (pTerm/nTerm) shares with the
** previous term.
**
** Usually, the previous term is available in pPage->term. The exception
** is if this is the first term written in an incremental-merge step.
** In this case the previous term is not available, so just write a
** copy of (pTerm/nTerm) into the parent node. This is slightly
** inefficient, but still correct. */
int n = nTerm;
if( pPage->term.n ){
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
pPage = &pWriter->aWriter[0];
}
}else{
nPrefix = fts5PrefixCompress(
pPage->term.n, pPage->term.p, nTerm, pTerm
);
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
}
@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm(
/* Update the Fts5PageWriter.term field. */
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
pWriter->bFirstTermInPage = 0;
pWriter->bFirstRowidInPage = 0;
pWriter->bFirstRowidInDoclist = 1;
@ -2900,6 +2916,7 @@ static void fts5WriteInit(
if( pWriter->aWriter==0 ) return;
pWriter->nWriter = 1;
pWriter->aWriter[0].pgno = 1;
pWriter->bFirstTermInPage = 1;
}
static void fts5WriteInitForAppend(
@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend(
pWriter->nEmpty = pSeg->pgnoLast-1;
}
assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
pWriter->bFirstTermInPage = 1;
assert( pWriter->aWriter[0].term.n==0 );
}
}
@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
int iIdx; /* Used to iterate through indexes */
u64 cksum2 = 0; /* Checksum based on contents of indexes */
/* Check that the internal nodes of each segment match the leaves */
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
if( pStruct ){
int iLvl, iSeg;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
}
}
}
fts5StructureRelease(pStruct);
}
/* Check that the checksum of the index matches the argument checksum */
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
Fts5MultiSegIter *pIter;
@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
}
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
/* Check that the internal nodes of each segment match the leaves */
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
if( pStruct ){
int iLvl, iSeg;
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
}
}
}
fts5StructureRelease(pStruct);
}
return fts5IndexReturn(p);
}
@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
if( rc==SQLITE_OK ){
p->apHash = apNew;
}else{
for(i=0; i<nHash; i++){
sqlite3Fts5HashFree(apNew[i]);
if( apNew ){
for(i=0; i<nHash; i++){
sqlite3Fts5HashFree(apNew[i]);
}
sqlite3_free(apNew);
}
sqlite3_free(apNew);
return rc;
}
}

View File

@ -789,7 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
(void*)&ctx,
fts5StorageIntegrityCallback
);
if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
aTotalSize[i] += ctx.szCol;
}
if( rc!=SQLITE_OK ) break;
@ -803,7 +803,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
int i;
rc = fts5StorageLoadTotals(p, 0);
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
}
}
@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
i64 nRow;
rc = fts5StorageCount(p, "content", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
}
if( rc==SQLITE_OK ){
i64 nRow;
rc = fts5StorageCount(p, "docsize", &nRow);
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
}
/* Pass the expected checksum down to the FTS index module. It will
@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
}
rc = sqlite3_reset(pLookup);
if( bCorrupt && rc==SQLITE_OK ){
rc = SQLITE_CORRUPT_VTAB;
rc = FTS5_CORRUPT;
}
}
return rc;

View File

@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} {
set ret
}
proc fts5_level_segids {tbl} {
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
set ret [list]
foreach L [lrange [db one $sql] 1 end] {
set lvl [list]
foreach S [lrange $L 2 end] {
regexp {id=([1234567890]*)} $S -> segid
lappend lvl $segid
}
lappend ret $lvl
}
set ret
}
proc fts5_rnddoc {n} {
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
set doc [list]

View File

@ -0,0 +1,75 @@
# 2014 Dec 20
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
#
source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt
do_execsql_test 1.0 {
CREATE VIRTUAL TABLE t1 USING fts5(x);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
do_test 1.1 {
db transaction {
for {set i 1} {$i < 200} {incr i} {
set doc [list [string repeat x $i] [string repeat y $i]]
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
}
}
fts5_level_segs t1
} {1}
db_save
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]
do_test 1.3 {
execsql {
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {SQL logic error or missing database}}
do_test 1.4 {
db_restore_and_reopen
execsql {
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
rowid = fts5_rowid('segment', 0, $segid, 0, 4);
}
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}
db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#--------------------------------------------------------------------
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(x);
INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
}
do_test 2.1 {
db transaction {
for {set i 0} {$i < 20} {incr i} {
execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') }
}
for {set i 0} {$i < 20} {incr i} {
execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') }
}
}
} {}
db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
finish_test