mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-01 06:27:03 +03:00
Fix compression of keys stored on internal segment b-tree nodes by fts5.
FossilOrigin-Name: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
This commit is contained in:
@ -1021,7 +1021,7 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){
|
||||
}else{
|
||||
rc = sqlite3_reset(pCsr->pStmt);
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = SQLITE_CORRUPT_VTAB;
|
||||
rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,13 @@
|
||||
#define FTS5_RANK_NAME "rank"
|
||||
#define FTS5_ROWID_NAME "rowid"
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
|
||||
int sqlite3Fts5Corrupt(void);
|
||||
#else
|
||||
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
|
||||
#endif
|
||||
|
||||
/**************************************************************************
|
||||
** Interface to code in fts5.c.
|
||||
*/
|
||||
|
@ -256,10 +256,7 @@
|
||||
FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)
|
||||
|
||||
#ifdef SQLITE_DEBUG
|
||||
static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
||||
# define FTS5_CORRUPT fts5Corrupt()
|
||||
#else
|
||||
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
|
||||
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
|
||||
#endif
|
||||
|
||||
|
||||
@ -373,7 +370,7 @@ struct Fts5Structure {
|
||||
struct Fts5PageWriter {
|
||||
int pgno; /* Page number for this page */
|
||||
Fts5Buffer buf; /* Buffer containing page data */
|
||||
Fts5Buffer term; /* Buffer containing previous term on page */
|
||||
Fts5Buffer term; /* Buffer containing previous term on page */
|
||||
};
|
||||
struct Fts5SegWriter {
|
||||
int iIdx; /* Index to write to */
|
||||
@ -383,6 +380,7 @@ struct Fts5SegWriter {
|
||||
i64 iPrevRowid; /* Previous docid written to current leaf */
|
||||
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
|
||||
u8 bFirstRowidInPage; /* True if next rowid is first in page */
|
||||
u8 bFirstTermInPage; /* True if next term will be first in leaf */
|
||||
int nLeafWritten; /* Number of leaf pages written */
|
||||
int nEmpty; /* Number of contiguous term-less nodes */
|
||||
Fts5Buffer dlidx; /* Doclist index */
|
||||
@ -2677,7 +2675,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
||||
Fts5PageWriter *pPage = &pWriter->aWriter[0];
|
||||
i64 iRowid;
|
||||
|
||||
if( pPage->term.n==0 ){
|
||||
if( pWriter->bFirstTermInPage ){
|
||||
/* No term was written to this page. */
|
||||
assert( 0==fts5GetU16(&pPage->buf.p[2]) );
|
||||
fts5WriteBtreeNoTerm(p, pWriter);
|
||||
@ -2689,12 +2687,14 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
|
||||
|
||||
/* Initialize the next page. */
|
||||
fts5BufferZero(&pPage->buf);
|
||||
fts5BufferZero(&pPage->term);
|
||||
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
||||
pPage->pgno++;
|
||||
|
||||
/* Increase the leaves written counter */
|
||||
pWriter->nLeafWritten++;
|
||||
|
||||
/* The new leaf holds no terms */
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2717,23 +2717,38 @@ static void fts5WriteAppendTerm(
|
||||
/* Zero the first term and first docid fields */
|
||||
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
|
||||
fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
|
||||
assert( pPage->term.n==0 );
|
||||
assert( pWriter->bFirstTermInPage );
|
||||
}
|
||||
if( p->rc ) return;
|
||||
|
||||
if( pPage->term.n==0 ){
|
||||
if( pWriter->bFirstTermInPage ){
|
||||
/* Update the "first term" field of the page header. */
|
||||
assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
|
||||
fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
|
||||
nPrefix = 0;
|
||||
if( pWriter->aWriter[0].pgno!=1 ){
|
||||
fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm);
|
||||
if( pPage->pgno!=1 ){
|
||||
/* This is the first term on a leaf that is not the leftmost leaf in
|
||||
** the segment b-tree. In this case it is necessary to add a term to
|
||||
** the b-tree hierarchy that is (a) larger than the largest term
|
||||
** already written to the segment and (b) smaller than or equal to
|
||||
** this term. In other words, a prefix of (pTerm/nTerm) that is one
|
||||
** byte longer than the longest prefix (pTerm/nTerm) shares with the
|
||||
** previous term.
|
||||
**
|
||||
** Usually, the previous term is available in pPage->term. The exception
|
||||
** is if this is the first term written in an incremental-merge step.
|
||||
** In this case the previous term is not available, so just write a
|
||||
** copy of (pTerm/nTerm) into the parent node. This is slightly
|
||||
** inefficient, but still correct. */
|
||||
int n = nTerm;
|
||||
if( pPage->term.n ){
|
||||
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
|
||||
}
|
||||
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
|
||||
pPage = &pWriter->aWriter[0];
|
||||
}
|
||||
}else{
|
||||
nPrefix = fts5PrefixCompress(
|
||||
pPage->term.n, pPage->term.p, nTerm, pTerm
|
||||
);
|
||||
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
|
||||
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
|
||||
}
|
||||
|
||||
@ -2744,6 +2759,7 @@ static void fts5WriteAppendTerm(
|
||||
|
||||
/* Update the Fts5PageWriter.term field. */
|
||||
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
|
||||
pWriter->bFirstTermInPage = 0;
|
||||
|
||||
pWriter->bFirstRowidInPage = 0;
|
||||
pWriter->bFirstRowidInDoclist = 1;
|
||||
@ -2900,6 +2916,7 @@ static void fts5WriteInit(
|
||||
if( pWriter->aWriter==0 ) return;
|
||||
pWriter->nWriter = 1;
|
||||
pWriter->aWriter[0].pgno = 1;
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
}
|
||||
|
||||
static void fts5WriteInitForAppend(
|
||||
@ -2937,6 +2954,8 @@ static void fts5WriteInitForAppend(
|
||||
pWriter->nEmpty = pSeg->pgnoLast-1;
|
||||
}
|
||||
assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
|
||||
pWriter->bFirstTermInPage = 1;
|
||||
assert( pWriter->aWriter[0].term.n==0 );
|
||||
}
|
||||
}
|
||||
|
||||
@ -3918,6 +3937,21 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
int iIdx; /* Used to iterate through indexes */
|
||||
u64 cksum2 = 0; /* Checksum based on contents of indexes */
|
||||
|
||||
/* Check that the internal nodes of each segment match the leaves */
|
||||
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
|
||||
if( pStruct ){
|
||||
int iLvl, iSeg;
|
||||
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
||||
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
||||
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
||||
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
|
||||
}
|
||||
}
|
||||
}
|
||||
fts5StructureRelease(pStruct);
|
||||
}
|
||||
|
||||
/* Check that the checksum of the index matches the argument checksum */
|
||||
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5MultiSegIter *pIter;
|
||||
@ -3950,21 +3984,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
}
|
||||
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
|
||||
|
||||
/* Check that the internal nodes of each segment match the leaves */
|
||||
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
|
||||
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
|
||||
if( pStruct ){
|
||||
int iLvl, iSeg;
|
||||
for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
|
||||
for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
|
||||
Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
|
||||
fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
|
||||
}
|
||||
}
|
||||
}
|
||||
fts5StructureRelease(pStruct);
|
||||
}
|
||||
|
||||
return fts5IndexReturn(p);
|
||||
}
|
||||
|
||||
@ -3990,10 +4009,12 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
|
||||
if( rc==SQLITE_OK ){
|
||||
p->apHash = apNew;
|
||||
}else{
|
||||
for(i=0; i<nHash; i++){
|
||||
sqlite3Fts5HashFree(apNew[i]);
|
||||
if( apNew ){
|
||||
for(i=0; i<nHash; i++){
|
||||
sqlite3Fts5HashFree(apNew[i]);
|
||||
}
|
||||
sqlite3_free(apNew);
|
||||
}
|
||||
sqlite3_free(apNew);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
@ -789,7 +789,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
(void*)&ctx,
|
||||
fts5StorageIntegrityCallback
|
||||
);
|
||||
if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
|
||||
aTotalSize[i] += ctx.szCol;
|
||||
}
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
@ -803,7 +803,7 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
int i;
|
||||
rc = fts5StorageLoadTotals(p, 0);
|
||||
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
|
||||
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
|
||||
@ -812,12 +812,12 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
|
||||
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
|
||||
i64 nRow;
|
||||
rc = fts5StorageCount(p, "content", &nRow);
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
i64 nRow;
|
||||
rc = fts5StorageCount(p, "docsize", &nRow);
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
|
||||
if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
|
||||
}
|
||||
|
||||
/* Pass the expected checksum down to the FTS index module. It will
|
||||
@ -913,7 +913,7 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
|
||||
}
|
||||
rc = sqlite3_reset(pLookup);
|
||||
if( bCorrupt && rc==SQLITE_OK ){
|
||||
rc = SQLITE_CORRUPT_VTAB;
|
||||
rc = FTS5_CORRUPT;
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
|
@ -123,6 +123,20 @@ proc fts5_level_segs {tbl} {
|
||||
set ret
|
||||
}
|
||||
|
||||
proc fts5_level_segids {tbl} {
|
||||
set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
|
||||
set ret [list]
|
||||
foreach L [lrange [db one $sql] 1 end] {
|
||||
set lvl [list]
|
||||
foreach S [lrange $L 2 end] {
|
||||
regexp {id=([1234567890]*)} $S -> segid
|
||||
lappend lvl $segid
|
||||
}
|
||||
lappend ret $lvl
|
||||
}
|
||||
set ret
|
||||
}
|
||||
|
||||
proc fts5_rnddoc {n} {
|
||||
set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
|
||||
set doc [list]
|
||||
|
75
ext/fts5/test/fts5corrupt.test
Normal file
75
ext/fts5/test/fts5corrupt.test
Normal file
@ -0,0 +1,75 @@
|
||||
# 2014 Dec 20
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] fts5_common.tcl]
|
||||
set testprefix fts5corrupt
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(x);
|
||||
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
|
||||
do_test 1.1 {
|
||||
db transaction {
|
||||
for {set i 1} {$i < 200} {incr i} {
|
||||
set doc [list [string repeat x $i] [string repeat y $i]]
|
||||
execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
|
||||
}
|
||||
}
|
||||
fts5_level_segs t1
|
||||
} {1}
|
||||
db_save
|
||||
|
||||
do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
set segid [lindex [fts5_level_segids t1] 0]
|
||||
|
||||
do_test 1.3 {
|
||||
execsql {
|
||||
DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
|
||||
}
|
||||
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
} {1 {SQL logic error or missing database}}
|
||||
|
||||
do_test 1.4 {
|
||||
db_restore_and_reopen
|
||||
execsql {
|
||||
UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
|
||||
rowid = fts5_rowid('segment', 0, $segid, 0, 4);
|
||||
}
|
||||
catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||||
} {1 {database disk image is malformed}}
|
||||
|
||||
db_restore_and_reopen
|
||||
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
|
||||
|
||||
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
do_execsql_test 2.0 {
|
||||
CREATE VIRTUAL TABLE t2 USING fts5(x);
|
||||
INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
do_test 2.1 {
|
||||
db transaction {
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') }
|
||||
}
|
||||
for {set i 0} {$i < 20} {incr i} {
|
||||
execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') }
|
||||
}
|
||||
}
|
||||
} {}
|
||||
db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
|
||||
|
||||
finish_test
|
||||
|
Reference in New Issue
Block a user