1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes.

FossilOrigin-Name: 609a0bc7f34e6dae74ce756aff920f3df78fe828
This commit is contained in:
dan
2015-12-21 18:45:09 +00:00
parent c58b9eeaaa
commit 159fd77e0f
10 changed files with 133 additions and 89 deletions

View File

@ -297,7 +297,7 @@ int sqlite3Fts5IsBareword(char t);
/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
typedef struct Fts5Termset Fts5Termset;
int sqlite3Fts5TermsetNew(Fts5Termset**);
int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent);
int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
void sqlite3Fts5TermsetFree(Fts5Termset*);
/*

View File

@ -298,6 +298,7 @@ typedef struct Fts5TermsetEntry Fts5TermsetEntry;
struct Fts5TermsetEntry {
char *pTerm;
int nTerm;
int iIdx; /* Index (main or aPrefix[] entry) */
Fts5TermsetEntry *pNext;
};
@ -313,36 +314,44 @@ int sqlite3Fts5TermsetNew(Fts5Termset **pp){
int sqlite3Fts5TermsetAdd(
Fts5Termset *p,
int iIdx,
const char *pTerm, int nTerm,
int *pbPresent
){
int rc = SQLITE_OK;
int i;
int hash = 13;
Fts5TermsetEntry *pEntry;
/* Calculate a hash value for this term */
for(i=0; i<nTerm; i++){
hash += (hash << 3) + (int)pTerm[i];
}
hash = hash % ArraySize(p->apHash);
*pbPresent = 0;
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){
*pbPresent = 1;
break;
}
}
if( p ){
int i;
int hash;
Fts5TermsetEntry *pEntry;
if( pEntry==0 ){
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
if( pEntry ){
pEntry->pTerm = (char*)&pEntry[1];
pEntry->nTerm = nTerm;
memcpy(pEntry->pTerm, pTerm, nTerm);
pEntry->pNext = p->apHash[hash];
p->apHash[hash] = pEntry;
/* Calculate a hash value for this term */
hash = 104 + iIdx;
for(i=0; i<nTerm; i++){
hash += (hash << 3) + (int)pTerm[i];
}
hash = hash % ArraySize(p->apHash);
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
if( pEntry->iIdx==iIdx
&& pEntry->nTerm==nTerm
&& memcmp(pEntry->pTerm, pTerm, nTerm)==0
){
*pbPresent = 1;
break;
}
}
if( pEntry==0 ){
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
if( pEntry ){
pEntry->pTerm = (char*)&pEntry[1];
pEntry->nTerm = nTerm;
pEntry->iIdx = iIdx;
memcpy(pEntry->pTerm, pTerm, nTerm);
pEntry->pNext = p->apHash[hash];
p->apHash[hash] = pEntry;
}
}
}

View File

@ -4373,7 +4373,9 @@ static void fts5MergePrefixLists(
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1);
}
}
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
if( iNew!=writer.iPrev || tmp.n==0 ){
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
}
}
/* WRITEPOSLISTSIZE */
@ -4608,7 +4610,11 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
** size. Return the number of bytes in the nChar character prefix of the
** buffer, or 0 if there are less than nChar characters in total.
*/
static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
static int sqlite3Fts5IndexCharlenToBytelen(
const char *p,
int nByte,
int nChar
){
int n = 0;
int i;
for(i=0; i<nChar; i++){
@ -4665,7 +4671,8 @@ int sqlite3Fts5IndexWrite(
);
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
const int nChar = pConfig->aPrefix[i];
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
if( nByte ){
rc = sqlite3Fts5HashWrite(p->pHash,
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
@ -4983,7 +4990,7 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
/*
** Return a simple checksum value based on the arguments.
*/
static u64 fts5IndexEntryCksum(
u64 sqlite3Fts5IndexEntryCksum(
i64 iRowid,
int iCol,
int iPos,
@ -5071,7 +5078,7 @@ static int fts5QueryCksum(
){
int iCol = FTS5_POS2COLUMN(sReader.iPos);
int iOff = FTS5_POS2OFFSET(sReader.iPos);
cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
}
rc = sqlite3Fts5IterNext(pIdxIter);
}
@ -5370,7 +5377,7 @@ static void fts5IndexIntegrityCheckSegment(
/*
** Run internal checks to ensure that the FTS index (a) is internally
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
** checksum does not match. Return SQLITE_OK if all checks pass without
@ -5434,7 +5441,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
int iCol = FTS5_POS2COLUMN(iPos);
int iTokOff = FTS5_POS2OFFSET(iPos);
cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
}
}
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
@ -5450,34 +5457,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
return fts5IndexReturn(p);
}
/*
** Calculate and return a checksum that is the XOR of the index entry
** checksum of all entries that would be generated by the token specified
** by the final 5 arguments.
*/
u64 sqlite3Fts5IndexCksum(
Fts5Config *pConfig, /* Configuration object */
i64 iRowid, /* Document term appears in */
int iCol, /* Column term appears in */
int iPos, /* Position term appears in */
const char *pTerm, int nTerm /* Term at iPos */
){
u64 ret = 0; /* Return value */
int iIdx; /* For iterating through indexes */
ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
if( nByte ){
ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
}
}
return ret;
}
/*************************************************************************
**************************************************************************
** Below this point is the implementation of the fts5_decode() scalar

View File

@ -829,6 +829,7 @@ struct Fts5IntegrityCtx {
Fts5Config *pConfig;
};
/*
** Tokenization callback used by integrity check.
*/
@ -840,25 +841,41 @@ static int fts5StorageIntegrityCallback(
int iStart, /* Start offset of token */
int iEnd /* End offset of token */
){
int rc = SQLITE_OK;
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
Fts5Termset *pTermset = pCtx->pTermset;
int bPresent;
int ii;
int rc = SQLITE_OK;
int iPos;
int iCol;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
pCtx->szCol++;
}
if( pCtx->pTermset ){
int bPresent = 0;
rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent);
if( rc==SQLITE_OK && bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken
);
}
}else{
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
iPos = pTermset ? pCtx->iCol : pCtx->szCol-1;
iCol = pTermset ? 0 : pCtx->iCol;
rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
if( rc==SQLITE_OK && bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
pCtx->iRowid, iCol, iPos, 0, pToken, nToken
);
}
for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
const int nChar = pCtx->pConfig->aPrefix[ii];
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
if( nByte ){
rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
if( bPresent==0 ){
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
);
}
}
}
return rc;
}

View File

@ -74,6 +74,22 @@ foreach {T create} {
BEGIN;
}
6 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0);
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
7 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
8 {
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
BEGIN;
}
} {
do_test $T.1 {

View File

@ -369,7 +369,6 @@ foreach {tn expr} {
} {
breakpoint
do_auto_test 4.$tn yy {c1 c2 c3} $expr
}

View File

@ -21,7 +21,6 @@ ifcapable !fts5 {
return
}
#--------------------------------------------------------------------------
# Simple tests.
#
@ -65,5 +64,20 @@ do_catchsql_test 1.3.2 {
SELECT rowid FROM t1('NEAR(h d)');
} {1 {fts5: NEAR queries are not supported (offsets=0)}}
#-------------------------------------------------------------------------
# integrity-check with both offsets= and prefix= options.
#
do_execsql_test 2.0 {
CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1");
INSERT INTO t2(a) VALUES('aa ab');
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
breakpoint
do_execsql_test 2.1 {
INSERT INTO t2(t2) VALUES('integrity-check');
}
finish_test

View File

@ -18,7 +18,7 @@ ifcapable !fts5 {
finish_test
return
}
#-------------------------------------------------------------------------
#
set doc "x x [string repeat {y } 50]z z"
@ -350,6 +350,16 @@ do_execsql_test 4.1 {
SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'
} {0 {} 4}
#-------------------------------------------------------------------------
reset_db
do_execsql_test 15.0 {
CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1);
INSERT INTO x2 VALUES('ab');
}
do_execsql_test 15.1 {
INSERT INTO x2(x2) VALUES('integrity-check');
}
finish_test