mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-01 06:27:03 +03:00
Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes.
FossilOrigin-Name: 609a0bc7f34e6dae74ce756aff920f3df78fe828
This commit is contained in:
@ -297,7 +297,7 @@ int sqlite3Fts5IsBareword(char t);
|
||||
/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
|
||||
typedef struct Fts5Termset Fts5Termset;
|
||||
int sqlite3Fts5TermsetNew(Fts5Termset**);
|
||||
int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent);
|
||||
int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
|
||||
void sqlite3Fts5TermsetFree(Fts5Termset*);
|
||||
|
||||
/*
|
||||
|
@ -298,6 +298,7 @@ typedef struct Fts5TermsetEntry Fts5TermsetEntry;
|
||||
struct Fts5TermsetEntry {
|
||||
char *pTerm;
|
||||
int nTerm;
|
||||
int iIdx; /* Index (main or aPrefix[] entry) */
|
||||
Fts5TermsetEntry *pNext;
|
||||
};
|
||||
|
||||
@ -313,36 +314,44 @@ int sqlite3Fts5TermsetNew(Fts5Termset **pp){
|
||||
|
||||
int sqlite3Fts5TermsetAdd(
|
||||
Fts5Termset *p,
|
||||
int iIdx,
|
||||
const char *pTerm, int nTerm,
|
||||
int *pbPresent
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
int i;
|
||||
int hash = 13;
|
||||
Fts5TermsetEntry *pEntry;
|
||||
|
||||
/* Calculate a hash value for this term */
|
||||
for(i=0; i<nTerm; i++){
|
||||
hash += (hash << 3) + (int)pTerm[i];
|
||||
}
|
||||
hash = hash % ArraySize(p->apHash);
|
||||
|
||||
*pbPresent = 0;
|
||||
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
|
||||
if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){
|
||||
*pbPresent = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( p ){
|
||||
int i;
|
||||
int hash;
|
||||
Fts5TermsetEntry *pEntry;
|
||||
|
||||
if( pEntry==0 ){
|
||||
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
|
||||
if( pEntry ){
|
||||
pEntry->pTerm = (char*)&pEntry[1];
|
||||
pEntry->nTerm = nTerm;
|
||||
memcpy(pEntry->pTerm, pTerm, nTerm);
|
||||
pEntry->pNext = p->apHash[hash];
|
||||
p->apHash[hash] = pEntry;
|
||||
/* Calculate a hash value for this term */
|
||||
hash = 104 + iIdx;
|
||||
for(i=0; i<nTerm; i++){
|
||||
hash += (hash << 3) + (int)pTerm[i];
|
||||
}
|
||||
hash = hash % ArraySize(p->apHash);
|
||||
|
||||
for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
|
||||
if( pEntry->iIdx==iIdx
|
||||
&& pEntry->nTerm==nTerm
|
||||
&& memcmp(pEntry->pTerm, pTerm, nTerm)==0
|
||||
){
|
||||
*pbPresent = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( pEntry==0 ){
|
||||
pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
|
||||
if( pEntry ){
|
||||
pEntry->pTerm = (char*)&pEntry[1];
|
||||
pEntry->nTerm = nTerm;
|
||||
pEntry->iIdx = iIdx;
|
||||
memcpy(pEntry->pTerm, pTerm, nTerm);
|
||||
pEntry->pNext = p->apHash[hash];
|
||||
p->apHash[hash] = pEntry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4373,7 +4373,9 @@ static void fts5MergePrefixLists(
|
||||
sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1);
|
||||
}
|
||||
}
|
||||
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
|
||||
if( iNew!=writer.iPrev || tmp.n==0 ){
|
||||
p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
|
||||
}
|
||||
}
|
||||
|
||||
/* WRITEPOSLISTSIZE */
|
||||
@ -4608,7 +4610,11 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
|
||||
** size. Return the number of bytes in the nChar character prefix of the
|
||||
** buffer, or 0 if there are less than nChar characters in total.
|
||||
*/
|
||||
static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
|
||||
static int sqlite3Fts5IndexCharlenToBytelen(
|
||||
const char *p,
|
||||
int nByte,
|
||||
int nChar
|
||||
){
|
||||
int n = 0;
|
||||
int i;
|
||||
for(i=0; i<nChar; i++){
|
||||
@ -4665,7 +4671,8 @@ int sqlite3Fts5IndexWrite(
|
||||
);
|
||||
|
||||
for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
|
||||
int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
|
||||
const int nChar = pConfig->aPrefix[i];
|
||||
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
|
||||
if( nByte ){
|
||||
rc = sqlite3Fts5HashWrite(p->pHash,
|
||||
p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
|
||||
@ -4983,7 +4990,7 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
|
||||
/*
|
||||
** Return a simple checksum value based on the arguments.
|
||||
*/
|
||||
static u64 fts5IndexEntryCksum(
|
||||
u64 sqlite3Fts5IndexEntryCksum(
|
||||
i64 iRowid,
|
||||
int iCol,
|
||||
int iPos,
|
||||
@ -5071,7 +5078,7 @@ static int fts5QueryCksum(
|
||||
){
|
||||
int iCol = FTS5_POS2COLUMN(sReader.iPos);
|
||||
int iOff = FTS5_POS2OFFSET(sReader.iPos);
|
||||
cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
|
||||
cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
|
||||
}
|
||||
rc = sqlite3Fts5IterNext(pIdxIter);
|
||||
}
|
||||
@ -5370,7 +5377,7 @@ static void fts5IndexIntegrityCheckSegment(
|
||||
/*
|
||||
** Run internal checks to ensure that the FTS index (a) is internally
|
||||
** consistent and (b) contains entries for which the XOR of the checksums
|
||||
** as calculated by fts5IndexEntryCksum() is cksum.
|
||||
** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
|
||||
**
|
||||
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
|
||||
** checksum does not match. Return SQLITE_OK if all checks pass without
|
||||
@ -5434,7 +5441,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
|
||||
int iCol = FTS5_POS2COLUMN(iPos);
|
||||
int iTokOff = FTS5_POS2OFFSET(iPos);
|
||||
cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
|
||||
cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
|
||||
}
|
||||
}
|
||||
fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
|
||||
@ -5450,34 +5457,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
|
||||
return fts5IndexReturn(p);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Calculate and return a checksum that is the XOR of the index entry
|
||||
** checksum of all entries that would be generated by the token specified
|
||||
** by the final 5 arguments.
|
||||
*/
|
||||
u64 sqlite3Fts5IndexCksum(
|
||||
Fts5Config *pConfig, /* Configuration object */
|
||||
i64 iRowid, /* Document term appears in */
|
||||
int iCol, /* Column term appears in */
|
||||
int iPos, /* Position term appears in */
|
||||
const char *pTerm, int nTerm /* Term at iPos */
|
||||
){
|
||||
u64 ret = 0; /* Return value */
|
||||
int iIdx; /* For iterating through indexes */
|
||||
|
||||
ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);
|
||||
|
||||
for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
|
||||
int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
|
||||
if( nByte ){
|
||||
ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
**************************************************************************
|
||||
** Below this point is the implementation of the fts5_decode() scalar
|
||||
|
@ -829,6 +829,7 @@ struct Fts5IntegrityCtx {
|
||||
Fts5Config *pConfig;
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
** Tokenization callback used by integrity check.
|
||||
*/
|
||||
@ -840,25 +841,41 @@ static int fts5StorageIntegrityCallback(
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd /* End offset of token */
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
|
||||
Fts5Termset *pTermset = pCtx->pTermset;
|
||||
int bPresent;
|
||||
int ii;
|
||||
int rc = SQLITE_OK;
|
||||
int iPos;
|
||||
int iCol;
|
||||
|
||||
if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
|
||||
pCtx->szCol++;
|
||||
}
|
||||
|
||||
if( pCtx->pTermset ){
|
||||
int bPresent = 0;
|
||||
rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent);
|
||||
if( rc==SQLITE_OK && bPresent==0 ){
|
||||
pCtx->cksum ^= sqlite3Fts5IndexCksum(
|
||||
pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken
|
||||
);
|
||||
}
|
||||
}else{
|
||||
pCtx->cksum ^= sqlite3Fts5IndexCksum(
|
||||
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
|
||||
iPos = pTermset ? pCtx->iCol : pCtx->szCol-1;
|
||||
iCol = pTermset ? 0 : pCtx->iCol;
|
||||
|
||||
rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
|
||||
if( rc==SQLITE_OK && bPresent==0 ){
|
||||
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
|
||||
pCtx->iRowid, iCol, iPos, 0, pToken, nToken
|
||||
);
|
||||
}
|
||||
|
||||
for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
|
||||
const int nChar = pCtx->pConfig->aPrefix[ii];
|
||||
int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
|
||||
if( nByte ){
|
||||
rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
|
||||
if( bPresent==0 ){
|
||||
pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
|
||||
pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -74,6 +74,22 @@ foreach {T create} {
|
||||
BEGIN;
|
||||
}
|
||||
|
||||
6 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0);
|
||||
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
|
||||
7 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
|
||||
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
|
||||
}
|
||||
|
||||
8 {
|
||||
CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5");
|
||||
INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
|
||||
BEGIN;
|
||||
}
|
||||
|
||||
} {
|
||||
|
||||
do_test $T.1 {
|
||||
|
@ -369,7 +369,6 @@ foreach {tn expr} {
|
||||
|
||||
|
||||
} {
|
||||
breakpoint
|
||||
do_auto_test 4.$tn yy {c1 c2 c3} $expr
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,6 @@ ifcapable !fts5 {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
#--------------------------------------------------------------------------
|
||||
# Simple tests.
|
||||
#
|
||||
@ -65,5 +64,20 @@ do_catchsql_test 1.3.2 {
|
||||
SELECT rowid FROM t1('NEAR(h d)');
|
||||
} {1 {fts5: NEAR queries are not supported (offsets=0)}}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# integrity-check with both offsets= and prefix= options.
|
||||
#
|
||||
do_execsql_test 2.0 {
|
||||
CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1");
|
||||
INSERT INTO t2(a) VALUES('aa ab');
|
||||
}
|
||||
|
||||
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}
|
||||
|
||||
breakpoint
|
||||
do_execsql_test 2.1 {
|
||||
INSERT INTO t2(t2) VALUES('integrity-check');
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
||||
|
@ -18,7 +18,7 @@ ifcapable !fts5 {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
set doc "x x [string repeat {y } 50]z z"
|
||||
@ -350,6 +350,16 @@ do_execsql_test 4.1 {
|
||||
SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'
|
||||
} {0 {} 4}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
reset_db
|
||||
do_execsql_test 15.0 {
|
||||
CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1);
|
||||
INSERT INTO x2 VALUES('ab');
|
||||
}
|
||||
|
||||
do_execsql_test 15.1 {
|
||||
INSERT INTO x2(x2) VALUES('integrity-check');
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
||||
|
Reference in New Issue
Block a user