1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

When scanning the full-text index as part of the fts5 integrity-check, also run a point query for every term and verify that these results are consistent with those found by the linear scan.

FossilOrigin-Name: ce972f6aab90f6929d018696f1ab3c2649eca802
This commit is contained in:
dan
2015-03-21 15:37:19 +00:00
parent f5fab92d82
commit 8885708c3c
4 changed files with 56 additions and 17 deletions

View File

@ -242,7 +242,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy);
** 0==sqlite3Fts5IterEof(pIter);
** sqlite3Fts5IterNext(pIter)
** ){
** i64 iDocid = sqlite3Fts5IndexDocid(pIter);
** i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/

View File

@ -4312,6 +4312,8 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
Fts5Config *pConfig = p->pConfig;
int iIdx; /* Used to iterate through indexes */
u64 cksum2 = 0; /* Checksum based on contents of indexes */
u64 cksum3 = 0; /* Checksum based on contents of indexes */
Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
/* Check that the internal nodes of each segment match the leaves */
for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
@ -4328,7 +4330,19 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
fts5StructureRelease(pStruct);
}
/* Check that the checksum of the index matches the argument checksum */
/* The cksum argument passed to this function is a checksum calculated
** based on all expected entries in the FTS index (including prefix index
** entries). This block checks that a checksum calculated based on the
** actual contents of FTS index is identical.
**
** Two versions of the same checksum are calculated. The first (stack
** variable cksum2) based on entries extracted from the full-text index
** while doing a linear scan of each individual index in turn.
**
** As each term visited by the linear scans, a separate query for the
** same term is performed. cksum3 is calculated based on the entries
** extracted by these queries.
*/
for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
Fts5MultiSegIter *pIter;
Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
@ -4341,25 +4355,50 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
i64 iRowid = fts5MultiIterRowid(pIter);
char *z = (char*)fts5MultiIterTerm(pIter, &n);
/* Update cksum2 with the entries associated with the current term
** and rowid. */
for(fts5PosIterInit(p, pIter, &sPos);
fts5PosIterEof(p, &sPos)==0;
fts5PosIterNext(p, &sPos)
){
cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
#if 0
fprintf(stdout, "rowid=%d ", (int)iRowid);
fprintf(stdout, "term=%.*s ", n, z);
fprintf(stdout, "col=%d ", sPos.iCol);
fprintf(stdout, "off=%d\n", sPos.iPos);
fflush(stdout);
#endif
}
/* If this is a new term, query for it. Update cksum3 with the results. */
if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
Fts5IndexIter *pIdxIter = 0;
int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
const u8 *pPos;
int nPos;
i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
if( rc==SQLITE_OK ){
Fts5PoslistReader sReader;
for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
sReader.bEof==0;
sqlite3Fts5PoslistReaderNext(&sReader)
){
int iCol = FTS5_POS2COLUMN(sReader.iPos);
int iOff = FTS5_POS2OFFSET(sReader.iPos);
cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
}
rc = sqlite3Fts5IterNext(pIdxIter);
}
}
sqlite3Fts5IterClose(pIdxIter);
fts5BufferSet(&rc, &term, n, (const u8*)z);
p->rc = rc;
}
}
fts5MultiIterFree(p, pIter);
fts5StructureRelease(pStruct);
}
if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT;
fts5BufferFree(&term);
return fts5IndexReturn(p);
}

View File

@ -1,5 +1,5 @@
C Add\san\soptimization\sto\sthe\sfts5\sunicode\stokenizer\scode.
D 2015-03-11T14:51:39.375
C When\sscanning\sthe\sfull-text\sindex\sas\spart\sof\sthe\sfts5\sintegrity-check,\salso\srun\sa\spoint\squery\sfor\severy\sterm\sand\sverify\sthat\sthese\sresults\sare\sconsistent\swith\sthose\sfound\sby\sthe\slinear\sscan.
D 2015-03-21T15:37:19.761
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad
F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b
F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a
F ext/fts5/fts5Int.h 1dcb02943f3a55d275d5473911a7e991d638c73c
F ext/fts5/fts5Int.h 8d09f7894e83b00a18a7e2149354a153904002df
F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22
F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06
F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894
F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98
F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd
F ext/fts5/fts5_index.c b00f7147f9660e66d9d1a8149d4faea3a06cd48e
F ext/fts5/fts5_index.c 4adc5e18ab6d0648faeb18f18a89d6aec57d77be
F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d
F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541
F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b
@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 5c46820d9b4aae791a8704b69145bd81f1e6780d
R 3ab21807b5c69b091448271467250abb
P f5db489250029678fce845dfb2b1109fde46bea5
R 258f390b03b29f1c61f33d36002f03e9
U dan
Z 588a6ddf00bd9c069b244e50951b58e0
Z 6cf104c99eec6cd34913e25fa048b8ab

View File

@ -1 +1 @@
f5db489250029678fce845dfb2b1109fde46bea5
ce972f6aab90f6929d018696f1ab3c2649eca802