From 159fd77e0f6ea30171b64471036d73d721f1271a Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 21 Dec 2015 18:45:09 +0000 Subject: [PATCH] Fix an fts5 integrity-check problem that affects offsets=0 tables with prefix indexes. FossilOrigin-Name: 609a0bc7f34e6dae74ce756aff920f3df78fe828 --- ext/fts5/fts5Int.h | 2 +- ext/fts5/fts5_buffer.c | 57 ++++++++++++++++++++-------------- ext/fts5/fts5_index.c | 49 +++++++++-------------------- ext/fts5/fts5_storage.c | 41 +++++++++++++++++------- ext/fts5/test/fts5ad.test | 16 ++++++++++ ext/fts5/test/fts5auto.test | 1 - ext/fts5/test/fts5offsets.test | 16 +++++++++- ext/fts5/test/fts5simple.test | 12 ++++++- manifest | 26 ++++++++-------- manifest.uuid | 2 +- 10 files changed, 133 insertions(+), 89 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 436ae0712e..3016f4be04 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -297,7 +297,7 @@ int sqlite3Fts5IsBareword(char t); /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ typedef struct Fts5Termset Fts5Termset; int sqlite3Fts5TermsetNew(Fts5Termset**); -int sqlite3Fts5TermsetAdd(Fts5Termset*, const char*, int, int *pbPresent); +int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); void sqlite3Fts5TermsetFree(Fts5Termset*); /* diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 7e3e137996..251a543c5a 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -298,6 +298,7 @@ typedef struct Fts5TermsetEntry Fts5TermsetEntry; struct Fts5TermsetEntry { char *pTerm; int nTerm; + int iIdx; /* Index (main or aPrefix[] entry) */ Fts5TermsetEntry *pNext; }; @@ -313,36 +314,44 @@ int sqlite3Fts5TermsetNew(Fts5Termset **pp){ int sqlite3Fts5TermsetAdd( Fts5Termset *p, + int iIdx, const char *pTerm, int nTerm, int *pbPresent ){ int rc = SQLITE_OK; - int i; - int hash = 13; - Fts5TermsetEntry *pEntry; - - /* Calculate a hash value for this term */ - for(i=0; iapHash); - *pbPresent = 0; - for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ - if( pEntry->nTerm==nTerm && memcmp(pEntry->pTerm, pTerm, nTerm)==0 ){ - *pbPresent = 1; - break; - } - } + if( p ){ + int i; + int hash; + Fts5TermsetEntry *pEntry; - if( pEntry==0 ){ - pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); - if( pEntry ){ - pEntry->pTerm = (char*)&pEntry[1]; - pEntry->nTerm = nTerm; - memcpy(pEntry->pTerm, pTerm, nTerm); - pEntry->pNext = p->apHash[hash]; - p->apHash[hash] = pEntry; + /* Calculate a hash value for this term */ + hash = 104 + iIdx; + for(i=0; iapHash); + + for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ + if( pEntry->iIdx==iIdx + && pEntry->nTerm==nTerm + && memcmp(pEntry->pTerm, pTerm, nTerm)==0 + ){ + *pbPresent = 1; + break; + } + } + + if( pEntry==0 ){ + pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); + if( pEntry ){ + pEntry->pTerm = (char*)&pEntry[1]; + pEntry->nTerm = nTerm; + pEntry->iIdx = iIdx; + memcpy(pEntry->pTerm, pTerm, nTerm); + pEntry->pNext = p->apHash[hash]; + p->apHash[hash] = pEntry; + } } } diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 5aa87d500f..bcd81843b7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -4373,7 +4373,9 @@ static void fts5MergePrefixLists( sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1); } } - p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + if( iNew!=writer.iPrev || tmp.n==0 ){ + p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + } } /* WRITEPOSLISTSIZE */ @@ -4608,7 +4610,11 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ -static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ +static int sqlite3Fts5IndexCharlenToBytelen( + const char *p, + int nByte, + int nChar +){ int n = 0; int i; for(i=0; inPrefix && rc==SQLITE_OK; i++){ - int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); + const int nChar = pConfig->aPrefix[i]; + int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, @@ -4983,7 +4990,7 @@ int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ /* ** Return a simple checksum value based on the arguments. */ -static u64 fts5IndexEntryCksum( +u64 sqlite3Fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, @@ -5071,7 +5078,7 @@ static int fts5QueryCksum( ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); + cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } rc = sqlite3Fts5IterNext(pIdxIter); } @@ -5370,7 +5377,7 @@ static void fts5IndexIntegrityCheckSegment( /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums -** as calculated by fts5IndexEntryCksum() is cksum. +** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. ** ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the ** checksum does not match. Return SQLITE_OK if all checks pass without @@ -5434,7 +5441,7 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); - cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); + cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } } fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); @@ -5450,34 +5457,6 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ return fts5IndexReturn(p); } - -/* -** Calculate and return a checksum that is the XOR of the index entry -** checksum of all entries that would be generated by the token specified -** by the final 5 arguments. -*/ -u64 sqlite3Fts5IndexCksum( - Fts5Config *pConfig, /* Configuration object */ - i64 iRowid, /* Document term appears in */ - int iCol, /* Column term appears in */ - int iPos, /* Position term appears in */ - const char *pTerm, int nTerm /* Term at iPos */ -){ - u64 ret = 0; /* Return value */ - int iIdx; /* For iterating through indexes */ - - ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm); - - for(iIdx=0; iIdxnPrefix; iIdx++){ - int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); - if( nByte ){ - ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte); - } - } - - return ret; -} - /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index c84b213fdf..65429edbd9 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -829,6 +829,7 @@ struct Fts5IntegrityCtx { Fts5Config *pConfig; }; + /* ** Tokenization callback used by integrity check. */ @@ -840,25 +841,41 @@ static int fts5StorageIntegrityCallback( int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ - int rc = SQLITE_OK; Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; + Fts5Termset *pTermset = pCtx->pTermset; + int bPresent; + int ii; + int rc = SQLITE_OK; + int iPos; + int iCol; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } - if( pCtx->pTermset ){ - int bPresent = 0; - rc = sqlite3Fts5TermsetAdd(pCtx->pTermset, pToken, nToken, &bPresent); - if( rc==SQLITE_OK && bPresent==0 ){ - pCtx->cksum ^= sqlite3Fts5IndexCksum( - pCtx->pConfig, pCtx->iRowid, 0, pCtx->iCol, pToken, nToken - ); - } - }else{ - pCtx->cksum ^= sqlite3Fts5IndexCksum( - pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken + iPos = pTermset ? pCtx->iCol : pCtx->szCol-1; + iCol = pTermset ? 0 : pCtx->iCol; + + rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); + if( rc==SQLITE_OK && bPresent==0 ){ + pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( + pCtx->iRowid, iCol, iPos, 0, pToken, nToken ); } + + for(ii=0; rc==SQLITE_OK && iipConfig->nPrefix; ii++){ + const int nChar = pCtx->pConfig->aPrefix[ii]; + int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); + if( nByte ){ + rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); + if( bPresent==0 ){ + pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( + pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte + ); + } + } + } + return rc; } diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 3881c7e161..a9fc568133 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -74,6 +74,22 @@ foreach {T create} { BEGIN; } + 6 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + } + + 7 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5"); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + } + + 8 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, offsets=0, prefix="1,2,3,4,5"); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + BEGIN; + } + } { do_test $T.1 { diff --git a/ext/fts5/test/fts5auto.test b/ext/fts5/test/fts5auto.test index 771a0b64d8..6efaa9cb7c 100644 --- a/ext/fts5/test/fts5auto.test +++ b/ext/fts5/test/fts5auto.test @@ -369,7 +369,6 @@ foreach {tn expr} { } { -breakpoint do_auto_test 4.$tn yy {c1 c2 c3} $expr } diff --git a/ext/fts5/test/fts5offsets.test b/ext/fts5/test/fts5offsets.test index 405824abf9..b70bee3b53 100644 --- a/ext/fts5/test/fts5offsets.test +++ b/ext/fts5/test/fts5offsets.test @@ -21,7 +21,6 @@ ifcapable !fts5 { return } - #-------------------------------------------------------------------------- # Simple tests. # @@ -65,5 +64,20 @@ do_catchsql_test 1.3.2 { SELECT rowid FROM t1('NEAR(h d)'); } {1 {fts5: NEAR queries are not supported (offsets=0)}} +#------------------------------------------------------------------------- +# integrity-check with both offsets= and prefix= options. +# +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(a, offsets=0, prefix="1"); + INSERT INTO t2(a) VALUES('aa ab'); +} + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} + +breakpoint +do_execsql_test 2.1 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + finish_test diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test index 27a1aee08b..870a912bb1 100644 --- a/ext/fts5/test/fts5simple.test +++ b/ext/fts5/test/fts5simple.test @@ -18,7 +18,7 @@ ifcapable !fts5 { finish_test return } - + #------------------------------------------------------------------------- # set doc "x x [string repeat {y } 50]z z" @@ -350,6 +350,16 @@ do_execsql_test 4.1 { SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads' } {0 {} 4} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 15.0 { + CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1); + INSERT INTO x2 VALUES('ab'); +} + +do_execsql_test 15.1 { + INSERT INTO x2(x2) VALUES('integrity-check'); +} finish_test diff --git a/manifest b/manifest index 1beb3af7bd..34b04180c1 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sproblem\swith\sprefix\squeries\son\sfts5\soffsets=0\stables. -D 2015-12-18T19:07:14.984 +C Fix\san\sfts5\sintegrity-check\sproblem\sthat\saffects\soffsets=0\stables\swith\sprefix\sindexes. +D 2015-12-21T18:45:09.329 F Makefile.in 28bcd6149e050dff35d4dcfd97e890cd387a499d F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 5fff077fcc46de7714ed6eebb6159a4c00eab751 @@ -97,15 +97,15 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h 8b9a13b309b180e9fb88ea5666c0d8d73c6102d9 -F ext/fts5/fts5Int.h 4e1bb66d8e607bf38e881eb455cdf36cc3fa9e42 +F ext/fts5/fts5Int.h e71739ed0b816758f1f77baaccca1b4a9064c4dc F ext/fts5/fts5_aux.c 1f384972d606375b8fa078319f25ab4b5feb1b35 -F ext/fts5/fts5_buffer.c 389d377d04f6e622644c3343ab5e511f6646de36 +F ext/fts5/fts5_buffer.c 87204c8b3b8bc62b27376eab09b74d6d5acc41f1 F ext/fts5/fts5_config.c ba5248a05c28ec6a6fdf2599a86e9fd67e5c61e2 F ext/fts5/fts5_expr.c 3b2c7ac54e6c03e732751a6a4bf9ced8f408e2a2 F ext/fts5/fts5_hash.c d4a6b52faca0134cc7bcc880f03a257a0dec2636 -F ext/fts5/fts5_index.c d862dc84c54133d902893a6a7992699cd5272c48 +F ext/fts5/fts5_index.c 1d1939afbc434907993d7e9a0c631be630d5b0dc F ext/fts5/fts5_main.c ef04699949ab8e42d590ae30188afef7ad58776e -F ext/fts5/fts5_storage.c 0dc37a6183e1061e255f23971198d8878159d4ef +F ext/fts5/fts5_storage.c 14e0bb6549a66da54adf4fa1564edbf24647cb22 F ext/fts5/fts5_tcl.c 3bf445e66de32137d4693694ff7b1fd6074e32bd F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf F ext/fts5/fts5_tokenize.c 618efe033bceb80c521b1e9ddfd9fee85fb5946e @@ -118,7 +118,7 @@ F ext/fts5/test/fts5_common.tcl 51f7ef3af444b89c6f6ce3896a0ac349ff4e996d F ext/fts5/test/fts5aa.test 2c553eea4dab4bc5a75928f56729277c7bc1d206 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c -F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63 +F ext/fts5/test/fts5ad.test 21d87b12c7ec83b4ec48816d24503443dffb10a1 F ext/fts5/test/fts5ae.test 0a9984fc3479f89f8c63d9848d6ed0c465dfcebe F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 @@ -128,7 +128,7 @@ F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test a1b7b6393376bc2adc216527a28f5ae5594069df F ext/fts5/test/fts5alter.test 6022c61467a82aa11c70822ccad22b328dcf0d04 -F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca +F ext/fts5/test/fts5auto.test 2a6241673657b340427f521528f7809ddaa02a9e F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb @@ -156,7 +156,7 @@ F ext/fts5/test/fts5integrity.test 87db5d4e7da0ce04a1dcba5ba91658673c997a65 F ext/fts5/test/fts5matchinfo.test 2163b0013e824bba65499da9e34ea4da41349cc2 F ext/fts5/test/fts5merge.test 8f3cdba2ec9c5e7e568246e81b700ad37f764367 F ext/fts5/test/fts5near.test b214cddb1c1f1bddf45c75af768f20145f7e71cc -F ext/fts5/test/fts5offsets.test 8410ec485d652da168c138b3d39e7418ff8406bf +F ext/fts5/test/fts5offsets.test 1a2d53c34a896d2038b839df2178410c45977671 F ext/fts5/test/fts5onepass.test 7ed9608e258132cb8d55e7c479b08676ad68810c F ext/fts5/test/fts5optimize.test 42741e7c085ee0a1276140a752d4407d97c2c9f5 F ext/fts5/test/fts5phrase.test f6d1d464da5beb25dc56277aa4f1d6102f0d9a2f @@ -169,7 +169,7 @@ F ext/fts5/test/fts5rank.test 7e9e64eac7245637f6f2033aec4b292aaf611aab F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 F ext/fts5/test/fts5rowid.test 400384798349d658eaf06aefa1e364957d5d4821 -F ext/fts5/test/fts5simple.test 9bded45827b4ab8933c87b7b3bcc3cd47f7378a4 +F ext/fts5/test/fts5simple.test a599b7577bc3827a9a678add3b43d8b818b93456 F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 @@ -1406,7 +1406,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 40b5bbf02a824ca73b33aa4ae1c7d5f65b7cda10 -R 3f45bfddac5bb4e5875c9115c2804e36 +P ad0987d83c252dd8d6a69321893629d7be805c28 +R f519655f5c64b0aca45be5b21174cea1 U dan -Z 1ef5385fc9e0304197471eb155fb346c +Z 77d1a35391efdb0fe1eff3fc97431cfb diff --git a/manifest.uuid b/manifest.uuid index af1abb33df..32831e991d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ad0987d83c252dd8d6a69321893629d7be805c28 \ No newline at end of file +609a0bc7f34e6dae74ce756aff920f3df78fe828 \ No newline at end of file