diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 297065dbd7..00a9d7ba6f 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -2176,7 +2176,7 @@ static void fts3SnippetFunc( const char *zEnd = ""; const char *zEllipsis = "..."; int iCol = -1; - int nToken = 15; + int nToken = 15; /* Default number of tokens in snippet */ /* There must be at least one argument passed to this function (otherwise ** the non-overloaded version would have been called instead of this one). diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 6ed600afe5..828d59ff50 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -45,12 +45,10 @@ static int fts3ExprIterate( ){ int rc; int eType = pExpr->eType; - if( eType==FTSQUERY_NOT ){ - rc = SQLITE_OK; - }else if( eType!=FTSQUERY_PHRASE ){ + if( eType!=FTSQUERY_PHRASE ){ assert( pExpr->pLeft && pExpr->pRight ); rc = fts3ExprIterate(pExpr->pLeft, x, pCtx); - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ rc = fts3ExprIterate(pExpr->pRight, x, pCtx); } }else{ @@ -108,7 +106,7 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ - fts3ExprNearTrim(pExpr); + rc = fts3ExprNearTrim(pExpr); } } @@ -459,10 +457,10 @@ int fts3SnippetShift( rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); } pMod->xClose(pC); - if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ - return rc; - } - nShift = iCurrent-nSnippet; + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } + + nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; + assert( nShift<=nDesired ); if( nShift>0 ){ *piPos += nShift; *pHlmask = hlmask >> nShift; @@ -475,6 +473,8 @@ int fts3SnippetShift( static int fts3SnippetText( Fts3Cursor *pCsr, /* FTS3 Cursor */ SnippetFragment *pFragment, /* Snippet to extract */ + int iFragment, /* Fragment number */ + int isLast, /* True for final fragment in snippet */ int nSnippet, /* Number of tokens in extracted snippet */ const char *zOpen, /* String inserted before highlighted term */ const char *zClose, /* String inserted after highlighted term */ @@ -486,7 +486,6 @@ static int fts3SnippetText( const char *zDoc; /* Document text to extract snippet from */ int nDoc; /* Size of zDoc in bytes */ int iCurrent = 0; /* Current token number of document */ - int iStart = 0; /* Byte offset of current token */ int iEnd = 0; /* Byte offset of end of current token */ int isShiftDone = 0; int iPos = pFragment->iPos; @@ -495,7 +494,7 @@ static int fts3SnippetText( sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ const char *ZDUMMY; /* Dummy arguments used with tokenizer */ - int DUMMY1, DUMMY2, DUMMY3; /* Dummy arguments used with tokenizer */ + int DUMMY1; /* Dummy arguments used with tokenizer */ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1); if( zDoc==0 ){ @@ -506,10 +505,7 @@ static int fts3SnippetText( } nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1); - /* Open a token cursor on the document. Read all tokens up to and - ** including token iPos (the first token of the snippet). Set variable - ** iStart to the byte offset in zDoc of the start of token iPos. - */ + /* Open a token cursor on the document. */ pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); if( rc!=SQLITE_OK ){ @@ -518,50 +514,54 @@ static int fts3SnippetText( pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK ){ - int iBegin; - int iFin; + int iBegin; /* Offset in zDoc of start of token */ + int iFin; /* Offset in zDoc of end of token */ + int isHighlight; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); - - if( rc==SQLITE_OK ){ - if( iCurrent=(iPos+nSnippet) ){ - rc = SQLITE_DONE; - }else{ - iEnd = iFin; - if( hlmask & ((u64)1 << (iCurrent-iPos)) ){ - if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart) - || fts3StringAppend(pOut, zOpen, -1) - || fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin) - || fts3StringAppend(pOut, zClose, -1) - ){ - rc = SQLITE_NOMEM; - } - iStart = iEnd; - } - } - } - } - assert( rc!=SQLITE_OK ); - if( rc==SQLITE_DONE ){ - rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart); - if( rc==SQLITE_OK ){ - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ){ + /* Special case - the last token of the snippet is also the last token + ** of the column. Append any punctuation that occurred between the end + ** of the previous token and the end of the document to the output. + ** Then break out of the loop. */ rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); - }else if( rc==SQLITE_OK && zEllipsis ){ + } + break; + } + if( iCurrent0 || iFragment>0) ){ rc = fts3StringAppend(pOut, zEllipsis, -1); } + if( rc!=SQLITE_OK || iCurrent=(iPos+nSnippet) ){ + if( isLast ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + break; + } + + /* Set isHighlight to true if this term should be highlighted. */ + isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; + + if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); + if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); + + iEnd = iFin; } pMod->xClose(pC); @@ -803,13 +803,9 @@ void sqlite3Fts3Snippet( assert( nFToken>0 ); for(i=0; i0 || p->iPos>0 ){ - fts3StringAppend(&res, zEllipsis, -1); - } - rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res); + rc = fts3SnippetText(pCsr, &aSnippet[i], + i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res + ); } snippet_out: @@ -951,7 +947,7 @@ void sqlite3Fts3Offsets( sqlite3_snprintf(sizeof(aBuffer), aBuffer, "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart ); - fts3StringAppend(&res, aBuffer, -1); + rc = fts3StringAppend(&res, aBuffer, -1); } } } diff --git a/manifest b/manifest index d32a664bee..26957b9f36 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Another\sattempt\sat\sfixing\sthe\stable\sgenerator\sin\slemon.\s\sAgain,\sthis\sdoes\nnot\seffect\sthe\sSQLite\sgrammar. -D 2010-01-07T03:53:04 +C Fixes\sto\sproblems\sin\sFTS3\ssnippet()\sfunction\sfound\sby\sth3\stests. +D 2010-01-07T10:54:29 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -59,7 +56,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 04e95afa45789d7a3da59f458d4a8c1879c31446 +F ext/fts3/fts3.c 7b1969f6b958059ab7c6c8450fa4f27cf88681c7 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b @@ -67,7 +64,7 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156 -F ext/fts3/fts3_snippet.c f3d4bc931f041d8eddaab7e9a9328381e799a31e +F ext/fts3/fts3_snippet.c 210bd2a9336e25d4d82a2e81c85abd9b9f8f31ca F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b @@ -408,7 +405,7 @@ F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b -F test/fts3snippet.test bfbceb2e292ddfdc6bb0b1b252ccea78bd6091be +F test/fts3snippet.test 16a05b313bf85da4d0b5cb683549fc2279430fbe F test/func.test af106ed834001738246d276659406823e35cde7b F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9 @@ -787,14 +784,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P c7e5966e3b031672f149d0b6e1f75f9bc40868fa -R c71e215e8efe7e980817f2247ed81780 -U drh -Z eaa58ce585624f7f98bd421d59de7fbe ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFLRVqjoxKgR168RlERAr98AJ0fYlm+0rcqam2O2GKZRNuoWdSXiwCeKGNO -zIN2JBTaxHsvBtPlPjm5964= -=BXEN ------END PGP SIGNATURE----- +P e22c090f35b3a2bac64781d33aa1123ed765dbbf +R c4c872b237c2eb5c969c1e58ad74f097 +U dan +Z 9b608ddefd032d66f262f5c7928a6a08 diff --git a/manifest.uuid b/manifest.uuid index 527ad918cb..316f7be6c5 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e22c090f35b3a2bac64781d33aa1123ed765dbbf \ No newline at end of file +3b5ccd2682176929f4da8a3f39a7e8f58b179f18 \ No newline at end of file diff --git a/test/fts3snippet.test b/test/fts3snippet.test index 1d388e86d8..0f848981d2 100644 --- a/test/fts3snippet.test +++ b/test/fts3snippet.test @@ -1,10 +1,30 @@ +# 2010 January 07 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# set testdir [file dirname $argv0] source $testdir/tester.tcl -# If SQLITE_ENABLE_FTS3 is defined, omit this file. +# If SQLITE_ENABLE_FTS3 is not defined, omit this file. ifcapable !fts3 { finish_test ; return } +# Transform the list $L to its "normal" form. So that it can be compared to +# another list with the same set of elements using [string compare]. +# +proc normalize {L} { + set ret [list] + foreach l $L {lappend ret $l} + return $ret +} + do_test fts3snippet-1.1 { execsql { CREATE VIRTUAL TABLE ft USING fts3; @@ -12,12 +32,6 @@ do_test fts3snippet-1.1 { } } {} -proc normalize {L} { - set ret [list] - foreach l $L {lappend ret $l} - return $ret -} - do_test fts3snippet-1.2 { execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' } } {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}} @@ -64,5 +78,31 @@ do_test fts3snippet-1.5 { 0 2 12 3 }]] +do_test fts3snippet-2.1 { + execsql { + DROP TABLE IF EXISTS ft; + CREATE VIRTUAL TABLE ft USING fts3; + INSERT INTO ft VALUES('one two three four five six seven eight nine ten'); + } +} {} +foreach {tn expr res} { + 1 one "[one] two three four five..." + 2 two "one [two] three four five..." + 3 three "one two [three] four five..." + 4 four "...two three [four] five six..." + 5 five "...three four [five] six seven..." + 6 six "...four five [six] seven eight..." + 7 seven "...five six [seven] eight nine..." + 8 eight "...six seven [eight] nine ten" + 9 nine "...six seven eight [nine] ten" + 10 ten "...six seven eight nine [ten]" +} { + do_test fts3snippet-2.2.$tn { + execsql { + SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr + } + } [list $res] +} + finish_test