1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fixes to problems in FTS3 snippet() function found by th3 tests.

FossilOrigin-Name: 3b5ccd2682176929f4da8a3f39a7e8f58b179f18
This commit is contained in:
dan
2010-01-07 10:54:28 +00:00
parent 8dc3e8f3e6
commit 3174598a64
5 changed files with 115 additions and 89 deletions

View File

@ -2176,7 +2176,7 @@ static void fts3SnippetFunc(
const char *zEnd = "</b>"; const char *zEnd = "</b>";
const char *zEllipsis = "<b>...</b>"; const char *zEllipsis = "<b>...</b>";
int iCol = -1; int iCol = -1;
int nToken = 15; int nToken = 15; /* Default number of tokens in snippet */
/* There must be at least one argument passed to this function (otherwise /* There must be at least one argument passed to this function (otherwise
** the non-overloaded version would have been called instead of this one). ** the non-overloaded version would have been called instead of this one).

View File

@ -45,12 +45,10 @@ static int fts3ExprIterate(
){ ){
int rc; int rc;
int eType = pExpr->eType; int eType = pExpr->eType;
if( eType==FTSQUERY_NOT ){ if( eType!=FTSQUERY_PHRASE ){
rc = SQLITE_OK;
}else if( eType!=FTSQUERY_PHRASE ){
assert( pExpr->pLeft && pExpr->pRight ); assert( pExpr->pLeft && pExpr->pRight );
rc = fts3ExprIterate(pExpr->pLeft, x, pCtx); rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
rc = fts3ExprIterate(pExpr->pRight, x, pCtx); rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
} }
}else{ }else{
@ -108,7 +106,7 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){
rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
pExpr->isLoaded = 1; pExpr->isLoaded = 1;
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
fts3ExprNearTrim(pExpr); rc = fts3ExprNearTrim(pExpr);
} }
} }
@ -459,10 +457,10 @@ int fts3SnippetShift(
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
} }
pMod->xClose(pC); pMod->xClose(pC);
if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
return rc;
} nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
nShift = iCurrent-nSnippet; assert( nShift<=nDesired );
if( nShift>0 ){ if( nShift>0 ){
*piPos += nShift; *piPos += nShift;
*pHlmask = hlmask >> nShift; *pHlmask = hlmask >> nShift;
@ -475,6 +473,8 @@ int fts3SnippetShift(
static int fts3SnippetText( static int fts3SnippetText(
Fts3Cursor *pCsr, /* FTS3 Cursor */ Fts3Cursor *pCsr, /* FTS3 Cursor */
SnippetFragment *pFragment, /* Snippet to extract */ SnippetFragment *pFragment, /* Snippet to extract */
int iFragment, /* Fragment number */
int isLast, /* True for final fragment in snippet */
int nSnippet, /* Number of tokens in extracted snippet */ int nSnippet, /* Number of tokens in extracted snippet */
const char *zOpen, /* String inserted before highlighted term */ const char *zOpen, /* String inserted before highlighted term */
const char *zClose, /* String inserted after highlighted term */ const char *zClose, /* String inserted after highlighted term */
@ -486,7 +486,6 @@ static int fts3SnippetText(
const char *zDoc; /* Document text to extract snippet from */ const char *zDoc; /* Document text to extract snippet from */
int nDoc; /* Size of zDoc in bytes */ int nDoc; /* Size of zDoc in bytes */
int iCurrent = 0; /* Current token number of document */ int iCurrent = 0; /* Current token number of document */
int iStart = 0; /* Byte offset of current token */
int iEnd = 0; /* Byte offset of end of current token */ int iEnd = 0; /* Byte offset of end of current token */
int isShiftDone = 0; int isShiftDone = 0;
int iPos = pFragment->iPos; int iPos = pFragment->iPos;
@ -495,7 +494,7 @@ static int fts3SnippetText(
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
const char *ZDUMMY; /* Dummy arguments used with tokenizer */ const char *ZDUMMY; /* Dummy arguments used with tokenizer */
int DUMMY1, DUMMY2, DUMMY3; /* Dummy arguments used with tokenizer */ int DUMMY1; /* Dummy arguments used with tokenizer */
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1); zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
if( zDoc==0 ){ if( zDoc==0 ){
@ -506,10 +505,7 @@ static int fts3SnippetText(
} }
nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1); nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
/* Open a token cursor on the document. Read all tokens up to and /* Open a token cursor on the document. */
** including token iPos (the first token of the snippet). Set variable
** iStart to the byte offset in zDoc of the start of token iPos.
*/
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
@ -518,50 +514,54 @@ static int fts3SnippetText(
pC->pTokenizer = pTab->pTokenizer; pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK ){ while( rc==SQLITE_OK ){
int iBegin; int iBegin; /* Offset in zDoc of start of token */
int iFin; int iFin; /* Offset in zDoc of end of token */
int isHighlight;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_OK ){
if( iCurrent<iPos ) continue;
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
}
if( iCurrent==iPos ){
iStart = iEnd = iBegin;
}
if( iCurrent>=(iPos+nSnippet) ){
rc = SQLITE_DONE;
}else{
iEnd = iFin;
if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart)
|| fts3StringAppend(pOut, zOpen, -1)
|| fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin)
|| fts3StringAppend(pOut, zClose, -1)
){
rc = SQLITE_NOMEM;
}
iStart = iEnd;
}
}
}
}
assert( rc!=SQLITE_OK );
if( rc==SQLITE_DONE ){
rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart);
if( rc==SQLITE_OK ){
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
if( rc==SQLITE_DONE ){ if( rc==SQLITE_DONE ){
/* Special case - the last token of the snippet is also the last token
** of the column. Append any punctuation that occurred between the end
** of the previous token and the end of the document to the output.
** Then break out of the loop. */
rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
}else if( rc==SQLITE_OK && zEllipsis ){ }
break;
}
if( iCurrent<iPos ){ continue; }
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
** required. They are required if (a) this is not the first fragment,
** or (b) this fragment does not begin at position 0 of its column.
*/
if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
rc = fts3StringAppend(pOut, zEllipsis, -1); rc = fts3StringAppend(pOut, zEllipsis, -1);
} }
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
} }
if( iCurrent>=(iPos+nSnippet) ){
if( isLast ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}
break;
}
/* Set isHighlight to true if this term should be highlighted. */
isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
iEnd = iFin;
} }
pMod->xClose(pC); pMod->xClose(pC);
@ -803,13 +803,9 @@ void sqlite3Fts3Snippet(
assert( nFToken>0 ); assert( nFToken>0 );
for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
SnippetFragment *p = &aSnippet[i]; rc = fts3SnippetText(pCsr, &aSnippet[i],
const char *zTail = ((i==nSnippet-1) ? zEllipsis : 0); i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
);
if( i>0 || p->iPos>0 ){
fts3StringAppend(&res, zEllipsis, -1);
}
rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res);
} }
snippet_out: snippet_out:
@ -951,7 +947,7 @@ void sqlite3Fts3Offsets(
sqlite3_snprintf(sizeof(aBuffer), aBuffer, sqlite3_snprintf(sizeof(aBuffer), aBuffer,
"%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
); );
fts3StringAppend(&res, aBuffer, -1); rc = fts3StringAppend(&res, aBuffer, -1);
} }
} }
} }

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE----- C Fixes\sto\sproblems\sin\sFTS3\ssnippet()\sfunction\sfound\sby\sth3\stests.
Hash: SHA1 D 2010-01-07T10:54:29
C Another\sattempt\sat\sfixing\sthe\stable\sgenerator\sin\slemon.\s\sAgain,\sthis\sdoes\nnot\seffect\sthe\sSQLite\sgrammar.
D 2010-01-07T03:53:04
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3 F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -59,7 +56,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 04e95afa45789d7a3da59f458d4a8c1879c31446 F ext/fts3/fts3.c 7b1969f6b958059ab7c6c8450fa4f27cf88681c7
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd
F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b
@ -67,7 +64,7 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156 F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156
F ext/fts3/fts3_snippet.c f3d4bc931f041d8eddaab7e9a9328381e799a31e F ext/fts3/fts3_snippet.c 210bd2a9336e25d4d82a2e81c85abd9b9f8f31ca
F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b
@ -408,7 +405,7 @@ F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f
F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b
F test/fts3snippet.test bfbceb2e292ddfdc6bb0b1b252ccea78bd6091be F test/fts3snippet.test 16a05b313bf85da4d0b5cb683549fc2279430fbe
F test/func.test af106ed834001738246d276659406823e35cde7b F test/func.test af106ed834001738246d276659406823e35cde7b
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9 F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9
@ -787,14 +784,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P c7e5966e3b031672f149d0b6e1f75f9bc40868fa P e22c090f35b3a2bac64781d33aa1123ed765dbbf
R c71e215e8efe7e980817f2247ed81780 R c4c872b237c2eb5c969c1e58ad74f097
U drh U dan
Z eaa58ce585624f7f98bd421d59de7fbe Z 9b608ddefd032d66f262f5c7928a6a08
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFLRVqjoxKgR168RlERAr98AJ0fYlm+0rcqam2O2GKZRNuoWdSXiwCeKGNO
zIN2JBTaxHsvBtPlPjm5964=
=BXEN
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
e22c090f35b3a2bac64781d33aa1123ed765dbbf 3b5ccd2682176929f4da8a3f39a7e8f58b179f18

View File

@ -1,10 +1,30 @@
# 2010 January 07
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
set testdir [file dirname $argv0] set testdir [file dirname $argv0]
source $testdir/tester.tcl source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is defined, omit this file. # If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return } ifcapable !fts3 { finish_test ; return }
# Transform the list $L to its "normal" form. So that it can be compared to
# another list with the same set of elements using [string compare].
#
proc normalize {L} {
set ret [list]
foreach l $L {lappend ret $l}
return $ret
}
do_test fts3snippet-1.1 { do_test fts3snippet-1.1 {
execsql { execsql {
CREATE VIRTUAL TABLE ft USING fts3; CREATE VIRTUAL TABLE ft USING fts3;
@ -12,12 +32,6 @@ do_test fts3snippet-1.1 {
} }
} {} } {}
proc normalize {L} {
set ret [list]
foreach l $L {lappend ret $l}
return $ret
}
do_test fts3snippet-1.2 { do_test fts3snippet-1.2 {
execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' } execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}} } {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}
@ -64,5 +78,31 @@ do_test fts3snippet-1.5 {
0 2 12 3 0 2 12 3
}]] }]]
do_test fts3snippet-2.1 {
execsql {
DROP TABLE IF EXISTS ft;
CREATE VIRTUAL TABLE ft USING fts3;
INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
}
} {}
foreach {tn expr res} {
1 one "[one] two three four five..."
2 two "one [two] three four five..."
3 three "one two [three] four five..."
4 four "...two three [four] five six..."
5 five "...three four [five] six seven..."
6 six "...four five [six] seven eight..."
7 seven "...five six [seven] eight nine..."
8 eight "...six seven [eight] nine ten"
9 nine "...six seven eight [nine] ten"
10 ten "...six seven eight nine [ten]"
} {
do_test fts3snippet-2.2.$tn {
execsql {
SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr
}
} [list $res]
}
finish_test finish_test