1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Fixes to problems in FTS3 snippet() function found by th3 tests.

FossilOrigin-Name: 3b5ccd2682176929f4da8a3f39a7e8f58b179f18
This commit is contained in:
dan
2010-01-07 10:54:28 +00:00
parent 8dc3e8f3e6
commit 3174598a64
5 changed files with 115 additions and 89 deletions

View File

@ -2176,7 +2176,7 @@ static void fts3SnippetFunc(
const char *zEnd = "</b>";
const char *zEllipsis = "<b>...</b>";
int iCol = -1;
int nToken = 15;
int nToken = 15; /* Default number of tokens in snippet */
/* There must be at least one argument passed to this function (otherwise
** the non-overloaded version would have been called instead of this one).

View File

@ -45,12 +45,10 @@ static int fts3ExprIterate(
){
int rc;
int eType = pExpr->eType;
if( eType==FTSQUERY_NOT ){
rc = SQLITE_OK;
}else if( eType!=FTSQUERY_PHRASE ){
if( eType!=FTSQUERY_PHRASE ){
assert( pExpr->pLeft && pExpr->pRight );
rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
if( rc==SQLITE_OK ){
if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
}
}else{
@ -108,7 +106,7 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){
rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
pExpr->isLoaded = 1;
if( rc==SQLITE_OK ){
fts3ExprNearTrim(pExpr);
rc = fts3ExprNearTrim(pExpr);
}
}
@ -459,10 +457,10 @@ int fts3SnippetShift(
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
}
pMod->xClose(pC);
if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){
return rc;
}
nShift = iCurrent-nSnippet;
if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
assert( nShift<=nDesired );
if( nShift>0 ){
*piPos += nShift;
*pHlmask = hlmask >> nShift;
@ -475,6 +473,8 @@ int fts3SnippetShift(
static int fts3SnippetText(
Fts3Cursor *pCsr, /* FTS3 Cursor */
SnippetFragment *pFragment, /* Snippet to extract */
int iFragment, /* Fragment number */
int isLast, /* True for final fragment in snippet */
int nSnippet, /* Number of tokens in extracted snippet */
const char *zOpen, /* String inserted before highlighted term */
const char *zClose, /* String inserted after highlighted term */
@ -486,7 +486,6 @@ static int fts3SnippetText(
const char *zDoc; /* Document text to extract snippet from */
int nDoc; /* Size of zDoc in bytes */
int iCurrent = 0; /* Current token number of document */
int iStart = 0; /* Byte offset of current token */
int iEnd = 0; /* Byte offset of end of current token */
int isShiftDone = 0;
int iPos = pFragment->iPos;
@ -495,7 +494,7 @@ static int fts3SnippetText(
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
const char *ZDUMMY; /* Dummy arguments used with tokenizer */
int DUMMY1, DUMMY2, DUMMY3; /* Dummy arguments used with tokenizer */
int DUMMY1; /* Dummy arguments used with tokenizer */
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
if( zDoc==0 ){
@ -506,10 +505,7 @@ static int fts3SnippetText(
}
nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
/* Open a token cursor on the document. Read all tokens up to and
** including token iPos (the first token of the snippet). Set variable
** iStart to the byte offset in zDoc of the start of token iPos.
*/
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
@ -518,50 +514,54 @@ static int fts3SnippetText(
pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK ){
int iBegin;
int iFin;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
int iBegin; /* Offset in zDoc of start of token */
int iFin; /* Offset in zDoc of end of token */
int isHighlight;
if( rc==SQLITE_OK ){
if( iCurrent<iPos ) continue;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
/* Special case - the last token of the snippet is also the last token
** of the column. Append any punctuation that occurred between the end
** of the previous token and the end of the document to the output.
** Then break out of the loop. */
rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
}
break;
}
if( iCurrent<iPos ){ continue; }
if( !isShiftDone ){
int n = nDoc - iBegin;
rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
** required. They are required if (a) this is not the first fragment,
** or (b) this fragment does not begin at position 0 of its column.
*/
if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}
if( iCurrent==iPos ){
iStart = iEnd = iBegin;
if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
}
if( iCurrent>=(iPos+nSnippet) ){
rc = SQLITE_DONE;
}else{
iEnd = iFin;
if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart)
|| fts3StringAppend(pOut, zOpen, -1)
|| fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin)
|| fts3StringAppend(pOut, zClose, -1)
){
rc = SQLITE_NOMEM;
}
iStart = iEnd;
}
}
}
}
assert( rc!=SQLITE_OK );
if( rc==SQLITE_DONE ){
rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart);
if( rc==SQLITE_OK ){
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
if( rc==SQLITE_DONE ){
rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
}else if( rc==SQLITE_OK && zEllipsis ){
if( isLast ){
rc = fts3StringAppend(pOut, zEllipsis, -1);
}
break;
}
/* Set isHighlight to true if this term should be highlighted. */
isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
iEnd = iFin;
}
pMod->xClose(pC);
@ -803,13 +803,9 @@ void sqlite3Fts3Snippet(
assert( nFToken>0 );
for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
SnippetFragment *p = &aSnippet[i];
const char *zTail = ((i==nSnippet-1) ? zEllipsis : 0);
if( i>0 || p->iPos>0 ){
fts3StringAppend(&res, zEllipsis, -1);
}
rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res);
rc = fts3SnippetText(pCsr, &aSnippet[i],
i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
);
}
snippet_out:
@ -951,7 +947,7 @@ void sqlite3Fts3Offsets(
sqlite3_snprintf(sizeof(aBuffer), aBuffer,
"%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
);
fts3StringAppend(&res, aBuffer, -1);
rc = fts3StringAppend(&res, aBuffer, -1);
}
}
}

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Another\sattempt\sat\sfixing\sthe\stable\sgenerator\sin\slemon.\s\sAgain,\sthis\sdoes\nnot\seffect\sthe\sSQLite\sgrammar.
D 2010-01-07T03:53:04
C Fixes\sto\sproblems\sin\sFTS3\ssnippet()\sfunction\sfound\sby\sth3\stests.
D 2010-01-07T10:54:29
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -59,7 +56,7 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts3/fts3.c 04e95afa45789d7a3da59f458d4a8c1879c31446
F ext/fts3/fts3.c 7b1969f6b958059ab7c6c8450fa4f27cf88681c7
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd
F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b
@ -67,7 +64,7 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156
F ext/fts3/fts3_snippet.c f3d4bc931f041d8eddaab7e9a9328381e799a31e
F ext/fts3/fts3_snippet.c 210bd2a9336e25d4d82a2e81c85abd9b9f8f31ca
F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b
@ -408,7 +405,7 @@ F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f
F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b
F test/fts3snippet.test bfbceb2e292ddfdc6bb0b1b252ccea78bd6091be
F test/fts3snippet.test 16a05b313bf85da4d0b5cb683549fc2279430fbe
F test/func.test af106ed834001738246d276659406823e35cde7b
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9
@ -787,14 +784,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P c7e5966e3b031672f149d0b6e1f75f9bc40868fa
R c71e215e8efe7e980817f2247ed81780
U drh
Z eaa58ce585624f7f98bd421d59de7fbe
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFLRVqjoxKgR168RlERAr98AJ0fYlm+0rcqam2O2GKZRNuoWdSXiwCeKGNO
zIN2JBTaxHsvBtPlPjm5964=
=BXEN
-----END PGP SIGNATURE-----
P e22c090f35b3a2bac64781d33aa1123ed765dbbf
R c4c872b237c2eb5c969c1e58ad74f097
U dan
Z 9b608ddefd032d66f262f5c7928a6a08

View File

@ -1 +1 @@
e22c090f35b3a2bac64781d33aa1123ed765dbbf
3b5ccd2682176929f4da8a3f39a7e8f58b179f18

View File

@ -1,10 +1,30 @@
# 2010 January 07
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS3 is defined, omit this file.
# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 { finish_test ; return }
# Transform the list $L to its "normal" form. So that it can be compared to
# another list with the same set of elements using [string compare].
#
proc normalize {L} {
set ret [list]
foreach l $L {lappend ret $l}
return $ret
}
do_test fts3snippet-1.1 {
execsql {
CREATE VIRTUAL TABLE ft USING fts3;
@ -12,12 +32,6 @@ do_test fts3snippet-1.1 {
}
} {}
proc normalize {L} {
set ret [list]
foreach l $L {lappend ret $l}
return $ret
}
do_test fts3snippet-1.2 {
execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}
@ -64,5 +78,31 @@ do_test fts3snippet-1.5 {
0 2 12 3
}]]
do_test fts3snippet-2.1 {
execsql {
DROP TABLE IF EXISTS ft;
CREATE VIRTUAL TABLE ft USING fts3;
INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
}
} {}
foreach {tn expr res} {
1 one "[one] two three four five..."
2 two "one [two] three four five..."
3 three "one two [three] four five..."
4 four "...two three [four] five six..."
5 five "...three four [five] six seven..."
6 six "...four five [six] seven eight..."
7 seven "...five six [seven] eight nine..."
8 eight "...six seven [eight] nine ten"
9 nine "...six seven eight [nine] ten"
10 ten "...six seven eight nine [ten]"
} {
do_test fts3snippet-2.2.$tn {
execsql {
SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr
}
} [list $res]
}
finish_test