diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c
index 1d52b687d2..297065dbd7 100644
--- a/ext/fts3/fts3.c
+++ b/ext/fts3/fts3.c
@@ -1709,6 +1709,74 @@ static int fts3PhraseSelect(
return rc;
}
+static int fts3NearMerge(
+ int mergetype, /* MERGE_POS_NEAR or MERGE_NEAR */
+ int nNear, /* Parameter to NEAR operator */
+ int nTokenLeft, /* Number of tokens in LHS phrase arg */
+ char *aLeft, /* Doclist for LHS (incl. positions) */
+ int nLeft, /* Size of LHS doclist in bytes */
+ int nTokenRight, /* As nTokenLeft */
+ char *aRight, /* As aLeft */
+ int nRight, /* As nRight */
+ char **paOut, /* OUT: Results of merge (malloced) */
+ int *pnOut /* OUT: Sized of output buffer */
+){
+ char *aOut;
+ int rc;
+
+ assert( mergetype==MERGE_POS_NEAR || MERGE_NEAR );
+
+ aOut = sqlite3_malloc(nLeft+nRight+1);
+ if( aOut==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft,
+ aOut, pnOut, aLeft, nLeft, aRight, nRight
+ );
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(aOut);
+ aOut = 0;
+ }
+ }
+
+ *paOut = aOut;
+ return rc;
+}
+
+int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){
+ int rc;
+ if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){
+ sqlite3_free(pLeft->aDoclist);
+ sqlite3_free(pRight->aDoclist);
+ pRight->aDoclist = 0;
+ pLeft->aDoclist = 0;
+ rc = SQLITE_OK;
+ }else{
+ char *aOut;
+ int nOut;
+
+ rc = fts3NearMerge(MERGE_POS_NEAR, nNear,
+ pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist,
+ pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist,
+ &aOut, &nOut
+ );
+ if( rc!=SQLITE_OK ) return rc;
+ sqlite3_free(pRight->aDoclist);
+ pRight->aDoclist = aOut;
+ pRight->nDoclist = nOut;
+
+ rc = fts3NearMerge(MERGE_POS_NEAR, nNear,
+ pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist,
+ pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist,
+ &aOut, &nOut
+ );
+ sqlite3_free(pLeft->aDoclist);
+ pLeft->aDoclist = aOut;
+ pLeft->nDoclist = nOut;
+ }
+ return rc;
+}
+
/*
** Evaluate the full-text expression pExpr against fts3 table pTab. Store
** the resulting doclist in *paOut and *pnOut.
@@ -1753,9 +1821,6 @@ static int evalFts3Expr(
Fts3Expr *pLeft;
Fts3Expr *pRight;
int mergetype = isReqPos ? MERGE_POS_NEAR : MERGE_NEAR;
- int nParam1;
- int nParam2;
- char *aBuffer;
if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){
mergetype = MERGE_POS_NEAR;
@@ -1768,17 +1833,11 @@ static int evalFts3Expr(
assert( pRight->eType==FTSQUERY_PHRASE );
assert( pLeft->eType==FTSQUERY_PHRASE );
- nParam1 = pExpr->nNear+1;
- nParam2 = nParam1+pLeft->pPhrase->nToken+pRight->pPhrase->nToken-2;
- aBuffer = sqlite3_malloc(nLeft+nRight+1);
- rc = fts3DoclistMerge(mergetype, nParam1, nParam2, aBuffer,
- pnOut, aLeft, nLeft, aRight, nRight
+ rc = fts3NearMerge(mergetype, pExpr->nNear,
+ pLeft->pPhrase->nToken, aLeft, nLeft,
+ pRight->pPhrase->nToken, aRight, nRight,
+ paOut, pnOut
);
- if( rc!=SQLITE_OK ){
- sqlite3_free(aBuffer);
- }else{
- *paOut = aBuffer;
- }
sqlite3_free(aLeft);
break;
}
@@ -2064,7 +2123,7 @@ char *sqlite3Fts3FindPositions(
pCsr++;
pCsr += sqlite3Fts3GetVarint32(pCsr, &iThis);
}
- if( iCol==iThis ) return pCsr;
+ if( iCol==iThis && (*pCsr&0xFE) ) return pCsr;
}
return 0;
}
@@ -2116,45 +2175,8 @@ static void fts3SnippetFunc(
const char *zStart = "";
const char *zEnd = "";
const char *zEllipsis = "...";
-
- /* There must be at least one argument passed to this function (otherwise
- ** the non-overloaded version would have been called instead of this one).
- */
- assert( nVal>=1 );
-
- if( nVal>4 ){
- sqlite3_result_error(pContext,
- "wrong number of arguments to function snippet()", -1);
- return;
- }
- if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;
-
- switch( nVal ){
- case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
- case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
- case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
- }
- if( !zEllipsis || !zEnd || !zStart ){
- sqlite3_result_error_nomem(pContext);
- }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
- sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis);
- }
-}
-
-/*
-** Implementation of the snippet2() function for FTS3
-*/
-static void fts3Snippet2Func(
- sqlite3_context *pContext, /* SQLite function call context */
- int nVal, /* Size of apVal[] array */
- sqlite3_value **apVal /* Array of arguments */
-){
- Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */
- const char *zStart = "";
- const char *zEnd = "";
- const char *zEllipsis = "...";
int iCol = -1;
- int nToken = 10;
+ int nToken = 15;
/* There must be at least one argument passed to this function (otherwise
** the non-overloaded version would have been called instead of this one).
@@ -2178,7 +2200,7 @@ static void fts3Snippet2Func(
if( !zEllipsis || !zEnd || !zStart ){
sqlite3_result_error_nomem(pContext);
}else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
- sqlite3Fts3Snippet2(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
+ sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
}
}
@@ -2279,7 +2301,6 @@ static int fts3FindFunctionMethod(
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
} aOverload[] = {
{ "snippet", fts3SnippetFunc },
- { "snippet2", fts3Snippet2Func },
{ "offsets", fts3OffsetsFunc },
{ "optimize", fts3OptimizeFunc },
{ "matchinfo", fts3MatchinfoFunc },
@@ -2429,7 +2450,6 @@ int sqlite3Fts3Init(sqlite3 *db){
if( SQLITE_OK==rc
&& SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
- && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet2", -1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", -1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h
index ceb13ee7d6..22d3885014 100644
--- a/ext/fts3/fts3Int.h
+++ b/ext/fts3/fts3Int.h
@@ -279,6 +279,7 @@ void sqlite3Fts3Dequote(char *);
char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *);
+int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
@@ -289,10 +290,7 @@ int sqlite3Fts3InitTokenizer(Fts3Hash *pHash,
/* fts3_snippet.c */
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
-void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*,
- const char *, const char *, const char *
-);
-void sqlite3Fts3Snippet2(sqlite3_context *, Fts3Cursor *, const char *,
+void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *);
diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c
index 287c68b13b..78eb2ecc9f 100644
--- a/ext/fts3/fts3_snippet.c
+++ b/ext/fts3/fts3_snippet.c
@@ -18,723 +18,6 @@
#include
#include
-typedef struct Snippet Snippet;
-
-/*
-** An instance of the following structure keeps track of generated
-** matching-word offset information and snippets.
-*/
-struct Snippet {
- int nMatch; /* Total number of matches */
- int nAlloc; /* Space allocated for aMatch[] */
- struct snippetMatch { /* One entry for each matching term */
- char snStatus; /* Status flag for use while constructing snippets */
- short int nByte; /* Number of bytes in the term */
- short int iCol; /* The column that contains the match */
- short int iTerm; /* The index in Query.pTerms[] of the matching term */
- int iToken; /* The index of the matching document token */
- int iStart; /* The offset to the first character of the term */
- } *aMatch; /* Points to space obtained from malloc */
- char *zOffset; /* Text rendering of aMatch[] */
- int nOffset; /* strlen(zOffset) */
- char *zSnippet; /* Snippet text */
- int nSnippet; /* strlen(zSnippet) */
-};
-
-
-/* It is not safe to call isspace(), tolower(), or isalnum() on
-** hi-bit-set characters. This is the same solution used in the
-** tokenizer.
-*/
-static int fts3snippetIsspace(char c){
- return (c&0x80)==0 ? isspace(c) : 0;
-}
-
-
-/*
-** A StringBuffer object holds a zero-terminated string that grows
-** arbitrarily by appending. Space to hold the string is obtained
-** from sqlite3_malloc(). After any memory allocation failure,
-** StringBuffer.z is set to NULL and no further allocation is attempted.
-*/
-typedef struct StringBuffer {
- char *z; /* Text of the string. Space from malloc. */
- int nUsed; /* Number bytes of z[] used, not counting \000 terminator */
- int nAlloc; /* Bytes allocated for z[] */
-} StringBuffer;
-
-
-/*
-** Initialize a new StringBuffer.
-*/
-static void fts3SnippetSbInit(StringBuffer *p){
- p->nAlloc = 100;
- p->nUsed = 0;
- p->z = sqlite3_malloc( p->nAlloc );
-}
-
-/*
-** Append text to the string buffer.
-*/
-static void fts3SnippetAppend(StringBuffer *p, const char *zNew, int nNew){
- if( p->z==0 ) return;
- if( nNew<0 ) nNew = (int)strlen(zNew);
- if( p->nUsed + nNew >= p->nAlloc ){
- int nAlloc;
- char *zNew;
-
- nAlloc = p->nUsed + nNew + p->nAlloc;
- zNew = sqlite3_realloc(p->z, nAlloc);
- if( zNew==0 ){
- sqlite3_free(p->z);
- p->z = 0;
- return;
- }
- p->z = zNew;
- p->nAlloc = nAlloc;
- }
- memcpy(&p->z[p->nUsed], zNew, nNew);
- p->nUsed += nNew;
- p->z[p->nUsed] = 0;
-}
-
-/* If the StringBuffer ends in something other than white space, add a
-** single space character to the end.
-*/
-static void fts3SnippetAppendWhiteSpace(StringBuffer *p){
- if( p->z && p->nUsed && !fts3snippetIsspace(p->z[p->nUsed-1]) ){
- fts3SnippetAppend(p, " ", 1);
- }
-}
-
-/* Remove white space from the end of the StringBuffer */
-static void fts3SnippetTrimWhiteSpace(StringBuffer *p){
- if( p->z ){
- while( p->nUsed && fts3snippetIsspace(p->z[p->nUsed-1]) ){
- p->nUsed--;
- }
- p->z[p->nUsed] = 0;
- }
-}
-
-/*
-** Release all memory associated with the Snippet structure passed as
-** an argument.
-*/
-static void fts3SnippetFree(Snippet *p){
- if( p ){
- sqlite3_free(p->aMatch);
- sqlite3_free(p->zOffset);
- sqlite3_free(p->zSnippet);
- sqlite3_free(p);
- }
-}
-
-/*
-** Append a single entry to the p->aMatch[] log.
-*/
-static int snippetAppendMatch(
- Snippet *p, /* Append the entry to this snippet */
- int iCol, int iTerm, /* The column and query term */
- int iToken, /* Matching token in document */
- int iStart, int nByte /* Offset and size of the match */
-){
- int i;
- struct snippetMatch *pMatch;
- if( p->nMatch+1>=p->nAlloc ){
- struct snippetMatch *pNew;
- p->nAlloc = p->nAlloc*2 + 10;
- pNew = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
- if( pNew==0 ){
- p->aMatch = 0;
- p->nMatch = 0;
- p->nAlloc = 0;
- return SQLITE_NOMEM;
- }
- p->aMatch = pNew;
- }
- i = p->nMatch++;
- pMatch = &p->aMatch[i];
- pMatch->iCol = (short)iCol;
- pMatch->iTerm = (short)iTerm;
- pMatch->iToken = iToken;
- pMatch->iStart = iStart;
- pMatch->nByte = (short)nByte;
- return SQLITE_OK;
-}
-
-/*
-** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
-*/
-#define FTS3_ROTOR_SZ (32)
-#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)
-
-/*
-** Function to iterate through the tokens of a compiled expression.
-**
-** Except, skip all tokens on the right-hand side of a NOT operator.
-** This function is used to find tokens as part of snippet and offset
-** generation and we do nt want snippets and offsets to report matches
-** for tokens on the RHS of a NOT.
-*/
-static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){
- Fts3Expr *p = *ppExpr;
- int iToken = *piToken;
- if( iToken<0 ){
- /* In this case the expression p is the root of an expression tree.
- ** Move to the first token in the expression tree.
- */
- while( p->pLeft ){
- p = p->pLeft;
- }
- iToken = 0;
- }else{
- assert(p && p->eType==FTSQUERY_PHRASE );
- if( iToken<(p->pPhrase->nToken-1) ){
- iToken++;
- }else{
- iToken = 0;
- while( p->pParent && p->pParent->pLeft!=p ){
- assert( p->pParent->pRight==p );
- p = p->pParent;
- }
- p = p->pParent;
- if( p ){
- assert( p->pRight!=0 );
- p = p->pRight;
- while( p->pLeft ){
- p = p->pLeft;
- }
- }
- }
- }
-
- *ppExpr = p;
- *piToken = iToken;
- return p?1:0;
-}
-
-/*
-** Return TRUE if the expression node pExpr is located beneath the
-** RHS of a NOT operator.
-*/
-static int fts3ExprBeneathNot(Fts3Expr *p){
- Fts3Expr *pParent;
- while( p ){
- pParent = p->pParent;
- if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){
- return 1;
- }
- p = pParent;
- }
- return 0;
-}
-
-/*
-** Add entries to pSnippet->aMatch[] for every match that occurs against
-** document zDoc[0..nDoc-1] which is stored in column iColumn.
-*/
-static int snippetOffsetsOfColumn(
- Fts3Cursor *pCur, /* The fulltest search cursor */
- Snippet *pSnippet, /* The Snippet object to be filled in */
- int iColumn, /* Index of fulltext table column */
- const char *zDoc, /* Text of the fulltext table column */
- int nDoc /* Length of zDoc in bytes */
-){
- const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */
- sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */
- sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */
- Fts3Table *pVtab; /* The full text index */
- int nColumn; /* Number of columns in the index */
- int i, j; /* Loop counters */
- int rc; /* Return code */
- unsigned int match, prevMatch; /* Phrase search bitmasks */
- const char *zToken; /* Next token from the tokenizer */
- int nToken; /* Size of zToken */
- int iBegin, iEnd, iPos; /* Offsets of beginning and end */
-
- /* The following variables keep a circular buffer of the last
- ** few tokens */
- unsigned int iRotor = 0; /* Index of current token */
- int iRotorBegin[FTS3_ROTOR_SZ]; /* Beginning offset of token */
- int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */
-
- pVtab = (Fts3Table *)pCur->base.pVtab;
- nColumn = pVtab->nColumn;
- pTokenizer = pVtab->pTokenizer;
- pTModule = pTokenizer->pModule;
- rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
- if( rc ) return rc;
- pTCursor->pTokenizer = pTokenizer;
-
- prevMatch = 0;
- while( (rc = pTModule->xNext(pTCursor, &zToken, &nToken,
- &iBegin, &iEnd, &iPos))==SQLITE_OK ){
- Fts3Expr *pIter = pCur->pExpr;
- int iIter = -1;
- iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
- iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
- match = 0;
- for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){
- int nPhrase; /* Number of tokens in current phrase */
- struct PhraseToken *pToken; /* Current token */
- int iCol; /* Column index */
-
- if( fts3ExprBeneathNot(pIter) ) continue;
- nPhrase = pIter->pPhrase->nToken;
- pToken = &pIter->pPhrase->aToken[iIter];
- iCol = pIter->pPhrase->iColumn;
- if( iCol>=0 && iColn>nToken ) continue;
- if( !pToken->isPrefix && pToken->nn<=nToken );
- if( memcmp(pToken->z, zToken, pToken->n) ) continue;
- if( iIter>0 && (prevMatch & (1<=0; j--){
- int k = (iRotor-j) & FTS3_ROTOR_MASK;
- rc = snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
- iRotorBegin[k], iRotorLen[k]);
- if( rc ) goto end_offsets_of_column;
- }
- }
- }
- prevMatch = match<<1;
- iRotor++;
- }
-end_offsets_of_column:
- pTModule->xClose(pTCursor);
- return rc==SQLITE_DONE ? SQLITE_OK : rc;
-}
-
-/*
-** Remove entries from the pSnippet structure to account for the NEAR
-** operator. When this is called, pSnippet contains the list of token
-** offsets produced by treating all NEAR operators as AND operators.
-** This function removes any entries that should not be present after
-** accounting for the NEAR restriction. For example, if the queried
-** document is:
-**
-** "A B C D E A"
-**
-** and the query is:
-**
-** A NEAR/0 E
-**
-** then when this function is called the Snippet contains token offsets
-** 0, 4 and 5. This function removes the "0" entry (because the first A
-** is not near enough to an E).
-**
-** When this function is called, the value pointed to by parameter piLeft is
-** the integer id of the left-most token in the expression tree headed by
-** pExpr. This function increments *piLeft by the total number of tokens
-** in the expression tree headed by pExpr.
-**
-** Return 1 if any trimming occurs. Return 0 if no trimming is required.
-*/
-static int trimSnippetOffsets(
- Fts3Expr *pExpr, /* The search expression */
- Snippet *pSnippet, /* The set of snippet offsets to be trimmed */
- int *piLeft /* Index of left-most token in pExpr */
-){
- if( pExpr ){
- if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
- return 1;
- }
-
- switch( pExpr->eType ){
- case FTSQUERY_PHRASE:
- *piLeft += pExpr->pPhrase->nToken;
- break;
- case FTSQUERY_NEAR: {
- /* The right-hand-side of a NEAR operator is always a phrase. The
- ** left-hand-side is either a phrase or an expression tree that is
- ** itself headed by a NEAR operator. The following initializations
- ** set local variable iLeft to the token number of the left-most
- ** token in the right-hand phrase, and iRight to the right most
- ** token in the same phrase. For example, if we had:
- **
- ** MATCH '"abc def" NEAR/2 "ghi jkl"'
- **
- ** then iLeft will be set to 2 (token number of ghi) and nToken will
- ** be set to 4.
- */
- Fts3Expr *pLeft = pExpr->pLeft;
- Fts3Expr *pRight = pExpr->pRight;
- int iLeft = *piLeft;
- int nNear = pExpr->nNear;
- int nToken = pRight->pPhrase->nToken;
- int jj, ii;
- if( pLeft->eType==FTSQUERY_NEAR ){
- pLeft = pLeft->pRight;
- }
- assert( pRight->eType==FTSQUERY_PHRASE );
- assert( pLeft->eType==FTSQUERY_PHRASE );
- nToken += pLeft->pPhrase->nToken;
-
- for(ii=0; iinMatch; ii++){
- struct snippetMatch *p = &pSnippet->aMatch[ii];
- if( p->iTerm==iLeft ){
- int isOk = 0;
- /* Snippet ii is an occurence of query term iLeft in the document.
- ** It occurs at position (p->iToken) of the document. We now
- ** search for an instance of token (iLeft-1) somewhere in the
- ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within
- ** the set of snippetMatch structures. If one is found, proceed.
- ** If one cannot be found, then remove snippets ii..(ii+N-1)
- ** from the matching snippets, where N is the number of tokens
- ** in phrase pRight->pPhrase.
- */
- for(jj=0; isOk==0 && jjnMatch; jj++){
- struct snippetMatch *p2 = &pSnippet->aMatch[jj];
- if( p2->iTerm==(iLeft-1) ){
- if( p2->iToken>=(p->iToken-nNear-1)
- && p2->iToken<(p->iToken+nNear+nToken)
- ){
- isOk = 1;
- }
- }
- }
- if( !isOk ){
- int kk;
- for(kk=0; kkpPhrase->nToken; kk++){
- pSnippet->aMatch[kk+ii].iTerm = -2;
- }
- return 1;
- }
- }
- if( p->iTerm==(iLeft-1) ){
- int isOk = 0;
- for(jj=0; isOk==0 && jjnMatch; jj++){
- struct snippetMatch *p2 = &pSnippet->aMatch[jj];
- if( p2->iTerm==iLeft ){
- if( p2->iToken<=(p->iToken+nNear+1)
- && p2->iToken>(p->iToken-nNear-nToken)
- ){
- isOk = 1;
- }
- }
- }
- if( !isOk ){
- int kk;
- for(kk=0; kkpPhrase->nToken; kk++){
- pSnippet->aMatch[ii-kk].iTerm = -2;
- }
- return 1;
- }
- }
- }
- break;
- }
- }
-
- if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){
- return 1;
- }
- }
- return 0;
-}
-
-/*
-** Compute all offsets for the current row of the query.
-** If the offsets have already been computed, this routine is a no-op.
-*/
-static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){
- Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; /* The FTS3 virtual table */
- int nColumn; /* Number of columns. Docid does count */
- int iColumn; /* Index of of a column */
- int i; /* Loop index */
- int iFirst; /* First column to search */
- int iLast; /* Last coumn to search */
- int iTerm = 0;
- Snippet *pSnippet;
- int rc = SQLITE_OK;
-
- if( pCsr->pExpr==0 ){
- return SQLITE_OK;
- }
-
- pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet));
- *ppSnippet = pSnippet;
- if( !pSnippet ){
- return SQLITE_NOMEM;
- }
- memset(pSnippet, 0, sizeof(Snippet));
-
- nColumn = p->nColumn;
- iColumn = (pCsr->eSearch - 2);
- if( iColumn<0 || iColumn>=nColumn ){
- /* Look for matches over all columns of the full-text index */
- iFirst = 0;
- iLast = nColumn-1;
- }else{
- /* Look for matches in the iColumn-th column of the index only */
- iFirst = iColumn;
- iLast = iColumn;
- }
- for(i=iFirst; rc==SQLITE_OK && i<=iLast; i++){
- const char *zDoc;
- int nDoc;
- zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1);
- nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1);
- if( zDoc==0 && sqlite3_column_type(pCsr->pStmt, i+1)!=SQLITE_NULL ){
- rc = SQLITE_NOMEM;
- }else{
- rc = snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc);
- }
- }
-
- while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){
- iTerm = 0;
- }
-
- return rc;
-}
-
-/*
-** Convert the information in the aMatch[] array of the snippet
-** into the string zOffset[0..nOffset-1]. This string is used as
-** the return of the SQL offsets() function.
-*/
-static void snippetOffsetText(Snippet *p){
- int i;
- int cnt = 0;
- StringBuffer sb;
- char zBuf[200];
- if( p->zOffset ) return;
- fts3SnippetSbInit(&sb);
- for(i=0; inMatch; i++){
- struct snippetMatch *pMatch = &p->aMatch[i];
- if( pMatch->iTerm>=0 ){
- /* If snippetMatch.iTerm is less than 0, then the match was
- ** discarded as part of processing the NEAR operator (see the
- ** trimSnippetOffsetsForNear() function for details). Ignore
- ** it in this case
- */
- zBuf[0] = ' ';
- sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
- pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
- fts3SnippetAppend(&sb, zBuf, -1);
- cnt++;
- }
- }
- p->zOffset = sb.z;
- p->nOffset = sb.z ? sb.nUsed : 0;
-}
-
-/*
-** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set
-** of matching words some of which might be in zDoc. zDoc is column
-** number iCol.
-**
-** iBreak is suggested spot in zDoc where we could begin or end an
-** excerpt. Return a value similar to iBreak but possibly adjusted
-** to be a little left or right so that the break point is better.
-*/
-static int wordBoundary(
- int iBreak, /* The suggested break point */
- const char *zDoc, /* Document text */
- int nDoc, /* Number of bytes in zDoc[] */
- struct snippetMatch *aMatch, /* Matching words */
- int nMatch, /* Number of entries in aMatch[] */
- int iCol /* The column number for zDoc[] */
-){
- int i;
- if( iBreak<=10 ){
- return 0;
- }
- if( iBreak>=nDoc-10 ){
- return nDoc;
- }
- for(i=0; ALWAYS(i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
- return aMatch[i-1].iStart;
- }
- }
- for(i=1; i<=10; i++){
- if( fts3snippetIsspace(zDoc[iBreak-i]) ){
- return iBreak - i + 1;
- }
- if( fts3snippetIsspace(zDoc[iBreak+i]) ){
- return iBreak + i + 1;
- }
- }
- return iBreak;
-}
-
-
-
-/*
-** Allowed values for Snippet.aMatch[].snStatus
-*/
-#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */
-#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */
-
-/*
-** Generate the text of a snippet.
-*/
-static void snippetText(
- Fts3Cursor *pCursor, /* The cursor we need the snippet for */
- Snippet *pSnippet,
- const char *zStartMark, /* Markup to appear before each match */
- const char *zEndMark, /* Markup to appear after each match */
- const char *zEllipsis /* Ellipsis mark */
-){
- int i, j;
- struct snippetMatch *aMatch;
- int nMatch;
- int nDesired;
- StringBuffer sb;
- int tailCol;
- int tailOffset;
- int iCol;
- int nDoc;
- const char *zDoc;
- int iStart, iEnd;
- int tailEllipsis = 0;
- int iMatch;
-
-
- sqlite3_free(pSnippet->zSnippet);
- pSnippet->zSnippet = 0;
- aMatch = pSnippet->aMatch;
- nMatch = pSnippet->nMatch;
- fts3SnippetSbInit(&sb);
-
- for(i=0; i0; i++){
- if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
- nDesired--;
- iCol = aMatch[i].iCol;
- zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
- nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
- iStart = aMatch[i].iStart - 40;
- iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
- if( iStart<=10 ){
- iStart = 0;
- }
- if( iCol==tailCol && iStart<=tailOffset+20 ){
- iStart = tailOffset;
- }
- if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
- fts3SnippetTrimWhiteSpace(&sb);
- fts3SnippetAppendWhiteSpace(&sb);
- fts3SnippetAppend(&sb, zEllipsis, -1);
- fts3SnippetAppendWhiteSpace(&sb);
- }
- iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
- iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
- if( iEnd>=nDoc-10 ){
- iEnd = nDoc;
- tailEllipsis = 0;
- }else{
- tailEllipsis = 1;
- }
- while( iMatchzSnippet = sb.z;
- pSnippet->nSnippet = sb.z ? sb.nUsed : 0;
-}
-
-void sqlite3Fts3Offsets(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr /* Cursor object */
-){
- Snippet *p; /* Snippet structure */
- int rc = snippetAllOffsets(pCsr, &p);
- if( rc==SQLITE_OK ){
- snippetOffsetText(p);
- if( p->zOffset ){
- sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT);
- }else{
- sqlite3_result_error_nomem(pCtx);
- }
- }else{
- sqlite3_result_error_nomem(pCtx);
- }
- fts3SnippetFree(p);
-}
-
-void sqlite3Fts3Snippet(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr, /* Cursor object */
- const char *zStart, /* Snippet start text - "" */
- const char *zEnd, /* Snippet end text - "" */
- const char *zEllipsis /* Snippet ellipsis text - "..." */
-){
- Snippet *p; /* Snippet structure */
- int rc = snippetAllOffsets(pCsr, &p);
- if( rc==SQLITE_OK ){
- snippetText(pCsr, p, zStart, zEnd, zEllipsis);
- if( p->zSnippet ){
- sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT);
- }else{
- sqlite3_result_error_nomem(pCtx);
- }
- }else{
- sqlite3_result_error_nomem(pCtx);
- }
- fts3SnippetFree(p);
-}
-
-/*************************************************************************
-** Below this point is the alternative, experimental snippet() implementation.
-*/
-
#define SNIPPET_BUFFER_CHUNK 64
#define SNIPPET_BUFFER_SIZE SNIPPET_BUFFER_CHUNK*4
#define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1)
@@ -780,29 +63,81 @@ typedef struct LoadDoclistCtx LoadDoclistCtx;
struct LoadDoclistCtx {
Fts3Table *pTab; /* FTS3 Table */
int nPhrase; /* Number of phrases so far */
+ int nToken; /* Number of tokens so far */
};
-static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, void *ctx){
+static int fts3ExprNearTrim(Fts3Expr *pExpr){
int rc = SQLITE_OK;
- LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
- p->nPhrase++;
- if( pExpr->isLoaded==0 ){
- rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
- pExpr->isLoaded = 1;
- if( rc==SQLITE_OK && pExpr->aDoclist ){
- pExpr->pCurrent = pExpr->aDoclist;
- pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent);
+ Fts3Expr *pParent = pExpr->pParent;
+
+ assert( pExpr->eType==FTSQUERY_PHRASE );
+ while( rc==SQLITE_OK
+ && pExpr->aDoclist && pParent
+ && pParent->eType==FTSQUERY_NEAR
+ && pParent->pRight==pExpr
+ ){
+ /* This expression (pExpr) is the right-hand-side of a NEAR operator.
+ ** Find the expression to the left of the same operator.
+ */
+ int nNear = pParent->nNear;
+ Fts3Expr *pLeft = pParent->pLeft;
+
+ if( pLeft->eType!=FTSQUERY_PHRASE ){
+ assert( pLeft->eType==FTSQUERY_NEAR );
+ assert( pLeft->pRight->eType==FTSQUERY_PHRASE );
+ pLeft = pLeft->pRight;
}
+
+ rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear);
+
+ pExpr = pLeft;
+ pParent = pExpr->pParent;
}
+
return rc;
}
-static int fts3ExprLoadDoclists(Fts3Cursor *pCsr, int *pnPhrase){
+static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){
+ int rc = SQLITE_OK;
+ LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
+
+ p->nPhrase++;
+ p->nToken += pExpr->pPhrase->nToken;
+
+ if( pExpr->isLoaded==0 ){
+ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
+ pExpr->isLoaded = 1;
+ if( rc==SQLITE_OK ){
+ fts3ExprNearTrim(pExpr);
+ }
+ }
+
+ return rc;
+}
+
+static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){
+ if( pExpr->aDoclist ){
+ pExpr->pCurrent = pExpr->aDoclist;
+ pExpr->iCurrent = 0;
+ pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent, &pExpr->iCurrent);
+ }
+ return SQLITE_OK;
+}
+
+static int fts3ExprLoadDoclists(
+ Fts3Cursor *pCsr,
+ int *pnPhrase, /* OUT: Number of phrases in query */
+ int *pnToken /* OUT: Number of tokens in query */
+){
int rc;
- LoadDoclistCtx sCtx = {0, 0};
+ LoadDoclistCtx sCtx = {0, 0, 0};
sCtx.pTab = (Fts3Table *)pCsr->base.pVtab;
- rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
- *pnPhrase = sCtx.nPhrase;
+ rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
+ if( rc==SQLITE_OK ){
+ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0);
+ }
+ if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
+ if( pnToken ) *pnToken = sCtx.nToken;
return rc;
}
@@ -832,20 +167,20 @@ static int fts3LoadSnippetBuffer(
int iPrev = aiPrev[i];
char *pList = apList[i];
- if( !pList ){
+ if( iPrev<0 || !pList ){
nFin++;
continue;
}
while( iPrev<(iPos+SNIPPET_BUFFER_CHUNK) ){
- if( iPrev>=iPos ){
- aBuffer[iPrev&SNIPPET_BUFFER_MASK] = (u8)(i+1);
- }
+ assert( iPrev>=iPos );
+ aBuffer[iPrev&SNIPPET_BUFFER_MASK] = i+1;
if( 0==((*pList)&0xFE) ){
- nFin++;
+ iPrev = -1;
break;
+ }else{
+ fts3GetDeltaPosition(&pList, &iPrev);
}
- fts3GetDeltaPosition(&pList, &iPrev);
}
aiPrev[i] = iPrev;
@@ -892,15 +227,11 @@ static void fts3SnippetCnt(
){
int iSub = (iIdx-1)&SNIPPET_BUFFER_MASK;
int iAdd = (iIdx+nSnippet-1)&SNIPPET_BUFFER_MASK;
- int iSub2 = (iIdx+(nSnippet/3)-1)&SNIPPET_BUFFER_MASK;
- int iAdd2 = (iIdx+(nSnippet*2/3)-1)&SNIPPET_BUFFER_MASK;
u64 h = *pHlmask;
anCnt[ aBuffer[iSub] ]--;
- anCnt[ aBuffer[iSub2] ]--;
anCnt[ aBuffer[iAdd] ]++;
- anCnt[ aBuffer[iAdd2] ]++;
h = h >> 1;
if( aBuffer[iAdd] ){
@@ -912,22 +243,46 @@ static void fts3SnippetCnt(
*pHlmask = h;
}
-static int fts3SnippetScore(int n, int *anCnt){
+static int fts3SnippetScore(int n, int *anCnt, u64 covmask){
int j;
int iScore = 0;
for(j=1; j<=n; j++){
int nCnt = anCnt[j];
- iScore += nCnt + (nCnt ? 1000 : 0);
+ iScore += nCnt;
+ if( nCnt && 0==(covmask & ((u64)1 << (j-1))) ){
+ iScore += 1000;
+ }
}
return iScore;
}
+static u64 fts3SnippetMask(int n, int *anCnt){
+ int j;
+ u64 mask = 0;
+
+ if( n>64 ) n = 64;
+ for(j=1; j<=n; j++){
+ if( anCnt[j] ) mask |= ((u64)1)<<(j-1);
+ }
+ return mask;
+}
+
+typedef struct SnippetFragment SnippetFragment;
+struct SnippetFragment {
+ int iCol; /* Column snippet is extracted from */
+ int iPos; /* Index of first token in snippet */
+ u64 covered; /* Mask of query phrases covered */
+ u64 hlmask; /* Mask of snippet terms to highlight */
+};
+
static int fts3BestSnippet(
int nSnippet, /* Desired snippet length */
Fts3Cursor *pCsr, /* Cursor to create snippet for */
int iCol, /* Index of column to create snippet from */
- int *piPos, /* OUT: Starting token for best snippet */
- u64 *pHlmask /* OUT: Highlight mask for best snippet */
+ u64 mCovered, /* Mask of phrases already covered */
+ u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
+ SnippetFragment *pFragment, /* OUT: Best snippet found */
+ int *piScore /* OUT: Score of snippet pFragment */
){
int rc; /* Return Code */
u8 aBuffer[SNIPPET_BUFFER_SIZE];/* Circular snippet buffer */
@@ -935,20 +290,21 @@ static int fts3BestSnippet(
int *anToken; /* Number of tokens in each phrase */
char **apList; /* Array of position lists */
int *anCnt; /* Running totals of phrase occurences */
- int nList;
-
- int i;
-
+ int nList; /* Number of phrases in expression */
+ int nByte; /* Bytes of dynamic space required */
+ int i; /* Loop counter */
u64 hlmask = 0; /* Current mask of highlighted terms */
u64 besthlmask = 0; /* Mask of highlighted terms for iBestPos */
+ u64 bestcovmask = 0; /* Mask of terms with at least one hit */
int iBestPos = 0; /* Starting position of 'best' snippet */
int iBestScore = 0; /* Score of best snippet higher->better */
+ int iEnd = 0x7FFFFFFF;
SnippetCtx sCtx;
/* Iterate through the phrases in the expression to count them. The same
** callback makes sure the doclists are loaded for each phrase.
*/
- rc = fts3ExprLoadDoclists(pCsr, &nList);
+ rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -956,16 +312,15 @@ static int fts3BestSnippet(
/* Now that it is known how many phrases there are, allocate and zero
** the required arrays using malloc().
*/
- apList = sqlite3_malloc(
- sizeof(u8*)*nList + /* apList */
+ nByte = sizeof(u8*)*nList + /* apList */
sizeof(int)*(nList) + /* anToken */
sizeof(int)*nList + /* aiPrev */
- sizeof(int)*(nList+1) /* anCnt */
- );
+ sizeof(int)*(nList+1); /* anCnt */
+ apList = (char **)sqlite3_malloc(nByte);
if( !apList ){
return SQLITE_NOMEM;
}
- memset(apList, 0, sizeof(u8*)*nList+sizeof(int)*nList+sizeof(int)*nList);
+ memset(apList, 0, nByte);
anToken = (int *)&apList[nList];
aiPrev = &anToken[nList];
anCnt = &aiPrev[nList];
@@ -979,6 +334,12 @@ static int fts3BestSnippet(
sCtx.iPhrase = 0;
(void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);
+ for(i=0; i=0 ){
+ *pmSeen |= (u64)1 << i;
+ }
+ }
+
/* Load the first two chunks of data into the buffer. */
memset(aBuffer, 0, SNIPPET_BUFFER_SIZE);
fts3LoadSnippetBuffer(0, aBuffer, nList, apList, aiPrev);
@@ -988,16 +349,19 @@ static int fts3BestSnippet(
for(i=1-nSnippet; i<=0; i++){
fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask);
}
- iBestScore = fts3SnippetScore(nList, anCnt);
+ iBestScore = fts3SnippetScore(nList, anCnt, mCovered);
besthlmask = hlmask;
iBestPos = 0;
+ bestcovmask = fts3SnippetMask(nList, anCnt);
- for(i=1; 1; i++){
+ for(i=1; iiBestScore ){
iBestPos = i;
iBestScore = iScore;
besthlmask = hlmask;
+ bestcovmask = fts3SnippetMask(nList, anCnt);
}
}
sqlite3_free(apList);
- *piPos = iBestPos;
- *pHlmask = besthlmask;
+
+ pFragment->iPos = iBestPos;
+ pFragment->hlmask = besthlmask;
+ pFragment->iCol = iCol;
+ pFragment->covered = bestcovmask;
+ *piScore = iBestScore;
return SQLITE_OK;
}
@@ -1033,7 +402,7 @@ static int fts3StringAppend(
int nAppend
){
if( nAppend<0 ){
- nAppend = (int)strlen(zAppend);
+ nAppend = strlen(zAppend);
}
if( pStr->n+nAppend+1>=pStr->nAlloc ){
@@ -1053,30 +422,86 @@ static int fts3StringAppend(
return SQLITE_OK;
}
+int fts3SnippetShift(
+ Fts3Table *pTab,
+ int nSnippet,
+ const char *zDoc,
+ int nDoc,
+ int *piPos,
+ u64 *pHlmask
+){
+ u64 hlmask = *pHlmask;
+
+ if( hlmask ){
+ int nLeft;
+ int nRight;
+ int nDesired;
+
+ for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
+ for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
+
+ nDesired = (nLeft-nRight)/2;
+ if( nDesired>0 ){
+ int nShift;
+ int iCurrent = 0;
+ int rc;
+ sqlite3_tokenizer_module *pMod;
+ sqlite3_tokenizer_cursor *pC;
+
+ pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
+ rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+ pC->pTokenizer = pTab->pTokenizer;
+ while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
+ const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
+ rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
+ }
+ pMod->xClose(pC);
+ if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){
+ return rc;
+ }
+ nShift = iCurrent-nSnippet;
+ if( nShift>0 ){
+ *piPos += nShift;
+ *pHlmask = hlmask >> nShift;
+ }
+ }
+ }
+ return SQLITE_OK;
+}
+
static int fts3SnippetText(
Fts3Cursor *pCsr, /* FTS3 Cursor */
- const char *zDoc, /* Document to extract snippet from */
- int nDoc, /* Size of zDoc in bytes */
+ SnippetFragment *pFragment, /* Snippet to extract */
int nSnippet, /* Number of tokens in extracted snippet */
- int iPos, /* Index of first document token in snippet */
- u64 hlmask, /* Bitmask of terms to highlight in snippet */
const char *zOpen, /* String inserted before highlighted term */
const char *zClose, /* String inserted after highlighted term */
const char *zEllipsis,
- char **pzSnippet /* OUT: Snippet text */
+ StrBuffer *pOut
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc; /* Return code */
- int iCurrent = 0;
- int iStart = 0;
- int iEnd;
+ const char *zDoc; /* Document text to extract snippet from */
+ int nDoc; /* Size of zDoc in bytes */
+ int iCurrent = 0; /* Current token number of document */
+ int iStart = 0; /* Byte offset of current token */
+ int iEnd = 0; /* Byte offset of end of current token */
+ int isShiftDone = 0;
+ int iPos = pFragment->iPos;
+ u64 hlmask = pFragment->hlmask;
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
const char *ZDUMMY; /* Dummy arguments used with tokenizer */
int DUMMY1, DUMMY2, DUMMY3; /* Dummy arguments used with tokenizer */
-
- StrBuffer res = {0, 0, 0}; /* Result string */
+
+ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
+ if( zDoc==0 ){
+ if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
+ return SQLITE_NOMEM;
+ }
+ return SQLITE_OK;
+ }
+ nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
/* Open a token cursor on the document. Read all tokens up to and
** including token iPos (the first token of the snippet). Set variable
@@ -1084,14 +509,10 @@ static int fts3SnippetText(
*/
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
- while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &DUMMY1, &iStart, &DUMMY2, &iCurrent);
- }
- iEnd = iStart;
-
- if( rc==SQLITE_OK && iStart>0 ){
- rc = fts3StringAppend(&res, zEllipsis, -1);
+ if( rc!=SQLITE_OK ){
+ return rc;
}
+ pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK ){
int iBegin;
@@ -1099,15 +520,26 @@ static int fts3SnippetText(
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc==SQLITE_OK ){
+ if( iCurrent=(iPos+nSnippet) ){
rc = SQLITE_DONE;
}else{
iEnd = iFin;
if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
- if( fts3StringAppend(&res, &zDoc[iStart], iBegin-iStart)
- || fts3StringAppend(&res, zOpen, -1)
- || fts3StringAppend(&res, &zDoc[iBegin], iEnd-iBegin)
- || fts3StringAppend(&res, zClose, -1)
+ if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart)
+ || fts3StringAppend(pOut, zOpen, -1)
+ || fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin)
+ || fts3StringAppend(pOut, zClose, -1)
){
rc = SQLITE_NOMEM;
}
@@ -1118,23 +550,18 @@ static int fts3SnippetText(
}
assert( rc!=SQLITE_OK );
if( rc==SQLITE_DONE ){
- rc = fts3StringAppend(&res, &zDoc[iStart], iEnd-iStart);
+ rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart);
if( rc==SQLITE_OK ){
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
- if( rc==SQLITE_OK ){
- rc = fts3StringAppend(&res, zEllipsis, -1);
- }else if( rc==SQLITE_DONE ){
- rc = fts3StringAppend(&res, &zDoc[iEnd], -1);
+ if( rc==SQLITE_DONE ){
+ rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
+ }else if( rc==SQLITE_OK && zEllipsis ){
+ rc = fts3StringAppend(pOut, zEllipsis, -1);
}
}
}
pMod->xClose(pC);
- if( rc!=SQLITE_OK ){
- sqlite3_free(res.z);
- }else{
- *pzSnippet = res.z;
- }
return rc;
}
@@ -1264,7 +691,7 @@ static int fts3GetMatchinfo(Fts3Cursor *pCsr){
g.pTab = pTab;
g.nCol = pTab->nColumn;
g.iPhrase = 0;
- rc = fts3ExprLoadDoclists(pCsr, &nPhrase);
+ rc = fts3ExprLoadDoclists(pCsr, &nPhrase, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -1299,7 +726,7 @@ static int fts3GetMatchinfo(Fts3Cursor *pCsr){
return SQLITE_OK;
}
-void sqlite3Fts3Snippet2(
+void sqlite3Fts3Snippet(
sqlite3_context *pCtx, /* SQLite function call context */
Fts3Cursor *pCsr, /* Cursor object */
const char *zStart, /* Snippet start text - "" */
@@ -1308,29 +735,243 @@ void sqlite3Fts3Snippet2(
int iCol, /* Extract snippet from this column */
int nToken /* Approximate number of tokens in snippet */
){
- int rc;
- int iPos = 0;
- u64 hlmask = 0;
- char *z = 0;
- int nDoc;
- const char *zDoc;
+ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+ int rc = SQLITE_OK;
+ int i;
+ StrBuffer res = {0, 0, 0};
- rc = fts3BestSnippet(nToken, pCsr, iCol, &iPos, &hlmask);
+ /* The returned text includes up to four fragments of text extracted from
+ ** the data in the current row. The first iteration of the for(...) loop
+ ** below attempts to locate a single fragment of text nToken tokens in
+ ** size that contains at least one instance of all phrases in the query
+ ** expression that appear in the current row. If such a fragment of text
+ ** cannot be found, the second iteration of the loop attempts to locate
+ ** a pair of fragments, and so on.
+ */
+ int nSnippet = 0; /* Number of fragments in this snippet */
+ SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
+ int nFToken = -1; /* Number of tokens in each fragment */
- nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
+ do {
+ int iSnip; /* Loop counter 0..nSnippet-1 */
+ u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
+ u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
- if( rc==SQLITE_OK ){
- rc = fts3SnippetText(
- pCsr, zDoc, nDoc, nToken, iPos, hlmask, zStart, zEnd, zEllipsis, &z);
+ nSnippet++;
+ nFToken = (nToken+nSnippet-1) / nSnippet;
+
+ for(iSnip=0; iSnipnColumn; iRead++){
+ SnippetFragment sF;
+ int iS;
+ if( iCol>=0 && iRead!=iCol ) continue;
+
+ /* Find the best snippet of nFToken tokens in column iRead. */
+ rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
+ if( rc!=SQLITE_OK ){
+ goto snippet_out;
+ }
+ if( iS>iBestScore ){
+ *pFragment = sF;
+ iBestScore = iS;
+ }
+ }
+
+ mCovered |= pFragment->covered;
+ }
+
+ /* If all query phrases seen by fts3BestSnippet() are present in at least
+ ** one of the nSnippet snippet fragments, break out of the loop.
+ */
+ assert( (mCovered&mSeen)==mCovered );
+ if( mSeen==mCovered ) break;
+ }while( nSnippet0 );
+
+ for(i=0; i0 || p->iPos>0 ){
+ fts3StringAppend(&res, zEllipsis, -1);
+ }
+ rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res);
}
+
+ snippet_out:
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
+ sqlite3_free(res.z);
}else{
- sqlite3_result_text(pCtx, z, -1, sqlite3_free);
+ sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
}
}
+
+typedef struct TermOffset TermOffset;
+struct TermOffset {
+ char *pList; /* Position-list */
+ int iPos; /* Position just read from pList */
+ int iOff;
+};
+typedef struct TermOffsetCtx TermOffsetCtx;
+
+struct TermOffsetCtx {
+ int iCol; /* Column of table to populate aTerm for */
+ int iTerm;
+ sqlite3_int64 iDocid;
+ TermOffset *aTerm;
+};
+
+/*
+** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
+*/
+static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, void *ctx){
+ TermOffsetCtx *p = (TermOffsetCtx *)ctx;
+ int nTerm; /* Number of tokens in phrase */
+ int iTerm; /* For looping through nTerm phrase terms */
+ char *pList; /* Pointer to position list for phrase */
+ int iPos = 0; /* First position in position-list */
+
+ pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol);
+ nTerm = pExpr->pPhrase->nToken;
+ if( pList ){
+ fts3GetDeltaPosition(&pList, &iPos);
+ assert( iPos>=0 );
+ }
+
+ for(iTerm=0; iTermaTerm[p->iTerm++];
+ pT->iOff = nTerm-iTerm-1;
+ pT->pList = pList;
+ pT->iPos = iPos;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** Implementation of offsets() function.
+*/
+void sqlite3Fts3Offsets(
+ sqlite3_context *pCtx, /* SQLite function call context */
+ Fts3Cursor *pCsr /* Cursor object */
+){
+ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+ sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
+ const char *ZDUMMY;
+ int NDUMMY;
+
+ int rc; /* Return Code */
+ int nToken; /* Number of tokens in query */
+ int iCol; /* Column currently being processed */
+ StrBuffer res = {0, 0, 0}; /* Result string */
+
+ TermOffsetCtx sCtx;
+ memset(&sCtx, 0, sizeof(sCtx));
+
+ assert( pCsr->isRequireSeek==0 );
+
+ /* Count the number of terms in the query */
+ rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
+ if( rc!=SQLITE_OK ) goto offsets_out;
+
+ /* Allocate the array of TermOffset iterators. */
+ sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
+ if( 0==sCtx.aTerm ){
+ rc = SQLITE_NOMEM;
+ goto offsets_out;
+ }
+ sCtx.iDocid = pCsr->iPrevId;
+
+ for(iCol=0; iColnColumn; iCol++){
+ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
+ int iStart;
+ int iEnd;
+ int iCurrent;
+ const char *zDoc;
+ int nDoc;
+
+ /* Initialize the contents of sCtx.aTerm[] for column iCol. */
+ sCtx.iCol = iCol;
+ sCtx.iTerm = 0;
+ rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
+ if( rc!=SQLITE_OK ) goto offsets_out;
+
+ /* Initialize a tokenizer iterator to iterate through column iCol. */
+ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
+ nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
+ rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+ if( rc!=SQLITE_OK ) goto offsets_out;
+ pC->pTokenizer = pTab->pTokenizer;
+
+ rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
+ while( rc==SQLITE_OK ){
+ int i; /* Used to loop through terms */
+ int iMinPos = 0x7FFFFFFF; /* Position of next token */
+ TermOffset *pTerm = 0; /* TermOffset associated with next token */
+
+ for(i=0; ipList && (pT->iPos-pT->iOff)iPos-pT->iOff;
+ pTerm = pT;
+ }
+ }
+
+ if( !pTerm ){
+ /* All offsets for this column have been gathered. */
+ break;
+ }else{
+ assert( iCurrent<=iMinPos );
+ if( 0==(0xFE&*pTerm->pList) ){
+ pTerm->pList = 0;
+ }else{
+ fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
+ }
+ while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
+ }
+ if( rc==SQLITE_OK ){
+ char aBuffer[64];
+ sqlite3_snprintf(sizeof(aBuffer), aBuffer,
+ "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
+ );
+ fts3StringAppend(&res, aBuffer, -1);
+ }
+ }
+ }
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_ERROR;
+ }
+
+ pMod->xClose(pC);
+ if( rc!=SQLITE_OK ) goto offsets_out;
+ }
+
+ offsets_out:
+ sqlite3_free(sCtx.aTerm);
+ assert( rc!=SQLITE_DONE );
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ sqlite3_free(res.z);
+ }else{
+ sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
+ }
+ return;
+}
+
void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
int rc = fts3GetMatchinfo(pCsr);
if( rc!=SQLITE_OK ){
diff --git a/manifest b/manifest
index 959dc6ca24..002e04be4a 100644
--- a/manifest
+++ b/manifest
@@ -1,8 +1,5 @@
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA1
-
-C Fix\san\sissue\swith\slemon\sgenerating\sincorrect\sgrammars.\s\sThis\sissue\sdoes\nnot\seffect\sSQLite.
-D 2010-01-06T13:07:31
+C Change\sthe\sfts3\ssnippet\sfunction\sto\sreturn\s(hopefully)\smore\srelevant\ssnippets\sin\sless\stime.
+D 2010-01-06T17:19:22
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -59,15 +56,15 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c 15fb87c1f00dfd88c2fbbbd9e50f319ea77834f0
+F ext/fts3/fts3.c 04e95afa45789d7a3da59f458d4a8c1879c31446
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h 9326800fa10e06d8e9d6d519f873b1371252968a
+F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd
F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b
F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156
-F ext/fts3/fts3_snippet.c 0e38f76c5992dd08d20fc81e1265763370f9ea4f
+F ext/fts3/fts3_snippet.c 612b3ad63abf2c5c85b6a46aac94bd90280e905a
F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b
@@ -383,7 +380,7 @@ F test/fts3.test ae0433b09b12def08105640e57693726c4949338
F test/fts3_common.tcl 2a2044688ce3addb1dd58d3d846c574cf4b7bbcd
F test/fts3aa.test 5327d4c1d9b6c61021696746cc9a6cdc5bf159c0
F test/fts3ab.test 09aeaa162aee6513d9ff336b6932211008b9d1f9
-F test/fts3ac.test 356280144a2c92aa7b11474afadfe62a437fcd69
+F test/fts3ac.test fc1ac42c33f8a66d48ae41e4728f7ca4b6dfc950
F test/fts3ad.test e40570cb6f74f059129ad48bcef3d7cbc20dda49
F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda
F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c
@@ -392,7 +389,7 @@ F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff
F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2
F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4
F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d
-F test/fts3al.test 6d19619402d2133773262652fc3f185cdf6be667
+F test/fts3al.test 07d64326e79bbdbab20ee87fc3328fbf01641c9f
F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8
F test/fts3an.test 931fa21bd80641ca594bfa32e105250a8a07918b
F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9
@@ -405,9 +402,10 @@ F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb
-F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077
+F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f
F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b
+F test/fts3snippet.test bfbceb2e292ddfdc6bb0b1b252ccea78bd6091be
F test/func.test af106ed834001738246d276659406823e35cde7b
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9
@@ -786,14 +784,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P 28d0d7710761114a44a1a3a425a6883c661f06e7
-R cb14a38f5906a10fa21936447376b66a
-U drh
-Z 437ad8ccf6b703d6a3f5435217fbeff7
------BEGIN PGP SIGNATURE-----
-Version: GnuPG v1.4.6 (GNU/Linux)
-
-iD8DBQFLRIsWoxKgR168RlERAko3AJ9cRW4W+hFzWCSEF5rdeL83LKknrgCfQKRR
-l/RSoin5yCY/+/3Q1I6oeNA=
-=d16B
------END PGP SIGNATURE-----
+P 077a6bee2dd4668a5b13c37aa7d4c052350ec782
+R 63513c05ce3003328b753382175b1505
+U dan
+Z 7955c05e9b09116e00ebafe15af16394
diff --git a/manifest.uuid b/manifest.uuid
index efd93353cd..a4236dd065 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-077a6bee2dd4668a5b13c37aa7d4c052350ec782
\ No newline at end of file
+8a208223a74d451f60d9cd707d63fb7d157d1737
\ No newline at end of file
diff --git a/test/fts3ac.test b/test/fts3ac.test
index 72e5410c7d..34abce44da 100644
--- a/test/fts3ac.test
+++ b/test/fts3ac.test
@@ -1131,39 +1131,36 @@ do_test fts3ac-4.2 {
SELECT snippet(email) FROM email
WHERE email MATCH 'christmas candlelight'
}
-} {{... place.? What do you think about going here Christmas
-eve?? They have an 11:00 a.m. service and a candlelight service at 5:00 p.m.,
-among others. ...}}
+} {{...here Christmas
+eve?? They have an 11:00 a.m. service and a candlelight service...}}
do_test fts3ac-4.3 {
execsql {
SELECT snippet(email) FROM email
WHERE email MATCH 'deal sheet potential reuse'
}
-} {{EOL-Accenture Deal Sheet ... intent
- Review Enron asset base for potential reuse/ licensing
- Contract negotiations ...}}
+} {{EOL-Accenture Deal Sheet...asset base for potential reuse/ licensing
+ Contract negotiations...}}
do_test fts3ac-4.4 {
execsql {
SELECT snippet(email,'<<<','>>>',' ') FROM email
WHERE email MATCH 'deal sheet potential reuse'
}
-} {{EOL-Accenture <<>> <<>> intent
- Review Enron asset base for <<>> <<>>/ licensing
- Contract negotiations }}
+} {{EOL-Accenture <<>> <<>> asset base for <<>> <<>>/ licensing
+ Contract negotiations }}
do_test fts3ac-4.5 {
execsql {
SELECT snippet(email,'<<<','>>>',' ') FROM email
WHERE email MATCH 'first things'
}
-} {{Re: <<>> Polish Deal! Congrats! <<>> seem to be building rapidly now on the }}
+} {{Re: <<>> Polish Deal! Congrats! <<>> seem to be building rapidly now }}
do_test fts3ac-4.6 {
execsql {
SELECT snippet(email) FROM email
WHERE email MATCH 'chris is here'
}
-} {{chris.germany@enron.com ... Sounds good to me. I bet this is next to the Warick?? Hotel. ... place.? What do you think about going here Christmas
-eve?? They have an 11:00 a.m. ...}}
+} {{...chris.germany@enron.com'" <chris...bet this is next to...about going here Christmas
+eve...}}
do_test fts3ac-4.7 {
execsql {
SELECT snippet(email) FROM email
@@ -1171,19 +1168,15 @@ do_test fts3ac-4.7 {
}
} {{Erin:
-Pursuant to your request, attached are the Schedule to ...}}
+Pursuant to your request, attached are the Schedule to the ISDA Master Agreement, together...}}
do_test fts3ac-4.8 {
execsql {
SELECT snippet(email) FROM email
WHERE email MATCH 'ancillary load davis'
}
-} {{pete.davis@enron.com ... Start Date: 4/22/01; HourAhead hour: 3; No ancillary schedules awarded.
-Variances detected.
-Variances detected in Load schedule.
+} {{pete.davis@enron.com...3; No ancillary schedules awarded...detected in Load schedule.
- LOG MESSAGES:
-
-PARSING ...}}
+ LOG...}}
# Combinations of AND and OR operators:
#
@@ -1192,22 +1185,17 @@ do_test fts3ac-5.1 {
SELECT snippet(email) FROM email
WHERE email MATCH 'questar enron OR com'
}
-} {{matt.smith@enron.com ... six reports:
-
-31 Keystone Receipts
+} {{matt.smith@enron.com...31 Keystone Receipts
15 Questar Pipeline
-40 Rockies Production
-22 West_2 ...}}
+40 Rockies...}}
+
do_test fts3ac-5.2 {
execsql {
SELECT snippet(email) FROM email
WHERE email MATCH 'enron OR com questar'
}
-} {{matt.smith@enron.com ... six reports:
-
-31 Keystone Receipts
+} {{matt.smith@enron.com...31 Keystone Receipts
15 Questar Pipeline
-40 Rockies Production
-22 West_2 ...}}
+40 Rockies...}}
finish_test
diff --git a/test/fts3al.test b/test/fts3al.test
index be01ecb7c0..02cc0d1695 100644
--- a/test/fts3al.test
+++ b/test/fts3al.test
@@ -53,6 +53,10 @@ do_test fts3al-1.3 {
#
# The trailing and leading hi-bit chars help with code which tests for
# isspace() to coalesce multiple spaces.
+#
+# UPDATE: The above is no longer true; there is no such code in fts3.
+# But leave the test in just the same.
+#
set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80"
set phrase1 "$word $word $word target $word $word $word"
@@ -64,6 +68,6 @@ db eval "INSERT INTO t4 (content) VALUES ('$phrase2')"
do_test fts3al-1.4 {
execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'}
-} {1 111 2 117}
+} {1 241 2 247}
finish_test
diff --git a/test/fts3near.test b/test/fts3near.test
index e824133bb5..9c4409e1ed 100644
--- a/test/fts3near.test
+++ b/test/fts3near.test
@@ -76,6 +76,17 @@ do_test fts3near-1.15 {
execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR two NEAR one'}
} {3}
+do_test fts3near-1.16 {
+ execsql {
+ SELECT docid FROM t1 WHERE content MATCH '"one three" NEAR/0 "four five"'
+ }
+} {1}
+do_test fts3near-1.17 {
+ execsql {
+ SELECT docid FROM t1 WHERE content MATCH '"four five" NEAR/0 "one three"'
+ }
+} {1}
+
# Output format of the offsets() function:
#
@@ -154,6 +165,7 @@ do_test fts3near-3.6 {
SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/0 "two four"'
}
} {{0 0 8 5 0 1 14 3 0 2 18 4}}
+breakpoint
do_test fts3near-3.7 {
execsql {
SELECT offsets(t1) FROM t1 WHERE content MATCH '"two four" NEAR/0 three'}
@@ -170,7 +182,7 @@ do_test fts3near-4.1 {
execsql {
SELECT snippet(t1) FROM t1 WHERE content MATCH 'specification NEAR supports'
}
-} {{... devices, handheld devices, etc. This specification also supports content positioning, downloadable fonts, ...}}
+} {{...braille devices, handheld devices, etc. This specification also supports content positioning, downloadable fonts, table layout...}}
do_test fts3near-5.1 {
execsql {
diff --git a/test/fts3snippet.test b/test/fts3snippet.test
new file mode 100644
index 0000000000..1d388e86d8
--- /dev/null
+++ b/test/fts3snippet.test
@@ -0,0 +1,68 @@
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If SQLITE_ENABLE_FTS3 is defined, omit this file.
+ifcapable !fts3 { finish_test ; return }
+
+do_test fts3snippet-1.1 {
+ execsql {
+ CREATE VIRTUAL TABLE ft USING fts3;
+ INSERT INTO ft VALUES('xxx xxx xxx xxx');
+ }
+} {}
+
+proc normalize {L} {
+ set ret [list]
+ foreach l $L {lappend ret $l}
+ return $ret
+}
+
+do_test fts3snippet-1.2 {
+ execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
+} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}
+
+do_test fts3snippet-1.3 {
+ execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' }
+} [list [normalize {
+ 0 0 0 3
+ 0 0 4 3
+ 0 1 4 3
+ 0 0 8 3
+ 0 1 8 3
+ 0 1 12 3
+}]]
+
+
+do_test fts3snippet-1.4 {
+ execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx" xxx' }
+} [list [normalize {
+ 0 0 0 3
+ 0 2 0 3
+ 0 0 4 3
+ 0 1 4 3
+ 0 2 4 3
+ 0 0 8 3
+ 0 1 8 3
+ 0 2 8 3
+ 0 1 12 3
+ 0 2 12 3
+}]]
+
+do_test fts3snippet-1.5 {
+ execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx "xxx xxx"' }
+} [list [normalize {
+ 0 0 0 3
+ 0 1 0 3
+ 0 0 4 3
+ 0 1 4 3
+ 0 2 4 3
+ 0 0 8 3
+ 0 1 8 3
+ 0 2 8 3
+ 0 0 12 3
+ 0 2 12 3
+}]]
+
+finish_test
+