mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-27 20:41:58 +03:00
Update comments in fts3_snippet.c.
FossilOrigin-Name: dd9689b7c31861484f8d8062478ae73d3c940902
This commit is contained in:
@ -18,24 +18,115 @@
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define SNIPPET_BUFFER_CHUNK 64
|
||||
#define SNIPPET_BUFFER_SIZE SNIPPET_BUFFER_CHUNK*4
|
||||
#define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1)
|
||||
|
||||
/*
|
||||
** Used as an fts3ExprIterate() context when loading phrase doclists to
|
||||
** Fts3Expr.aDoclist[]/nDoclist.
|
||||
*/
|
||||
typedef struct LoadDoclistCtx LoadDoclistCtx;
|
||||
struct LoadDoclistCtx {
|
||||
Fts3Table *pTab; /* FTS3 Table */
|
||||
int nPhrase; /* Number of phrases seen so far */
|
||||
int nToken; /* Number of tokens seen so far */
|
||||
};
|
||||
|
||||
/*
|
||||
** The following types are used as part of the implementation of the
|
||||
** fts3BestSnippet() routine.
|
||||
*/
|
||||
typedef struct SnippetIter SnippetIter;
|
||||
typedef struct SnippetPhrase SnippetPhrase;
|
||||
typedef struct SnippetFragment SnippetFragment;
|
||||
|
||||
struct SnippetIter {
|
||||
Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
|
||||
int iCol; /* Extract snippet from this column */
|
||||
int nSnippet; /* Requested snippet length (in tokens) */
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
SnippetPhrase *aPhrase; /* Array of size nPhrase */
|
||||
int iCurrent; /* First token of current snippet */
|
||||
};
|
||||
|
||||
struct SnippetPhrase {
|
||||
int nToken; /* Number of tokens in phrase */
|
||||
char *pList; /* Pointer to start of phrase position list */
|
||||
int iHead; /* Next value in position list */
|
||||
char *pHead; /* Position list data following iHead */
|
||||
int iTail; /* Next value in trailing position list */
|
||||
char *pTail; /* Position list data following iTail */
|
||||
};
|
||||
|
||||
struct SnippetFragment {
|
||||
int iCol; /* Column snippet is extracted from */
|
||||
int iPos; /* Index of first token in snippet */
|
||||
u64 covered; /* Mask of query phrases covered */
|
||||
u64 hlmask; /* Mask of snippet terms to highlight */
|
||||
};
|
||||
|
||||
/*
|
||||
** This type is used as an fts3ExprIterate() context object while
|
||||
** accumulating the data returned by the matchinfo() function.
|
||||
*/
|
||||
typedef struct MatchInfo MatchInfo;
|
||||
struct MatchInfo {
|
||||
Fts3Cursor *pCursor; /* FTS3 Cursor */
|
||||
int nCol; /* Number of columns in table */
|
||||
u32 *aMatchinfo; /* Pre-allocated buffer */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
** The snippet() and offsets() functions both return text values. An instance
|
||||
** of the following structure is used to accumulate those values while the
|
||||
** functions are running. See fts3StringAppend() for details.
|
||||
*/
|
||||
typedef struct StrBuffer StrBuffer;
|
||||
struct StrBuffer {
|
||||
char *z; /* Pointer to buffer containing string */
|
||||
int n; /* Length of z in bytes (excl. nul-term) */
|
||||
int nAlloc; /* Allocated size of buffer z in bytes */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
** This function is used to help iterate through a position-list. A position
|
||||
** list is a list of unique integers, sorted from smallest to largest. Each
|
||||
** element of the list is represented by an FTS3 varint that takes the value
|
||||
** of the difference between the current element and the previous one plus
|
||||
** two. For example, to store the position-list:
|
||||
**
|
||||
** 4 9 113
|
||||
**
|
||||
** the three varints:
|
||||
**
|
||||
** 6 7 106
|
||||
**
|
||||
** are encoded.
|
||||
**
|
||||
** When this function is called, *pp points to the start of an element of
|
||||
** the list. *piPos contains the value of the previous entry in the list.
|
||||
** After it returns, *piPos contains the value of the next element of the
|
||||
** list and *pp is advanced to the following varint.
|
||||
*/
|
||||
static void fts3GetDeltaPosition(char **pp, int *piPos){
|
||||
int iVal;
|
||||
*pp += sqlite3Fts3GetVarint32(*pp, &iVal);
|
||||
*piPos += (iVal-2);
|
||||
}
|
||||
|
||||
/*
|
||||
** Helper function for fts3ExprIterate() (see below).
|
||||
*/
|
||||
static int fts3ExprIterate2(
|
||||
Fts3Expr *pExpr, /* Expression to iterate phrases of */
|
||||
int *piPhrase, /* Pointer to phrase counter */
|
||||
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
|
||||
void *pCtx /* Second argument to pass to callback */
|
||||
){
|
||||
int rc;
|
||||
int eType = pExpr->eType;
|
||||
int rc; /* Return code */
|
||||
int eType = pExpr->eType; /* Type of expression node pExpr */
|
||||
|
||||
if( eType!=FTSQUERY_PHRASE ){
|
||||
assert( pExpr->pLeft && pExpr->pRight );
|
||||
rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
|
||||
@ -64,17 +155,24 @@ static int fts3ExprIterate(
|
||||
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
|
||||
void *pCtx /* Second argument to pass to callback */
|
||||
){
|
||||
int iPhrase = 0;
|
||||
int iPhrase = 0; /* Variable used as the phrase counter */
|
||||
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
|
||||
}
|
||||
|
||||
typedef struct LoadDoclistCtx LoadDoclistCtx;
|
||||
struct LoadDoclistCtx {
|
||||
Fts3Table *pTab; /* FTS3 Table */
|
||||
int nPhrase; /* Number of phrases so far */
|
||||
int nToken; /* Number of tokens so far */
|
||||
};
|
||||
|
||||
/*
|
||||
** The argument to this function is always a phrase node. Its doclist
|
||||
** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes
|
||||
** to the left of this one in the query tree have already been loaded.
|
||||
**
|
||||
** If this phrase node is part of a series of phrase nodes joined by
|
||||
** NEAR operators (and is not the left-most of said series), then elements are
|
||||
** removed from the phrases doclist consistent with the NEAR restriction. If
|
||||
** required, elements may be removed from the doclists of phrases to the
|
||||
** left of this one that are part of the same series of NEAR operator
|
||||
** connected phrases.
|
||||
**
|
||||
** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
|
||||
*/
|
||||
static int fts3ExprNearTrim(Fts3Expr *pExpr){
|
||||
int rc = SQLITE_OK;
|
||||
Fts3Expr *pParent = pExpr->pParent;
|
||||
@ -106,6 +204,11 @@ static int fts3ExprNearTrim(Fts3Expr *pExpr){
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is an fts3ExprIterate() callback used while loading the doclists
|
||||
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
|
||||
** fts3ExprLoadDoclists().
|
||||
*/
|
||||
static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
int rc = SQLITE_OK;
|
||||
LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
|
||||
@ -124,6 +227,11 @@ static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is an fts3ExprIterate() callback used while loading the doclists
|
||||
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
|
||||
** fts3ExprLoadDoclists().
|
||||
*/
|
||||
static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
if( pExpr->aDoclist ){
|
||||
pExpr->pCurrent = pExpr->aDoclist;
|
||||
@ -133,13 +241,23 @@ static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Load the doclists for each phrase in the query associated with FTS3 cursor
|
||||
** pCsr.
|
||||
**
|
||||
** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
|
||||
** phrases in the expression (all phrases except those directly or
|
||||
** indirectly descended from the right-hand-side of a NOT operator). If
|
||||
** pnToken is not NULL, then it is set to the number of tokens in all
|
||||
** matchable phrases of the expression.
|
||||
*/
|
||||
static int fts3ExprLoadDoclists(
|
||||
Fts3Cursor *pCsr,
|
||||
Fts3Cursor *pCsr, /* Fts3 cursor for current query */
|
||||
int *pnPhrase, /* OUT: Number of phrases in query */
|
||||
int *pnToken /* OUT: Number of tokens in query */
|
||||
){
|
||||
int rc;
|
||||
LoadDoclistCtx sCtx = {0, 0, 0};
|
||||
int rc; /* Return Code */
|
||||
LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
|
||||
sCtx.pTab = (Fts3Table *)pCsr->base.pVtab;
|
||||
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
|
||||
if( rc==SQLITE_OK ){
|
||||
@ -150,30 +268,6 @@ static int fts3ExprLoadDoclists(
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** The following types are used as part of the implementation of the
|
||||
** fts3BestSnippet() routine.
|
||||
*/
|
||||
typedef struct SnippetCtx SnippetCtx;
|
||||
typedef struct SnippetPhrase SnippetPhrase;
|
||||
|
||||
struct SnippetCtx {
|
||||
Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
|
||||
int iCol; /* Extract snippet from this column */
|
||||
int nSnippet; /* Requested snippet length (in tokens) */
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
SnippetPhrase *aPhrase; /* Array of size nPhrase */
|
||||
int iCurrent; /* First token of current snippet */
|
||||
};
|
||||
struct SnippetPhrase {
|
||||
int nToken; /* Number of tokens in phrase */
|
||||
char *pList; /* Pointer to start of phrase position list */
|
||||
int iHead; /* Next value in position list */
|
||||
char *pHead; /* Position list data following iHead */
|
||||
int iTail; /* Next value in trailing position list */
|
||||
char *pTail; /* Position list data following iTail */
|
||||
};
|
||||
|
||||
/*
|
||||
** Advance the position list iterator specified by the first two
|
||||
** arguments so that it points to the first element with a value greater
|
||||
@ -198,11 +292,14 @@ static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
|
||||
}
|
||||
}
|
||||
|
||||
static int fts3SnippetNextCandidate(SnippetCtx *pIter){
|
||||
/*
|
||||
** Advance the snippet iterator to the next candidate snippet.
|
||||
*/
|
||||
static int fts3SnippetNextCandidate(SnippetIter *pIter){
|
||||
int i; /* Loop counter */
|
||||
|
||||
if( pIter->iCurrent<0 ){
|
||||
/* The SnippetCtx object has just been initialized. The first snippet
|
||||
/* The SnippetIter object has just been initialized. The first snippet
|
||||
** candidate always starts at offset 0 (even if this candidate has a
|
||||
** score of 0.0).
|
||||
*/
|
||||
@ -240,8 +337,12 @@ static int fts3SnippetNextCandidate(SnippetCtx *pIter){
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Retrieve information about the current candidate snippet of snippet
|
||||
** iterator pIter.
|
||||
*/
|
||||
static void fts3SnippetDetails(
|
||||
SnippetCtx *pIter, /* Snippet iterator */
|
||||
SnippetIter *pIter, /* Snippet iterator */
|
||||
u64 mCovered, /* Bitmask of phrases already covered */
|
||||
int *piToken, /* OUT: First token of proposed snippet */
|
||||
int *piScore, /* OUT: "Score" for this snippet */
|
||||
@ -249,11 +350,10 @@ static void fts3SnippetDetails(
|
||||
u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
|
||||
){
|
||||
int iStart = pIter->iCurrent; /* First token of snippet */
|
||||
|
||||
int iScore = 0;
|
||||
int i;
|
||||
u64 mCover = 0;
|
||||
u64 mHighlight = 0;
|
||||
int iScore = 0; /* Score of this snippet */
|
||||
int i; /* Loop counter */
|
||||
u64 mCover = 0; /* Mask of phrases covered by this snippet */
|
||||
u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
|
||||
|
||||
for(i=0; i<pIter->nPhrase; i++){
|
||||
SnippetPhrase *pPhrase = &pIter->aPhrase[i];
|
||||
@ -283,6 +383,7 @@ static void fts3SnippetDetails(
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the output variables before returning. */
|
||||
*piToken = iStart;
|
||||
*piScore = iScore;
|
||||
*pmCover = mCover;
|
||||
@ -291,10 +392,10 @@ static void fts3SnippetDetails(
|
||||
|
||||
/*
|
||||
** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
|
||||
** Each invocation populates an element of the SnippetCtx.aPhrase[] array.
|
||||
** Each invocation populates an element of the SnippetIter.aPhrase[] array.
|
||||
*/
|
||||
static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
SnippetCtx *p = (SnippetCtx *)ctx;
|
||||
SnippetIter *p = (SnippetIter *)ctx;
|
||||
SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
|
||||
char *pCsr;
|
||||
|
||||
@ -316,16 +417,21 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
#define BITMASK_SIZE 64
|
||||
|
||||
typedef struct SnippetFragment SnippetFragment;
|
||||
struct SnippetFragment {
|
||||
int iCol; /* Column snippet is extracted from */
|
||||
int iPos; /* Index of first token in snippet */
|
||||
u64 covered; /* Mask of query phrases covered */
|
||||
u64 hlmask; /* Mask of snippet terms to highlight */
|
||||
};
|
||||
|
||||
/*
|
||||
** Select the fragment of text consisting of nFragment contiguous tokens
|
||||
** from column iCol that represent the "best" snippet. The best snippet
|
||||
** is the snippet with the highest score, where scores are calculated
|
||||
** by adding:
|
||||
**
|
||||
** (a) +1 point for each occurence of a matchable phrase in the snippet.
|
||||
**
|
||||
** (b) +1000 points for the first occurence of each matchable phrase in
|
||||
** the snippet for which the corresponding mCovered bit is not set.
|
||||
**
|
||||
** The selected snippet parameters are stored in structure *pFragment before
|
||||
** returning. The score of the selected snippet is stored in *piScore
|
||||
** before returning.
|
||||
*/
|
||||
static int fts3BestSnippet(
|
||||
int nSnippet, /* Desired snippet length */
|
||||
Fts3Cursor *pCsr, /* Cursor to create snippet for */
|
||||
@ -337,12 +443,12 @@ static int fts3BestSnippet(
|
||||
){
|
||||
int rc; /* Return Code */
|
||||
int nList; /* Number of phrases in expression */
|
||||
SnippetCtx sCtx; /* Snippet context object */
|
||||
SnippetIter sIter; /* Iterates through snippet candidates */
|
||||
int nByte; /* Number of bytes of space to allocate */
|
||||
int iBestScore = -1;
|
||||
int i;
|
||||
int iBestScore = -1; /* Best snippet score found so far */
|
||||
int i; /* Loop counter */
|
||||
|
||||
memset(&sCtx, 0, sizeof(sCtx));
|
||||
memset(&sIter, 0, sizeof(sIter));
|
||||
|
||||
/* Iterate through the phrases in the expression to count them. The same
|
||||
** callback makes sure the doclists are loaded for each phrase.
|
||||
@ -356,36 +462,39 @@ static int fts3BestSnippet(
|
||||
** the required space using malloc().
|
||||
*/
|
||||
nByte = sizeof(SnippetPhrase) * nList;
|
||||
sCtx.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
|
||||
if( !sCtx.aPhrase ){
|
||||
sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
|
||||
if( !sIter.aPhrase ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(sCtx.aPhrase, 0, nByte);
|
||||
memset(sIter.aPhrase, 0, nByte);
|
||||
|
||||
/* Initialize the contents of the SnippetCtx object. Then iterate through
|
||||
/* Initialize the contents of the SnippetIter object. Then iterate through
|
||||
** the set of phrases in the expression to populate the aPhrase[] array.
|
||||
*/
|
||||
sCtx.pCsr = pCsr;
|
||||
sCtx.iCol = iCol;
|
||||
sCtx.nSnippet = nSnippet;
|
||||
sCtx.nPhrase = nList;
|
||||
sCtx.iCurrent = -1;
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);
|
||||
sIter.pCsr = pCsr;
|
||||
sIter.iCol = iCol;
|
||||
sIter.nSnippet = nSnippet;
|
||||
sIter.nPhrase = nList;
|
||||
sIter.iCurrent = -1;
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
|
||||
|
||||
/* Set the *pmSeen output variable. */
|
||||
for(i=0; i<nList; i++){
|
||||
if( sCtx.aPhrase[i].pHead ){
|
||||
if( sIter.aPhrase[i].pHead ){
|
||||
*pmSeen |= (u64)1 << i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Loop through all candidate snippets. Store the best snippet in
|
||||
** *pFragment. Store its associated 'score' in iBestScore.
|
||||
*/
|
||||
pFragment->iCol = iCol;
|
||||
while( !fts3SnippetNextCandidate(&sCtx) ){
|
||||
while( !fts3SnippetNextCandidate(&sIter) ){
|
||||
int iPos;
|
||||
int iScore;
|
||||
u64 mCover;
|
||||
u64 mHighlight;
|
||||
fts3SnippetDetails(&sCtx, mCovered, &iPos, &iScore, &mCover, &mHighlight);
|
||||
|
||||
fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
|
||||
assert( iScore>=0 );
|
||||
if( iScore>iBestScore ){
|
||||
pFragment->iPos = iPos;
|
||||
@ -395,28 +504,31 @@ static int fts3BestSnippet(
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(sCtx.aPhrase);
|
||||
sqlite3_free(sIter.aPhrase);
|
||||
*piScore = iBestScore;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
||||
typedef struct StrBuffer StrBuffer;
|
||||
struct StrBuffer {
|
||||
char *z;
|
||||
int n;
|
||||
int nAlloc;
|
||||
};
|
||||
|
||||
/*
|
||||
** Append a string to the string-buffer passed as the first argument.
|
||||
**
|
||||
** If nAppend is negative, then the length of the string zAppend is
|
||||
** determined using strlen().
|
||||
*/
|
||||
static int fts3StringAppend(
|
||||
StrBuffer *pStr,
|
||||
const char *zAppend,
|
||||
int nAppend
|
||||
StrBuffer *pStr, /* Buffer to append to */
|
||||
const char *zAppend, /* Pointer to data to append to buffer */
|
||||
int nAppend /* Size of zAppend in bytes (or -1) */
|
||||
){
|
||||
if( nAppend<0 ){
|
||||
nAppend = strlen(zAppend);
|
||||
}
|
||||
|
||||
/* If there is insufficient space allocated at StrBuffer.z, use realloc()
|
||||
** to grow the buffer until so that it is big enough to accomadate the
|
||||
** appended data.
|
||||
*/
|
||||
if( pStr->n+nAppend+1>=pStr->nAlloc ){
|
||||
int nAlloc = pStr->nAlloc+nAppend+100;
|
||||
char *zNew = sqlite3_realloc(pStr->z, nAlloc);
|
||||
@ -427,6 +539,7 @@ static int fts3StringAppend(
|
||||
pStr->nAlloc = nAlloc;
|
||||
}
|
||||
|
||||
/* Append the data to the string buffer. */
|
||||
memcpy(&pStr->z[pStr->n], zAppend, nAppend);
|
||||
pStr->n += nAppend;
|
||||
pStr->z[pStr->n] = '\0';
|
||||
@ -434,33 +547,63 @@ static int fts3StringAppend(
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The fts3BestSnippet() function often selects snippets that end with a
|
||||
** query term. That is, the final term of the snippet is always a term
|
||||
** that requires highlighting. For example, if 'X' is a highlighted term
|
||||
** and '.' is a non-highlighted term, BestSnippet() may select:
|
||||
**
|
||||
** ........X.....X
|
||||
**
|
||||
** This function "shifts" the beginning of the snippet forward in the
|
||||
** document so that there are approximately the same number of
|
||||
** non-highlighted terms to the right of the final highlighted term as there
|
||||
** are to the left of the first highlighted term. For example, to this:
|
||||
**
|
||||
** ....X.....X....
|
||||
**
|
||||
** This is done as part of extracting the snippet text, not when selecting
|
||||
** the snippet. Snippet selection is done based on doclists only, so there
|
||||
** is no way for fts3BestSnippet() to know whether or not the document
|
||||
** actually contains terms that follow the final highlighted term.
|
||||
*/
|
||||
int fts3SnippetShift(
|
||||
Fts3Table *pTab,
|
||||
int nSnippet,
|
||||
const char *zDoc,
|
||||
int nDoc,
|
||||
int *piPos,
|
||||
u64 *pHlmask
|
||||
Fts3Table *pTab, /* FTS3 table snippet comes from */
|
||||
int nSnippet, /* Number of tokens desired for snippet */
|
||||
const char *zDoc, /* Document text to extract snippet from */
|
||||
int nDoc, /* Size of buffer zDoc in bytes */
|
||||
int *piPos, /* IN/OUT: First token of snippet */
|
||||
u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
|
||||
){
|
||||
u64 hlmask = *pHlmask;
|
||||
u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
|
||||
|
||||
if( hlmask ){
|
||||
int nLeft;
|
||||
int nRight;
|
||||
int nDesired;
|
||||
int nLeft; /* Tokens to the left of first highlight */
|
||||
int nRight; /* Tokens to the right of last highlight */
|
||||
int nDesired; /* Ideal number of tokens to shift forward */
|
||||
|
||||
for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
|
||||
for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
|
||||
|
||||
nDesired = (nLeft-nRight)/2;
|
||||
|
||||
/* Ideally, the start of the snippet should be pushed forward in the
|
||||
** document nDesired tokens. This block checks if there are actually
|
||||
** nDesired tokens to the right of the snippet. If so, *piPos and
|
||||
** *pHlMask are updated to shift the snippet nDesired tokens to the
|
||||
** right. Otherwise, the snippet is shifted by the number of tokens
|
||||
** available.
|
||||
*/
|
||||
if( nDesired>0 ){
|
||||
int nShift;
|
||||
int iCurrent = 0;
|
||||
int rc;
|
||||
int nShift; /* Number of tokens to shift snippet by */
|
||||
int iCurrent = 0; /* Token counter */
|
||||
int rc; /* Return Code */
|
||||
sqlite3_tokenizer_module *pMod;
|
||||
sqlite3_tokenizer_cursor *pC;
|
||||
|
||||
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
|
||||
|
||||
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
|
||||
** or more tokens in zDoc/nDoc.
|
||||
*/
|
||||
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
@ -484,6 +627,10 @@ int fts3SnippetShift(
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the snippet text for fragment pFragment from cursor pCsr and
|
||||
** append it to string buffer pOut.
|
||||
*/
|
||||
static int fts3SnippetText(
|
||||
Fts3Cursor *pCsr, /* FTS3 Cursor */
|
||||
SnippetFragment *pFragment, /* Snippet to extract */
|
||||
@ -492,8 +639,8 @@ static int fts3SnippetText(
|
||||
int nSnippet, /* Number of tokens in extracted snippet */
|
||||
const char *zOpen, /* String inserted before highlighted term */
|
||||
const char *zClose, /* String inserted after highlighted term */
|
||||
const char *zEllipsis,
|
||||
StrBuffer *pOut
|
||||
const char *zEllipsis, /* String inserted between snippets */
|
||||
StrBuffer *pOut /* Write output here */
|
||||
){
|
||||
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
||||
int rc; /* Return code */
|
||||
@ -501,23 +648,23 @@ static int fts3SnippetText(
|
||||
int nDoc; /* Size of zDoc in bytes */
|
||||
int iCurrent = 0; /* Current token number of document */
|
||||
int iEnd = 0; /* Byte offset of end of current token */
|
||||
int isShiftDone = 0;
|
||||
int iPos = pFragment->iPos;
|
||||
u64 hlmask = pFragment->hlmask;
|
||||
|
||||
int isShiftDone = 0; /* True after snippet is shifted */
|
||||
int iPos = pFragment->iPos; /* First token of snippet */
|
||||
u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
|
||||
int iCol = pFragment->iCol+1; /* Query column to extract text from */
|
||||
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
|
||||
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
|
||||
const char *ZDUMMY; /* Dummy arguments used with tokenizer */
|
||||
int DUMMY1; /* Dummy arguments used with tokenizer */
|
||||
const char *ZDUMMY; /* Dummy argument used with tokenizer */
|
||||
int DUMMY1; /* Dummy argument used with tokenizer */
|
||||
|
||||
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
|
||||
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
|
||||
if( zDoc==0 ){
|
||||
if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
|
||||
if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
|
||||
nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
|
||||
|
||||
/* Open a token cursor on the document. */
|
||||
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
|
||||
@ -530,7 +677,7 @@ static int fts3SnippetText(
|
||||
while( rc==SQLITE_OK ){
|
||||
int iBegin; /* Offset in zDoc of start of token */
|
||||
int iFin; /* Offset in zDoc of end of token */
|
||||
int isHighlight;
|
||||
int isHighlight; /* True for highlighted terms */
|
||||
|
||||
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
|
||||
if( rc!=SQLITE_OK ){
|
||||
@ -584,39 +731,17 @@ static int fts3SnippetText(
|
||||
|
||||
|
||||
/*
|
||||
** An instance of this structure is used to collect the 'global' part of
|
||||
** the matchinfo statistics. The 'global' part consists of the following:
|
||||
** This function is used to count the entries in a column-list (a
|
||||
** delta-encoded list of term offsets within a single column of a single
|
||||
** row). When this function is called, *ppCollist should point to the
|
||||
** beginning of the first varint in the column-list (the varint that
|
||||
** contains the position of the first matching term in the column data).
|
||||
** Before returning, *ppCollist is set to point to the first byte after
|
||||
** the last varint in the column-list (either the 0x00 signifying the end
|
||||
** of the position-list, or the 0x01 that precedes the column number of
|
||||
** the next column in the position-list).
|
||||
**
|
||||
** 1. The number of phrases in the query (nPhrase).
|
||||
**
|
||||
** 2. The number of columns in the FTS3 table (nCol).
|
||||
**
|
||||
** 3. A matrix of (nPhrase*nCol) integers containing the sum of the
|
||||
** number of hits for each phrase in each column across all rows
|
||||
** of the table.
|
||||
**
|
||||
** The total size of the global matchinfo array, assuming the number of
|
||||
** columns is N and the number of phrases is P is:
|
||||
**
|
||||
** 2 + P*(N+1)
|
||||
**
|
||||
** The number of hits for the 3rd phrase in the second column is found
|
||||
** using the expression:
|
||||
**
|
||||
** aGlobal[2 + P*(1+2) + 1]
|
||||
*/
|
||||
typedef struct MatchInfo MatchInfo;
|
||||
struct MatchInfo {
|
||||
Fts3Table *pTab; /* FTS3 Table */
|
||||
Fts3Cursor *pCursor; /* FTS3 Cursor */
|
||||
int iPhrase; /* Number of phrases so far */
|
||||
int nCol; /* Number of columns in table */
|
||||
u32 *aGlobal; /* Pre-allocated buffer */
|
||||
};
|
||||
|
||||
/*
|
||||
** This function is used to count the entries in a column-list (delta-encoded
|
||||
** list of term offsets within a single column of a single row).
|
||||
** The number of elements in the column-list is returned.
|
||||
*/
|
||||
static int fts3ColumnlistCount(char **ppCollist){
|
||||
char *pEnd = *ppCollist;
|
||||
@ -633,15 +758,20 @@ static int fts3ColumnlistCount(char **ppCollist){
|
||||
return nEntry;
|
||||
}
|
||||
|
||||
static void fts3LoadColumnlistCounts(char **pp, u32 *aOut){
|
||||
static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
|
||||
char *pCsr = *pp;
|
||||
while( *pCsr ){
|
||||
int nHit;
|
||||
sqlite3_int64 iCol = 0;
|
||||
if( *pCsr==0x01 ){
|
||||
pCsr++;
|
||||
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
|
||||
}
|
||||
aOut[iCol] += fts3ColumnlistCount(&pCsr);
|
||||
nHit = fts3ColumnlistCount(&pCsr);
|
||||
if( isGlobal ){
|
||||
if( nHit ) aOut[iCol*3+1]++;
|
||||
}
|
||||
aOut[iCol*3] += nHit;
|
||||
}
|
||||
pCsr++;
|
||||
*pp = pCsr;
|
||||
@ -649,17 +779,18 @@ static void fts3LoadColumnlistCounts(char **pp, u32 *aOut){
|
||||
|
||||
/*
|
||||
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
|
||||
** for a single query.
|
||||
** for a single query. The "global" stats are those elements of the matchinfo
|
||||
** array that are constant for all rows returned by the current query.
|
||||
*/
|
||||
static int fts3ExprGlobalMatchinfoCb(
|
||||
Fts3Expr *pExpr, /* Phrase expression node */
|
||||
int iPhrase,
|
||||
int iPhrase, /* Phrase number (numbered from zero) */
|
||||
void *pCtx /* Pointer to MatchInfo structure */
|
||||
){
|
||||
MatchInfo *p = (MatchInfo *)pCtx;
|
||||
char *pCsr;
|
||||
char *pEnd;
|
||||
const int iStart = 2 + p->nCol*p->iPhrase;
|
||||
const int iStart = 2 + (iPhrase * p->nCol * 3) + 1;
|
||||
|
||||
assert( pExpr->isLoaded );
|
||||
|
||||
@ -667,84 +798,96 @@ static int fts3ExprGlobalMatchinfoCb(
|
||||
pCsr = pExpr->aDoclist;
|
||||
pEnd = &pExpr->aDoclist[pExpr->nDoclist];
|
||||
while( pCsr<pEnd ){
|
||||
while( *pCsr++ & 0x80 );
|
||||
fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iStart]);
|
||||
}
|
||||
|
||||
p->iPhrase++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts3ExprLocalMatchinfoCb(
|
||||
Fts3Expr *pExpr, /* Phrase expression node */
|
||||
int iPhrase,
|
||||
void *pCtx /* Pointer to MatchInfo structure */
|
||||
){
|
||||
MatchInfo *p = (MatchInfo *)pCtx;
|
||||
p->iPhrase++;
|
||||
|
||||
if( pExpr->aDoclist ){
|
||||
char *pCsr;
|
||||
int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase);
|
||||
|
||||
memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32));
|
||||
pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
|
||||
if( pCsr ) fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iOffset]);
|
||||
while( *pCsr++ & 0x80 ); /* Skip past docid. */
|
||||
fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 1);
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Populate pCsr->aMatchinfo[] with data for the current row. The 'matchinfo'
|
||||
** data is an array of 32-bit unsigned integers (C type u32).
|
||||
** fts3ExprIterate() callback used to collect the "local" matchinfo stats
|
||||
** for a single query. The "local" stats are those elements of the matchinfo
|
||||
** array that are different for each row returned by the query.
|
||||
*/
|
||||
static int fts3ExprLocalMatchinfoCb(
|
||||
Fts3Expr *pExpr, /* Phrase expression node */
|
||||
int iPhrase, /* Phrase number */
|
||||
void *pCtx /* Pointer to MatchInfo structure */
|
||||
){
|
||||
MatchInfo *p = (MatchInfo *)pCtx;
|
||||
|
||||
if( pExpr->aDoclist ){
|
||||
char *pCsr;
|
||||
int iStart = 2 + (iPhrase * p->nCol * 3);
|
||||
int i;
|
||||
|
||||
for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;
|
||||
|
||||
pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
|
||||
if( pCsr ){
|
||||
fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
|
||||
}
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Populate pCsr->aMatchinfo[] with data for the current row. The
|
||||
** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
|
||||
*/
|
||||
static int fts3GetMatchinfo(Fts3Cursor *pCsr){
|
||||
MatchInfo g;
|
||||
MatchInfo sInfo;
|
||||
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
||||
if( pCsr->aMatchinfo==0 ){
|
||||
int rc;
|
||||
int nPhrase;
|
||||
int nMatchinfo;
|
||||
|
||||
g.pTab = pTab;
|
||||
g.nCol = pTab->nColumn;
|
||||
g.iPhrase = 0;
|
||||
sInfo.pCursor = pCsr;
|
||||
sInfo.nCol = pTab->nColumn;
|
||||
|
||||
if( pCsr->aMatchinfo==0 ){
|
||||
/* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
|
||||
** matchinfo function has been called for this query. In this case
|
||||
** allocate the array used to accumulate the matchinfo data and
|
||||
** initialize those elements that are constant for every row.
|
||||
*/
|
||||
int rc; /* Return Code */
|
||||
int nPhrase; /* Number of phrases */
|
||||
int nMatchinfo; /* Number of u32 elements in match-info */
|
||||
|
||||
/* Load doclists for each phrase in the query. */
|
||||
rc = fts3ExprLoadDoclists(pCsr, &nPhrase, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
nMatchinfo = 2 + 3*sInfo.nCol*nPhrase;
|
||||
|
||||
nMatchinfo = 2 + 2*g.nCol*nPhrase;
|
||||
|
||||
g.iPhrase = 0;
|
||||
g.aGlobal = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
|
||||
if( !g.aGlobal ){
|
||||
sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
|
||||
if( !sInfo.aMatchinfo ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(g.aGlobal, 0, sizeof(u32)*nMatchinfo);
|
||||
memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo);
|
||||
|
||||
g.aGlobal[0] = nPhrase;
|
||||
g.aGlobal[1] = g.nCol;
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb, (void *)&g);
|
||||
|
||||
pCsr->aMatchinfo = g.aGlobal;
|
||||
/* First element of match-info is the number of phrases in the query */
|
||||
sInfo.aMatchinfo[0] = nPhrase;
|
||||
sInfo.aMatchinfo[1] = sInfo.nCol;
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo);
|
||||
|
||||
pCsr->aMatchinfo = sInfo.aMatchinfo;
|
||||
}
|
||||
|
||||
g.pTab = pTab;
|
||||
g.pCursor = pCsr;
|
||||
g.nCol = pTab->nColumn;
|
||||
g.iPhrase = 0;
|
||||
g.aGlobal = pCsr->aMatchinfo;
|
||||
|
||||
sInfo.aMatchinfo = pCsr->aMatchinfo;
|
||||
if( pCsr->isMatchinfoOk ){
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void *)&g);
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo);
|
||||
pCsr->isMatchinfoOk = 0;
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of snippet() function.
|
||||
*/
|
||||
void sqlite3Fts3Snippet(
|
||||
sqlite3_context *pCtx, /* SQLite function call context */
|
||||
Fts3Cursor *pCsr, /* Cursor object */
|
||||
@ -835,12 +978,13 @@ void sqlite3Fts3Snippet(
|
||||
|
||||
|
||||
typedef struct TermOffset TermOffset;
|
||||
typedef struct TermOffsetCtx TermOffsetCtx;
|
||||
|
||||
struct TermOffset {
|
||||
char *pList; /* Position-list */
|
||||
int iPos; /* Position just read from pList */
|
||||
int iOff;
|
||||
int iOff; /* Offset of this term from read positions */
|
||||
};
|
||||
typedef struct TermOffsetCtx TermOffsetCtx;
|
||||
|
||||
struct TermOffsetCtx {
|
||||
int iCol; /* Column of table to populate aTerm for */
|
||||
@ -885,17 +1029,15 @@ void sqlite3Fts3Offsets(
|
||||
){
|
||||
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
||||
sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
|
||||
const char *ZDUMMY;
|
||||
int NDUMMY;
|
||||
|
||||
const char *ZDUMMY; /* Dummy argument used with xNext() */
|
||||
int NDUMMY; /* Dummy argument used with xNext() */
|
||||
int rc; /* Return Code */
|
||||
int nToken; /* Number of tokens in query */
|
||||
int iCol; /* Column currently being processed */
|
||||
StrBuffer res = {0, 0, 0}; /* Result string */
|
||||
TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
|
||||
|
||||
TermOffsetCtx sCtx;
|
||||
memset(&sCtx, 0, sizeof(sCtx));
|
||||
|
||||
assert( pCsr->isRequireSeek==0 );
|
||||
|
||||
/* Count the number of terms in the query */
|
||||
@ -910,6 +1052,9 @@ void sqlite3Fts3Offsets(
|
||||
}
|
||||
sCtx.iDocid = pCsr->iPrevId;
|
||||
|
||||
/* Loop through the table columns, appending offset information to
|
||||
** string-buffer res for each column.
|
||||
*/
|
||||
for(iCol=0; iCol<pTab->nColumn; iCol++){
|
||||
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
|
||||
int iStart;
|
||||
@ -1001,12 +1146,15 @@ void sqlite3Fts3Offsets(
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of matchinfo() function.
|
||||
*/
|
||||
void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
|
||||
int rc = fts3GetMatchinfo(pCsr);
|
||||
if( rc!=SQLITE_OK ){
|
||||
sqlite3_result_error_code(pContext, rc);
|
||||
}else{
|
||||
int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*2);
|
||||
int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3);
|
||||
sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user