mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Fixes and simplifications for the snippet() and highlight() functions.
FossilOrigin-Name: ca5d44042aa7461dcc8b700b0763df4df9d4a891
This commit is contained in:
@ -1300,7 +1300,10 @@ static int fts5ApiSetAuxdata(
|
||||
}
|
||||
}else{
|
||||
pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata));
|
||||
if( pData==0 ) return SQLITE_NOMEM;
|
||||
if( pData==0 ){
|
||||
if( xDelete ) xDelete(pPtr);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pData, 0, sizeof(Fts5Auxdata));
|
||||
pData->pAux = pCsr->pAux;
|
||||
pData->pNext = pCsr->pAuxdata;
|
||||
|
@ -141,9 +141,10 @@ typedef void (*fts5_extension_function)(
|
||||
** future invocation of the same fts5 extension function made as part of
|
||||
** of the same MATCH query using the xGetAuxdata() API.
|
||||
**
|
||||
** Each extension function is allocated a single auxiliary data slot per
|
||||
** query. If the extension function is invoked more than once by the SQL
|
||||
** query, then all invocations share a single auxiliary data context.
|
||||
** Each extension function is allocated a single auxiliary data slot for
|
||||
** each FTS query (MATCH expression). If the extension function is invoked
|
||||
** more than once for a single FTS query, then all invocations share a
|
||||
** single auxiliary data context.
|
||||
**
|
||||
** If there is already an auxiliary data pointer when this function is
|
||||
** invoked, then it is replaced by the new pointer. If an xDelete callback
|
||||
@ -153,6 +154,11 @@ typedef void (*fts5_extension_function)(
|
||||
** The xDelete callback, if one is specified, is also invoked on the
|
||||
** auxiliary data pointer after the FTS5 query has finished.
|
||||
**
|
||||
** If an error (e.g. an OOM condition) occurs within this function, an
|
||||
** the auxiliary data is set to NULL and an error code returned. If the
|
||||
** xDelete parameter was not NULL, it is invoked on the auxiliary data
|
||||
** pointer before returning.
|
||||
**
|
||||
**
|
||||
** xGetAuxdata(pFts5, bClear)
|
||||
**
|
||||
|
@ -14,22 +14,113 @@
|
||||
#include "fts5Int.h"
|
||||
#include <math.h>
|
||||
|
||||
/*
|
||||
** Object used to iterate through all "coalesced phrase instances" in
|
||||
** a single column of the current row. If the phrase instances in the
|
||||
** column being considered do not overlap, this object simply iterates
|
||||
** through them. Or, if they do overlap (share one or more tokens in
|
||||
** common), each set of overlapping instances is treated as a single
|
||||
** match. See documentation for the highlight() auxiliary function for
|
||||
** details.
|
||||
**
|
||||
** Usage is:
|
||||
**
|
||||
** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
|
||||
** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
|
||||
** rc = fts5CInstIterNext(&iter)
|
||||
** ){
|
||||
** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
|
||||
** }
|
||||
**
|
||||
*/
|
||||
typedef struct CInstIter CInstIter;
|
||||
struct CInstIter {
|
||||
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
|
||||
Fts5Context *pFts; /* First arg to pass to pApi functions */
|
||||
int iCol; /* Column to search */
|
||||
int iInst; /* Next phrase instance index */
|
||||
int nInst; /* Total number of phrase instances */
|
||||
|
||||
/* Output variables */
|
||||
int iStart; /* First token in coalesced phrase instance */
|
||||
int iEnd; /* Last token in coalesced phrase instance */
|
||||
};
|
||||
|
||||
/*
|
||||
** Return non-zero if the iterator is at EOF, or zero otherwise.
|
||||
*/
|
||||
static int fts5CInstIterEof(CInstIter *pIter){
|
||||
return (pIter->iStart < 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** Advance the iterator to the next coalesced phrase instance. Return
|
||||
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
|
||||
*/
|
||||
static int fts5CInstIterNext(CInstIter *pIter){
|
||||
int rc = SQLITE_OK;
|
||||
pIter->iStart = -1;
|
||||
pIter->iEnd = -1;
|
||||
|
||||
while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
|
||||
int ip; int ic; int io;
|
||||
rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
|
||||
if( rc==SQLITE_OK ){
|
||||
if( ic==pIter->iCol ){
|
||||
int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
|
||||
if( pIter->iStart<0 ){
|
||||
pIter->iStart = io;
|
||||
pIter->iEnd = iEnd;
|
||||
}else if( io<=pIter->iEnd ){
|
||||
if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
pIter->iInst++;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Initialize the iterator object indicated by the final parameter to
|
||||
** iterate through coalesced phrase instances in column iCol.
|
||||
*/
|
||||
static int fts5CInstIterInit(
|
||||
const Fts5ExtensionApi *pApi,
|
||||
Fts5Context *pFts,
|
||||
int iCol,
|
||||
CInstIter *pIter
|
||||
){
|
||||
int rc;
|
||||
|
||||
memset(pIter, 0, sizeof(CInstIter));
|
||||
pIter->pApi = pApi;
|
||||
pIter->pFts = pFts;
|
||||
pIter->iCol = iCol;
|
||||
rc = pApi->xInstCount(pFts, &pIter->nInst);
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterNext(pIter);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
** Start of highlight() implementation.
|
||||
*/
|
||||
typedef struct HighlightContext HighlightContext;
|
||||
struct HighlightContext {
|
||||
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
|
||||
Fts5Context *pFts; /* First arg to pass to pApi functions */
|
||||
int nInst; /* Total number of phrase instances */
|
||||
int iInst; /* Current phrase instance index */
|
||||
int iStart; /* First token of current phrase */
|
||||
int iEnd; /* Last token of current phrase */
|
||||
|
||||
CInstIter iter; /* Coalesced Instance Iterator */
|
||||
int iRangeStart;
|
||||
int iRangeEnd;
|
||||
const char *zOpen; /* Opening highlight */
|
||||
const char *zClose; /* Closing highlight */
|
||||
int iCol; /* Column to read from */
|
||||
|
||||
const char *zIn; /* Input text */
|
||||
int nIn; /* Size of input text in bytes */
|
||||
int iOff; /* Current offset within zIn[] */
|
||||
@ -40,6 +131,10 @@ struct HighlightContext {
|
||||
** Append text to the HighlightContext output string - p->zOut. Argument
|
||||
** z points to a buffer containing n bytes of text to append. If n is
|
||||
** negative, everything up until the first '\0' is appended to the output.
|
||||
**
|
||||
** If *pRc is set to any value other than SQLITE_OK when this function is
|
||||
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
|
||||
** *pRc is set to an error code before returning.
|
||||
*/
|
||||
static void fts5HighlightAppend(
|
||||
int *pRc,
|
||||
@ -53,6 +148,9 @@ static void fts5HighlightAppend(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Tokenizer callback used by implementation of highlight() function.
|
||||
*/
|
||||
static int fts5HighlightCb(
|
||||
void *pContext, /* Pointer to HighlightContext object */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
@ -64,39 +162,43 @@ static int fts5HighlightCb(
|
||||
HighlightContext *p = (HighlightContext*)pContext;
|
||||
int rc = SQLITE_OK;
|
||||
|
||||
if( iPos==p->iStart ){
|
||||
if( p->iRangeEnd>0 ){
|
||||
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
|
||||
if( iPos==p->iRangeStart ) p->iOff = iStartOff;
|
||||
}
|
||||
|
||||
if( iPos==p->iter.iStart ){
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
|
||||
fts5HighlightAppend(&rc, p, p->zOpen, -1);
|
||||
p->iOff = iStartOff;
|
||||
}
|
||||
|
||||
if( iPos==p->iEnd ){
|
||||
int bClose = 1;
|
||||
for(p->iInst++; rc==SQLITE_OK && p->iInst<p->nInst; p->iInst++){
|
||||
int iP, iPCol, iOff;
|
||||
rc = p->pApi->xInst(p->pFts, p->iInst, &iP, &iPCol, &iOff);
|
||||
if( iPCol!=p->iCol ){
|
||||
p->iStart = p->iEnd = -1;
|
||||
}else{
|
||||
int iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP);
|
||||
if( iEnd<=p->iEnd ) continue;
|
||||
if( iOff<=p->iEnd ) bClose = 0;
|
||||
p->iStart = iOff;
|
||||
p->iEnd = iEnd;
|
||||
}
|
||||
break;
|
||||
if( iPos==p->iter.iEnd ){
|
||||
if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
|
||||
fts5HighlightAppend(&rc, p, p->zOpen, -1);
|
||||
}
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
||||
fts5HighlightAppend(&rc, p, p->zClose, -1);
|
||||
p->iOff = iEndOff;
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterNext(&p->iter);
|
||||
}
|
||||
}
|
||||
|
||||
if( bClose ){
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
||||
if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
|
||||
fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
|
||||
p->iOff = iEndOff;
|
||||
if( iPos<p->iter.iEnd ){
|
||||
fts5HighlightAppend(&rc, p, p->zClose, -1);
|
||||
p->iOff = iEndOff;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Implementation of highlight() function.
|
||||
*/
|
||||
static void fts5HighlightFunction(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
@ -106,6 +208,7 @@ static void fts5HighlightFunction(
|
||||
){
|
||||
HighlightContext ctx;
|
||||
int rc;
|
||||
int iCol;
|
||||
|
||||
if( nVal!=3 ){
|
||||
const char *zErr = "wrong number of arguments to function highlight()";
|
||||
@ -113,26 +216,14 @@ static void fts5HighlightFunction(
|
||||
return;
|
||||
}
|
||||
|
||||
iCol = sqlite3_value_int(apVal[0]);
|
||||
memset(&ctx, 0, sizeof(HighlightContext));
|
||||
ctx.iCol = sqlite3_value_int(apVal[0]);
|
||||
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
||||
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
||||
ctx.pApi = pApi;
|
||||
ctx.pFts = pFts;
|
||||
rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn);
|
||||
if( rc==SQLITE_OK ) rc = pApi->xInstCount(pFts, &ctx.nInst);
|
||||
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
|
||||
|
||||
/* Find the first phrase instance in the right column. */
|
||||
ctx.iStart = -1;
|
||||
ctx.iEnd = -1;
|
||||
for( ; ctx.iInst<ctx.nInst && rc==SQLITE_OK; ctx.iInst++){
|
||||
int iP, iPCol, iOff;
|
||||
rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff);
|
||||
if( iPCol==ctx.iCol ){
|
||||
ctx.iStart = iOff;
|
||||
ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP);
|
||||
break;
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
@ -150,347 +241,7 @@ static void fts5HighlightFunction(
|
||||
/*
|
||||
**************************************************************************/
|
||||
|
||||
typedef struct SnipPhrase SnipPhrase;
|
||||
typedef struct SnipIter SnipIter;
|
||||
typedef struct SnippetCtx SnippetCtx;
|
||||
|
||||
struct SnipPhrase {
|
||||
u64 mask; /* Current mask */
|
||||
int nToken; /* Tokens in this phrase */
|
||||
int i; /* Current offset in phrase poslist */
|
||||
i64 iPos; /* Next position in phrase (-ve -> EOF) */
|
||||
};
|
||||
|
||||
struct SnipIter {
|
||||
i64 iLast; /* Last token position of current snippet */
|
||||
int nScore; /* Score of current snippet */
|
||||
|
||||
const Fts5ExtensionApi *pApi;
|
||||
Fts5Context *pFts;
|
||||
u64 szmask; /* Mask used to on SnipPhrase.mask */
|
||||
int nPhrase; /* Number of phrases */
|
||||
SnipPhrase aPhrase[0]; /* Array of size nPhrase */
|
||||
};
|
||||
|
||||
struct SnippetCtx {
|
||||
int iFirst; /* Offset of first token to record */
|
||||
int nToken; /* Size of aiStart[] and aiEnd[] arrays */
|
||||
int iSeen; /* Set to largest offset seen */
|
||||
int *aiStart;
|
||||
int *aiEnd;
|
||||
};
|
||||
|
||||
static int fts5SnippetCallback(
|
||||
void *pContext, /* Pointer to Fts5Buffer object */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos /* Position offset of token */
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
SnippetCtx *pCtx = (SnippetCtx*)pContext;
|
||||
int iOff = iPos - pCtx->iFirst;
|
||||
|
||||
if( iOff>=0 ){
|
||||
if( iOff < pCtx->nToken ){
|
||||
pCtx->aiStart[iOff] = iStart;
|
||||
pCtx->aiEnd[iOff] = iEnd;
|
||||
}
|
||||
pCtx->iSeen = iPos;
|
||||
if( iOff>=pCtx->nToken ) rc = SQLITE_DONE;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Set pIter->nScore to the score for the current entry.
|
||||
*/
|
||||
static void fts5SnippetCalculateScore(SnipIter *pIter){
|
||||
int i;
|
||||
int nScore = 0;
|
||||
assert( pIter->iLast>=0 );
|
||||
|
||||
for(i=0; i<pIter->nPhrase; i++){
|
||||
SnipPhrase *p = &pIter->aPhrase[i];
|
||||
u64 mask = p->mask;
|
||||
if( mask ){
|
||||
u64 j;
|
||||
nScore += 1000;
|
||||
for(j=1; j & pIter->szmask; j<<=1){
|
||||
if( mask & j ) nScore++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pIter->nScore = nScore;
|
||||
}
|
||||
|
||||
/*
|
||||
** Allocate a new snippet iter.
|
||||
*/
|
||||
static int fts5SnipIterNew(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
int nToken, /* Number of tokens in snippets */
|
||||
SnipIter **ppIter /* OUT: New object */
|
||||
){
|
||||
int i; /* Counter variable */
|
||||
SnipIter *pIter; /* New iterator object */
|
||||
int nByte; /* Bytes of space to allocate */
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
|
||||
*ppIter = 0;
|
||||
nPhrase = pApi->xPhraseCount(pFts);
|
||||
nByte = sizeof(SnipIter) + nPhrase * sizeof(SnipPhrase);
|
||||
pIter = (SnipIter*)sqlite3_malloc(nByte);
|
||||
if( pIter==0 ) return SQLITE_NOMEM;
|
||||
memset(pIter, 0, nByte);
|
||||
|
||||
pIter->nPhrase = nPhrase;
|
||||
pIter->pApi = pApi;
|
||||
pIter->pFts = pFts;
|
||||
pIter->szmask = ((u64)1 << nToken) - 1;
|
||||
assert( nToken<=63 );
|
||||
|
||||
for(i=0; i<nPhrase; i++){
|
||||
pIter->aPhrase[i].nToken = pApi->xPhraseSize(pFts, i);
|
||||
}
|
||||
|
||||
*ppIter = pIter;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Set the iterator to point to the first candidate snippet.
|
||||
*/
|
||||
static void fts5SnipIterFirst(SnipIter *pIter){
|
||||
const Fts5ExtensionApi *pApi = pIter->pApi;
|
||||
Fts5Context *pFts = pIter->pFts;
|
||||
int i; /* Used to iterate through phrases */
|
||||
SnipPhrase *pMin = 0; /* Phrase with first match */
|
||||
|
||||
memset(pIter->aPhrase, 0, sizeof(SnipPhrase) * pIter->nPhrase);
|
||||
|
||||
for(i=0; i<pIter->nPhrase; i++){
|
||||
SnipPhrase *p = &pIter->aPhrase[i];
|
||||
p->nToken = pApi->xPhraseSize(pFts, i);
|
||||
pApi->xPoslist(pFts, i, &p->i, &p->iPos);
|
||||
if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ){
|
||||
pMin = p;
|
||||
}
|
||||
}
|
||||
assert( pMin );
|
||||
|
||||
pIter->iLast = pMin->iPos + pMin->nToken - 1;
|
||||
pMin->mask = 0x01;
|
||||
pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos);
|
||||
fts5SnippetCalculateScore(pIter);
|
||||
}
|
||||
|
||||
/*
|
||||
** Advance the snippet iterator to the next candidate snippet.
|
||||
*/
|
||||
static void fts5SnipIterNext(SnipIter *pIter){
|
||||
const Fts5ExtensionApi *pApi = pIter->pApi;
|
||||
Fts5Context *pFts = pIter->pFts;
|
||||
int nPhrase = pIter->nPhrase;
|
||||
int i; /* Used to iterate through phrases */
|
||||
SnipPhrase *pMin = 0;
|
||||
|
||||
for(i=0; i<nPhrase; i++){
|
||||
SnipPhrase *p = &pIter->aPhrase[i];
|
||||
if( p->iPos>=0 && (pMin==0 || p->iPos<pMin->iPos) ) pMin = p;
|
||||
}
|
||||
|
||||
if( pMin==0 ){
|
||||
/* pMin==0 indicates that the SnipIter is at EOF. */
|
||||
pIter->iLast = -1;
|
||||
}else{
|
||||
i64 nShift = pMin->iPos - pIter->iLast;
|
||||
assert( nShift>=0 );
|
||||
for(i=0; i<nPhrase; i++){
|
||||
SnipPhrase *p = &pIter->aPhrase[i];
|
||||
if( nShift>=63 ){
|
||||
p->mask = 0;
|
||||
}else{
|
||||
p->mask = p->mask << (int)nShift;
|
||||
p->mask &= pIter->szmask;
|
||||
}
|
||||
}
|
||||
|
||||
pIter->iLast = pMin->iPos;
|
||||
pMin->mask |= 0x01;
|
||||
fts5SnippetCalculateScore(pIter);
|
||||
pApi->xPoslist(pFts, pMin - pIter->aPhrase, &pMin->i, &pMin->iPos);
|
||||
}
|
||||
}
|
||||
|
||||
static void fts5SnipIterFree(SnipIter *pIter){
|
||||
if( pIter ){
|
||||
sqlite3_free(pIter);
|
||||
}
|
||||
}
|
||||
|
||||
static int fts5SnippetText(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
SnipIter *pIter, /* Snippet to write to buffer */
|
||||
int nToken, /* Size of desired snippet in tokens */
|
||||
const char *zStart,
|
||||
const char *zFinal,
|
||||
const char *zEllip,
|
||||
Fts5Buffer *pBuf /* Write output to this buffer */
|
||||
){
|
||||
SnippetCtx ctx;
|
||||
int i;
|
||||
u64 all = 0;
|
||||
const char *zCol; /* Column text to extract snippet from */
|
||||
int nCol; /* Size of column text in bytes */
|
||||
int rc;
|
||||
int nShift;
|
||||
|
||||
rc = pApi->xColumnText(pFts, FTS5_POS2COLUMN(pIter->iLast), &zCol, &nCol);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
||||
/* At this point pIter->iLast is the offset of the last token in the
|
||||
** proposed snippet. However, in all cases pIter->iLast contains the
|
||||
** final token of one of the phrases. This makes the snippet look
|
||||
** unbalanced. For example:
|
||||
**
|
||||
** "...x x x x x <b>term</b>..."
|
||||
**
|
||||
** It is better to increase iLast a little so that the snippet looks
|
||||
** more like:
|
||||
**
|
||||
** "...x x x <b>term</b> y y..."
|
||||
**
|
||||
** The problem is that there is no easy way to discover whether or not
|
||||
** how many tokens are present in the column following "term".
|
||||
*/
|
||||
|
||||
/* Set variable nShift to the number of tokens by which the snippet
|
||||
** should be shifted, assuming there are sufficient tokens to the right
|
||||
** of iLast in the column value. */
|
||||
for(i=0; i<pIter->nPhrase; i++){
|
||||
int iToken;
|
||||
for(iToken=0; iToken<pIter->aPhrase[i].nToken; iToken++){
|
||||
all |= (pIter->aPhrase[i].mask << iToken);
|
||||
}
|
||||
}
|
||||
for(i=nToken-1; i>=0; i--){
|
||||
if( all & ((u64)1 << i) ) break;
|
||||
}
|
||||
assert( i>=0 );
|
||||
nShift = (nToken - i) / 2;
|
||||
|
||||
memset(&ctx, 0, sizeof(SnippetCtx));
|
||||
ctx.nToken = nToken + nShift;
|
||||
ctx.iFirst = FTS5_POS2OFFSET(pIter->iLast) - nToken + 1;
|
||||
if( ctx.iFirst<0 ){
|
||||
nShift += ctx.iFirst;
|
||||
if( nShift<0 ) nShift = 0;
|
||||
ctx.iFirst = 0;
|
||||
}
|
||||
ctx.aiStart = (int*)sqlite3_malloc(sizeof(int) * ctx.nToken * 2);
|
||||
if( ctx.aiStart==0 ) return SQLITE_NOMEM;
|
||||
ctx.aiEnd = &ctx.aiStart[ctx.nToken];
|
||||
|
||||
rc = pApi->xTokenize(pFts, zCol, nCol, (void*)&ctx, fts5SnippetCallback);
|
||||
if( rc==SQLITE_OK ){
|
||||
int i1; /* First token from input to include */
|
||||
int i2; /* Last token from input to include */
|
||||
|
||||
int iPrint;
|
||||
int iMatchto;
|
||||
int iLast;
|
||||
|
||||
int *aiStart = ctx.aiStart - ctx.iFirst;
|
||||
int *aiEnd = ctx.aiEnd - ctx.iFirst;
|
||||
|
||||
/* Ideally we want to start the snippet with token (ctx.iFirst + nShift).
|
||||
** However, this is only possible if there are sufficient tokens within
|
||||
** the column. This block sets variables i1 and i2 to the first and last
|
||||
** input tokens to include in the snippet. */
|
||||
if( (ctx.iFirst + nShift + nToken)<=ctx.iSeen ){
|
||||
i1 = ctx.iFirst + nShift;
|
||||
i2 = i1 + nToken - 1;
|
||||
}else{
|
||||
i2 = ctx.iSeen;
|
||||
i1 = ctx.iSeen - nToken + 1;
|
||||
assert( i1>=0 || ctx.iFirst==0 );
|
||||
if( i1<0 ) i1 = 0;
|
||||
}
|
||||
|
||||
/* If required, append the preceding ellipsis. */
|
||||
if( i1>0 ) sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%s", zEllip);
|
||||
|
||||
iLast = FTS5_POS2OFFSET(pIter->iLast);
|
||||
iPrint = i1;
|
||||
iMatchto = -1;
|
||||
|
||||
for(i=i1; i<=i2; i++){
|
||||
|
||||
/* Check if this is the first token of any phrase match. */
|
||||
int ip;
|
||||
for(ip=0; ip<pIter->nPhrase; ip++){
|
||||
SnipPhrase *pPhrase = &pIter->aPhrase[ip];
|
||||
u64 m = (1 << (iLast - i - pPhrase->nToken + 1));
|
||||
|
||||
if( i<=iLast && (pPhrase->mask & m) ){
|
||||
if( iMatchto<0 ){
|
||||
sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s",
|
||||
aiStart[i] - aiStart[iPrint],
|
||||
&zCol[aiStart[iPrint]],
|
||||
zStart
|
||||
);
|
||||
iPrint = i;
|
||||
}
|
||||
if( i>iMatchto ) iMatchto = i + pPhrase->nToken - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if( i==iMatchto ){
|
||||
sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s%s",
|
||||
aiEnd[i] - aiStart[iPrint],
|
||||
&zCol[aiStart[iPrint]],
|
||||
zFinal
|
||||
);
|
||||
iMatchto = -1;
|
||||
iPrint = i+1;
|
||||
|
||||
if( i<i2 ){
|
||||
sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s",
|
||||
aiStart[i+1] - aiEnd[i],
|
||||
&zCol[aiEnd[i]]
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( iPrint<=i2 ){
|
||||
sqlite3Fts5BufferAppendPrintf(&rc, pBuf, "%.*s",
|
||||
aiEnd[i2] - aiStart[iPrint],
|
||||
&zCol[aiStart[iPrint]]
|
||||
);
|
||||
if( iMatchto>=0 ){
|
||||
sqlite3Fts5BufferAppendString(&rc, pBuf, zFinal);
|
||||
}
|
||||
}
|
||||
|
||||
/* If required, append the trailing ellipsis. */
|
||||
if( i2<ctx.iSeen ) sqlite3Fts5BufferAppendString(&rc, pBuf, zEllip);
|
||||
}
|
||||
|
||||
sqlite3_free(ctx.aiStart);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** A default snippet() implementation. This is compatible with the FTS3
|
||||
** snippet() function.
|
||||
*/
|
||||
static void fts5SnippetFunction(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
@ -498,55 +249,120 @@ static void fts5SnippetFunction(
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
){
|
||||
const char *zStart = "<b>";
|
||||
const char *zFinal = "</b>";
|
||||
const char *zEllip = "<b>...</b>";
|
||||
int nToken = -15;
|
||||
int nAbs;
|
||||
int rc;
|
||||
SnipIter *pIter = 0;
|
||||
HighlightContext ctx;
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
int iCol; /* 1st argument to snippet() */
|
||||
const char *zEllips; /* 4th argument to snippet() */
|
||||
int nToken; /* 5th argument to snippet() */
|
||||
int nInst; /* Number of instance matches this row */
|
||||
int i; /* Used to iterate through instances */
|
||||
int nPhrase; /* Number of phrases in query */
|
||||
unsigned char *aSeen; /* Array of "seen instance" flags */
|
||||
int iBestCol; /* Column containing best snippet */
|
||||
int iBestStart = 0; /* First token of best snippet */
|
||||
int iBestLast = nToken; /* Last token of best snippet */
|
||||
int nBestScore = 0; /* Score of best snippet */
|
||||
int nColSize; /* Total size of iBestCol in tokens */
|
||||
|
||||
if( nVal>=1 ) zStart = (const char*)sqlite3_value_text(apVal[0]);
|
||||
if( nVal>=2 ) zFinal = (const char*)sqlite3_value_text(apVal[1]);
|
||||
if( nVal>=3 ) zEllip = (const char*)sqlite3_value_text(apVal[2]);
|
||||
if( nVal>=4 ){
|
||||
nToken = sqlite3_value_int(apVal[3]);
|
||||
if( nToken==0 ) nToken = -15;
|
||||
if( nVal!=5 ){
|
||||
const char *zErr = "wrong number of arguments to function snippet()";
|
||||
sqlite3_result_error(pCtx, zErr, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&ctx, 0, sizeof(HighlightContext));
|
||||
rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
|
||||
|
||||
iCol = sqlite3_value_int(apVal[0]);
|
||||
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
||||
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
||||
zEllips = (const char*)sqlite3_value_text(apVal[3]);
|
||||
nToken = sqlite3_value_int(apVal[4]);
|
||||
|
||||
iBestCol = (iCol>=0 ? iCol : 0);
|
||||
nPhrase = pApi->xPhraseCount(pFts);
|
||||
aSeen = sqlite3_malloc(nPhrase);
|
||||
if( aSeen==0 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
nAbs = nToken * (nToken<0 ? -1 : 1);
|
||||
|
||||
rc = fts5SnipIterNew(pApi, pFts, nAbs, &pIter);
|
||||
if( rc==SQLITE_OK ){
|
||||
Fts5Buffer buf; /* Result buffer */
|
||||
int nBestScore = 0; /* Score of best snippet found */
|
||||
rc = pApi->xInstCount(pFts, &nInst);
|
||||
}
|
||||
for(i=0; rc==SQLITE_OK && i<nInst; i++){
|
||||
int ip, iSnippetCol, iStart;
|
||||
memset(aSeen, 0, nPhrase);
|
||||
rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
|
||||
if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
|
||||
int nScore = 1000;
|
||||
int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
|
||||
int j;
|
||||
aSeen[ip] = 1;
|
||||
|
||||
for(fts5SnipIterFirst(pIter);
|
||||
pIter->iLast>=0;
|
||||
fts5SnipIterNext(pIter)
|
||||
){
|
||||
if( pIter->nScore>nBestScore ) nBestScore = pIter->nScore;
|
||||
}
|
||||
for(fts5SnipIterFirst(pIter);
|
||||
pIter->iLast>=0;
|
||||
fts5SnipIterNext(pIter)
|
||||
){
|
||||
if( pIter->nScore==nBestScore ) break;
|
||||
}
|
||||
for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
|
||||
int ic; int io; int iFinal;
|
||||
rc = pApi->xInst(pFts, j, &ip, &ic, &io);
|
||||
iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
|
||||
if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
|
||||
nScore += aSeen[ip] ? 1000 : 1;
|
||||
aSeen[ip] = 1;
|
||||
if( iFinal>iLast ) iLast = iFinal;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&buf, 0, sizeof(Fts5Buffer));
|
||||
rc = fts5SnippetText(pApi, pFts, pIter, nAbs, zStart, zFinal, zEllip, &buf);
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_text(pCtx, (const char*)buf.p, buf.n, SQLITE_TRANSIENT);
|
||||
if( rc==SQLITE_OK && nScore>nBestScore ){
|
||||
iBestCol = iSnippetCol;
|
||||
iBestStart = iStart;
|
||||
iBestLast = iLast;
|
||||
nBestScore = nScore;
|
||||
}
|
||||
}
|
||||
sqlite3_free(buf.p);
|
||||
}
|
||||
|
||||
fts5SnipIterFree(pIter);
|
||||
if( rc!=SQLITE_OK ){
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
|
||||
}
|
||||
|
||||
if( (iBestStart+nToken-1)>iBestLast ){
|
||||
iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
|
||||
}
|
||||
if( iBestStart+nToken>nColSize ){
|
||||
iBestStart = nColSize - nToken;
|
||||
}
|
||||
if( iBestStart<0 ) iBestStart = 0;
|
||||
|
||||
ctx.iRangeStart = iBestStart;
|
||||
ctx.iRangeEnd = iBestStart + nToken - 1;
|
||||
|
||||
if( iBestStart>0 ){
|
||||
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb);
|
||||
}
|
||||
if( ctx.iRangeEnd>=(nColSize-1) ){
|
||||
fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
|
||||
}else{
|
||||
fts5HighlightAppend(&rc, &ctx, zEllips, -1);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
||||
}else{
|
||||
sqlite3_result_error_code(pCtx, rc);
|
||||
}
|
||||
sqlite3_free(ctx.zOut);
|
||||
sqlite3_free(aSeen);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
||||
|
||||
/*
|
||||
** Context object passed by fts5GatherTotals() to xQueryPhrase callback
|
||||
|
@ -57,6 +57,19 @@ struct F5tApi {
|
||||
Fts5Context *pFts;
|
||||
};
|
||||
|
||||
/*
|
||||
** An object of this type is used with the xSetAuxdata() and xGetAuxdata()
|
||||
** API test wrappers. The tcl interface allows a single tcl value to be
|
||||
** saved using xSetAuxdata(). Instead of simply storing a pointer to the
|
||||
** tcl object, the code in this file wraps it in an sqlite3_malloc'd
|
||||
** instance of the following struct so that if the destructor is not
|
||||
** correctly invoked it will be reported as an SQLite memory leak.
|
||||
*/
|
||||
typedef struct F5tAuxData F5tAuxData;
|
||||
struct F5tAuxData {
|
||||
Tcl_Obj *pObj;
|
||||
};
|
||||
|
||||
static int xTokenizeCb(
|
||||
void *pCtx,
|
||||
const char *zToken, int nToken,
|
||||
@ -108,8 +121,14 @@ static int xQueryPhraseCb(
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void xSetAuxdataDestructor(void *p){
|
||||
F5tAuxData *pData = (F5tAuxData*)p;
|
||||
Tcl_DecrRefCount(pData->pObj);
|
||||
sqlite3_free(pData);
|
||||
}
|
||||
|
||||
/*
|
||||
** api sub-command...
|
||||
** api sub-command...
|
||||
**
|
||||
** Description...
|
||||
*/
|
||||
@ -136,6 +155,8 @@ static int xF5tApi(
|
||||
{ "xColumnText", 1, "COL" },
|
||||
{ "xColumnSize", 1, "COL" },
|
||||
{ "xQueryPhrase", 2, "PHRASE SCRIPT" },
|
||||
{ "xSetAuxdata", 1, "VALUE" },
|
||||
{ "xGetAuxdata", 1, "CLEAR" },
|
||||
{ 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -284,6 +305,34 @@ static int xF5tApi(
|
||||
}
|
||||
break;
|
||||
}
|
||||
CASE(12, "xSetAuxdata") {
|
||||
F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData));
|
||||
if( pData==0 ){
|
||||
Tcl_AppendResult(interp, "out of memory", 0);
|
||||
return TCL_ERROR;
|
||||
}
|
||||
pData->pObj = objv[2];
|
||||
Tcl_IncrRefCount(pData->pObj);
|
||||
rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor);
|
||||
break;
|
||||
}
|
||||
CASE(13, "xGetAuxdata") {
|
||||
F5tAuxData *pData;
|
||||
int bClear;
|
||||
if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){
|
||||
return TCL_ERROR;
|
||||
}
|
||||
pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear);
|
||||
if( pData==0 ){
|
||||
Tcl_ResetResult(interp);
|
||||
}else{
|
||||
Tcl_SetObjResult(interp, pData->pObj);
|
||||
if( bClear ){
|
||||
xSetAuxdataDestructor((void*)pData);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
assert( 0 );
|
||||
|
112
ext/fts5/fts5auxdata.test
Normal file
112
ext/fts5/fts5auxdata.test
Normal file
@ -0,0 +1,112 @@
|
||||
# 2014 Dec 20
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
set testprefix fts5auxdata
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
CREATE VIRTUAL TABLE f1 USING fts5(a, b);
|
||||
INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1');
|
||||
INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2');
|
||||
INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3');
|
||||
INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4');
|
||||
INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5');
|
||||
}
|
||||
|
||||
proc aux_function_1 {cmd tn} {
|
||||
switch [$cmd xRowid] {
|
||||
1 {
|
||||
do_test $tn.1 [list $cmd xGetAuxdata 0 ] {}
|
||||
$cmd xSetAuxdata "one"
|
||||
}
|
||||
|
||||
2 {
|
||||
do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one}
|
||||
$cmd xSetAuxdata "two"
|
||||
}
|
||||
|
||||
3 {
|
||||
do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two}
|
||||
}
|
||||
|
||||
4 {
|
||||
do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two}
|
||||
}
|
||||
|
||||
5 {
|
||||
do_test $tn.5 [list $cmd xGetAuxdata 0 ] {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_fts5_create_function db aux_function_1 aux_function_1
|
||||
db eval {
|
||||
SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a'
|
||||
ORDER BY rowid ASC
|
||||
}
|
||||
|
||||
proc aux_function_2 {cmd tn inst} {
|
||||
if {$inst == "A"} {
|
||||
switch [$cmd xRowid] {
|
||||
1 {
|
||||
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {}
|
||||
$cmd xSetAuxdata "one $inst"
|
||||
}
|
||||
2 {
|
||||
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst"
|
||||
$cmd xSetAuxdata "two $inst"
|
||||
}
|
||||
3 {
|
||||
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst"
|
||||
}
|
||||
4 {
|
||||
do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst"
|
||||
}
|
||||
5 {
|
||||
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
switch [$cmd xRowid] {
|
||||
1 {
|
||||
do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A"
|
||||
}
|
||||
2 {
|
||||
do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A"
|
||||
}
|
||||
3 {
|
||||
do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A"
|
||||
}
|
||||
4 {
|
||||
do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {}
|
||||
}
|
||||
5 {
|
||||
do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_fts5_create_function db aux_function_2 aux_function_2
|
||||
db eval {
|
||||
SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B')
|
||||
FROM f1 WHERE f1 MATCH 'a'
|
||||
ORDER BY rowid ASC
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
Reference in New Issue
Block a user