1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Add the auxiliary highlight() function to fts5.

FossilOrigin-Name: 059092379f981eb919b500ce447006f9e645fc5a
This commit is contained in:
dan
2014-11-24 16:24:33 +00:00
parent 48d7014067
commit cb62aae034
9 changed files with 434 additions and 29 deletions

View File

@ -165,6 +165,9 @@ struct Fts5Cursor {
Fts5Auxiliary *pAux; /* Currently executing extension function */
Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
int *aColumnSize; /* Values for xColumnSize() */
int nInstCount; /* Number of phrase instances */
int *aInst; /* 3 integers per phrase instance */
};
/*
@ -488,6 +491,18 @@ static int fts5StmtType(int idxNum){
return FTS5_STMT_LOOKUP;
}
/*
** This function is called after the cursor passed as the only argument
** is moved to point at a different row. It clears all cached data
** specific to the previous row stored by the cursor object.
*/
static void fts5CsrNewrow(Fts5Cursor *pCsr){
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
sqlite3_free(pCsr->aInst);
pCsr->aInst = 0;
pCsr->nInstCount = 0;
}
/*
** Close the cursor. For additional information see the documentation
** on the xClose method of the virtual table interface.
@ -499,6 +514,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
Fts5Auxdata *pData;
Fts5Auxdata *pNext;
fts5CsrNewrow(pCsr);
if( pCsr->pStmt ){
int eStmt = fts5StmtType(pCsr->idxNum);
sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
@ -557,7 +573,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){
pSorter->aIdx[i] = &aBlob[nBlob] - a;
pSorter->aPoslist = a;
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
fts5CsrNewrow(pCsr);
}
return rc;
@ -583,7 +599,7 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
CsrFlagSet(pCsr, FTS5CSR_EOF);
}
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
fts5CsrNewrow(pCsr);
break;
case FTS5_PLAN_SPECIAL: {
@ -666,7 +682,7 @@ static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
CsrFlagSet(pCsr, FTS5CSR_EOF);
}
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
fts5CsrNewrow(pCsr);
return rc;
}
@ -1044,6 +1060,104 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
}
static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){
int n;
if( pCsr->pSorter ){
Fts5Sorter *pSorter = pCsr->pSorter;
int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
n = pSorter->aIdx[iPhrase] - i1;
*pa = &pSorter->aPoslist[i1];
}else{
n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
}
return n;
}
/*
** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
** correctly for the current view. Return SQLITE_OK if successful, or an
** SQLite error code otherwise.
*/
static int fts5CacheInstArray(Fts5Cursor *pCsr){
int rc = SQLITE_OK;
if( pCsr->aInst==0 ){
Fts5PoslistReader *aIter; /* One iterator for each phrase */
int nIter; /* Number of iterators/phrases */
int nByte;
nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
nByte = sizeof(Fts5PoslistReader) * nIter;
aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
if( aIter ){
Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */
int nInst; /* Number instances seen so far */
int i;
/* Initialize all iterators */
for(i=0; i<nIter; i++){
const u8 *a;
int n = fts5CsrPoslist(pCsr, i, &a);
sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
}
while( 1 ){
int *aInst;
int iBest = -1;
for(i=0; i<nIter; i++){
if( aIter[i].bEof==0 && (iBest<0 || aIter[i].iPos<iBest) ){
iBest = i;
}
}
if( iBest<0 ) break;
nInst++;
if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break;
aInst = &((int*)buf.p)[3 * (nInst-1)];
aInst[0] = iBest;
aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
}
pCsr->aInst = (int*)buf.p;
pCsr->nInstCount = nInst;
sqlite3_free(aIter);
}
}
return rc;
}
static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
int rc;
if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
*pnInst = pCsr->nInstCount;
}
return rc;
}
static int fts5ApiInst(
Fts5Context *pCtx,
int iIdx,
int *piPhrase,
int *piCol,
int *piOff
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
int rc;
if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
if( iIdx<0 || iIdx>=pCsr->nInstCount ){
rc = SQLITE_RANGE;
}else{
*piPhrase = pCsr->aInst[iIdx*3];
*piCol = pCsr->aInst[iIdx*3 + 1];
*piOff = pCsr->aInst[iIdx*3 + 2];
}
}
return rc;
}
static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
return fts5CursorRowid((Fts5Cursor*)pCtx);
}
@ -1088,14 +1202,7 @@ static int fts5ApiPoslist(
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
const u8 *a; int n; /* Poslist for phrase iPhrase */
if( pCsr->pSorter ){
Fts5Sorter *pSorter = pCsr->pSorter;
int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
n = pSorter->aIdx[iPhrase] - i1;
a = &pSorter->aPoslist[i1];
}else{
n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a);
}
n = fts5CsrPoslist(pCsr, iPhrase, &a);
return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
}
@ -1162,6 +1269,8 @@ static const Fts5ExtensionApi sFts5Api = {
fts5ApiTokenize,
fts5ApiPhraseCount,
fts5ApiPhraseSize,
fts5ApiInstCount,
fts5ApiInst,
fts5ApiRowid,
fts5ApiColumnText,
fts5ApiColumnSize,

View File

@ -70,13 +70,42 @@ typedef void (*fts5_extension_function)(
** Returns the number of tokens in phrase iPhrase of the query. Phrases
** are numbered starting from zero.
**
** xInstCount:
** Set *pnInst to the total number of occurrences of all phrases within
** the query within the current row. Return SQLITE_OK if successful, or
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
** Query for the details of phrase match iIdx within the current row.
** Phrase matches are numbered starting from zero, so the iIdx argument
** should be greater than or equal to zero and smaller than the value
** output by xInstCount().
**
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
** if an error occurs.
**
** xRowid:
** Returns the rowid of the current row.
**
** xPoslist:
** Iterate through instances of phrase iPhrase in the current row.
** Iterate through phrase instances in the current row. If the iPhrase
** argument is 0 or greater, then only instances of phrase iPhrase are
** visited. If it is less than 0, instances of all phrases are visited.
**
** At EOF, -1 is returned and output variable iPos set to -1.
**
** </pre>
** sqlite3_int64 iPos;
** int iPhrase;
** int ii = 0;
**
** while( (iPhrase = pFts->xPoslist(pFts, -1, &ii, &iPos) >= 0 ){
** int iCol = FTS5_POS2COLUMN(iPos);
** int iOff = FTS5_POS2OFFSET(iPos);
** // An instance of phrase iPhrase at offset iOff of column iCol.
** }
** </pre>
**
** At EOF, a non-zero value is returned and output variable iPos set to -1.
**
** xTokenize:
** Tokenize text using the tokenizer belonging to the FTS5 table.
@ -160,6 +189,9 @@ struct Fts5ExtensionApi {
int (*xPhraseCount)(Fts5Context*);
int (*xPhraseSize)(Fts5Context*, int iPhrase);
int (*xInstCount)(Fts5Context*, int *pnInst);
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
sqlite3_int64 (*xRowid)(Fts5Context*);
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);

View File

@ -301,6 +301,9 @@ void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge);
*/
int sqlite3Fts5IndexReads(Fts5Index *p);
/* Malloc utility */
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
/*
** End of interface to code in fts5_index.c.
**************************************************************************/

View File

@ -14,6 +14,138 @@
#include "fts5Int.h"
#include <math.h>
/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
Fts5Context *pFts; /* First arg to pass to pApi functions */
int iInst; /* Current phrase instance index */
int iStart; /* First token of current phrase */
int iEnd; /* Last token of current phrase */
const char *zOpen; /* Opening highlight */
const char *zClose; /* Closing highlight */
int iCol; /* Column to read from */
const char *zIn; /* Input text */
int nIn; /* Size of input text in bytes */
int iOff; /* Current offset within zIn[] */
char *zOut; /* Output value */
};
static int fts5HighlightAppend(HighlightContext *p, const char *z, int n){
if( n<0 ) n = strlen(z);
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
if( p->zOut==0 ) return SQLITE_NOMEM;
return SQLITE_OK;
}
static int fts5HighlightCb(
void *pContext, /* Pointer to HighlightContext object */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos /* Position offset of token */
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
if( iPos==p->iStart ){
rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iStart - p->iOff);
p->iOff = iStart;
if( rc==SQLITE_OK ){
rc = fts5HighlightAppend(p, p->zOpen, -1);
}
}
if( rc==SQLITE_OK ){
rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iEnd - p->iOff);
p->iOff = iEnd;
}
if( rc==SQLITE_OK && iPos==p->iEnd ){
int bClose = 1;
do{
int iP, iPCol, iOff;
rc = p->pApi->xInst(p->pFts, ++p->iInst, &iP, &iPCol, &iOff);
if( rc==SQLITE_RANGE || iPCol!=p->iCol ){
p->iStart = -1;
p->iEnd = -1;
rc = SQLITE_OK;
}else{
iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP);
if( iEnd<=p->iEnd ) continue;
if( iOff<=p->iEnd ) bClose = 0;
p->iStart = iOff;
p->iEnd = iEnd;
}
}while( 0 );
if( rc==SQLITE_OK && bClose ){
rc = fts5HighlightAppend(p, p->zClose, -1);
}
}
return rc;
}
static void fts5HighlightFunction(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
HighlightContext ctx;
int rc;
if( nVal!=3 ){
const char *zErr = "wrong number of arguments to function highlight()";
sqlite3_result_error(pCtx, zErr, -1);
return;
}
memset(&ctx, 0, sizeof(HighlightContext));
ctx.iCol = sqlite3_value_int(apVal[0]);
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn);
ctx.pApi = pApi;
ctx.pFts = pFts;
/* Find the first phrase instance in the right column. */
ctx.iStart = -1;
ctx.iEnd = -1;
while( rc==SQLITE_OK ){
int iP, iPCol, iOff;
rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff);
if( rc==SQLITE_OK && iPCol==ctx.iCol ){
ctx.iStart = iOff;
ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP);
break;
}
ctx.iInst++;
}
if( rc==SQLITE_OK || rc==SQLITE_RANGE ){
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb);
}
if( rc==SQLITE_OK ){
rc = fts5HighlightAppend(&ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
}
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
}else{
sqlite3_result_error_code(pCtx, rc);
}
sqlite3_free(ctx.zOut);
}
/*
**************************************************************************/
typedef struct SnipPhrase SnipPhrase;
typedef struct SnipIter SnipIter;
typedef struct SnippetCtx SnippetCtx;
@ -796,6 +928,22 @@ static void fts5TestFunction(
}
}
/*
** xInst()
*/
if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " inst ");
if( 0==zReq || 0==sqlite3_stricmp(zReq, "inst") ){
int nInst;
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iCol, iOff;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d.%d.%d",
(i==0 ? "" : " "), iPhrase, iCol, iOff
);
}
}
/*
** xPhraseCount()
*/
@ -966,6 +1114,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){
{ "bm25debug", (void*)1, fts5Bm25Function, 0 },
{ "snippet", 0, fts5SnippetFunction, 0 },
{ "fts5_test", 0, fts5TestFunction, 0 },
{ "highlight", 0, fts5HighlightFunction, 0 },
{ "bm25", 0, fts5Bm25Function, 0 },
};

View File

@ -692,7 +692,7 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){
return pRet;
}
static void *fts5MallocZero(int *pRc, int nByte){
void *sqlite3Fts5MallocZero(int *pRc, int nByte){
void *pRet = 0;
if( *pRc==SQLITE_OK ){
pRet = sqlite3_malloc(nByte);
@ -981,7 +981,7 @@ static int fts5StructureDecode(
sizeof(Fts5Structure) + /* Main structure */
sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */
);
pRet = (Fts5Structure*)fts5MallocZero(&rc, nByte);
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
if( pRet ){
pRet->nLevel = nLevel;
@ -995,7 +995,7 @@ static int fts5StructureDecode(
i += getVarint32(&pData[i], pLvl->nMerge);
i += getVarint32(&pData[i], nTotal);
assert( nTotal>=pLvl->nMerge );
pLvl->aSeg = (Fts5StructureSegment*)fts5MallocZero(&rc,
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
nTotal * sizeof(Fts5StructureSegment)
);