mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-01 06:27:03 +03:00
Add the auxiliary highlight() function to fts5.
FossilOrigin-Name: 059092379f981eb919b500ce447006f9e645fc5a
This commit is contained in:
131
ext/fts5/fts5.c
131
ext/fts5/fts5.c
@ -165,6 +165,9 @@ struct Fts5Cursor {
|
||||
Fts5Auxiliary *pAux; /* Currently executing extension function */
|
||||
Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
|
||||
int *aColumnSize; /* Values for xColumnSize() */
|
||||
|
||||
int nInstCount; /* Number of phrase instances */
|
||||
int *aInst; /* 3 integers per phrase instance */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -488,6 +491,18 @@ static int fts5StmtType(int idxNum){
|
||||
return FTS5_STMT_LOOKUP;
|
||||
}
|
||||
|
||||
/*
|
||||
** This function is called after the cursor passed as the only argument
|
||||
** is moved to point at a different row. It clears all cached data
|
||||
** specific to the previous row stored by the cursor object.
|
||||
*/
|
||||
static void fts5CsrNewrow(Fts5Cursor *pCsr){
|
||||
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
|
||||
sqlite3_free(pCsr->aInst);
|
||||
pCsr->aInst = 0;
|
||||
pCsr->nInstCount = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close the cursor. For additional information see the documentation
|
||||
** on the xClose method of the virtual table interface.
|
||||
@ -499,6 +514,7 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts5Auxdata *pData;
|
||||
Fts5Auxdata *pNext;
|
||||
|
||||
fts5CsrNewrow(pCsr);
|
||||
if( pCsr->pStmt ){
|
||||
int eStmt = fts5StmtType(pCsr->idxNum);
|
||||
sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
|
||||
@ -557,7 +573,7 @@ static int fts5SorterNext(Fts5Cursor *pCsr){
|
||||
pSorter->aIdx[i] = &aBlob[nBlob] - a;
|
||||
|
||||
pSorter->aPoslist = a;
|
||||
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
|
||||
fts5CsrNewrow(pCsr);
|
||||
}
|
||||
|
||||
return rc;
|
||||
@ -583,7 +599,7 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
|
||||
CsrFlagSet(pCsr, FTS5CSR_EOF);
|
||||
}
|
||||
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
|
||||
fts5CsrNewrow(pCsr);
|
||||
break;
|
||||
|
||||
case FTS5_PLAN_SPECIAL: {
|
||||
@ -666,7 +682,7 @@ static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
|
||||
if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
|
||||
CsrFlagSet(pCsr, FTS5CSR_EOF);
|
||||
}
|
||||
CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
|
||||
fts5CsrNewrow(pCsr);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -1044,6 +1060,104 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
|
||||
return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
|
||||
}
|
||||
|
||||
static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){
|
||||
int n;
|
||||
if( pCsr->pSorter ){
|
||||
Fts5Sorter *pSorter = pCsr->pSorter;
|
||||
int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
|
||||
n = pSorter->aIdx[iPhrase] - i1;
|
||||
*pa = &pSorter->aPoslist[i1];
|
||||
}else{
|
||||
n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
|
||||
** correctly for the current view. Return SQLITE_OK if successful, or an
|
||||
** SQLite error code otherwise.
|
||||
*/
|
||||
static int fts5CacheInstArray(Fts5Cursor *pCsr){
|
||||
int rc = SQLITE_OK;
|
||||
if( pCsr->aInst==0 ){
|
||||
Fts5PoslistReader *aIter; /* One iterator for each phrase */
|
||||
int nIter; /* Number of iterators/phrases */
|
||||
int nByte;
|
||||
|
||||
nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
|
||||
nByte = sizeof(Fts5PoslistReader) * nIter;
|
||||
aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
|
||||
if( aIter ){
|
||||
Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */
|
||||
int nInst; /* Number instances seen so far */
|
||||
int i;
|
||||
|
||||
/* Initialize all iterators */
|
||||
for(i=0; i<nIter; i++){
|
||||
const u8 *a;
|
||||
int n = fts5CsrPoslist(pCsr, i, &a);
|
||||
sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
|
||||
}
|
||||
|
||||
while( 1 ){
|
||||
int *aInst;
|
||||
int iBest = -1;
|
||||
for(i=0; i<nIter; i++){
|
||||
if( aIter[i].bEof==0 && (iBest<0 || aIter[i].iPos<iBest) ){
|
||||
iBest = i;
|
||||
}
|
||||
}
|
||||
|
||||
if( iBest<0 ) break;
|
||||
nInst++;
|
||||
if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break;
|
||||
|
||||
aInst = &((int*)buf.p)[3 * (nInst-1)];
|
||||
aInst[0] = iBest;
|
||||
aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
|
||||
aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
|
||||
sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
|
||||
}
|
||||
|
||||
pCsr->aInst = (int*)buf.p;
|
||||
pCsr->nInstCount = nInst;
|
||||
sqlite3_free(aIter);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
|
||||
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
|
||||
int rc;
|
||||
if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
|
||||
*pnInst = pCsr->nInstCount;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int fts5ApiInst(
|
||||
Fts5Context *pCtx,
|
||||
int iIdx,
|
||||
int *piPhrase,
|
||||
int *piCol,
|
||||
int *piOff
|
||||
){
|
||||
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
|
||||
int rc;
|
||||
if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
|
||||
if( iIdx<0 || iIdx>=pCsr->nInstCount ){
|
||||
rc = SQLITE_RANGE;
|
||||
}else{
|
||||
*piPhrase = pCsr->aInst[iIdx*3];
|
||||
*piCol = pCsr->aInst[iIdx*3 + 1];
|
||||
*piOff = pCsr->aInst[iIdx*3 + 2];
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
|
||||
return fts5CursorRowid((Fts5Cursor*)pCtx);
|
||||
}
|
||||
@ -1088,14 +1202,7 @@ static int fts5ApiPoslist(
|
||||
){
|
||||
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
|
||||
const u8 *a; int n; /* Poslist for phrase iPhrase */
|
||||
if( pCsr->pSorter ){
|
||||
Fts5Sorter *pSorter = pCsr->pSorter;
|
||||
int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
|
||||
n = pSorter->aIdx[iPhrase] - i1;
|
||||
a = &pSorter->aPoslist[i1];
|
||||
}else{
|
||||
n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, &a);
|
||||
}
|
||||
n = fts5CsrPoslist(pCsr, iPhrase, &a);
|
||||
return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
|
||||
}
|
||||
|
||||
@ -1162,6 +1269,8 @@ static const Fts5ExtensionApi sFts5Api = {
|
||||
fts5ApiTokenize,
|
||||
fts5ApiPhraseCount,
|
||||
fts5ApiPhraseSize,
|
||||
fts5ApiInstCount,
|
||||
fts5ApiInst,
|
||||
fts5ApiRowid,
|
||||
fts5ApiColumnText,
|
||||
fts5ApiColumnSize,
|
||||
|
@ -70,13 +70,42 @@ typedef void (*fts5_extension_function)(
|
||||
** Returns the number of tokens in phrase iPhrase of the query. Phrases
|
||||
** are numbered starting from zero.
|
||||
**
|
||||
** xInstCount:
|
||||
** Set *pnInst to the total number of occurrences of all phrases within
|
||||
** the query within the current row. Return SQLITE_OK if successful, or
|
||||
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
|
||||
**
|
||||
** xInst:
|
||||
** Query for the details of phrase match iIdx within the current row.
|
||||
** Phrase matches are numbered starting from zero, so the iIdx argument
|
||||
** should be greater than or equal to zero and smaller than the value
|
||||
** output by xInstCount().
|
||||
**
|
||||
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
|
||||
** if an error occurs.
|
||||
**
|
||||
** xRowid:
|
||||
** Returns the rowid of the current row.
|
||||
**
|
||||
** xPoslist:
|
||||
** Iterate through instances of phrase iPhrase in the current row.
|
||||
** Iterate through phrase instances in the current row. If the iPhrase
|
||||
** argument is 0 or greater, then only instances of phrase iPhrase are
|
||||
** visited. If it is less than 0, instances of all phrases are visited.
|
||||
**
|
||||
** At EOF, -1 is returned and output variable iPos set to -1.
|
||||
**
|
||||
** </pre>
|
||||
** sqlite3_int64 iPos;
|
||||
** int iPhrase;
|
||||
** int ii = 0;
|
||||
**
|
||||
** while( (iPhrase = pFts->xPoslist(pFts, -1, &ii, &iPos) >= 0 ){
|
||||
** int iCol = FTS5_POS2COLUMN(iPos);
|
||||
** int iOff = FTS5_POS2OFFSET(iPos);
|
||||
** // An instance of phrase iPhrase at offset iOff of column iCol.
|
||||
** }
|
||||
** </pre>
|
||||
**
|
||||
** At EOF, a non-zero value is returned and output variable iPos set to -1.
|
||||
**
|
||||
** xTokenize:
|
||||
** Tokenize text using the tokenizer belonging to the FTS5 table.
|
||||
@ -160,6 +189,9 @@ struct Fts5ExtensionApi {
|
||||
int (*xPhraseCount)(Fts5Context*);
|
||||
int (*xPhraseSize)(Fts5Context*, int iPhrase);
|
||||
|
||||
int (*xInstCount)(Fts5Context*, int *pnInst);
|
||||
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
|
||||
|
||||
sqlite3_int64 (*xRowid)(Fts5Context*);
|
||||
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
|
||||
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
|
||||
|
@ -301,6 +301,9 @@ void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge);
|
||||
*/
|
||||
int sqlite3Fts5IndexReads(Fts5Index *p);
|
||||
|
||||
/* Malloc utility */
|
||||
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
|
||||
|
||||
/*
|
||||
** End of interface to code in fts5_index.c.
|
||||
**************************************************************************/
|
||||
|
@ -14,6 +14,138 @@
|
||||
#include "fts5Int.h"
|
||||
#include <math.h>
|
||||
|
||||
/*************************************************************************
|
||||
** Start of highlight() implementation.
|
||||
*/
|
||||
typedef struct HighlightContext HighlightContext;
|
||||
struct HighlightContext {
|
||||
const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
|
||||
Fts5Context *pFts; /* First arg to pass to pApi functions */
|
||||
int iInst; /* Current phrase instance index */
|
||||
int iStart; /* First token of current phrase */
|
||||
int iEnd; /* Last token of current phrase */
|
||||
|
||||
const char *zOpen; /* Opening highlight */
|
||||
const char *zClose; /* Closing highlight */
|
||||
int iCol; /* Column to read from */
|
||||
|
||||
const char *zIn; /* Input text */
|
||||
int nIn; /* Size of input text in bytes */
|
||||
int iOff; /* Current offset within zIn[] */
|
||||
char *zOut; /* Output value */
|
||||
};
|
||||
|
||||
static int fts5HighlightAppend(HighlightContext *p, const char *z, int n){
|
||||
if( n<0 ) n = strlen(z);
|
||||
p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
|
||||
if( p->zOut==0 ) return SQLITE_NOMEM;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts5HighlightCb(
|
||||
void *pContext, /* Pointer to HighlightContext object */
|
||||
const char *pToken, /* Buffer containing token */
|
||||
int nToken, /* Size of token in bytes */
|
||||
int iStart, /* Start offset of token */
|
||||
int iEnd, /* End offset of token */
|
||||
int iPos /* Position offset of token */
|
||||
){
|
||||
HighlightContext *p = (HighlightContext*)pContext;
|
||||
int rc = SQLITE_OK;
|
||||
|
||||
if( iPos==p->iStart ){
|
||||
rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iStart - p->iOff);
|
||||
p->iOff = iStart;
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5HighlightAppend(p, p->zOpen, -1);
|
||||
}
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5HighlightAppend(p, &p->zIn[p->iOff], iEnd - p->iOff);
|
||||
p->iOff = iEnd;
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK && iPos==p->iEnd ){
|
||||
int bClose = 1;
|
||||
do{
|
||||
int iP, iPCol, iOff;
|
||||
rc = p->pApi->xInst(p->pFts, ++p->iInst, &iP, &iPCol, &iOff);
|
||||
if( rc==SQLITE_RANGE || iPCol!=p->iCol ){
|
||||
p->iStart = -1;
|
||||
p->iEnd = -1;
|
||||
rc = SQLITE_OK;
|
||||
}else{
|
||||
iEnd = iOff - 1 + p->pApi->xPhraseSize(p->pFts, iP);
|
||||
if( iEnd<=p->iEnd ) continue;
|
||||
if( iOff<=p->iEnd ) bClose = 0;
|
||||
p->iStart = iOff;
|
||||
p->iEnd = iEnd;
|
||||
}
|
||||
}while( 0 );
|
||||
|
||||
if( rc==SQLITE_OK && bClose ){
|
||||
rc = fts5HighlightAppend(p, p->zClose, -1);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void fts5HighlightFunction(
|
||||
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
||||
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
||||
sqlite3_context *pCtx, /* Context for returning result/error */
|
||||
int nVal, /* Number of values in apVal[] array */
|
||||
sqlite3_value **apVal /* Array of trailing arguments */
|
||||
){
|
||||
HighlightContext ctx;
|
||||
int rc;
|
||||
|
||||
if( nVal!=3 ){
|
||||
const char *zErr = "wrong number of arguments to function highlight()";
|
||||
sqlite3_result_error(pCtx, zErr, -1);
|
||||
return;
|
||||
}
|
||||
memset(&ctx, 0, sizeof(HighlightContext));
|
||||
ctx.iCol = sqlite3_value_int(apVal[0]);
|
||||
ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
|
||||
ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
|
||||
rc = pApi->xColumnText(pFts, ctx.iCol, &ctx.zIn, &ctx.nIn);
|
||||
ctx.pApi = pApi;
|
||||
ctx.pFts = pFts;
|
||||
|
||||
/* Find the first phrase instance in the right column. */
|
||||
ctx.iStart = -1;
|
||||
ctx.iEnd = -1;
|
||||
while( rc==SQLITE_OK ){
|
||||
int iP, iPCol, iOff;
|
||||
rc = pApi->xInst(pFts, ctx.iInst, &iP, &iPCol, &iOff);
|
||||
if( rc==SQLITE_OK && iPCol==ctx.iCol ){
|
||||
ctx.iStart = iOff;
|
||||
ctx.iEnd = iOff - 1 + pApi->xPhraseSize(pFts, iP);
|
||||
break;
|
||||
}
|
||||
ctx.iInst++;
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK || rc==SQLITE_RANGE ){
|
||||
rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx, fts5HighlightCb);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = fts5HighlightAppend(&ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK ){
|
||||
sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
|
||||
}else{
|
||||
sqlite3_result_error_code(pCtx, rc);
|
||||
}
|
||||
sqlite3_free(ctx.zOut);
|
||||
}
|
||||
/*
|
||||
**************************************************************************/
|
||||
|
||||
typedef struct SnipPhrase SnipPhrase;
|
||||
typedef struct SnipIter SnipIter;
|
||||
typedef struct SnippetCtx SnippetCtx;
|
||||
@ -796,6 +928,22 @@ static void fts5TestFunction(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** xInst()
|
||||
*/
|
||||
if( zReq==0 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, " inst ");
|
||||
if( 0==zReq || 0==sqlite3_stricmp(zReq, "inst") ){
|
||||
int nInst;
|
||||
rc = pApi->xInstCount(pFts, &nInst);
|
||||
for(i=0; rc==SQLITE_OK && i<nInst; i++){
|
||||
int iPhrase, iCol, iOff;
|
||||
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
|
||||
sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d.%d.%d",
|
||||
(i==0 ? "" : " "), iPhrase, iCol, iOff
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** xPhraseCount()
|
||||
*/
|
||||
@ -966,6 +1114,7 @@ int sqlite3Fts5AuxInit(fts5_api *pApi){
|
||||
{ "bm25debug", (void*)1, fts5Bm25Function, 0 },
|
||||
{ "snippet", 0, fts5SnippetFunction, 0 },
|
||||
{ "fts5_test", 0, fts5TestFunction, 0 },
|
||||
{ "highlight", 0, fts5HighlightFunction, 0 },
|
||||
{ "bm25", 0, fts5Bm25Function, 0 },
|
||||
};
|
||||
|
||||
|
@ -692,7 +692,7 @@ static void *fts5IdxMalloc(Fts5Index *p, int nByte){
|
||||
return pRet;
|
||||
}
|
||||
|
||||
static void *fts5MallocZero(int *pRc, int nByte){
|
||||
void *sqlite3Fts5MallocZero(int *pRc, int nByte){
|
||||
void *pRet = 0;
|
||||
if( *pRc==SQLITE_OK ){
|
||||
pRet = sqlite3_malloc(nByte);
|
||||
@ -981,7 +981,7 @@ static int fts5StructureDecode(
|
||||
sizeof(Fts5Structure) + /* Main structure */
|
||||
sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */
|
||||
);
|
||||
pRet = (Fts5Structure*)fts5MallocZero(&rc, nByte);
|
||||
pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
|
||||
|
||||
if( pRet ){
|
||||
pRet->nLevel = nLevel;
|
||||
@ -995,7 +995,7 @@ static int fts5StructureDecode(
|
||||
i += getVarint32(&pData[i], pLvl->nMerge);
|
||||
i += getVarint32(&pData[i], nTotal);
|
||||
assert( nTotal>=pLvl->nMerge );
|
||||
pLvl->aSeg = (Fts5StructureSegment*)fts5MallocZero(&rc,
|
||||
pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
|
||||
nTotal * sizeof(Fts5StructureSegment)
|
||||
);
|
||||
|
||||
|
Reference in New Issue
Block a user