mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-29 08:01:23 +03:00
Change fts4aux to (additionally) report on term frequency in individual columns of an fts table.
FossilOrigin-Name: 3996f92a9aca9ac2628be003eca83c8f954c71de
This commit is contained in:
@ -31,17 +31,22 @@ struct Fts3auxCursor {
|
||||
Fts3SegReaderCursor csr; /* Must be right after "base" */
|
||||
Fts3SegFilter filter;
|
||||
char *zStop;
|
||||
int nStop;
|
||||
int isEof;
|
||||
sqlite3_int64 iRowid;
|
||||
sqlite3_int64 nDoc;
|
||||
sqlite3_int64 nOcc;
|
||||
int nStop; /* Byte-length of string zStop */
|
||||
int isEof; /* True if cursor is at EOF */
|
||||
sqlite3_int64 iRowid; /* Current rowid */
|
||||
|
||||
int iCol; /* Current value of 'col' column */
|
||||
int nStat; /* Size of aStat[] array */
|
||||
struct Fts3auxColstats {
|
||||
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
|
||||
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
|
||||
} *aStat;
|
||||
};
|
||||
|
||||
/*
|
||||
** Schema of the terms table.
|
||||
*/
|
||||
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, documents, occurrences)"
|
||||
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)"
|
||||
|
||||
/*
|
||||
** This function does all the work for both the xConnect and xCreate methods.
|
||||
@ -199,10 +204,27 @@ static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->zStop);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
|
||||
if( nSize>pCsr->nStat ){
|
||||
struct Fts3auxColstats *aNew;
|
||||
aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat,
|
||||
sizeof(struct Fts3auxColstats) * nSize
|
||||
);
|
||||
if( aNew==0 ) return SQLITE_NOMEM;
|
||||
memset(&aNew[pCsr->nStat], 0,
|
||||
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
|
||||
);
|
||||
pCsr->aStat = aNew;
|
||||
pCsr->nStat = nSize;
|
||||
}
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** xNext - Advance the cursor to the next row, if any.
|
||||
*/
|
||||
@ -211,12 +233,21 @@ static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
|
||||
int rc;
|
||||
|
||||
/* Increment our pretend rowid value. */
|
||||
pCsr->iRowid++;
|
||||
|
||||
for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
|
||||
if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
|
||||
if( rc==SQLITE_ROW ){
|
||||
int i;
|
||||
int isIgnore = 1;
|
||||
int i = 0;
|
||||
int nDoclist = pCsr->csr.nDoclist;
|
||||
char *aDoclist = pCsr->csr.aDoclist;
|
||||
int iCol;
|
||||
|
||||
int eState = 0;
|
||||
|
||||
if( pCsr->zStop ){
|
||||
int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
|
||||
@ -227,27 +258,61 @@ static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
}
|
||||
}
|
||||
|
||||
/* Now count the number of documents and positions in the doclist
|
||||
** in pCsr->csr.aDoclist[]. Store the number of documents in pCsr->nDoc
|
||||
** and the number of occurrences in pCsr->nOcc. */
|
||||
pCsr->nDoc = 0;
|
||||
pCsr->nOcc = 0;
|
||||
i = 0;
|
||||
if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
|
||||
memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
|
||||
iCol = 0;
|
||||
|
||||
while( i<nDoclist ){
|
||||
sqlite3_int64 v = 0;
|
||||
|
||||
i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
|
||||
if( isIgnore ){
|
||||
isIgnore = 0;
|
||||
}else if( v>1 ){
|
||||
pCsr->nOcc++;
|
||||
}else{
|
||||
if( v==0 ) pCsr->nDoc++;
|
||||
isIgnore = 1;
|
||||
switch( eState ){
|
||||
/* State 0. In this state the integer just read was a docid. */
|
||||
case 0:
|
||||
pCsr->aStat[0].nDoc++;
|
||||
eState = 1;
|
||||
iCol = 0;
|
||||
break;
|
||||
|
||||
/* State 1. In this state we are expecting either a 1, indicating
|
||||
** that the following integer will be a column number, or the
|
||||
** start of a position list for column 0.
|
||||
**
|
||||
** The only difference between state 1 and state 2 is that if the
|
||||
** integer encountered in state 1 is not 0 or 1, then we need to
|
||||
** increment the column 0 "nDoc" count for this term.
|
||||
*/
|
||||
case 1:
|
||||
assert( iCol==0 );
|
||||
if( v>1 ){
|
||||
pCsr->aStat[1].nDoc++;
|
||||
}
|
||||
eState = 2;
|
||||
/* fall through */
|
||||
|
||||
case 2:
|
||||
if( v==0 ){ /* 0x00. Next integer will be a docid. */
|
||||
eState = 0;
|
||||
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
|
||||
eState = 3;
|
||||
}else{ /* 2 or greater. A position. */
|
||||
pCsr->aStat[iCol+1].nOcc++;
|
||||
pCsr->aStat[0].nOcc++;
|
||||
}
|
||||
break;
|
||||
|
||||
/* State 3. The integer just read is a column number. */
|
||||
case 3:
|
||||
iCol = (int)v;
|
||||
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
|
||||
pCsr->aStat[iCol+1].nDoc++;
|
||||
eState = 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->iCol = 0;
|
||||
rc = SQLITE_OK;
|
||||
pCsr->iRowid++;
|
||||
}else{
|
||||
pCsr->isEof = 1;
|
||||
}
|
||||
@ -280,6 +345,7 @@ static int fts3auxFilterMethod(
|
||||
testcase(pCsr->filter.zTerm);
|
||||
sqlite3Fts3SegReaderFinish(&pCsr->csr);
|
||||
sqlite3_free((void *)pCsr->filter.zTerm);
|
||||
sqlite3_free(pCsr->aStat);
|
||||
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
|
||||
|
||||
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
|
||||
@ -332,10 +398,16 @@ static int fts3auxColumnMethod(
|
||||
assert( p->isEof==0 );
|
||||
if( iCol==0 ){ /* Column "term" */
|
||||
sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
|
||||
}else if( iCol==1 ){ /* Column "documents" */
|
||||
sqlite3_result_int64(pContext, p->nDoc);
|
||||
}else if( iCol==1 ){ /* Column "col" */
|
||||
if( p->iCol ){
|
||||
sqlite3_result_int(pContext, p->iCol-1);
|
||||
}else{
|
||||
sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC);
|
||||
}
|
||||
}else if( iCol==2 ){ /* Column "documents" */
|
||||
sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc);
|
||||
}else{ /* Column "occurrences" */
|
||||
sqlite3_result_int64(pContext, p->nOcc);
|
||||
sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc);
|
||||
}
|
||||
|
||||
return SQLITE_OK;
|
||||
|
@ -2223,7 +2223,7 @@ void sqlite3Fts3SegReaderFinish(
|
||||
*/
|
||||
static int fts3SegmentMerge(Fts3Table *p, int iLevel){
|
||||
int rc; /* Return code */
|
||||
int iIdx; /* Index of new segment */
|
||||
int iIdx = 0; /* Index of new segment */
|
||||
int iNewLevel = 0; /* Level to create new segment at */
|
||||
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
|
||||
Fts3SegFilter filter; /* Segment term filter condition */
|
||||
@ -2242,7 +2242,6 @@ static int fts3SegmentMerge(Fts3Table *p, int iLevel){
|
||||
rc = SQLITE_DONE;
|
||||
goto finished;
|
||||
}
|
||||
iIdx = 0;
|
||||
rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel);
|
||||
}else{
|
||||
/* This call is to merge all segments at level iLevel. Find the next
|
||||
|
Reference in New Issue
Block a user