1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-29 08:01:23 +03:00

Change fts4aux to (additionally) report on term frequency in individual columns of an fts table.

FossilOrigin-Name: 3996f92a9aca9ac2628be003eca83c8f954c71de
This commit is contained in:
dan
2011-02-04 18:56:25 +00:00
parent 29362c204f
commit 4c600ac389
6 changed files with 167 additions and 103 deletions

View File

@ -31,17 +31,22 @@ struct Fts3auxCursor {
Fts3SegReaderCursor csr; /* Must be right after "base" */
Fts3SegFilter filter;
char *zStop;
int nStop;
int isEof;
sqlite3_int64 iRowid;
sqlite3_int64 nDoc;
sqlite3_int64 nOcc;
int nStop; /* Byte-length of string zStop */
int isEof; /* True if cursor is at EOF */
sqlite3_int64 iRowid; /* Current rowid */
int iCol; /* Current value of 'col' column */
int nStat; /* Size of aStat[] array */
struct Fts3auxColstats {
sqlite3_int64 nDoc; /* 'documents' values for current csr row */
sqlite3_int64 nOcc; /* 'occurrences' values for current csr row */
} *aStat;
};
/*
** Schema of the terms table.
*/
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, documents, occurrences)"
#define FTS3_TERMS_SCHEMA "CREATE TABLE x(term, col, documents, occurrences)"
/*
** This function does all the work for both the xConnect and xCreate methods.
@ -199,10 +204,27 @@ static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->zStop);
sqlite3_free(pCsr->aStat);
sqlite3_free(pCsr);
return SQLITE_OK;
}
static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
if( nSize>pCsr->nStat ){
struct Fts3auxColstats *aNew;
aNew = (struct Fts3auxColstats *)sqlite3_realloc(pCsr->aStat,
sizeof(struct Fts3auxColstats) * nSize
);
if( aNew==0 ) return SQLITE_NOMEM;
memset(&aNew[pCsr->nStat], 0,
sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
);
pCsr->aStat = aNew;
pCsr->nStat = nSize;
}
return SQLITE_OK;
}
/*
** xNext - Advance the cursor to the next row, if any.
*/
@ -211,12 +233,21 @@ static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
int rc;
/* Increment our pretend rowid value. */
pCsr->iRowid++;
for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
}
rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
if( rc==SQLITE_ROW ){
int i;
int isIgnore = 1;
int i = 0;
int nDoclist = pCsr->csr.nDoclist;
char *aDoclist = pCsr->csr.aDoclist;
int iCol;
int eState = 0;
if( pCsr->zStop ){
int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
@ -227,27 +258,61 @@ static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
}
}
/* Now count the number of documents and positions in the doclist
** in pCsr->csr.aDoclist[]. Store the number of documents in pCsr->nDoc
** and the number of occurrences in pCsr->nOcc. */
pCsr->nDoc = 0;
pCsr->nOcc = 0;
i = 0;
if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
iCol = 0;
while( i<nDoclist ){
sqlite3_int64 v = 0;
i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
if( isIgnore ){
isIgnore = 0;
}else if( v>1 ){
pCsr->nOcc++;
}else{
if( v==0 ) pCsr->nDoc++;
isIgnore = 1;
switch( eState ){
/* State 0. In this state the integer just read was a docid. */
case 0:
pCsr->aStat[0].nDoc++;
eState = 1;
iCol = 0;
break;
/* State 1. In this state we are expecting either a 1, indicating
** that the following integer will be a column number, or the
** start of a position list for column 0.
**
** The only difference between state 1 and state 2 is that if the
** integer encountered in state 1 is not 0 or 1, then we need to
** increment the column 0 "nDoc" count for this term.
*/
case 1:
assert( iCol==0 );
if( v>1 ){
pCsr->aStat[1].nDoc++;
}
eState = 2;
/* fall through */
case 2:
if( v==0 ){ /* 0x00. Next integer will be a docid. */
eState = 0;
}else if( v==1 ){ /* 0x01. Next integer will be a column number. */
eState = 3;
}else{ /* 2 or greater. A position. */
pCsr->aStat[iCol+1].nOcc++;
pCsr->aStat[0].nOcc++;
}
break;
/* State 3. The integer just read is a column number. */
case 3:
iCol = (int)v;
if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
pCsr->aStat[iCol+1].nDoc++;
eState = 2;
break;
}
}
pCsr->iCol = 0;
rc = SQLITE_OK;
pCsr->iRowid++;
}else{
pCsr->isEof = 1;
}
@ -280,6 +345,7 @@ static int fts3auxFilterMethod(
testcase(pCsr->filter.zTerm);
sqlite3Fts3SegReaderFinish(&pCsr->csr);
sqlite3_free((void *)pCsr->filter.zTerm);
sqlite3_free(pCsr->aStat);
memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
@ -332,10 +398,16 @@ static int fts3auxColumnMethod(
assert( p->isEof==0 );
if( iCol==0 ){ /* Column "term" */
sqlite3_result_text(pContext, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
}else if( iCol==1 ){ /* Column "documents" */
sqlite3_result_int64(pContext, p->nDoc);
}else if( iCol==1 ){ /* Column "col" */
if( p->iCol ){
sqlite3_result_int(pContext, p->iCol-1);
}else{
sqlite3_result_text(pContext, "*", -1, SQLITE_STATIC);
}
}else if( iCol==2 ){ /* Column "documents" */
sqlite3_result_int64(pContext, p->aStat[p->iCol].nDoc);
}else{ /* Column "occurrences" */
sqlite3_result_int64(pContext, p->nOcc);
sqlite3_result_int64(pContext, p->aStat[p->iCol].nOcc);
}
return SQLITE_OK;

View File

@ -2223,7 +2223,7 @@ void sqlite3Fts3SegReaderFinish(
*/
static int fts3SegmentMerge(Fts3Table *p, int iLevel){
int rc; /* Return code */
int iIdx; /* Index of new segment */
int iIdx = 0; /* Index of new segment */
int iNewLevel = 0; /* Level to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
@ -2242,7 +2242,6 @@ static int fts3SegmentMerge(Fts3Table *p, int iLevel){
rc = SQLITE_DONE;
goto finished;
}
iIdx = 0;
rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel);
}else{
/* This call is to merge all segments at level iLevel. Find the next