1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-01 06:27:03 +03:00

Allow the rank column to be remapped on a per-query basis by including a term similar to "rank match 'bm25(10,2)'" in a where clause.

FossilOrigin-Name: 1cd15a1759004d5d321056905dbb6acff20dc7d9
This commit is contained in:
dan
2015-01-02 14:55:22 +00:00
parent 37db72f1f7
commit ade921c3ad
6 changed files with 152 additions and 35 deletions

View File

@ -160,6 +160,8 @@ struct Fts5Cursor {
char *zSpecial; /* Result of special query */
/* "rank" function. Populated on demand from vtab.xColumn(). */
char *zRank; /* Custom rank function */
char *zRankArgs; /* Custom rank function args */
Fts5Auxiliary *pRank; /* Rank callback (or NULL) */
int nRankArg; /* Number of trailing arguments for rank() */
sqlite3_value **apRankArg; /* Array of trailing arguments */
@ -181,6 +183,7 @@ struct Fts5Cursor {
#define FTS5CSR_REQUIRE_CONTENT 0x01
#define FTS5CSR_REQUIRE_DOCSIZE 0x02
#define FTS5CSR_EOF 0x04
#define FTS5CSR_FREE_ZRANK 0x08
/*
** Macros to Set(), Clear() and Test() cursor flags.
@ -418,6 +421,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts5Config *pConfig = pTab->pConfig;
int iCons;
int ePlan = FTS5_PLAN_SCAN;
int iRankMatch;
iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol);
if( iCons>=0 ){
@ -453,6 +457,14 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC;
}
}
iRankMatch = fts5FindConstraint(
pInfo, SQLITE_INDEX_CONSTRAINT_MATCH, pConfig->nCol+1
);
if( iRankMatch>=0 ){
pInfo->aConstraintUsage[iRankMatch].argvIndex = 1 + (iCons>=0);
pInfo->aConstraintUsage[iRankMatch].omit = 1;
}
pInfo->idxNum = ePlan;
return SQLITE_OK;
@ -543,6 +555,10 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
sqlite3_free(pCsr->apRankArg);
sqlite3_free(pCsr->zSpecial);
if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
sqlite3_free(pCsr->zRank);
sqlite3_free(pCsr->zRankArgs);
}
sqlite3_free(pCsr);
return SQLITE_OK;
}
@ -636,7 +652,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
int nByte;
int rc = SQLITE_OK;
char *zSql;
const char *zRank = pConfig->zRank ? pConfig->zRank : FTS5_DEFAULT_RANK;
const char *zRank = pCsr->zRank;
const char *zRankArgs = pCsr->zRankArgs;
nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase;
@ -654,8 +671,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
** If SQLite a built-in statement cache, this wouldn't be a problem. */
zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
(pConfig->zRankArgs ? ", " : ""),
(pConfig->zRankArgs ? pConfig->zRankArgs : ""),
(zRankArgs ? ", " : ""),
(zRankArgs ? zRankArgs : ""),
bAsc ? "ASC" : "DESC"
);
if( zSql==0 ){
@ -747,14 +764,13 @@ static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){
static int fts5FindRankFunction(Fts5Cursor *pCsr){
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
Fts5Config *pConfig = pTab->pConfig;
const char *zRank = pConfig->zRank;
int rc = SQLITE_OK;
Fts5Auxiliary *pAux;
const char *zRank = pCsr->zRank;
const char *zRankArgs = pCsr->zRankArgs;
if( zRank==0 ) zRank = FTS5_DEFAULT_RANK;
if( pTab->pConfig->zRankArgs ){
char *zSql = sqlite3_mprintf("SELECT %s", pTab->pConfig->zRankArgs);
if( zRankArgs ){
char *zSql = sqlite3_mprintf("SELECT %s", zRankArgs);
if( zSql==0 ){
rc = SQLITE_NOMEM;
}else{
@ -796,10 +812,50 @@ static int fts5FindRankFunction(Fts5Cursor *pCsr){
return rc;
}
static int fts5CursorParseRank(
Fts5Config *pConfig,
Fts5Cursor *pCsr,
sqlite3_value *pRank
){
int rc = SQLITE_OK;
if( pRank ){
const char *z = (const char*)sqlite3_value_text(pRank);
char *zRank = 0;
char *zRankArgs = 0;
rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
if( rc==SQLITE_OK ){
pCsr->zRank = zRank;
pCsr->zRankArgs = zRankArgs;
CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
}else if( rc==SQLITE_ERROR ){
pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
"parse error in rank function: %s", z
);
}
}else{
if( pConfig->zRank ){
pCsr->zRank = (char*)pConfig->zRank;
pCsr->zRankArgs = (char*)pConfig->zRankArgs;
}else{
pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
pCsr->zRankArgs = 0;
}
}
return rc;
}
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
** information.
**
** There are three possible query strategies:
**
** 1. Full-text search using a MATCH operator.
** 2. A by-rowid lookup.
** 3. A full-table scan.
*/
static int fts5FilterMethod(
sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
@ -813,10 +869,13 @@ static int fts5FilterMethod(
int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0);
int rc = SQLITE_OK;
assert( nVal<=2 );
assert( pCsr->pStmt==0 );
assert( pCsr->pExpr==0 );
assert( pCsr->csrflags==0 );
assert( pCsr->pRank==0 );
assert( pCsr->zRank==0 );
assert( pCsr->zRankArgs==0 );
if( pTab->pSortCsr ){
/* If pSortCsr is non-NULL, then this call is being made as part of
@ -835,19 +894,22 @@ static int fts5FilterMethod(
if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){
const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);
if( zExpr[0]=='*' ){
/* The user has issued a query of the form "MATCH '*...'". This
** indicates that the MATCH expression is not a full text query,
** but a request for an internal parameter. */
rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
}else{
char **pzErr = &pTab->base.zErrMsg;
rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr);
if( rc==SQLITE_OK ){
if( ePlan==FTS5_PLAN_MATCH ){
rc = fts5CursorFirst(pTab, pCsr, bAsc);
}else{
rc = fts5CursorFirstSorted(pTab, pCsr, bAsc);
rc = fts5CursorParseRank(pTab->pConfig, pCsr, (nVal==2 ? apVal[1] : 0));
if( rc==SQLITE_OK ){
if( zExpr[0]=='*' ){
/* The user has issued a query of the form "MATCH '*...'". This
** indicates that the MATCH expression is not a full text query,
** but a request for an internal parameter. */
rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
}else{
char **pzErr = &pTab->base.zErrMsg;
rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr);
if( rc==SQLITE_OK ){
if( ePlan==FTS5_PLAN_MATCH ){
rc = fts5CursorFirst(pTab, pCsr, bAsc);
}else{
rc = fts5CursorFirstSorted(pTab, pCsr, bAsc);
}
}
}
}

View File

@ -109,6 +109,8 @@ int sqlite3Fts5ConfigLoad(Fts5Config*, int);
/* Set the value of a single config attribute */
int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
/*
** End of interface to code in fts5_config.c.
**************************************************************************/

View File

@ -552,7 +552,7 @@ static const char *fts5ConfigSkipArgs(const char *pIn){
** + Zero or more SQL literals in a comma separated list
** + Close parenthesis - ")"
*/
static int fts5ConfigParseRank(
int sqlite3Fts5ConfigParseRank(
const char *zIn, /* Input string */
char **pzRank, /* OUT: Rank function name */
char **pzRankArgs /* OUT: Rank function arguments */
@ -647,7 +647,7 @@ int sqlite3Fts5ConfigSetValue(
const char *zIn = (const char*)sqlite3_value_text(pVal);
char *zRank;
char *zRankArgs;
rc = fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
if( rc==SQLITE_OK ){
sqlite3_free(pConfig->zRank);
sqlite3_free(pConfig->zRankArgs);

View File

@ -178,6 +178,23 @@ do_execsql_test 4.1.1 {
}
do_execsql_test 4.1.2 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rowid ASC
} {
1 0 2 4 3 6 5 103
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0
}
do_execsql_test 4.1.4 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
@ -185,13 +202,13 @@ do_execsql_test 4.1.2 {
6 9 7 0 9 102 10 8
}
do_execsql_test 4.1.3 {
do_execsql_test 4.1.5 {
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0
}
do_execsql_test 4.1.4 {
do_execsql_test 4.1.6 {
INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) ');
SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
@ -216,6 +233,42 @@ do_execsql_test 4.2.2 {
10 121
}
do_execsql_test 4.2.3 {
SELECT rowid, rank FROM t2
WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)'
} {
10 122
}
proc rowidmod {cmd imod} {
expr [$cmd xRowid] % $imod
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_execsql_test 4.3.1 {
CREATE VIRTUAL TABLE t3 USING fts5(x);
INSERT INTO t3 VALUES('a one');
INSERT INTO t3 VALUES('a two');
INSERT INTO t3 VALUES('a three');
INSERT INTO t3 VALUES('a four');
INSERT INTO t3 VALUES('a five');
INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()');
}
breakpoint
do_execsql_test 4.3.2 {
SELECT * FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)'
ORDER BY rank ASC
} {
{a four} {a five} {a one} {a two} {a three}
}
do_execsql_test 4.3.3 {
SELECT *, rank FROM t3
WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)'
ORDER BY rank ASC
} {
{a three} 0 {a four} 1 {a one} 1 {a five} 2 {a two} 2
}
finish_test