From ade921c3ad001b0397eae849975572c07cfa2f96 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 2 Jan 2015 14:55:22 +0000 Subject: [PATCH] Allow the rank column to be remapped on a per-query basis by including a term similar to "rank match 'bm25(10,2)'" in a where clause. FossilOrigin-Name: 1cd15a1759004d5d321056905dbb6acff20dc7d9 --- ext/fts5/fts5.c | 104 ++++++++++++++++++++++++++++++-------- ext/fts5/fts5Int.h | 2 + ext/fts5/fts5_config.c | 4 +- ext/fts5/test/fts5al.test | 57 ++++++++++++++++++++- manifest | 18 +++---- manifest.uuid | 2 +- 6 files changed, 152 insertions(+), 35 deletions(-) diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 604d5c7cb6..6c69da97b3 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -160,6 +160,8 @@ struct Fts5Cursor { char *zSpecial; /* Result of special query */ /* "rank" function. Populated on demand from vtab.xColumn(). */ + char *zRank; /* Custom rank function */ + char *zRankArgs; /* Custom rank function args */ Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ int nRankArg; /* Number of trailing arguments for rank() */ sqlite3_value **apRankArg; /* Array of trailing arguments */ @@ -181,6 +183,7 @@ struct Fts5Cursor { #define FTS5CSR_REQUIRE_CONTENT 0x01 #define FTS5CSR_REQUIRE_DOCSIZE 0x02 #define FTS5CSR_EOF 0x04 +#define FTS5CSR_FREE_ZRANK 0x08 /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -418,6 +421,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts5Config *pConfig = pTab->pConfig; int iCons; int ePlan = FTS5_PLAN_SCAN; + int iRankMatch; iCons = fts5FindConstraint(pInfo,SQLITE_INDEX_CONSTRAINT_MATCH,pConfig->nCol); if( iCons>=0 ){ @@ -453,6 +457,14 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ ePlan |= pInfo->aOrderBy[0].desc ? FTS5_ORDER_DESC : FTS5_ORDER_ASC; } } + + iRankMatch = fts5FindConstraint( + pInfo, SQLITE_INDEX_CONSTRAINT_MATCH, pConfig->nCol+1 + ); + if( iRankMatch>=0 ){ + pInfo->aConstraintUsage[iRankMatch].argvIndex = 1 + (iCons>=0); + pInfo->aConstraintUsage[iRankMatch].omit = 1; + } pInfo->idxNum = ePlan; return SQLITE_OK; @@ -543,6 +555,10 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3_free(pCsr->apRankArg); sqlite3_free(pCsr->zSpecial); + if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ + sqlite3_free(pCsr->zRank); + sqlite3_free(pCsr->zRankArgs); + } sqlite3_free(pCsr); return SQLITE_OK; } @@ -636,7 +652,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ int nByte; int rc = SQLITE_OK; char *zSql; - const char *zRank = pConfig->zRank ? pConfig->zRank : FTS5_DEFAULT_RANK; + const char *zRank = pCsr->zRank; + const char *zRankArgs = pCsr->zRankArgs; nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase; @@ -654,8 +671,8 @@ static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ ** If SQLite a built-in statement cache, this wouldn't be a problem. */ zSql = sqlite3_mprintf("SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", pConfig->zDb, pConfig->zName, zRank, pConfig->zName, - (pConfig->zRankArgs ? ", " : ""), - (pConfig->zRankArgs ? pConfig->zRankArgs : ""), + (zRankArgs ? ", " : ""), + (zRankArgs ? zRankArgs : ""), bAsc ? "ASC" : "DESC" ); if( zSql==0 ){ @@ -747,14 +764,13 @@ static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){ static int fts5FindRankFunction(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; - const char *zRank = pConfig->zRank; int rc = SQLITE_OK; Fts5Auxiliary *pAux; + const char *zRank = pCsr->zRank; + const char *zRankArgs = pCsr->zRankArgs; - if( zRank==0 ) zRank = FTS5_DEFAULT_RANK; - - if( pTab->pConfig->zRankArgs ){ - char *zSql = sqlite3_mprintf("SELECT %s", pTab->pConfig->zRankArgs); + if( zRankArgs ){ + char *zSql = sqlite3_mprintf("SELECT %s", zRankArgs); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ @@ -796,10 +812,50 @@ static int fts5FindRankFunction(Fts5Cursor *pCsr){ return rc; } + +static int fts5CursorParseRank( + Fts5Config *pConfig, + Fts5Cursor *pCsr, + sqlite3_value *pRank +){ + int rc = SQLITE_OK; + if( pRank ){ + const char *z = (const char*)sqlite3_value_text(pRank); + char *zRank = 0; + char *zRankArgs = 0; + + rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); + if( rc==SQLITE_OK ){ + pCsr->zRank = zRank; + pCsr->zRankArgs = zRankArgs; + CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); + }else if( rc==SQLITE_ERROR ){ + pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( + "parse error in rank function: %s", z + ); + } + }else{ + if( pConfig->zRank ){ + pCsr->zRank = (char*)pConfig->zRank; + pCsr->zRankArgs = (char*)pConfig->zRankArgs; + }else{ + pCsr->zRank = (char*)FTS5_DEFAULT_RANK; + pCsr->zRankArgs = 0; + } + } + return rc; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional ** information. +** +** There are three possible query strategies: +** +** 1. Full-text search using a MATCH operator. +** 2. A by-rowid lookup. +** 3. A full-table scan. */ static int fts5FilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ @@ -813,10 +869,13 @@ static int fts5FilterMethod( int bAsc = ((idxNum & FTS5_ORDER_ASC) ? 1 : 0); int rc = SQLITE_OK; + assert( nVal<=2 ); assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); assert( pCsr->csrflags==0 ); assert( pCsr->pRank==0 ); + assert( pCsr->zRank==0 ); + assert( pCsr->zRankArgs==0 ); if( pTab->pSortCsr ){ /* If pSortCsr is non-NULL, then this call is being made as part of @@ -835,19 +894,22 @@ static int fts5FilterMethod( if( ePlan==FTS5_PLAN_MATCH || ePlan==FTS5_PLAN_SORTED_MATCH ){ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); - if( zExpr[0]=='*' ){ - /* The user has issued a query of the form "MATCH '*...'". This - ** indicates that the MATCH expression is not a full text query, - ** but a request for an internal parameter. */ - rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); - }else{ - char **pzErr = &pTab->base.zErrMsg; - rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); - if( rc==SQLITE_OK ){ - if( ePlan==FTS5_PLAN_MATCH ){ - rc = fts5CursorFirst(pTab, pCsr, bAsc); - }else{ - rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + rc = fts5CursorParseRank(pTab->pConfig, pCsr, (nVal==2 ? apVal[1] : 0)); + if( rc==SQLITE_OK ){ + if( zExpr[0]=='*' ){ + /* The user has issued a query of the form "MATCH '*...'". This + ** indicates that the MATCH expression is not a full text query, + ** but a request for an internal parameter. */ + rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); + }else{ + char **pzErr = &pTab->base.zErrMsg; + rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); + if( rc==SQLITE_OK ){ + if( ePlan==FTS5_PLAN_MATCH ){ + rc = fts5CursorFirst(pTab, pCsr, bAsc); + }else{ + rc = fts5CursorFirstSorted(pTab, pCsr, bAsc); + } } } } diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 1fffcbfe51..371c99d745 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -109,6 +109,8 @@ int sqlite3Fts5ConfigLoad(Fts5Config*, int); /* Set the value of a single config attribute */ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); +int sqlite3Fts5ConfigParseRank(const char*, char**, char**); + /* ** End of interface to code in fts5_config.c. **************************************************************************/ diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index fc3fe73bba..fd6b051a08 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -552,7 +552,7 @@ static const char *fts5ConfigSkipArgs(const char *pIn){ ** + Zero or more SQL literals in a comma separated list ** + Close parenthesis - ")" */ -static int fts5ConfigParseRank( +int sqlite3Fts5ConfigParseRank( const char *zIn, /* Input string */ char **pzRank, /* OUT: Rank function name */ char **pzRankArgs /* OUT: Rank function arguments */ @@ -647,7 +647,7 @@ int sqlite3Fts5ConfigSetValue( const char *zIn = (const char*)sqlite3_value_text(pVal); char *zRank; char *zRankArgs; - rc = fts5ConfigParseRank(zIn, &zRank, &zRankArgs); + rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); if( rc==SQLITE_OK ){ sqlite3_free(pConfig->zRank); sqlite3_free(pConfig->zRankArgs); diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 926ad951cf..2cf291af64 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -178,6 +178,23 @@ do_execsql_test 4.1.1 { } do_execsql_test 4.1.2 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' + ORDER BY rowid ASC +} { + 1 0 2 4 3 6 5 103 + 6 9 7 0 9 102 10 8 +} + +do_execsql_test 4.1.3 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' + ORDER BY rank DESC +} { + 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 +} + +do_execsql_test 4.1.4 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()'); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC } { @@ -185,13 +202,13 @@ do_execsql_test 4.1.2 { 6 9 7 0 9 102 10 8 } -do_execsql_test 4.1.3 { +do_execsql_test 4.1.5 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { 5 103 9 102 6 9 10 8 3 6 2 4 7 0 1 0 } -do_execsql_test 4.1.4 { +do_execsql_test 4.1.6 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { @@ -216,6 +233,42 @@ do_execsql_test 4.2.2 { 10 121 } +do_execsql_test 4.2.3 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)' +} { + 10 122 +} + +proc rowidmod {cmd imod} { + expr [$cmd xRowid] % $imod +} +sqlite3_fts5_create_function db rowidmod rowidmod +do_execsql_test 4.3.1 { + CREATE VIRTUAL TABLE t3 USING fts5(x); + INSERT INTO t3 VALUES('a one'); + INSERT INTO t3 VALUES('a two'); + INSERT INTO t3 VALUES('a three'); + INSERT INTO t3 VALUES('a four'); + INSERT INTO t3 VALUES('a five'); + INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()'); +} +breakpoint + +do_execsql_test 4.3.2 { + SELECT * FROM t3 + WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' + ORDER BY rank ASC +} { + {a four} {a five} {a one} {a two} {a three} +} +do_execsql_test 4.3.3 { + SELECT *, rank FROM t3 + WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' + ORDER BY rank ASC +} { + {a three} 0 {a four} 1 {a one} 1 {a five} 2 {a two} 2 +} finish_test diff --git a/manifest b/manifest index 73bbea9d02..c472027698 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\strunk\schanges\swith\sthis\sbranch. -D 2015-01-01T18:03:49.016 +C Allow\sthe\srank\scolumn\sto\sbe\sremapped\son\sa\sper-query\sbasis\sby\sincluding\sa\sterm\ssimilar\sto\s"rank\smatch\s'bm25(10,2)'"\sin\sa\swhere\sclause. +D 2015-01-02T14:55:22.175 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 7cd23e4fc91004a6bd081623e1bc6932e44828c0 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -104,12 +104,12 @@ F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl 4199cb887040ee3c3cd59a5171ddb0566904586e F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 -F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74 +F ext/fts5/fts5.c a80283dca24506f1c748fffbba8d87ae4d348b50 F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b -F ext/fts5/fts5Int.h b5dfed6a1b256ff21d11898f14ab337205844469 +F ext/fts5/fts5Int.h b5d7970b851d2b4f1745cd2d5c95216c9847aef2 F ext/fts5/fts5_aux.c 445e54031ff94174673f4f5aac6c064df20a2a6b F ext/fts5/fts5_buffer.c 1bc5c762bb2e9b4a40b2e8a820a31b809e72eec1 -F ext/fts5/fts5_config.c 73774e37a99218833b767f96bb5af35ebe43b77c +F ext/fts5/fts5_config.c 74a860e10c5583831f04d0088c4a49a3c6eca43d F ext/fts5/fts5_expr.c 27d3d2deebae277c34ae2bb3d501dd879c442ba5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f @@ -129,7 +129,7 @@ F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f -F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 +F ext/fts5/test/fts5al.test bc873766fec3baae05ba6e76b379bc2f5e8eaf75 F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 @@ -1270,7 +1270,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d09f7800cf14f73ea86d037107ef80295b2c173a 66269d0d8e49eb3dc7f508714753584f648bb022 -R 547c6b40048a436817ff50668d0f1e7c +P 4b3651677e7132c4c45605bc1f216fc08ef31198 +R 691df06fdaf9c3542bb10cf702e4a0f8 U dan -Z dab366ead758ca084f8be9b0cb1bbb1d +Z 12262406c5f3f18d2ab88add956e21a6 diff --git a/manifest.uuid b/manifest.uuid index 4878666f57..9ca9c0f833 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4b3651677e7132c4c45605bc1f216fc08ef31198 \ No newline at end of file +1cd15a1759004d5d321056905dbb6acff20dc7d9 \ No newline at end of file