1
0
mirror of https://github.com/sqlite/sqlite.git synced 2026-01-06 08:01:16 +03:00

Add support for the "colname : <nearset>" syntax to fts5.

FossilOrigin-Name: 004667106e552e832a564b77e242b86f183d4441
This commit is contained in:
dan
2014-07-05 07:54:01 +00:00
parent c18a8fe99c
commit c2642d7c1e
4 changed files with 286 additions and 35 deletions

View File

@@ -99,6 +99,7 @@ struct Fts5Parse {
*/
typedef struct Fts5PoslistIter Fts5PoslistIter;
struct Fts5PoslistIter {
int iCol; /* If (iCol>=0), this column only */
const u8 *a; /* Position list to iterate through */
int n; /* Size of buffer at a[] in bytes */
int i; /* Current offset in a[] */
@@ -116,19 +117,31 @@ static int fts5PoslistIterNext(Fts5PoslistIter *pIter){
pIter->i += getVarint32(&pIter->a[pIter->i], iVal);
if( iVal==1 ){
pIter->i += getVarint32(&pIter->a[pIter->i], iVal);
pIter->iPos = ((u64)iVal << 32);
pIter->i += getVarint32(&pIter->a[pIter->i], iVal);
if( pIter->iCol>=0 && iVal>pIter->iCol ){
pIter->bEof = 1;
}else{
pIter->iPos = ((u64)iVal << 32);
pIter->i += getVarint32(&pIter->a[pIter->i], iVal);
}
}
pIter->iPos += (iVal-2);
}
return pIter->bEof;
}
static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){
static int fts5PoslistIterInit(
int iCol, /* If (iCol>=0), this column only */
const u8 *a, int n, /* Poslist buffer to iterate through */
Fts5PoslistIter *pIter /* Iterator object to initialize */
){
memset(pIter, 0, sizeof(*pIter));
pIter->a = a;
pIter->n = n;
fts5PoslistIterNext(pIter);
pIter->iCol = iCol;
do {
fts5PoslistIterNext(pIter);
}while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
return pIter->bEof;
}
typedef struct Fts5PoslistWriter Fts5PoslistWriter;
@@ -325,6 +338,7 @@ static int fts5ExprNodeTest(Fts5Expr *pExpr, Fts5ExprNode *pNode){
*/
static int fts5ExprPhraseIsMatch(
Fts5Expr *pExpr, /* Expression pPhrase belongs to */
int iCol, /* If >=0, search for matches in iCol only */
Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
int *pbMatch /* OUT: Set to true if really a match */
){
@@ -334,6 +348,8 @@ static int fts5ExprPhraseIsMatch(
int i;
int rc = SQLITE_OK;
fts5BufferZero(&pPhrase->poslist);
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */
if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
@@ -346,10 +362,9 @@ static int fts5ExprPhraseIsMatch(
for(i=0; i<pPhrase->nTerm; i++){
int n;
const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &n);
fts5PoslistIterInit(a, n, &aIter[i]);
if( fts5PoslistIterInit(iCol, a, n, &aIter[i]) ) goto ismatch_out;
}
fts5BufferZero(&pPhrase->poslist);
while( 1 ){
int bMatch;
i64 iPos = aIter[0].iPos;
@@ -384,6 +399,22 @@ static int fts5ExprPhraseIsMatch(
}
/*
** The near-set object passed as the first argument contains more than
** one phrase. All phrases currently point to the same row. The
** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
** tests if the current row contains instances of each phrase sufficiently
** close together to meet the NEAR constraint. Output variable *pbMatch
** is set to true if it does, or false otherwise.
**
** If no error occurs, SQLITE_OK is returned. Or, if an error does occur,
** an SQLite error code. If a value other than SQLITE_OK is returned, the
** final value of *pbMatch is undefined.
**
** TODO: This function should also edit the position lists associated
** with each phrase to remove any phrase instances that are not part of
** a set of intances that collectively matches the NEAR constraint.
*/
static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){
Fts5PoslistIter aStatic[4];
Fts5PoslistIter *aIter = aStatic;
@@ -392,6 +423,8 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){
int bMatch;
i64 iMax;
assert( pNear->nPhrase>1 );
/* If the aStatic[] array is not large enough, allocate a large array
** using sqlite3_malloc(). This approach could be improved upon. */
if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){
@@ -403,7 +436,7 @@ static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){
/* Initialize a term iterator for each phrase */
for(i=0; i<pNear->nPhrase; i++){
Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist;
fts5PoslistIterInit(pPoslist->p, pPoslist->n, &aIter[i]);
fts5PoslistIterInit(-1, pPoslist->p, pPoslist->n, &aIter[i]);
}
iMax = aIter[0].iPos;
@@ -557,14 +590,14 @@ static int fts5ExprNearNextMatch(
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
if( pPhrase->nTerm>1 ){
if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
int bMatch = 0;
rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch);
rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
if( rc!=SQLITE_OK ) return rc;
if( bMatch==0 ) break;
}else{
int n;
u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n);
const u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n);
fts5BufferSet(&rc, &pPhrase->poslist, n, a);
}
}
@@ -1033,6 +1066,82 @@ static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
return zNew;
}
/*
** Compose a tcl-readable representation of expression pExpr. Return a
** pointer to a buffer containing that representation. It is the
** responsibility of the caller to at some point free the buffer using
** sqlite3_free().
*/
static char *fts5ExprPrintTcl(
Fts5Config *pConfig,
const char *zNearsetCmd,
Fts5ExprNode *pExpr
){
char *zRet = 0;
if( pExpr->eType==FTS5_STRING ){
Fts5ExprNearset *pNear = pExpr->pNear;
int i;
int iTerm;
zRet = fts5PrintfAppend(zRet, "[%s ", zNearsetCmd);
if( pNear->iCol>=0 ){
zRet = fts5PrintfAppend(zRet, "-col %d ", pNear->iCol);
if( zRet==0 ) return 0;
}
if( pNear->nPhrase>1 ){
zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
if( zRet==0 ) return 0;
}
zRet = fts5PrintfAppend(zRet, "--");
if( zRet==0 ) return 0;
for(i=0; i<pNear->nPhrase; i++){
Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
zRet = fts5PrintfAppend(zRet, " {");
for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
char *zTerm = pPhrase->aTerm[iTerm].zTerm;
zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
}
if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
if( zRet==0 ) return 0;
}
if( zRet ) zRet = fts5PrintfAppend(zRet, "]");
if( zRet==0 ) return 0;
}else{
char *zOp = 0;
char *z1 = 0;
char *z2 = 0;
switch( pExpr->eType ){
case FTS5_AND: zOp = "&&"; break;
case FTS5_NOT: zOp = "&& !"; break;
case FTS5_OR: zOp = "||"; break;
default: assert( 0 );
}
z1 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pLeft);
z2 = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRight);
if( z1 && z2 ){
int b1 = pExpr->pLeft->eType!=FTS5_STRING;
int b2 = pExpr->pRight->eType!=FTS5_STRING;
zRet = sqlite3_mprintf("%s%s%s %s %s%s%s",
b1 ? "(" : "", z1, b1 ? ")" : "",
zOp,
b2 ? "(" : "", z2, b2 ? ")" : ""
);
}
sqlite3_free(z1);
sqlite3_free(z2);
}
return zRet;
}
static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
char *zRet = 0;
if( pExpr->eType==FTS5_STRING ){
@@ -1117,12 +1226,18 @@ static void fts5ExprFunction(
Fts5Expr *pExpr = 0;
int rc;
int i;
int bTcl = sqlite3_user_data(pCtx)!=0;
const char **azConfig; /* Array of arguments for Fts5Config */
const char *zNearsetCmd = "nearset";
int nConfig; /* Size of azConfig[] */
Fts5Config *pConfig = 0;
nConfig = nArg + 2;
if( bTcl && nArg>1 ){
zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
}
nConfig = nArg + 2 - bTcl;
azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig);
if( azConfig==0 ){
sqlite3_result_error_nomem(pCtx);
@@ -1131,8 +1246,8 @@ static void fts5ExprFunction(
azConfig[0] = 0;
azConfig[1] = "main";
azConfig[2] = "tbl";
for(i=1; i<nArg; i++){
azConfig[i+2] = (const char*)sqlite3_value_text(apVal[i]);
for(i=1+bTcl; i<nArg; i++){
azConfig[i+2-bTcl] = (const char*)sqlite3_value_text(apVal[i]);
}
zExpr = (const char*)sqlite3_value_text(apVal[0]);
@@ -1141,7 +1256,12 @@ static void fts5ExprFunction(
rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr);
}
if( rc==SQLITE_OK ){
char *zText = fts5ExprPrint(pConfig, pExpr->pRoot);
char *zText;
if( bTcl ){
zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
}else{
zText = fts5ExprPrint(pConfig, pExpr->pRoot);
}
if( rc==SQLITE_OK ){
sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
sqlite3_free(zText);
@@ -1166,9 +1286,22 @@ static void fts5ExprFunction(
** UDF with the SQLite handle passed as the only argument.
*/
int sqlite3Fts5ExprInit(sqlite3 *db){
int rc = sqlite3_create_function(
db, "fts5_expr", -1, SQLITE_UTF8, 0, fts5ExprFunction, 0, 0
);
struct Fts5ExprFunc {
const char *z;
void *p;
void (*x)(sqlite3_context*,int,sqlite3_value**);
} aFunc[] = {
{ "fts5_expr", 0, fts5ExprFunction },
{ "fts5_expr_tcl", (void*)1, fts5ExprFunction },
};
int i;
int rc = SQLITE_OK;
for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){
struct Fts5ExprFunc *p = &aFunc[i];
rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, p->p, p->x, 0, 0);
}
return rc;
}

View File

@@ -1,5 +1,5 @@
C Add\ssupport\sfor\sNEAR\sexpressions\sto\sfts5.
D 2014-07-03T20:39:39.548
C Add\ssupport\sfor\sthe\s"colname\s:\s<nearset>"\ssyntax\sto\sfts5.
D 2014-07-05T07:54:01.680
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -107,7 +107,7 @@ F ext/fts5/fts5.c 1af3184dd9c0e5c1686f71202d6b6cac8f225f05
F ext/fts5/fts5Int.h b7a684ff3508ab24437886f8bc873a16f494a7db
F ext/fts5/fts5_buffer.c f1a26a79e2943fe4388e531fa141941b5eb6d31a
F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef
F ext/fts5/fts5_expr.c 84dd8c1f313f795b41f3fc5f73bee013e8301b68
F ext/fts5/fts5_expr.c 618e6641c8dc428c146ec84bf30ff0b3da6b28c7
F ext/fts5/fts5_index.c d8ab9712e38dc1beb9a9145ec89e18dc083c0467
F ext/fts5/fts5_storage.c 7848d8f8528d798bba159900ea310a6d4a279da8
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@@ -594,7 +594,7 @@ F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849
F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36
F test/fts5aa.test c8d3b9694f6b2864161c7437408464a535d19343
F test/fts5ab.test 4db86a9473ee2a8c2cb30e0d81df21c6022f99b6
F test/fts5ac.test cc4fc45a85fde7fbe8da135aed6b25d2795ba9f6
F test/fts5ac.test c7ca34a477d638195a839c961e1b572890dc5d0d
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
@@ -1190,7 +1190,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 2e5652e6526b8fb3f5c163168d95bc0bb4c93686
R f894350f59d9ccf43dee7a3b5d2aafff
P 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d
R 213fb14ea45e358dcb308401853c4570
U dan
Z be26a54244aa4231a7a300eba9899e25
Z 0b8632fefc20738326985d3c409c9be8

View File

@@ -1 +1 @@
250ae8d40115e2e47cc5a1e8a427fa8c0a89124d
004667106e552e832a564b77e242b86f183d4441

View File

@@ -137,6 +137,89 @@ do_test 1.1 {
}
} {}
proc phrasematch {phrase value} {
if {[string first $phrase $value]>=0} {
return 1
}
return 0
}
# Usage:
#
proc nearmatch {nNear phraselist value} {
set nPhrase [llength $phraselist]
set phraselist [string tolower $phraselist]
set value [string tolower $value]
if {$nPhrase==1} {
set bMatch [phrasematch [lindex $phraselist 0] $value]
} else {
set nValue [llength $value]
if {$nNear >= $nValue} {set nNear [expr $nValue-1]}
for {set i $nNear} {$i < $nValue} {incr i} {
set bMatch 1
foreach phrase $phraselist {
set iMin [expr $i - $nNear - [llength $phrase]]
set iMax [expr $i - 1 + [llength $phrase]]
set subdoc [lrange $value $iMin $iMax]
if {![phrasematch $phrase $subdoc]} {
set bMatch 0
break
}
}
if {$bMatch} break
}
}
return $bMatch
}
# Usage:
#
# nearset aCol ?-near N? ?-col C? -- phrase1 phrase2...
#
proc nearset {aCol args} {
set O(-near) 10
set O(-col) -1
set nOpt [lsearch -exact $args --]
if {$nOpt<0} { error "no -- option" }
foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
if {[info exists O($k)]==0} { error "unrecognized option $k" }
set O($k) $v
}
set phraselist [lrange $args [expr $nOpt+1] end]
set bMatch 0
set iCol -1
foreach col $aCol {
incr iCol
if {$O(-col)>=0 && $O(-col)!=$iCol} continue
if {[nearmatch $O(-near) $phraselist $col]} {
set bMatch 1
break
}
}
return $bMatch
}
proc matchdata {expr} {
set tclexpr [db one {SELECT fts5_expr_tcl($expr, 'nearset $cols', 'x', 'y')}]
set res [list]
foreach {id x y} $::data {
set cols [list $x $y]
if $tclexpr {
set res [concat $id $res]
}
}
return $res
}
foreach {tn phrase} {
1 "o"
2 "b q"
@@ -149,16 +232,51 @@ foreach {tn phrase} {
9 "no"
10 "L O O L V V K"
} {
set res [list]
foreach {id x y} $data {
set pat [string tolower $phrase]
if {[string first $pat $x]>=0 || [string first $pat $y]>=0} {
set res [concat $id $res]
}
}
set n [llength $res]
do_execsql_test 1.2.$tn.$n {
SELECT rowid FROM xx WHERE xx match '"' || $phrase || '"'
set expr "\"$phrase\""
set res [matchdata $expr]
do_execsql_test 1.2.$tn.[llength $res] {
SELECT rowid FROM xx WHERE xx match $expr
} $res
}
# Test the "nearmatch" commnad.
#
do_test 2.0 { nearmatch 2 {a b} {a x x b} } 1
do_test 2.1 { nearmatch 2 {b a} {a x x b} } 1
do_test 2.2 { nearmatch 1 {b a} {a x x b} } 0
do_test 2.3 { nearmatch 1 {"a b" "c d"} {x x a b x c d} } 1
do_test 2.4 { nearmatch 1 {"a b" "c d"} {x a b x x c d} } 0
do_test 2.5 { nearmatch 400 {a b} {a x x b} } 1
do_test 2.6 { nearmatch 0 {a} {a x x b} } 1
do_test 2.7 { nearmatch 0 {b} {a x x b} } 1
foreach {tn expr tclexpr} {
1 {a b} {[N $x -- {a}] && [N $x -- {b}]}
} {
do_execsql_test 3.$tn {SELECT fts5_expr_tcl($expr, 'N $x')} [list $tclexpr]
}
#-------------------------------------------------------------------------
#
foreach {tn expr} {
1 { NEAR(r c) }
2 { NEAR(r c, 5) }
3 { NEAR(r c, 3) }
4 { NEAR(r c, 2) }
5 { NEAR(r c, 0) }
6 { NEAR(a b c) }
7 { NEAR(a b c, 8) }
8 { x : NEAR(r c) }
9 { y : NEAR(r c) }
10 { x : "r c" }
11 { y : "r c" }
} {
set res [matchdata $expr]
do_execsql_test 2.$tn.[llength $res] {
SELECT rowid FROM xx WHERE xx match $expr
} $res
}