mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-30 19:03:16 +03:00
Add code for the matchinfo 'longest common substring' feature.
FossilOrigin-Name: 71011a4f9baf09ec6935ad591145252bf3c286ed
This commit is contained in:
@ -970,7 +970,123 @@ static int fts3MatchinfoSelectDoctotal(
|
|||||||
if( paLen ) *paLen = a;
|
if( paLen ) *paLen = a;
|
||||||
return SQLITE_OK;
|
return SQLITE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct LcsIterator LcsIterator;
|
||||||
|
struct LcsIterator {
|
||||||
|
Fts3Expr *pExpr; /* Pointer to phrase expression */
|
||||||
|
char *pRead; /* Cursor used to iterate through aDoclist */
|
||||||
|
int iPosOffset; /* Tokens count up to end of this phrase */
|
||||||
|
int iCol; /* Current column number */
|
||||||
|
int iPos; /* Current position */
|
||||||
|
};
|
||||||
|
|
||||||
|
#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
|
||||||
|
|
||||||
|
static int fts3MatchinfoLcsCb(
|
||||||
|
Fts3Expr *pExpr, /* Phrase expression node */
|
||||||
|
int iPhrase, /* Phrase number (numbered from zero) */
|
||||||
|
void *pCtx /* Pointer to MatchInfo structure */
|
||||||
|
){
|
||||||
|
LcsIterator *aIter = (LcsIterator *)pCtx;
|
||||||
|
aIter[iPhrase].pExpr = pExpr;
|
||||||
|
return SQLITE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fts3LcsIteratorAdvance(LcsIterator *pIter){
|
||||||
|
char *pRead = pIter->pRead;
|
||||||
|
sqlite3_int64 iRead;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||||
|
if( iRead==0 ){
|
||||||
|
pIter->iCol = LCS_ITERATOR_FINISHED;
|
||||||
|
rc = 1;
|
||||||
|
}else{
|
||||||
|
if( iRead==1 ){
|
||||||
|
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||||
|
pIter->iCol = iRead;
|
||||||
|
pIter->iPos = pIter->iPosOffset;
|
||||||
|
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
|
||||||
|
rc = 1;
|
||||||
|
}
|
||||||
|
pIter->iPos += (iRead-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
pIter->pRead = pRead;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
|
||||||
|
LcsIterator *aIter;
|
||||||
|
int i;
|
||||||
|
int iCol;
|
||||||
|
int nToken = 0;
|
||||||
|
|
||||||
|
/* Allocate and populate the array of LcsIterator objects. The array
|
||||||
|
** contains one element for each matchable phrase in the query.
|
||||||
|
**/
|
||||||
|
aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
|
||||||
|
if( !aIter ) return SQLITE_NOMEM;
|
||||||
|
memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
|
||||||
|
(void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
|
||||||
|
|
||||||
|
for(i=0; i<pInfo->nPhrase; i++){
|
||||||
|
LcsIterator *pIter = &aIter[i];
|
||||||
|
nToken -= pIter->pExpr->pPhrase->nToken;
|
||||||
|
pIter->iPosOffset = nToken;
|
||||||
|
pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
|
||||||
|
if( pIter->pRead ){
|
||||||
|
pIter->iPos = pIter->iPosOffset;
|
||||||
|
fts3LcsIteratorAdvance(&aIter[i]);
|
||||||
|
}else{
|
||||||
|
pIter->iCol = LCS_ITERATOR_FINISHED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(iCol=0; iCol<pInfo->nCol; iCol++){
|
||||||
|
int nLcs = 0;
|
||||||
|
int nLive = 0;
|
||||||
|
|
||||||
|
for(i=0; i<pInfo->nPhrase; i++){
|
||||||
|
assert( aIter[i].iCol>=iCol );
|
||||||
|
if( aIter[i].iCol==iCol ) nLive++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while( nLive>0 ){
|
||||||
|
LcsIterator *pAdv = 0;
|
||||||
|
int nThisLcs = 0;
|
||||||
|
char *aRead;
|
||||||
|
sqlite3_int64 iRead;
|
||||||
|
|
||||||
|
for(i=0; i<pInfo->nPhrase; i++){
|
||||||
|
LcsIterator *pIter = &aIter[i];
|
||||||
|
int nToken = pIter->pExpr->pPhrase->nToken;
|
||||||
|
|
||||||
|
if( iCol!=pIter->iCol ){
|
||||||
|
nThisLcs = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( pAdv==0 || pIter->iPos<pAdv->iPos ){
|
||||||
|
pAdv = pIter;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
|
||||||
|
nThisLcs++;
|
||||||
|
}else{
|
||||||
|
nThisLcs = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( nThisLcs>nLcs ) nLcs = nThisLcs;
|
||||||
|
}
|
||||||
|
if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
|
||||||
|
}
|
||||||
|
|
||||||
|
pInfo->aMatchinfo[iCol] = nLcs;
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlite3_free(aIter);
|
||||||
|
}
|
||||||
|
|
||||||
static int fts3MatchinfoValues(
|
static int fts3MatchinfoValues(
|
||||||
Fts3Cursor *pCsr, /* FTS3 cursor object */
|
Fts3Cursor *pCsr, /* FTS3 cursor object */
|
||||||
@ -1048,9 +1164,12 @@ static int fts3MatchinfoValues(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case FTS3_MATCHINFO_LCS:
|
||||||
|
fts3MatchinfoLcs(pCsr, pInfo);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
assert( zArg[i]==FTS3_MATCHINFO_LCS );
|
assert( !"this cannot happen" );
|
||||||
}
|
}
|
||||||
|
|
||||||
pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
|
pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
|
||||||
|
@ -328,6 +328,9 @@ int sqlite3Fts3SelectDocsize(
|
|||||||
return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
|
return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Similar to fts3SqlStmt(). Except, after binding the parameters in
|
** Similar to fts3SqlStmt(). Except, after binding the parameters in
|
||||||
** array apVal[] to the SQL statement identified by eStmt, the statement
|
** array apVal[] to the SQL statement identified by eStmt, the statement
|
||||||
|
18
manifest
18
manifest
@ -1,5 +1,5 @@
|
|||||||
C Fix\scrashes\sthat\scan\soccur\swhen\squeries\sare\srun\son\san\sFTS4\stable\scontaining\szero\srows.
|
C Add\scode\sfor\sthe\smatchinfo\s'longest\scommon\ssubstring'\sfeature.
|
||||||
D 2010-11-24T15:02:23
|
D 2010-11-24T19:26:19
|
||||||
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
|
||||||
F Makefile.in e7a59672eaeb04408d1fa8501618d7501a3c5e39
|
F Makefile.in e7a59672eaeb04408d1fa8501618d7501a3c5e39
|
||||||
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
|
||||||
@ -69,11 +69,11 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
|
|||||||
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
|
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
|
||||||
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
|
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
|
||||||
F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba
|
F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba
|
||||||
F ext/fts3/fts3_snippet.c 967ca2d3201fd6555062c7e929bcc2b89ef8dcb8
|
F ext/fts3/fts3_snippet.c be2648ff61a18af2d4a33eadbb26c0a6f06a6e26
|
||||||
F ext/fts3/fts3_tokenizer.c 1301b0ee3ef414caae3257a702215925cc48cd9c
|
F ext/fts3/fts3_tokenizer.c 1301b0ee3ef414caae3257a702215925cc48cd9c
|
||||||
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
|
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
|
||||||
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
|
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
|
||||||
F ext/fts3/fts3_write.c 9b2db92b815fdd50b5531eb6db912c71feca6a70
|
F ext/fts3/fts3_write.c b4e5b4c74f755a6f494dab9c131ad9bb04bab50c
|
||||||
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
|
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
|
||||||
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
|
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
|
||||||
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
|
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
|
||||||
@ -440,13 +440,13 @@ F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba
|
|||||||
F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7
|
F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7
|
||||||
F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
|
F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
|
||||||
F test/fts3defer.test d6cb0db9b5997ecf863d96ff419f83f8f2c87f4f
|
F test/fts3defer.test d6cb0db9b5997ecf863d96ff419f83f8f2c87f4f
|
||||||
F test/fts3defer2.test 548eb2ca7e6a1515a7bc151721e223be14c51f9d
|
F test/fts3defer2.test da840efaedebfdd54293d04b36098e2d9872caa6
|
||||||
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
|
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
|
||||||
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
|
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
|
||||||
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
|
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
|
||||||
F test/fts3fault.test 81fd40ceb12f33f9d16c5637d0f8d95d4556c456
|
F test/fts3fault.test 81fd40ceb12f33f9d16c5637d0f8d95d4556c456
|
||||||
F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b
|
F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b
|
||||||
F test/fts3matchinfo.test 2dfdf80a927e3dc02f2e42337e5aa0b835994f6e
|
F test/fts3matchinfo.test 41991bd810c6896a07c19a236ba3b489b16ba970
|
||||||
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
|
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
|
||||||
F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
|
F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
|
||||||
F test/fts3rnd.test 707533ce943f490443ce5e696236bb1675a37635
|
F test/fts3rnd.test 707533ce943f490443ce5e696236bb1675a37635
|
||||||
@ -889,7 +889,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
|
|||||||
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
|
||||||
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
|
||||||
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
|
||||||
P ae40b34cf7c24c9601bdfb5cbe5b20f05a376ea8
|
P ed61fd20adef44d0f6b2345e0205b25f3641a15e
|
||||||
R bf9b07886b7dc4c93620c54d1a7703f7
|
R c6aa2ef350d272455977bd9187b17fdc
|
||||||
U dan
|
U dan
|
||||||
Z c507e6c3579e8a599c5f904476d64b95
|
Z a65c681ffac166b61fab0e3fae2a5af0
|
||||||
|
@ -1 +1 @@
|
|||||||
ed61fd20adef44d0f6b2345e0205b25f3641a15e
|
71011a4f9baf09ec6935ad591145252bf3c286ed
|
@ -91,6 +91,7 @@ foreach {tn sql} {
|
|||||||
}
|
}
|
||||||
} {
|
} {
|
||||||
execsql $sql
|
execsql $sql
|
||||||
|
|
||||||
do_execsql_test 2.2.$tn {
|
do_execsql_test 2.2.$tn {
|
||||||
SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
|
SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
|
||||||
} [list \
|
} [list \
|
||||||
|
@ -181,6 +181,7 @@ do_execsql_test 4.1.0 {
|
|||||||
INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
|
INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
|
||||||
INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
|
INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
|
||||||
}
|
}
|
||||||
|
|
||||||
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
|
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
|
||||||
p {3 3}
|
p {3 3}
|
||||||
c {2 2}
|
c {2 2}
|
||||||
@ -192,8 +193,12 @@ do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
|
|||||||
l {{5 5} {5 5}}
|
l {{5 5} {5 5}}
|
||||||
a {{5 5} {5 5}}
|
a {{5 5} {5 5}}
|
||||||
|
|
||||||
|
s {{3 0} {0 3}}
|
||||||
|
|
||||||
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
|
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
|
||||||
|
xpxsscplax -
|
||||||
}
|
}
|
||||||
|
|
||||||
do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
|
do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
|
||||||
p {1 1}
|
p {1 1}
|
||||||
c {2 2}
|
c {2 2}
|
||||||
@ -205,8 +210,53 @@ do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
|
|||||||
l {{5 5} {5 5}}
|
l {{5 5} {5 5}}
|
||||||
a {{5 5} {5 5}}
|
a {{5 5} {5 5}}
|
||||||
|
|
||||||
|
s {{0 1} {1 0}}
|
||||||
|
|
||||||
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
|
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
|
||||||
|
sxsxs -
|
||||||
}
|
}
|
||||||
|
|
||||||
|
do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} }
|
||||||
|
do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
|
||||||
|
do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
|
||||||
|
do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} }
|
||||||
|
|
||||||
|
do_execsql_test 4.2.0 {
|
||||||
|
CREATE VIRTUAL TABLE t5 USING fts4;
|
||||||
|
INSERT INTO t5 VALUES('a a a a a');
|
||||||
|
INSERT INTO t5 VALUES('a b a b a');
|
||||||
|
INSERT INTO t5 VALUES('c b c b c');
|
||||||
|
}
|
||||||
|
do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { s {2 1} }
|
||||||
|
do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} }
|
||||||
|
do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} }
|
||||||
|
do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
|
||||||
|
do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
|
||||||
|
do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} }
|
||||||
|
|
||||||
|
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
|
||||||
|
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {x } 50000]')";
|
||||||
|
|
||||||
|
#do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { s {2 1} }
|
||||||
|
#do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} }
|
||||||
|
#do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} }
|
||||||
|
#do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
|
||||||
|
#do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
|
||||||
|
#do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
|
||||||
|
#
|
||||||
|
#do_execsql_test 4.4.0 {
|
||||||
|
# UPDATE t5_segments
|
||||||
|
# SET block = zeroblob(length(block))
|
||||||
|
# WHERE length(block)>10000;
|
||||||
|
#}
|
||||||
|
#
|
||||||
|
#do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} }
|
||||||
|
#do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
|
||||||
|
#do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} }
|
||||||
|
#do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
|
||||||
|
#do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
|
||||||
|
#do_matchinfo_test 4.4.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
|
||||||
|
|
||||||
|
|
||||||
finish_test
|
finish_test
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user