1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-30 19:03:16 +03:00

Add code for the matchinfo 'longest common substring' feature.

FossilOrigin-Name: 71011a4f9baf09ec6935ad591145252bf3c286ed
This commit is contained in:
dan
2010-11-24 19:26:18 +00:00
parent d6e12384bd
commit f9376540fa
6 changed files with 185 additions and 12 deletions

View File

@ -970,7 +970,123 @@ static int fts3MatchinfoSelectDoctotal(
if( paLen ) *paLen = a; if( paLen ) *paLen = a;
return SQLITE_OK; return SQLITE_OK;
} }
typedef struct LcsIterator LcsIterator;
struct LcsIterator {
Fts3Expr *pExpr; /* Pointer to phrase expression */
char *pRead; /* Cursor used to iterate through aDoclist */
int iPosOffset; /* Tokens count up to end of this phrase */
int iCol; /* Current column number */
int iPos; /* Current position */
};
#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
static int fts3MatchinfoLcsCb(
Fts3Expr *pExpr, /* Phrase expression node */
int iPhrase, /* Phrase number (numbered from zero) */
void *pCtx /* Pointer to MatchInfo structure */
){
LcsIterator *aIter = (LcsIterator *)pCtx;
aIter[iPhrase].pExpr = pExpr;
return SQLITE_OK;
}
static int fts3LcsIteratorAdvance(LcsIterator *pIter){
char *pRead = pIter->pRead;
sqlite3_int64 iRead;
int rc = 0;
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
if( iRead==0 ){
pIter->iCol = LCS_ITERATOR_FINISHED;
rc = 1;
}else{
if( iRead==1 ){
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
pIter->iCol = iRead;
pIter->iPos = pIter->iPosOffset;
pRead += sqlite3Fts3GetVarint(pRead, &iRead);
rc = 1;
}
pIter->iPos += (iRead-2);
}
pIter->pRead = pRead;
return rc;
}
static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
LcsIterator *aIter;
int i;
int iCol;
int nToken = 0;
/* Allocate and populate the array of LcsIterator objects. The array
** contains one element for each matchable phrase in the query.
**/
aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
if( !aIter ) return SQLITE_NOMEM;
memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
(void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
for(i=0; i<pInfo->nPhrase; i++){
LcsIterator *pIter = &aIter[i];
nToken -= pIter->pExpr->pPhrase->nToken;
pIter->iPosOffset = nToken;
pIter->pRead = sqlite3Fts3FindPositions(pIter->pExpr, pCsr->iPrevId, -1);
if( pIter->pRead ){
pIter->iPos = pIter->iPosOffset;
fts3LcsIteratorAdvance(&aIter[i]);
}else{
pIter->iCol = LCS_ITERATOR_FINISHED;
}
}
for(iCol=0; iCol<pInfo->nCol; iCol++){
int nLcs = 0;
int nLive = 0;
for(i=0; i<pInfo->nPhrase; i++){
assert( aIter[i].iCol>=iCol );
if( aIter[i].iCol==iCol ) nLive++;
}
while( nLive>0 ){
LcsIterator *pAdv = 0;
int nThisLcs = 0;
char *aRead;
sqlite3_int64 iRead;
for(i=0; i<pInfo->nPhrase; i++){
LcsIterator *pIter = &aIter[i];
int nToken = pIter->pExpr->pPhrase->nToken;
if( iCol!=pIter->iCol ){
nThisLcs = 0;
continue;
}
if( pAdv==0 || pIter->iPos<pAdv->iPos ){
pAdv = pIter;
}
if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
nThisLcs++;
}else{
nThisLcs = 1;
}
if( nThisLcs>nLcs ) nLcs = nThisLcs;
}
if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
}
pInfo->aMatchinfo[iCol] = nLcs;
}
sqlite3_free(aIter);
}
static int fts3MatchinfoValues( static int fts3MatchinfoValues(
Fts3Cursor *pCsr, /* FTS3 cursor object */ Fts3Cursor *pCsr, /* FTS3 cursor object */
@ -1048,9 +1164,12 @@ static int fts3MatchinfoValues(
break; break;
} }
case FTS3_MATCHINFO_LCS:
fts3MatchinfoLcs(pCsr, pInfo);
break;
default: default:
assert( zArg[i]==FTS3_MATCHINFO_LCS ); assert( !"this cannot happen" );
} }
pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);

View File

@ -328,6 +328,9 @@ int sqlite3Fts3SelectDocsize(
return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt); return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
} }
void sqlite3Fts3MatchinfoLcs(Fts3Expr *pExpr, u32 *aOut){
}
/* /*
** Similar to fts3SqlStmt(). Except, after binding the parameters in ** Similar to fts3SqlStmt(). Except, after binding the parameters in
** array apVal[] to the SQL statement identified by eStmt, the statement ** array apVal[] to the SQL statement identified by eStmt, the statement

View File

@ -1,5 +1,5 @@
C Fix\scrashes\sthat\scan\soccur\swhen\squeries\sare\srun\son\san\sFTS4\stable\scontaining\szero\srows. C Add\scode\sfor\sthe\smatchinfo\s'longest\scommon\ssubstring'\sfeature.
D 2010-11-24T15:02:23 D 2010-11-24T19:26:19
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in e7a59672eaeb04408d1fa8501618d7501a3c5e39 F Makefile.in e7a59672eaeb04408d1fa8501618d7501a3c5e39
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -69,11 +69,11 @@ F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c
F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec
F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295
F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba F ext/fts3/fts3_porter.c 8df6f6efcc4e9e31f8bf73a4007c2e9abca1dfba
F ext/fts3/fts3_snippet.c 967ca2d3201fd6555062c7e929bcc2b89ef8dcb8 F ext/fts3/fts3_snippet.c be2648ff61a18af2d4a33eadbb26c0a6f06a6e26
F ext/fts3/fts3_tokenizer.c 1301b0ee3ef414caae3257a702215925cc48cd9c F ext/fts3/fts3_tokenizer.c 1301b0ee3ef414caae3257a702215925cc48cd9c
F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
F ext/fts3/fts3_write.c 9b2db92b815fdd50b5531eb6db912c71feca6a70 F ext/fts3/fts3_write.c b4e5b4c74f755a6f494dab9c131ad9bb04bab50c
F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
@ -440,13 +440,13 @@ F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba
F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7
F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52 F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
F test/fts3defer.test d6cb0db9b5997ecf863d96ff419f83f8f2c87f4f F test/fts3defer.test d6cb0db9b5997ecf863d96ff419f83f8f2c87f4f
F test/fts3defer2.test 548eb2ca7e6a1515a7bc151721e223be14c51f9d F test/fts3defer2.test da840efaedebfdd54293d04b36098e2d9872caa6
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c F test/fts3expr.test 5e745b2b6348499d9ef8d59015de3182072c564c
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
F test/fts3fault.test 81fd40ceb12f33f9d16c5637d0f8d95d4556c456 F test/fts3fault.test 81fd40ceb12f33f9d16c5637d0f8d95d4556c456
F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b F test/fts3malloc.test 9c8cc3f885bb4dfc66d0460c52f68f45e4710d1b
F test/fts3matchinfo.test 2dfdf80a927e3dc02f2e42337e5aa0b835994f6e F test/fts3matchinfo.test 41991bd810c6896a07c19a236ba3b489b16ba970
F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844
F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2 F test/fts3query.test ef79d31fdb355d094baec1c1b24b60439a1fb8a2
F test/fts3rnd.test 707533ce943f490443ce5e696236bb1675a37635 F test/fts3rnd.test 707533ce943f490443ce5e696236bb1675a37635
@ -889,7 +889,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P ae40b34cf7c24c9601bdfb5cbe5b20f05a376ea8 P ed61fd20adef44d0f6b2345e0205b25f3641a15e
R bf9b07886b7dc4c93620c54d1a7703f7 R c6aa2ef350d272455977bd9187b17fdc
U dan U dan
Z c507e6c3579e8a599c5f904476d64b95 Z a65c681ffac166b61fab0e3fae2a5af0

View File

@ -1 +1 @@
ed61fd20adef44d0f6b2345e0205b25f3641a15e 71011a4f9baf09ec6935ad591145252bf3c286ed

View File

@ -91,6 +91,7 @@ foreach {tn sql} {
} }
} { } {
execsql $sql execsql $sql
do_execsql_test 2.2.$tn { do_execsql_test 2.2.$tn {
SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
} [list \ } [list \

View File

@ -181,6 +181,7 @@ do_execsql_test 4.1.0 {
INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
} }
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
p {3 3} p {3 3}
c {2 2} c {2 2}
@ -192,8 +193,12 @@ do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
l {{5 5} {5 5}} l {{5 5} {5 5}}
a {{5 5} {5 5}} a {{5 5} {5 5}}
s {{3 0} {0 3}}
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
xpxsscplax -
} }
do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
p {1 1} p {1 1}
c {2 2} c {2 2}
@ -205,8 +210,53 @@ do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
l {{5 5} {5 5}} l {{5 5} {5 5}}
a {{5 5} {5 5}} a {{5 5} {5 5}}
s {{0 1} {1 0}}
xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
sxsxs -
} }
do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} }
do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} }
do_execsql_test 4.2.0 {
CREATE VIRTUAL TABLE t5 USING fts4;
INSERT INTO t5 VALUES('a a a a a');
INSERT INTO t5 VALUES('a b a b a');
INSERT INTO t5 VALUES('c b c b c');
}
do_matchinfo_test 4.2.1 t5 {t5 MATCH 'a a'} { s {2 1} }
do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} }
do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} }
do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} }
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')";
do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {x } 50000]')";
#do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { s {2 1} }
#do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} }
#do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} }
#do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
#do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
#do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
#
#do_execsql_test 4.4.0 {
# UPDATE t5_segments
# SET block = zeroblob(length(block))
# WHERE length(block)>10000;
#}
#
#do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} }
#do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} }
#do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} }
#do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} }
#do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} }
#do_matchinfo_test 4.4.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} }
finish_test finish_test