mirror of
https://github.com/sqlite/sqlite.git
synced 2025-07-29 08:01:23 +03:00
Merge branches "fix-onerow-opt", "faster-openread", "fts5-delay-tokenizer" and "enhanced-raise", each containing minor enhancements prepared for 3.47, into this branch.
FossilOrigin-Name: 6dc6472175bccbed15ebf6811c209d1a0b5fad60158fb32040210f2cdae916a6
This commit is contained in:
@ -142,6 +142,15 @@ struct Fts5Colset {
|
||||
*/
|
||||
|
||||
typedef struct Fts5Config Fts5Config;
|
||||
typedef struct Fts5TokenizerConfig Fts5TokenizerConfig;
|
||||
|
||||
struct Fts5TokenizerConfig {
|
||||
Fts5Tokenizer *pTok;
|
||||
fts5_tokenizer *pTokApi;
|
||||
const char **azArg;
|
||||
int nArg;
|
||||
int ePattern; /* FTS_PATTERN_XXX constant */
|
||||
};
|
||||
|
||||
/*
|
||||
** An instance of the following structure encodes all information that can
|
||||
@ -184,6 +193,7 @@ typedef struct Fts5Config Fts5Config;
|
||||
*/
|
||||
struct Fts5Config {
|
||||
sqlite3 *db; /* Database handle */
|
||||
Fts5Global *pGlobal; /* Global fts5 object for handle db */
|
||||
char *zDb; /* Database holding FTS index (e.g. "main") */
|
||||
char *zName; /* Name of FTS index */
|
||||
int nCol; /* Number of columns */
|
||||
@ -199,10 +209,8 @@ struct Fts5Config {
|
||||
int bTokendata; /* "tokendata=" option value (dflt==0) */
|
||||
int eDetail; /* FTS5_DETAIL_XXX value */
|
||||
char *zContentExprlist;
|
||||
Fts5Tokenizer *pTok;
|
||||
fts5_tokenizer *pTokApi;
|
||||
Fts5TokenizerConfig t;
|
||||
int bLock; /* True when table is preparing statement */
|
||||
int ePattern; /* FTS_PATTERN_XXX constant */
|
||||
|
||||
/* Values loaded from the %_config table */
|
||||
int iVersion; /* fts5 file format 'version' */
|
||||
@ -597,13 +605,7 @@ struct Fts5Table {
|
||||
Fts5Index *pIndex; /* Full-text index */
|
||||
};
|
||||
|
||||
int sqlite3Fts5GetTokenizer(
|
||||
Fts5Global*,
|
||||
const char **azArg,
|
||||
int nArg,
|
||||
Fts5Config*,
|
||||
char **pzErr
|
||||
);
|
||||
int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig);
|
||||
|
||||
Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
|
||||
|
||||
@ -866,6 +868,7 @@ int sqlite3Fts5TokenizerPattern(
|
||||
int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
|
||||
Fts5Tokenizer *pTok
|
||||
);
|
||||
int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*);
|
||||
/*
|
||||
** End of interface to code in fts5_tokenizer.c.
|
||||
**************************************************************************/
|
||||
|
@ -298,12 +298,11 @@ static int fts5ConfigParseSpecial(
|
||||
if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
|
||||
const char *p = (const char*)zArg;
|
||||
sqlite3_int64 nArg = strlen(zArg) + 1;
|
||||
char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
|
||||
char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
|
||||
char *pSpace = pDel;
|
||||
char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg);
|
||||
|
||||
if( azArg && pSpace ){
|
||||
if( pConfig->pTok ){
|
||||
if( azArg ){
|
||||
char *pSpace = (char*)&azArg[nArg];
|
||||
if( pConfig->t.azArg ){
|
||||
*pzErr = sqlite3_mprintf("multiple tokenize=... directives");
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
@ -326,16 +325,14 @@ static int fts5ConfigParseSpecial(
|
||||
*pzErr = sqlite3_mprintf("parse error in tokenize directive");
|
||||
rc = SQLITE_ERROR;
|
||||
}else{
|
||||
rc = sqlite3Fts5GetTokenizer(pGlobal,
|
||||
(const char**)azArg, (int)nArg, pConfig,
|
||||
pzErr
|
||||
);
|
||||
pConfig->t.azArg = (const char**)azArg;
|
||||
pConfig->t.nArg = nArg;
|
||||
azArg = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_free(azArg);
|
||||
sqlite3_free(pDel);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -412,16 +409,6 @@ static int fts5ConfigParseSpecial(
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
** Allocate an instance of the default tokenizer ("simple") at
|
||||
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
|
||||
** code if an error occurs.
|
||||
*/
|
||||
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
|
||||
assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
|
||||
return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** Gobble up the first bareword or quoted word from the input buffer zIn.
|
||||
** Return a pointer to the character immediately following the last in
|
||||
@ -554,6 +541,7 @@ int sqlite3Fts5ConfigParse(
|
||||
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
|
||||
if( pRet==0 ) return SQLITE_NOMEM;
|
||||
memset(pRet, 0, sizeof(Fts5Config));
|
||||
pRet->pGlobal = pGlobal;
|
||||
pRet->db = db;
|
||||
pRet->iCookie = -1;
|
||||
|
||||
@ -640,13 +628,6 @@ int sqlite3Fts5ConfigParse(
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
|
||||
/* If a tokenizer= option was successfully parsed, the tokenizer has
|
||||
** already been allocated. Otherwise, allocate an instance of the default
|
||||
** tokenizer (unicode61) now. */
|
||||
if( rc==SQLITE_OK && pRet->pTok==0 ){
|
||||
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
|
||||
}
|
||||
|
||||
/* If no zContent option was specified, fill in the default values. */
|
||||
if( rc==SQLITE_OK && pRet->zContent==0 ){
|
||||
const char *zTail = 0;
|
||||
@ -688,9 +669,10 @@ int sqlite3Fts5ConfigParse(
|
||||
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
|
||||
if( pConfig ){
|
||||
int i;
|
||||
if( pConfig->pTok ){
|
||||
pConfig->pTokApi->xDelete(pConfig->pTok);
|
||||
if( pConfig->t.pTok ){
|
||||
pConfig->t.pTokApi->xDelete(pConfig->t.pTok);
|
||||
}
|
||||
sqlite3_free(pConfig->t.azArg);
|
||||
sqlite3_free(pConfig->zDb);
|
||||
sqlite3_free(pConfig->zName);
|
||||
for(i=0; i<pConfig->nCol; i++){
|
||||
@ -765,10 +747,18 @@ int sqlite3Fts5Tokenize(
|
||||
void *pCtx, /* Context passed to xToken() */
|
||||
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
||||
){
|
||||
if( pText==0 ) return SQLITE_OK;
|
||||
return pConfig->pTokApi->xTokenize(
|
||||
pConfig->pTok, pCtx, flags, pText, nText, xToken
|
||||
);
|
||||
int rc = SQLITE_OK;
|
||||
if( pText ){
|
||||
if( pConfig->t.pTok==0 ){
|
||||
rc = sqlite3Fts5LoadTokenizer(pConfig);
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = pConfig->t.pTokApi->xTokenize(
|
||||
pConfig->t.pTok, pCtx, flags, pText, nText, xToken
|
||||
);
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -324,7 +324,7 @@ int sqlite3Fts5ExprNew(
|
||||
}
|
||||
|
||||
sqlite3_free(sParse.apPhrase);
|
||||
*pzErr = sParse.zErr;
|
||||
if( 0==*pzErr ) *pzErr = sParse.zErr;
|
||||
return sParse.rc;
|
||||
}
|
||||
|
||||
|
@ -377,8 +377,12 @@ static int fts5InitVtab(
|
||||
assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
pConfig->pzErrmsg = pzErr;
|
||||
pTab->p.pConfig = pConfig;
|
||||
pTab->pGlobal = pGlobal;
|
||||
if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){
|
||||
rc = sqlite3Fts5LoadTokenizer(pConfig);
|
||||
}
|
||||
}
|
||||
|
||||
/* Open the index sub-system */
|
||||
@ -400,11 +404,8 @@ static int fts5InitVtab(
|
||||
|
||||
/* Load the initial configuration */
|
||||
if( rc==SQLITE_OK ){
|
||||
assert( pConfig->pzErrmsg==0 );
|
||||
pConfig->pzErrmsg = pzErr;
|
||||
rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
|
||||
sqlite3Fts5IndexRollback(pTab->p.pIndex);
|
||||
pConfig->pzErrmsg = 0;
|
||||
}
|
||||
|
||||
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
|
||||
@ -414,6 +415,7 @@ static int fts5InitVtab(
|
||||
rc = sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
|
||||
}
|
||||
|
||||
if( pConfig ) pConfig->pzErrmsg = 0;
|
||||
if( rc!=SQLITE_OK ){
|
||||
fts5FreeVtab(pTab);
|
||||
pTab = 0;
|
||||
@ -481,10 +483,10 @@ static int fts5UsePatternMatch(
|
||||
){
|
||||
assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB );
|
||||
assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE );
|
||||
if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
|
||||
if( pConfig->t.ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
|
||||
return 1;
|
||||
}
|
||||
if( pConfig->ePattern==FTS5_PATTERN_LIKE
|
||||
if( pConfig->t.ePattern==FTS5_PATTERN_LIKE
|
||||
&& (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB)
|
||||
){
|
||||
return 1;
|
||||
@ -626,6 +628,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
|
||||
idxStr += strlen(&idxStr[iIdxStr]);
|
||||
pInfo->aConstraintUsage[i].argvIndex = ++iCons;
|
||||
assert( idxStr[iIdxStr]=='\0' );
|
||||
bSeenMatch = 1;
|
||||
}else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){
|
||||
idxStr[iIdxStr++] = '=';
|
||||
bSeenEq = 1;
|
||||
@ -2860,7 +2863,7 @@ static int fts5FindTokenizer(
|
||||
return rc;
|
||||
}
|
||||
|
||||
int sqlite3Fts5GetTokenizer(
|
||||
int fts5GetTokenizer(
|
||||
Fts5Global *pGlobal,
|
||||
const char **azArg,
|
||||
int nArg,
|
||||
@ -2877,26 +2880,37 @@ int sqlite3Fts5GetTokenizer(
|
||||
*pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
|
||||
}else{
|
||||
rc = pMod->x.xCreate(
|
||||
pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok
|
||||
pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
|
||||
);
|
||||
pConfig->pTokApi = &pMod->x;
|
||||
pConfig->t.pTokApi = &pMod->x;
|
||||
if( rc!=SQLITE_OK ){
|
||||
if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor");
|
||||
}else{
|
||||
pConfig->ePattern = sqlite3Fts5TokenizerPattern(
|
||||
pMod->x.xCreate, pConfig->pTok
|
||||
pConfig->t.ePattern = sqlite3Fts5TokenizerPattern(
|
||||
pMod->x.xCreate, pConfig->t.pTok
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if( rc!=SQLITE_OK ){
|
||||
pConfig->pTokApi = 0;
|
||||
pConfig->pTok = 0;
|
||||
pConfig->t.pTokApi = 0;
|
||||
pConfig->t.pTok = 0;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
** Attempt to instantiate the tokenizer.
|
||||
*/
|
||||
int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){
|
||||
return fts5GetTokenizer(
|
||||
pConfig->pGlobal, pConfig->t.azArg, pConfig->t.nArg,
|
||||
pConfig, pConfig->pzErrmsg
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
static void fts5ModuleDestroy(void *pCtx){
|
||||
Fts5TokenizerModule *pTok, *pNextTok;
|
||||
Fts5Auxiliary *pAux, *pNextAux;
|
||||
|
@ -1428,6 +1428,16 @@ int sqlite3Fts5TokenizerPattern(
|
||||
return FTS5_PATTERN_NONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return true if the tokenizer described by p->azArg[] is the trigram
|
||||
** tokenizer. This tokenizer needs to be loaded before xBestIndex is
|
||||
** called for the first time in order to correctly handle LIKE/GLOB.
|
||||
*/
|
||||
int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){
|
||||
return (p->nArg>=1 && 0==sqlite3_stricmp(p->azArg[0], "trigram"));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Register all built-in tokenizers with FTS5.
|
||||
*/
|
||||
|
@ -64,6 +64,7 @@ struct Fts5VocabCursor {
|
||||
|
||||
int nLeTerm; /* Size of zLeTerm in bytes */
|
||||
char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
|
||||
int colUsed; /* Copy of sqlite3_index_info.colUsed */
|
||||
|
||||
/* These are used by 'col' tables only */
|
||||
int iCol;
|
||||
@ -90,9 +91,11 @@ struct Fts5VocabCursor {
|
||||
/*
|
||||
** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
|
||||
*/
|
||||
#define FTS5_VOCAB_TERM_EQ 0x01
|
||||
#define FTS5_VOCAB_TERM_GE 0x02
|
||||
#define FTS5_VOCAB_TERM_LE 0x04
|
||||
#define FTS5_VOCAB_TERM_EQ 0x0100
|
||||
#define FTS5_VOCAB_TERM_GE 0x0200
|
||||
#define FTS5_VOCAB_TERM_LE 0x0400
|
||||
|
||||
#define FTS5_VOCAB_COLUSED_MASK 0xFF
|
||||
|
||||
|
||||
/*
|
||||
@ -269,11 +272,13 @@ static int fts5VocabBestIndexMethod(
|
||||
int iTermEq = -1;
|
||||
int iTermGe = -1;
|
||||
int iTermLe = -1;
|
||||
int idxNum = 0;
|
||||
int idxNum = (int)pInfo->colUsed;
|
||||
int nArg = 0;
|
||||
|
||||
UNUSED_PARAM(pUnused);
|
||||
|
||||
assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed );
|
||||
|
||||
for(i=0; i<pInfo->nConstraint; i++){
|
||||
struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
|
||||
if( p->usable==0 ) continue;
|
||||
@ -365,7 +370,7 @@ static int fts5VocabOpenMethod(
|
||||
if( rc==SQLITE_OK ){
|
||||
pVTab->zErrMsg = sqlite3_mprintf(
|
||||
"no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
|
||||
);
|
||||
);
|
||||
rc = SQLITE_ERROR;
|
||||
}
|
||||
}else{
|
||||
@ -525,9 +530,19 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
|
||||
|
||||
switch( pTab->eType ){
|
||||
case FTS5_VOCAB_ROW:
|
||||
if( eDetail==FTS5_DETAIL_FULL ){
|
||||
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
|
||||
pCsr->aCnt[0]++;
|
||||
/* Do not bother counting the number of instances if the "cnt"
|
||||
** column is not being read (according to colUsed). */
|
||||
if( eDetail==FTS5_DETAIL_FULL && (pCsr->colUsed & 0x04) ){
|
||||
while( iPos<nPos ){
|
||||
u32 ii;
|
||||
fts5FastGetVarint32(pPos, iPos, ii);
|
||||
if( ii==1 ){
|
||||
/* New column in the position list */
|
||||
fts5FastGetVarint32(pPos, iPos, ii);
|
||||
}else{
|
||||
/* An instance - increment pCsr->aCnt[] */
|
||||
pCsr->aCnt[0]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
pCsr->aDoc[0]++;
|
||||
@ -625,6 +640,7 @@ static int fts5VocabFilterMethod(
|
||||
if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
|
||||
if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
|
||||
if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
|
||||
pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK);
|
||||
|
||||
if( pEq ){
|
||||
zTerm = (const char *)sqlite3_value_text(pEq);
|
||||
|
@ -300,5 +300,75 @@ set ::flags [list]
|
||||
do_execsql_test 9.5.1 { SELECT * FROM t1('"abc xyz*"'); } {}
|
||||
do_test 9.5.2 { set ::flags } {query}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
#
|
||||
reset_db
|
||||
do_execsql_test 10.1 {
|
||||
CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=unicode61);
|
||||
PRAGMA writable_schema = 1;
|
||||
UPDATE sqlite_schema
|
||||
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="unicode61 error");'
|
||||
WHERE name = 'x1';
|
||||
}
|
||||
|
||||
db close
|
||||
sqlite3 db test.db
|
||||
|
||||
do_catchsql_test 10.2 {
|
||||
SELECT * FROM x1('abc');
|
||||
} {1 {error in tokenizer constructor}}
|
||||
|
||||
do_catchsql_test 10.3 {
|
||||
INSERT INTO x1 VALUES('abc');
|
||||
} {1 {error in tokenizer constructor}}
|
||||
|
||||
do_execsql_test 10.4 {
|
||||
PRAGMA writable_schema = 1;
|
||||
UPDATE sqlite_schema
|
||||
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="nosuch error");'
|
||||
WHERE name = 'x1';
|
||||
}
|
||||
|
||||
db close
|
||||
sqlite3 db test.db
|
||||
|
||||
do_catchsql_test 10.5 {
|
||||
SELECT * FROM x1('abc');
|
||||
} {1 {no such tokenizer: nosuch}}
|
||||
do_catchsql_test 10.6 {
|
||||
INSERT INTO x1 VALUES('abc');
|
||||
} {1 {no such tokenizer: nosuch}}
|
||||
|
||||
do_execsql_test 10.7 {
|
||||
DROP TABLE x1;
|
||||
SELECT * FROM sqlite_schema;
|
||||
}
|
||||
|
||||
reset_db
|
||||
do_execsql_test 10.8 {
|
||||
CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=unicode61);
|
||||
INSERT INTO x1 VALUES('a b c'), ('d e f'), ('a b c');
|
||||
CREATE VIRTUAL TABLE x1v USING fts5vocab(x1, row);
|
||||
|
||||
PRAGMA writable_schema = 1;
|
||||
UPDATE sqlite_schema
|
||||
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=simplify);'
|
||||
WHERE name = 'x1';
|
||||
}
|
||||
|
||||
do_execsql_test 10.9 {
|
||||
SELECT * FROM x1v
|
||||
} {
|
||||
a 2 2 b 2 2 c 2 2 d 1 1 e 1 1 f 1 1
|
||||
}
|
||||
|
||||
db close
|
||||
sqlite3 db test.db
|
||||
|
||||
do_execsql_test 10.10 {
|
||||
SELECT * FROM x1v
|
||||
} {
|
||||
a 2 2 b 2 2 c 2 2 d 1 1 e 1 1 f 1 1
|
||||
}
|
||||
|
||||
finish_test
|
||||
|
@ -101,9 +101,20 @@ do_execsql_test 4.0 {
|
||||
CREATE VIRTUAL TABLE t4 USING fts5(z, tokenize=trigram);
|
||||
} {}
|
||||
|
||||
breakpoint
|
||||
do_execsql_test 4.1 {
|
||||
INSERT INTO t4 VALUES('ABCD');
|
||||
INSERT INTO t4 VALUES('DEFG');
|
||||
} {}
|
||||
|
||||
db close
|
||||
sqlite3 db test.db
|
||||
|
||||
do_eqp_test 4.1 {
|
||||
SELECT rowid FROM t4 WHERE z LIKE '%abc%'
|
||||
} {VIRTUAL TABLE INDEX 0:L0}
|
||||
|
||||
do_execsql_test 4.2 {
|
||||
SELECT rowid FROM t4 WHERE z LIKE '%abc%'
|
||||
} {1}
|
||||
|
||||
finish_test
|
||||
|
Reference in New Issue
Block a user