1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-07-29 08:01:23 +03:00

Merge branches "fix-onerow-opt", "faster-openread", "fts5-delay-tokenizer" and "enhanced-raise", each containing minor enhancements prepared for 3.47, into this branch.

FossilOrigin-Name: 6dc6472175bccbed15ebf6811c209d1a0b5fad60158fb32040210f2cdae916a6
This commit is contained in:
dan
2024-05-17 14:26:32 +00:00
18 changed files with 268 additions and 121 deletions

View File

@ -142,6 +142,15 @@ struct Fts5Colset {
*/
typedef struct Fts5Config Fts5Config;
typedef struct Fts5TokenizerConfig Fts5TokenizerConfig;
struct Fts5TokenizerConfig {
Fts5Tokenizer *pTok;
fts5_tokenizer *pTokApi;
const char **azArg;
int nArg;
int ePattern; /* FTS_PATTERN_XXX constant */
};
/*
** An instance of the following structure encodes all information that can
@ -184,6 +193,7 @@ typedef struct Fts5Config Fts5Config;
*/
struct Fts5Config {
sqlite3 *db; /* Database handle */
Fts5Global *pGlobal; /* Global fts5 object for handle db */
char *zDb; /* Database holding FTS index (e.g. "main") */
char *zName; /* Name of FTS index */
int nCol; /* Number of columns */
@ -199,10 +209,8 @@ struct Fts5Config {
int bTokendata; /* "tokendata=" option value (dflt==0) */
int eDetail; /* FTS5_DETAIL_XXX value */
char *zContentExprlist;
Fts5Tokenizer *pTok;
fts5_tokenizer *pTokApi;
Fts5TokenizerConfig t;
int bLock; /* True when table is preparing statement */
int ePattern; /* FTS_PATTERN_XXX constant */
/* Values loaded from the %_config table */
int iVersion; /* fts5 file format 'version' */
@ -597,13 +605,7 @@ struct Fts5Table {
Fts5Index *pIndex; /* Full-text index */
};
int sqlite3Fts5GetTokenizer(
Fts5Global*,
const char **azArg,
int nArg,
Fts5Config*,
char **pzErr
);
int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig);
Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
@ -866,6 +868,7 @@ int sqlite3Fts5TokenizerPattern(
int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
Fts5Tokenizer *pTok
);
int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig*);
/*
** End of interface to code in fts5_tokenizer.c.
**************************************************************************/

View File

@ -298,12 +298,11 @@ static int fts5ConfigParseSpecial(
if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
const char *p = (const char*)zArg;
sqlite3_int64 nArg = strlen(zArg) + 1;
char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
char *pSpace = pDel;
char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg);
if( azArg && pSpace ){
if( pConfig->pTok ){
if( azArg ){
char *pSpace = (char*)&azArg[nArg];
if( pConfig->t.azArg ){
*pzErr = sqlite3_mprintf("multiple tokenize=... directives");
rc = SQLITE_ERROR;
}else{
@ -326,16 +325,14 @@ static int fts5ConfigParseSpecial(
*pzErr = sqlite3_mprintf("parse error in tokenize directive");
rc = SQLITE_ERROR;
}else{
rc = sqlite3Fts5GetTokenizer(pGlobal,
(const char**)azArg, (int)nArg, pConfig,
pzErr
);
pConfig->t.azArg = (const char**)azArg;
pConfig->t.nArg = nArg;
azArg = 0;
}
}
}
sqlite3_free(azArg);
sqlite3_free(pDel);
return rc;
}
@ -412,16 +409,6 @@ static int fts5ConfigParseSpecial(
return SQLITE_ERROR;
}
/*
** Allocate an instance of the default tokenizer ("simple") at
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
** code if an error occurs.
*/
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0);
}
/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
@ -554,6 +541,7 @@ int sqlite3Fts5ConfigParse(
*ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
if( pRet==0 ) return SQLITE_NOMEM;
memset(pRet, 0, sizeof(Fts5Config));
pRet->pGlobal = pGlobal;
pRet->db = db;
pRet->iCookie = -1;
@ -640,13 +628,6 @@ int sqlite3Fts5ConfigParse(
rc = SQLITE_ERROR;
}
/* If a tokenizer= option was successfully parsed, the tokenizer has
** already been allocated. Otherwise, allocate an instance of the default
** tokenizer (unicode61) now. */
if( rc==SQLITE_OK && pRet->pTok==0 ){
rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
}
/* If no zContent option was specified, fill in the default values. */
if( rc==SQLITE_OK && pRet->zContent==0 ){
const char *zTail = 0;
@ -688,9 +669,10 @@ int sqlite3Fts5ConfigParse(
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
if( pConfig ){
int i;
if( pConfig->pTok ){
pConfig->pTokApi->xDelete(pConfig->pTok);
if( pConfig->t.pTok ){
pConfig->t.pTokApi->xDelete(pConfig->t.pTok);
}
sqlite3_free(pConfig->t.azArg);
sqlite3_free(pConfig->zDb);
sqlite3_free(pConfig->zName);
for(i=0; i<pConfig->nCol; i++){
@ -765,10 +747,18 @@ int sqlite3Fts5Tokenize(
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
){
if( pText==0 ) return SQLITE_OK;
return pConfig->pTokApi->xTokenize(
pConfig->pTok, pCtx, flags, pText, nText, xToken
);
int rc = SQLITE_OK;
if( pText ){
if( pConfig->t.pTok==0 ){
rc = sqlite3Fts5LoadTokenizer(pConfig);
}
if( rc==SQLITE_OK ){
rc = pConfig->t.pTokApi->xTokenize(
pConfig->t.pTok, pCtx, flags, pText, nText, xToken
);
}
}
return rc;
}
/*

View File

@ -324,7 +324,7 @@ int sqlite3Fts5ExprNew(
}
sqlite3_free(sParse.apPhrase);
*pzErr = sParse.zErr;
if( 0==*pzErr ) *pzErr = sParse.zErr;
return sParse.rc;
}

View File

@ -377,8 +377,12 @@ static int fts5InitVtab(
assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
}
if( rc==SQLITE_OK ){
pConfig->pzErrmsg = pzErr;
pTab->p.pConfig = pConfig;
pTab->pGlobal = pGlobal;
if( bCreate || sqlite3Fts5TokenizerPreload(&pConfig->t) ){
rc = sqlite3Fts5LoadTokenizer(pConfig);
}
}
/* Open the index sub-system */
@ -400,11 +404,8 @@ static int fts5InitVtab(
/* Load the initial configuration */
if( rc==SQLITE_OK ){
assert( pConfig->pzErrmsg==0 );
pConfig->pzErrmsg = pzErr;
rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
sqlite3Fts5IndexRollback(pTab->p.pIndex);
pConfig->pzErrmsg = 0;
}
if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
@ -414,6 +415,7 @@ static int fts5InitVtab(
rc = sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
}
if( pConfig ) pConfig->pzErrmsg = 0;
if( rc!=SQLITE_OK ){
fts5FreeVtab(pTab);
pTab = 0;
@ -481,10 +483,10 @@ static int fts5UsePatternMatch(
){
assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB );
assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE );
if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
if( pConfig->t.ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
return 1;
}
if( pConfig->ePattern==FTS5_PATTERN_LIKE
if( pConfig->t.ePattern==FTS5_PATTERN_LIKE
&& (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB)
){
return 1;
@ -626,6 +628,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
idxStr += strlen(&idxStr[iIdxStr]);
pInfo->aConstraintUsage[i].argvIndex = ++iCons;
assert( idxStr[iIdxStr]=='\0' );
bSeenMatch = 1;
}else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){
idxStr[iIdxStr++] = '=';
bSeenEq = 1;
@ -2860,7 +2863,7 @@ static int fts5FindTokenizer(
return rc;
}
int sqlite3Fts5GetTokenizer(
int fts5GetTokenizer(
Fts5Global *pGlobal,
const char **azArg,
int nArg,
@ -2877,26 +2880,37 @@ int sqlite3Fts5GetTokenizer(
*pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
}else{
rc = pMod->x.xCreate(
pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok
pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok
);
pConfig->pTokApi = &pMod->x;
pConfig->t.pTokApi = &pMod->x;
if( rc!=SQLITE_OK ){
if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor");
}else{
pConfig->ePattern = sqlite3Fts5TokenizerPattern(
pMod->x.xCreate, pConfig->pTok
pConfig->t.ePattern = sqlite3Fts5TokenizerPattern(
pMod->x.xCreate, pConfig->t.pTok
);
}
}
if( rc!=SQLITE_OK ){
pConfig->pTokApi = 0;
pConfig->pTok = 0;
pConfig->t.pTokApi = 0;
pConfig->t.pTok = 0;
}
return rc;
}
/*
** Attempt to instantiate the tokenizer.
*/
int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){
return fts5GetTokenizer(
pConfig->pGlobal, pConfig->t.azArg, pConfig->t.nArg,
pConfig, pConfig->pzErrmsg
);
}
static void fts5ModuleDestroy(void *pCtx){
Fts5TokenizerModule *pTok, *pNextTok;
Fts5Auxiliary *pAux, *pNextAux;

View File

@ -1428,6 +1428,16 @@ int sqlite3Fts5TokenizerPattern(
return FTS5_PATTERN_NONE;
}
/*
** Return true if the tokenizer described by p->azArg[] is the trigram
** tokenizer. This tokenizer needs to be loaded before xBestIndex is
** called for the first time in order to correctly handle LIKE/GLOB.
*/
int sqlite3Fts5TokenizerPreload(Fts5TokenizerConfig *p){
return (p->nArg>=1 && 0==sqlite3_stricmp(p->azArg[0], "trigram"));
}
/*
** Register all built-in tokenizers with FTS5.
*/

View File

@ -64,6 +64,7 @@ struct Fts5VocabCursor {
int nLeTerm; /* Size of zLeTerm in bytes */
char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
int colUsed; /* Copy of sqlite3_index_info.colUsed */
/* These are used by 'col' tables only */
int iCol;
@ -90,9 +91,11 @@ struct Fts5VocabCursor {
/*
** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
*/
#define FTS5_VOCAB_TERM_EQ 0x01
#define FTS5_VOCAB_TERM_GE 0x02
#define FTS5_VOCAB_TERM_LE 0x04
#define FTS5_VOCAB_TERM_EQ 0x0100
#define FTS5_VOCAB_TERM_GE 0x0200
#define FTS5_VOCAB_TERM_LE 0x0400
#define FTS5_VOCAB_COLUSED_MASK 0xFF
/*
@ -269,11 +272,13 @@ static int fts5VocabBestIndexMethod(
int iTermEq = -1;
int iTermGe = -1;
int iTermLe = -1;
int idxNum = 0;
int idxNum = (int)pInfo->colUsed;
int nArg = 0;
UNUSED_PARAM(pUnused);
assert( (pInfo->colUsed & FTS5_VOCAB_COLUSED_MASK)==pInfo->colUsed );
for(i=0; i<pInfo->nConstraint; i++){
struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
if( p->usable==0 ) continue;
@ -365,7 +370,7 @@ static int fts5VocabOpenMethod(
if( rc==SQLITE_OK ){
pVTab->zErrMsg = sqlite3_mprintf(
"no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
);
);
rc = SQLITE_ERROR;
}
}else{
@ -525,9 +530,19 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
switch( pTab->eType ){
case FTS5_VOCAB_ROW:
if( eDetail==FTS5_DETAIL_FULL ){
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
pCsr->aCnt[0]++;
/* Do not bother counting the number of instances if the "cnt"
** column is not being read (according to colUsed). */
if( eDetail==FTS5_DETAIL_FULL && (pCsr->colUsed & 0x04) ){
while( iPos<nPos ){
u32 ii;
fts5FastGetVarint32(pPos, iPos, ii);
if( ii==1 ){
/* New column in the position list */
fts5FastGetVarint32(pPos, iPos, ii);
}else{
/* An instance - increment pCsr->aCnt[] */
pCsr->aCnt[0]++;
}
}
}
pCsr->aDoc[0]++;
@ -625,6 +640,7 @@ static int fts5VocabFilterMethod(
if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
pCsr->colUsed = (idxNum & FTS5_VOCAB_COLUSED_MASK);
if( pEq ){
zTerm = (const char *)sqlite3_value_text(pEq);

View File

@ -300,5 +300,75 @@ set ::flags [list]
do_execsql_test 9.5.1 { SELECT * FROM t1('"abc xyz*"'); } {}
do_test 9.5.2 { set ::flags } {query}
#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.1 {
CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=unicode61);
PRAGMA writable_schema = 1;
UPDATE sqlite_schema
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="unicode61 error");'
WHERE name = 'x1';
}
db close
sqlite3 db test.db
do_catchsql_test 10.2 {
SELECT * FROM x1('abc');
} {1 {error in tokenizer constructor}}
do_catchsql_test 10.3 {
INSERT INTO x1 VALUES('abc');
} {1 {error in tokenizer constructor}}
do_execsql_test 10.4 {
PRAGMA writable_schema = 1;
UPDATE sqlite_schema
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="nosuch error");'
WHERE name = 'x1';
}
db close
sqlite3 db test.db
do_catchsql_test 10.5 {
SELECT * FROM x1('abc');
} {1 {no such tokenizer: nosuch}}
do_catchsql_test 10.6 {
INSERT INTO x1 VALUES('abc');
} {1 {no such tokenizer: nosuch}}
do_execsql_test 10.7 {
DROP TABLE x1;
SELECT * FROM sqlite_schema;
}
reset_db
do_execsql_test 10.8 {
CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=unicode61);
INSERT INTO x1 VALUES('a b c'), ('d e f'), ('a b c');
CREATE VIRTUAL TABLE x1v USING fts5vocab(x1, row);
PRAGMA writable_schema = 1;
UPDATE sqlite_schema
SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=simplify);'
WHERE name = 'x1';
}
do_execsql_test 10.9 {
SELECT * FROM x1v
} {
a 2 2 b 2 2 c 2 2 d 1 1 e 1 1 f 1 1
}
db close
sqlite3 db test.db
do_execsql_test 10.10 {
SELECT * FROM x1v
} {
a 2 2 b 2 2 c 2 2 d 1 1 e 1 1 f 1 1
}
finish_test

View File

@ -101,9 +101,20 @@ do_execsql_test 4.0 {
CREATE VIRTUAL TABLE t4 USING fts5(z, tokenize=trigram);
} {}
breakpoint
do_execsql_test 4.1 {
INSERT INTO t4 VALUES('ABCD');
INSERT INTO t4 VALUES('DEFG');
} {}
db close
sqlite3 db test.db
do_eqp_test 4.1 {
SELECT rowid FROM t4 WHERE z LIKE '%abc%'
} {VIRTUAL TABLE INDEX 0:L0}
do_execsql_test 4.2 {
SELECT rowid FROM t4 WHERE z LIKE '%abc%'
} {1}
finish_test