diff --git a/ext/fts5/extract_api_docs.tcl b/ext/fts5/extract_api_docs.tcl index e0f3191d61..81fe4cde53 100644 --- a/ext/fts5/extract_api_docs.tcl +++ b/ext/fts5/extract_api_docs.tcl @@ -15,6 +15,17 @@ # is included in the documentation on the web. # +set ::fts5_docs_output "" +if {[info commands hd_putsnl]==""} { + proc output {text} { + puts $text + } +} else { + proc output {text} { + append ::fts5_docs_output $text + } +} + set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] set data [read $fd] @@ -102,28 +113,28 @@ foreach {hdr docs} $D { if {[info exists M($hdr)]} { set hdr $M($hdr) } - puts "

  $hdr

" + output "
  $hdr
" set mode "" set bEmpty 1 foreach line [split [string trim $docs] "\n"] { if {[string trim $line]==""} { - if {$mode != ""} {puts ""} + if {$mode != ""} {output ""} set mode "" } elseif {$mode == ""} { if {[regexp {^ } $line]} { - set mode code + set mode codeblock } else { set mode p } - puts "<$mode>" + output "<$mode>" } - puts $line + output $line } - if {$mode != ""} {puts ""} + if {$mode != ""} {output ""} } - +set ::fts5_docs_output diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index ade9c87ceb..368a0f88c9 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -23,6 +23,8 @@ typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; typedef struct Fts5Auxdata Fts5Auxdata; +typedef struct Fts5TokenizerModule Fts5TokenizerModule; + /* ** NOTES ON TRANSACTIONS: ** @@ -65,9 +67,11 @@ struct Fts5TransactionState { ** all registered FTS5 extensions - tokenizers and auxiliary functions. */ struct Fts5Global { + fts5_api api; /* User visible part of object (see fts5.h) */ sqlite3 *db; /* Associated database connection */ i64 iNextId; /* Used to allocate unique cursor ids */ Fts5Auxiliary *pAux; /* First in list of all aux. functions */ + Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5Cursor *pCsr; /* First in list of all open cursors */ }; @@ -85,6 +89,19 @@ struct Fts5Auxiliary { Fts5Auxiliary *pNext; /* Next registered auxiliary function */ }; +/* +** Each tokenizer module registered with the FTS5 module is represented +** by an object of the following type. All such objects are stored as part +** of the Fts5Global.pTok list. +*/ +struct Fts5TokenizerModule { + char *zName; /* Name of tokenizer */ + void *pUserData; /* User pointer passed to xCreate() */ + fts5_tokenizer x; /* Tokenizer functions */ + void (*xDestroy)(void*); /* Destructor function */ + Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ +}; + /* ** Virtual-table object. */ @@ -281,12 +298,14 @@ static int fts5InitVtab( sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ ){ + Fts5Global *pGlobal = (Fts5Global*)pAux; + const char **azConfig = (const char**)argv; int rc; /* Return code */ Fts5Config *pConfig; /* Results of parsing argc/argv */ Fts5Table *pTab = 0; /* New virtual table object */ /* Parse the arguments */ - rc = sqlite3Fts5ConfigParse(db, argc, (const char**)argv, &pConfig, pzErr); + rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); /* Allocate the new vtab object */ @@ -297,7 +316,7 @@ static int fts5InitVtab( }else{ memset(pTab, 0, sizeof(Fts5Table)); pTab->pConfig = pConfig; - pTab->pGlobal = (Fts5Global*)pAux; + pTab->pGlobal = pGlobal; } } @@ -857,6 +876,10 @@ static int fts5SeekCursor(Fts5Cursor *pCsr){ ** Argument pVal is the value assigned to column "fts" by the INSERT ** statement. This function returns SQLITE_OK if successful, or an SQLite ** error code if an error occurs. +** +** The commands implemented by this function are documented in the "Special +** INSERT Directives" section of the documentation. It should be updated if +** more commands are added to this function. */ static int fts5SpecialCommand(Fts5Table *pTab, sqlite3_value *pVal){ const char *z = (const char*)sqlite3_value_text(pVal); @@ -1387,13 +1410,14 @@ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ /* ** Register a new auxiliary function with global context pGlobal. */ -int sqlite3Fts5CreateAux( - Fts5Global *pGlobal, /* Global context (one per db handle) */ +static int fts5CreateAux( + fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void *pUserData, /* User data for aux. function */ fts5_extension_function xFunc, /* Aux. function implementation */ void(*xDestroy)(void*) /* Destructor for pUserData */ ){ + Fts5Global *pGlobal = (Fts5Global*)pApi; int rc = sqlite3_overload_function(pGlobal->db, zName, -1); if( rc==SQLITE_OK ){ Fts5Auxiliary *pAux; @@ -1419,20 +1443,131 @@ int sqlite3Fts5CreateAux( return rc; } -static void fts5ModuleDestroy(void *pCtx){ - Fts5Auxiliary *pAux; - Fts5Auxiliary *pNext; - Fts5Global *pGlobal = (Fts5Global*)pCtx; - for(pAux=pGlobal->pAux; pAux; pAux=pNext){ - pNext = pAux->pNext; - if( pAux->xDestroy ){ - pAux->xDestroy(pAux->pUserData); +/* +** Register a new tokenizer. This is the implementation of the +** fts5_api.xCreateTokenizer() method. +*/ +static int fts5CreateTokenizer( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + void *pUserData, /* User data for aux. function */ + fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ + void(*xDestroy)(void*) /* Destructor for pUserData */ +){ + Fts5Global *pGlobal = (Fts5Global*)pApi; + Fts5TokenizerModule *pNew; + int nByte; /* Bytes of space to allocate */ + int rc = SQLITE_OK; + + nByte = sizeof(Fts5TokenizerModule) + strlen(zName) + 1; + pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte); + if( pNew ){ + memset(pNew, 0, nByte); + pNew->zName = (char*)&pNew[1]; + strcpy(pNew->zName, zName); + pNew->pUserData = pUserData; + pNew->x = *pTokenizer; + pNew->xDestroy = xDestroy; + pNew->pNext = pGlobal->pTok; + pGlobal->pTok = pNew; + }else{ + rc = SQLITE_NOMEM; + } + + return rc; +} + +/* +** Find a tokenizer. This is the implementation of the +** fts5_api.xFindTokenizer() method. +*/ +static int fts5FindTokenizer( + fts5_api *pApi, /* Global context (one per db handle) */ + const char *zName, /* Name of new function */ + fts5_tokenizer *pTokenizer /* Populate this object */ +){ + Fts5Global *pGlobal = (Fts5Global*)pApi; + int rc = SQLITE_OK; + Fts5TokenizerModule *pTok; + + for(pTok=pGlobal->pTok; pTok; pTok=pTok->pNext){ + if( sqlite3_stricmp(zName, pTok->zName)==0 ) break; + } + + if( pTok ){ + *pTokenizer = pTok->x; + }else{ + memset(pTokenizer, 0, sizeof(fts5_tokenizer)); + rc = SQLITE_ERROR; + } + + return rc; +} + +int sqlite3Fts5GetTokenizer( + Fts5Global *pGlobal, + const char **azArg, + int nArg, + Fts5Tokenizer **ppTok, + fts5_tokenizer **ppTokApi +){ + Fts5TokenizerModule *pMod = 0; + int rc = SQLITE_OK; + if( nArg==0 ){ + pMod = pGlobal->pTok; + }else{ + for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ + if( sqlite3_stricmp(azArg[0], pMod->zName)==0 ) break; } + } + + if( pMod==0 ){ + rc = SQLITE_ERROR; + }else{ + rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok); + *ppTokApi = &pMod->x; + } + + if( rc!=SQLITE_OK ){ + *ppTokApi = 0; + *ppTok = 0; + } + + return rc; +} + +static void fts5ModuleDestroy(void *pCtx){ + Fts5TokenizerModule *pTok, *pNextTok; + Fts5Auxiliary *pAux, *pNextAux; + Fts5Global *pGlobal = (Fts5Global*)pCtx; + + for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ + pNextAux = pAux->pNext; + if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); sqlite3_free(pAux); } + + for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ + pNextTok = pTok->pNext; + if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); + sqlite3_free(pTok); + } + sqlite3_free(pGlobal); } +static void fts5Fts5Func( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); + char buf[8]; + assert( nArg==0 ); + assert( sizeof(buf)>=sizeof(pGlobal) ); + memcpy(buf, pGlobal, sizeof(pGlobal)); + sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); +} int sqlite3Fts5Init(sqlite3 *db){ static const sqlite3_module fts5Mod = { @@ -1471,10 +1606,20 @@ int sqlite3Fts5Init(sqlite3 *db){ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; + pGlobal->api.iVersion = 1; + pGlobal->api.xCreateFunction = fts5CreateAux; + pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; + pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(pGlobal); + if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); + if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); + if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function( + db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 + ); + } } return rc; } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index d3db15cc36..248459ac8a 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -261,5 +261,42 @@ struct fts5_tokenizer { ** END OF CUSTOM TOKENIZERS *************************************************************************/ +/************************************************************************* +** FTS5 EXTENSION REGISTRATION API +*/ +typedef struct fts5_api fts5_api; +struct fts5_api { + int iVersion; /* Currently always set to 1 */ + + /* Create a new tokenizer */ + int (*xCreateTokenizer)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_tokenizer *pTokenizer, + void (*xDestroy)(void*) + ); + + /* Find an existing tokenizer */ + int (*xFindTokenizer)( + fts5_api *pApi, + const char *zName, + fts5_tokenizer *pTokenizer + ); + + /* Create a new auxiliary function */ + int (*xCreateFunction)( + fts5_api *pApi, + const char *zName, + void *pContext, + fts5_extension_function xFunction, + void (*xDestroy)(void*) + ); +}; + +/* +** END OF REGISTRATION API +*************************************************************************/ + #endif /* _FTS5_H */ diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 9512d3505f..3808d31617 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -31,6 +31,23 @@ /* Name of rank column */ #define FTS5_RANK_NAME "rank" +/************************************************************************** +** Interface to code in fts5.c. +*/ +typedef struct Fts5Global Fts5Global; + +int sqlite3Fts5GetTokenizer( + Fts5Global*, + const char **azArg, + int nArg, + Fts5Tokenizer**, + fts5_tokenizer** +); + +/* +** End of interface to code in fts5.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. @@ -50,10 +67,13 @@ struct Fts5Config { char **azCol; /* Column names */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ - sqlite3_tokenizer *pTokenizer; /* Tokenizer instance for this table */ + Fts5Tokenizer *pTok; + fts5_tokenizer *pTokApi; }; -int sqlite3Fts5ConfigParse(sqlite3*, int, const char**, Fts5Config**, char**); +int sqlite3Fts5ConfigParse( + Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** +); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); @@ -403,7 +423,7 @@ i64 sqlite3Fts5ExprRowid(Fts5Expr*); void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ -int sqlite3Fts5ExprInit(sqlite3*); +int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); @@ -453,32 +473,25 @@ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); **************************************************************************/ -/************************************************************************** -** Interface to code in fts5.c. -*/ -typedef struct Fts5Global Fts5Global; - -int sqlite3Fts5CreateAux( - Fts5Global*, - const char*, - void*, - fts5_extension_function, - void(*)(void*) -); -/* -** End of interface to code in fts5.c. -**************************************************************************/ - /************************************************************************** ** Interface to code in fts5_aux.c. */ -int sqlite3Fts5AuxInit(Fts5Global*); +int sqlite3Fts5AuxInit(fts5_api*); /* ** End of interface to code in fts5_aux.c. **************************************************************************/ +/************************************************************************** +** Interface to code in fts5_tokenizer.c. +*/ + +int sqlite3Fts5TokenizerInit(fts5_api*); +/* +** End of interface to code in fts5_tokenizer.c. +**************************************************************************/ + /************************************************************************** ** Interface to code in fts5_sorter.c. */ diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index 85bad5c496..186b43c166 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -956,7 +956,7 @@ static void fts5TestFunction( sqlite3Fts5BufferFree(&s); } -int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ +int sqlite3Fts5AuxInit(fts5_api *pApi){ struct Builtin { const char *zFunc; /* Function name (nul-terminated) */ void *pUserData; /* User-data pointer */ @@ -973,7 +973,7 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && ixCreateFunction(pApi, aBuiltin[i].zFunc, aBuiltin[i].pUserData, aBuiltin[i].xFunc, diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index d326f60cc8..68c340a48f 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -113,26 +113,15 @@ static char *fts5Strdup(const char *z){ return sqlite3_mprintf("%s", z); } -void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**); - /* ** Allocate an instance of the default tokenizer ("simple") at ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error ** code if an error occurs. */ -static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ - const sqlite3_tokenizer_module *pMod; /* Tokenizer module "simple" */ - sqlite3_tokenizer *pTokenizer; /* Tokenizer instance */ - int rc; /* Return code */ - - sqlite3Fts3SimpleTokenizerModule(&pMod); - rc = pMod->xCreate(0, 0, &pTokenizer); - if( rc==SQLITE_OK ){ - pTokenizer->pModule = pMod; - pConfig->pTokenizer = pTokenizer; - } - - return rc; +static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ + return sqlite3Fts5GetTokenizer( + pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi + ); } /* @@ -148,6 +137,7 @@ static int fts5ConfigDefaultTokenizer(Fts5Config *pConfig){ ** such error message using sqlite3_free(). */ int sqlite3Fts5ConfigParse( + Fts5Global *pGlobal, sqlite3 *db, int nArg, /* Number of arguments */ const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ @@ -206,8 +196,8 @@ int sqlite3Fts5ConfigParse( } } - if( rc==SQLITE_OK && pRet->pTokenizer==0 ){ - rc = fts5ConfigDefaultTokenizer(pRet); + if( rc==SQLITE_OK && pRet->pTok==0 ){ + rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } if( rc!=SQLITE_OK ){ @@ -223,8 +213,8 @@ int sqlite3Fts5ConfigParse( void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; - if( pConfig->pTokenizer ){ - pConfig->pTokenizer->pModule->xDestroy(pConfig->pTokenizer); + if( pConfig->pTok && pConfig->pTokApi->xDelete ){ + pConfig->pTokApi->xDelete(pConfig->pTok); } sqlite3_free(pConfig->zDb); sqlite3_free(pConfig->zName); @@ -302,27 +292,7 @@ int sqlite3Fts5Tokenize( void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int, int) /* Callback */ ){ - const sqlite3_tokenizer_module *pMod = pConfig->pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCsr = 0; - int rc; - - rc = pMod->xOpen(pConfig->pTokenizer, pText, nText, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( rc==SQLITE_OK ){ - const char *pToken; /* Pointer to token buffer */ - int nToken; /* Size of token in bytes */ - int iStart, iEnd, iPos; /* Start, end and position of token */ - pCsr->pTokenizer = pConfig->pTokenizer; - for(rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos); - rc==SQLITE_OK; - rc = pMod->xNext(pCsr, &pToken, &nToken, &iStart, &iEnd, &iPos) - ){ - if( (rc = xToken(pCtx, pToken, nToken, iStart, iEnd, iPos)) ) break; - } - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - pMod->xClose(pCsr); - } - return rc; + return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index b1fbe9ea97..efb91dadb6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -1520,20 +1520,22 @@ static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ } /* -** The implementation of user-defined scalar function fts5_expr(). +** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) +** and fts5_expr_tcl() (bTcl!=0). */ static void fts5ExprFunction( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ - sqlite3_value **apVal /* Function arguments */ + sqlite3_value **apVal, /* Function arguments */ + int bTcl ){ + Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); sqlite3 *db = sqlite3_context_db_handle(pCtx); const char *zExpr = 0; char *zErr = 0; Fts5Expr *pExpr = 0; int rc; int i; - int bTcl = sqlite3_user_data(pCtx)!=0; const char **azConfig; /* Array of arguments for Fts5Config */ const char *zNearsetCmd = "nearset"; @@ -1558,7 +1560,7 @@ static void fts5ExprFunction( } zExpr = (const char*)sqlite3_value_text(apVal[0]); - rc = sqlite3Fts5ConfigParse(db, nConfig, azConfig, &pConfig, &zErr); + rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); } @@ -1588,25 +1590,40 @@ static void fts5ExprFunction( sqlite3Fts5ExprFree(pExpr); } +static void fts5ExprFunctionHr( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 0); +} +static void fts5ExprFunctionTcl( + sqlite3_context *pCtx, /* Function call context */ + int nArg, /* Number of args */ + sqlite3_value **apVal /* Function arguments */ +){ + fts5ExprFunction(pCtx, nArg, apVal, 1); +} + /* ** This is called during initialization to register the fts5_expr() scalar ** UDF with the SQLite handle passed as the only argument. */ -int sqlite3Fts5ExprInit(sqlite3 *db){ +int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ struct Fts5ExprFunc { const char *z; - void *p; void (*x)(sqlite3_context*,int,sqlite3_value**); } aFunc[] = { - { "fts5_expr", 0, fts5ExprFunction }, - { "fts5_expr_tcl", (void*)1, fts5ExprFunction }, + { "fts5_expr", fts5ExprFunctionHr }, + { "fts5_expr_tcl", fts5ExprFunctionTcl }, }; int i; int rc = SQLITE_OK; + void *pCtx = (void*)pGlobal; for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ struct Fts5ExprFunc *p = &aFunc[i]; - rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, p->p, p->x, 0, 0); + rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); } return rc; diff --git a/ext/fts5/fts5_tokenize.c b/ext/fts5/fts5_tokenize.c new file mode 100644 index 0000000000..ef7c767544 --- /dev/null +++ b/ext/fts5/fts5_tokenize.c @@ -0,0 +1,145 @@ +/* +** 2014 May 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ + +#include "fts5.h" + + +/* +** Create a "simple" tokenizer. +*/ +static int fts5SimpleCreate( + void *pCtx, + const char **azArg, int nArg, + Fts5Tokenizer **ppOut +){ + *ppOut = 0; + return SQLITE_OK; +} + +/* +** Delete a "simple" tokenizer. +*/ +static void fts5SimpleDelete(Fts5Tokenizer *p){ + return; +} + +/* +** For tokenizers with no "unicode" modifier, the set of token characters +** is the same as the set of ASCII range alphanumeric characters. +*/ +static unsigned char aSimpleTokenChar[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ +}; + + +static void simpleFold(char *aOut, const char *aIn, int nByte){ + int i; + for(i=0; i='A' && c<='Z' ) c += 32; + aOut[i] = c; + } +} + +/* +** Tokenize some text using the simple tokenizer. +*/ +static int fts5SimpleTokenize( + Fts5Tokenizer *pTokenizer, + void *pCtx, + const char *pText, int nText, + int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos) +){ + int rc; + int ie; + int is = 0; + int iPos = 0; + + char aFold[64]; + int nFold = sizeof(aFold); + char *pFold = aFold; + + do { + int nByte; + + /* Skip any leading divider characters. */ + while( isnFold ){ + if( pFold!=aFold ) sqlite3_free(pFold); + pFold = sqlite3_malloc(nByte*2); + if( pFold==0 ){ + rc = SQLITE_NOMEM; + break; + } + nFold = nByte*2; + } + simpleFold(pFold, &pText[is], nByte); + + /* Invoke the token callback */ + rc = xToken(pCtx, pFold, nByte, is, ie, iPos); + iPos++; + is = ie+1; + }while( isxCreateTokenizer(pApi, + aBuiltin[i].zName, + &aBuiltin[i].pUserData, + &aBuiltin[i].x, + 0 + ); + } + + return SQLITE_OK; +} + + diff --git a/main.mk b/main.mk index 5ac76de2d8..e30bb92c82 100644 --- a/main.mk +++ b/main.mk @@ -80,6 +80,7 @@ LIBOBJ += fts5_expr.o LIBOBJ += fts5_hash.o LIBOBJ += fts5_index.o LIBOBJ += fts5_storage.o +LIBOBJ += fts5_tokenize.o LIBOBJ += fts5parse.o @@ -236,7 +237,8 @@ SRC += \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c \ - $(TOP)/ext/fts5/fts5_storage.c + $(TOP)/ext/fts5/fts5_storage.c \ + $(TOP)/ext/fts5/fts5_tokenize.c # Generated source code files @@ -610,10 +612,15 @@ fts5_index.o: $(TOP)/ext/fts5/fts5_index.c $(HDR) $(EXTHDR) fts5_storage.o: $(TOP)/ext/fts5/fts5_storage.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_storage.c +fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR) + $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c + fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y + mv fts5parse.c fts5parse.c.orig + cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' > fts5parse.c # Rules for building test programs and for running tests diff --git a/manifest b/manifest index 10e421f7a2..3c7f49ac42 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sdocumentation\sfor\stokenizer\sapi\sto\sfts5.h.\sAlso\sadd\sa\sscript\sto\sextract\sextension\sAPI\sdocs\sand\sformat\sthem\sas\shtml. -D 2014-08-25T19:58:54.559 +C Fix\sthe\scustomization\sinterfaces\sso\sthat\sthey\smatch\sthe\sdocumentation. +D 2014-11-15T20:07:31.166 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,17 +103,18 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/extract_api_docs.tcl c30b9aa60260b3a208b0b89df3d8dbf92c6d460c -F ext/fts5/fts5.c dd56525d45b354218b86c9accab2ed12ea4b4f4f -F ext/fts5/fts5.h 064f9bf705e59d23abaa2191b3950604dad98b9f -F ext/fts5/fts5Int.h bc6fa374a42c6121ae8276b20f141d6cd6d8d9f9 -F ext/fts5/fts5_aux.c 31e581413ecab0962ce2b37468f9f658f36f4b0e +F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786 +F ext/fts5/fts5.c cc3f0e4bac499c81d1311199f929dcad5e40ee8e +F ext/fts5/fts5.h a77cad780eec8f10850fdba0f44079a92561b790 +F ext/fts5/fts5Int.h a3c46f9dae13277de6fc3a6f8863d337ca660d6a +F ext/fts5/fts5_aux.c 6b0612e4312ca27264f7dacb0c97abc723a4b472 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 -F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710 -F ext/fts5/fts5_expr.c 7b8e380233176053841904a86006696ee8f6cd24 +F ext/fts5/fts5_config.c a292fe73864086e51e7974d842cc09f6379fbae0 +F ext/fts5/fts5_expr.c d317be07d70223a6865444f17982570260b690a5 F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279 F ext/fts5/fts5_index.c 3f4d84a1762e4284319739d4672b90b18b91060a F ext/fts5/fts5_storage.c 5913aa01a1dada1c5e1a39e4cbb44e84c5f7f350 +F ext/fts5/fts5_tokenize.c 8360c0d1ae0d4696f3cc13f7c67a2db6011cdc5b F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -158,7 +159,7 @@ F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk c4fff232b880b91bf665cd2951465de61178e444 +F main.mk 8a02fddafc05159c4b7d65200e912cf549f978c1 F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -201,7 +202,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c 0df0b1550b9cc1f58229644735e317ac89131f12 F src/lempar.c cdf0a000315332fc9b50b62f3b5e22e080a0952b F src/loadext.c 867c7b330b740c6c917af9956b13b81d0a048303 -F src/main.c e777879ad7c431f5b3b5d49c8419727b61d7c1be +F src/main.c afc0ae834a8abca9079908f6193b3886564164a0 F src/malloc.c 0203ebce9152c6a0e5de520140b8ba65187350be F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c c0c990fcaddff810ea277b4fb5d9138603dd5d4b @@ -257,7 +258,7 @@ F src/test_async.c 21e11293a2f72080eda70e1124e9102044531cd8 F src/test_autoext.c dea8a01a7153b9adc97bd26161e4226329546e12 F src/test_backup.c 3875e899222b651e18b662f86e0e50daa946344e F src/test_btree.c 2e9978eca99a9a4bfa8cae949efb00886860a64f -F src/test_config.c dabaa32868974e1ae39770cc17d7e066a9c38e6d +F src/test_config.c 9acba5c44c1562159104096e6e2ed5d293d4b86d F src/test_demovfs.c 69b2085076654ebc18014cbc6386f04409c959a9 F src/test_devsym.c e7498904e72ba7491d142d5c83b476c4e76993bc F src/test_fs.c ced436e3d4b8e4681328409b8081051ce614e28f @@ -597,17 +598,17 @@ F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test d895b1057a7798b67e03455d0fa50e9ea836c47b F test/fts4noti.test 524807f0c36d49deea7920cdd4cd687408b58849 F test/fts4unicode.test 01ec3fe2a7c3cfff3b4c0581b83caa11b33efa36 -F test/fts5aa.test 4c7cbf1d38d30e7aaa8febf44958dd13bbb53bf8 -F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 -F test/fts5ac.test 399533fe52b7383053368ab8ba01ae182391e5d7 -F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e -F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74 -F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420 -F test/fts5ah.test 009b993a9b7ebc43f84c10e53bd778b1dc8ffbe7 -F test/fts5ai.test 4dee71c23ddbcf2b0fc5d5586f241002b883c10e -F test/fts5aj.test 67014e9fc7c069425d67d549b133742b67755047 -F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 +F test/fts5aa.test 16bf1dbb92d4d63c7c357b480b1a47309f654ad1 +F test/fts5ab.test 657d6dc5ddc57bfea4af1bb85204d4f3539cd3e8 +F test/fts5ac.test f38ceca8a43fa0ff86122bec72428a4067b17bc4 +F test/fts5ad.test d29ff407c70df470c9a8fcbfe5bc80efd662f2c4 +F test/fts5ae.test d4141786d817e0198f89f8c66749af38359839a7 +F test/fts5af.test d24e3b0f879998ef5f60087272f8ab7b3a8fd4dc +F test/fts5ag.test 1c6c188d1bdc41b2277db3f4ddfea7d90bf44ceb +F test/fts5ah.test af9274cdb58a69780c7e57e61581990665ac0fb6 +F test/fts5ai.test aa2b5fd0f8d2cf59ac0211111e63cbca3b40ed7d +F test/fts5aj.test fe5c40216cac8072f29e454ee0540c7b89d17ccd +F test/fts5ea.test afaf3497b43add578384dc1fd26b0342738abe87 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1172,7 +1173,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 78a77b2c554d534c6f2dc903130186ed15715460 F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl 1712d3d71256ca1f297046619c89e77a4d7c8f6d -F tool/mksqlite3c.tcl becaa9d5617dfe137e73dddda9dab8f58bc71e8c +F tool/mksqlite3c.tcl 5be4e6b3ecf563c1ec6d579dc03ea2839e7fbc8b F tool/mksqlite3h.tcl ba24038056f51fde07c0079c41885ab85e2cff12 F tool/mksqlite3internalh.tcl b6514145a7d5321b47e64e19b8116cc44f973eb1 F tool/mkvsix.tcl 52a4c613707ac34ae9c226e5ccc69cb948556105 @@ -1203,7 +1204,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 2397404e152b908d838e6491294b263b05943b3f -R f1a35566903c71a22822fa6dd6758208 +P e240d467e60b7755486aae5e8b0824f7c741f852 +R 28baa98ae078d2f041a83a26b4550455 U dan -Z 8c301746cf7784949ad4603ff5681e4e +Z 1c1b566687b968f39cda6c2d32a692b6 diff --git a/manifest.uuid b/manifest.uuid index 3372bdccc8..ccae749918 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e240d467e60b7755486aae5e8b0824f7c741f852 \ No newline at end of file +fba0b5fc7eead07a4853e78e02d788e7c714f6cd \ No newline at end of file diff --git a/src/main.c b/src/main.c index 5d894b2ea9..d0ecbb56b8 100644 --- a/src/main.c +++ b/src/main.c @@ -19,6 +19,9 @@ #ifdef SQLITE_ENABLE_FTS3 # include "fts3.h" #endif +#ifdef SQLITE_ENABLE_FTS5 +int sqlite3Fts5Init(sqlite3*); +#endif #ifdef SQLITE_ENABLE_RTREE # include "rtree.h" #endif @@ -2609,7 +2612,12 @@ static int openDatabase( #ifdef SQLITE_ENABLE_FTS3 if( !db->mallocFailed && rc==SQLITE_OK ){ rc = sqlite3Fts3Init(db); - if( rc==SQLITE_OK ) rc = sqlite3Fts5Init(db); + } +#endif + +#ifdef SQLITE_ENABLE_FTS5 + if( !db->mallocFailed && rc==SQLITE_OK ){ + rc = sqlite3Fts5Init(db); } #endif diff --git a/src/test_config.c b/src/test_config.c index bf8afd8e67..2f8bed4477 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -330,6 +330,12 @@ static void set_options(Tcl_Interp *interp){ Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_ENABLE_FTS5 + Tcl_SetVar2(interp, "sqlite_options", "fts5", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "fts5", "0", TCL_GLOBAL_ONLY); +#endif + #if defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_ENABLE_FTS4_UNICODE61) Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY); #else diff --git a/test/fts5aa.test b/test/fts5aa.test index f5c1977f20..4126034687 100644 --- a/test/fts5aa.test +++ b/test/fts5aa.test @@ -17,7 +17,7 @@ source $testdir/tester.tcl set testprefix fts5aa # If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ab.test b/test/fts5ab.test index 1f6b7171da..4e2e9b13d0 100644 --- a/test/fts5ab.test +++ b/test/fts5ab.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ab -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ac.test b/test/fts5ac.test index b137e3a938..be6177705e 100644 --- a/test/fts5ac.test +++ b/test/fts5ac.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ac -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ad.test b/test/fts5ad.test index 70349388ee..824444a867 100644 --- a/test/fts5ad.test +++ b/test/fts5ad.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ad -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ae.test b/test/fts5ae.test index 4a5e4d041e..32d75616ae 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -17,8 +17,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ae -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5af.test b/test/fts5af.test index d59df77291..da70dc7ae6 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -19,8 +19,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5af -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } @@ -138,7 +138,7 @@ foreach {tn doc res} { 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} } { - do_snippet_test 1.$tn $doc "X + Y" $res + do_snippet_test 2.$tn $doc "X + Y" $res } finish_test diff --git a/test/fts5ag.test b/test/fts5ag.test index 14063669f4..647604ef64 100644 --- a/test/fts5ag.test +++ b/test/fts5ag.test @@ -16,8 +16,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ag -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ah.test b/test/fts5ah.test index f5e25848ab..fd78d23577 100644 --- a/test/fts5ah.test +++ b/test/fts5ah.test @@ -16,8 +16,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ah -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ai.test b/test/fts5ai.test index 705ca15988..eba9d09d71 100644 --- a/test/fts5ai.test +++ b/test/fts5ai.test @@ -18,8 +18,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ai -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5aj.test b/test/fts5aj.test index cb8e2d2a2f..8b333ae562 100644 --- a/test/fts5aj.test +++ b/test/fts5aj.test @@ -20,8 +20,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5aj -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/test/fts5ea.test b/test/fts5ea.test index a76f901d06..1518b8892c 100644 --- a/test/fts5ea.test +++ b/test/fts5ea.test @@ -14,8 +14,8 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix fts5ea -# If SQLITE_ENABLE_FTS3 is defined, omit this file. -ifcapable !fts3 { +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { finish_test return } diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 0d3120ce1a..073d67a5dd 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -335,9 +335,11 @@ foreach file { fts5.c fts5_config.c fts5_expr.c + fts5_hash.c fts5_index.c fts5parse.c fts5_storage.c + fts5_tokenize.c rtree.c icu.c