From 832a58a68cb62aca4c2e089194f23ecfb066fb95 Mon Sep 17 00:00:00 2001 From: danielk1977 Date: Fri, 22 Jun 2007 15:21:15 +0000 Subject: [PATCH] Extend fts2 so that user defined tokenizers may be added. Add a tokenizer that uses the ICU library if available. Documentation and tests to come. (CVS 4108) FossilOrigin-Name: 68677e420c744b39ea9d7399819e0f376748886d --- ext/fts2/fts2.c | 116 ++++++++++++++--- ext/fts2/fts2_icu.c | 257 ++++++++++++++++++++++++++++++++++++++ ext/fts2/fts2_tokenizer.c | 230 ++++++++++++++++++++++++++++++++++ ext/fts2/fts2_tokenizer.h | 12 -- ext/icu/README.txt | 8 +- ext/icu/icu.c | 7 +- main.mk | 2 + manifest | 39 +++--- manifest.uuid | 2 +- src/func.c | 10 +- src/loadext.c | 1 + src/main.c | 33 +++-- src/sqlite.h.in | 15 ++- src/sqlite3ext.h | 4 +- src/sqliteInt.h | 3 +- src/vtab.c | 56 ++++++--- test/fts2token.test | 107 ++++++++++++++++ 17 files changed, 821 insertions(+), 81 deletions(-) create mode 100644 ext/fts2/fts2_icu.c create mode 100644 ext/fts2/fts2_tokenizer.c create mode 100644 test/fts2token.test diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c index 3a5ae4565a..549adc2eef 100644 --- a/ext/fts2/fts2.c +++ b/ext/fts2/fts2.c @@ -1847,7 +1847,7 @@ static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ return (fulltext_vtab *) c->base.pVtab; } -static const sqlite3_module fulltextModule; /* forward declaration */ +static const sqlite3_module fts2Module; /* forward declaration */ /* Return a dynamically generated statement of the form * insert into %_content (rowid, ...) values (?, ...) @@ -2761,6 +2761,7 @@ static char *fulltextSchema( */ static int constructVtab( sqlite3 *db, /* The SQLite database connection */ + fts2Hash *pHash, /* Hash table containing tokenizers */ TableSpec *spec, /* Parsed spec information from parseSpec() */ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ @@ -2771,6 +2772,9 @@ static int constructVtab( const sqlite3_tokenizer_module *m = NULL; char *schema; + char const *zTok; /* Name of tokenizer to use for this fts table */ + int nTok; /* Length of zTok, including nul terminator */ + v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); if( v==0 ) return SQLITE_NOMEM; CLEAR(v); @@ -2787,16 +2791,20 @@ static int constructVtab( if( spec->azTokenizer==0 ){ return SQLITE_NOMEM; } - /* TODO(shess) For now, add new tokenizers as else if clauses. */ - if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ - sqlite3Fts2SimpleTokenizerModule(&m); - }else if( startsWith(spec->azTokenizer[0], "porter") ){ - sqlite3Fts2PorterTokenizerModule(&m); - }else{ + + zTok = spec->azTokenizer[0]; + if( !zTok ){ + zTok = "simple"; + } + nTok = strlen(zTok)+1; + + m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok); + if( !m ){ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); rc = SQLITE_ERROR; goto err; } + for(n=0; spec->azTokenizer[n]; n++){} if( n ){ rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], @@ -2841,7 +2849,7 @@ static int fulltextConnect( int rc = parseSpec(&spec, argc, argv, pzErr); if( rc!=SQLITE_OK ) return rc; - rc = constructVtab(db, &spec, ppVTab, pzErr); + rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); clearTableSpec(&spec); return rc; } @@ -2887,7 +2895,7 @@ static int fulltextCreate(sqlite3 *db, void *pAux, ");"); if( rc!=SQLITE_OK ) goto out; - rc = constructVtab(db, &spec, ppVTab, pzErr); + rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); out: clearTableSpec(&spec); @@ -5833,7 +5841,7 @@ static int fulltextFindFunction( return 0; } -static const sqlite3_module fulltextModule = { +static const sqlite3_module fts2Module = { /* iVersion */ 0, /* xCreate */ fulltextCreate, /* xConnect */ fulltextConnect, @@ -5855,15 +5863,93 @@ static const sqlite3_module fulltextModule = { /* xFindFunction */ fulltextFindFunction, }; +static void hashDestroy(void *p){ + fts2Hash *pHash = (fts2Hash *)p; + sqlite3Fts2HashClear(pHash); + sqlite3_free(pHash); +} + +/* +** The fts2 built-in tokenizers - "simple" and "porter" - are implemented +** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following +** two forward declarations are for functions declared in these files +** used to retrieve the respective implementations. +** +** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed +** to by the argument to point a the "simple" tokenizer implementation. +** Function ...PorterTokenizerModule() sets *pModule to point to the +** porter tokenizer/stemmer implementation. +*/ +void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); +void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); +void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); + +/* +** Initialise the fts2 extension. If this extension is built as part +** of the sqlite library, then this function is called directly by +** SQLite. If fts2 is built as a dynamically loadable extension, this +** function is called by the sqlite3_extension_init() entry point. +*/ int sqlite3Fts2Init(sqlite3 *db){ - sqlite3_overload_function(db, "snippet", -1); - sqlite3_overload_function(db, "offsets", -1); - return sqlite3_create_module(db, "fts2", &fulltextModule, 0); + int rc = SQLITE_OK; + fts2Hash *pHash = 0; + const sqlite3_tokenizer_module *pSimple = 0; + const sqlite3_tokenizer_module *pPorter = 0; + const sqlite3_tokenizer_module *pIcu = 0; + + sqlite3Fts2SimpleTokenizerModule(&pSimple); + sqlite3Fts2PorterTokenizerModule(&pPorter); +#ifdef SQLITE_ENABLE_ICU + sqlite3Fts2IcuTokenizerModule(&pIcu); +#endif + + /* Allocate and initialise the hash-table used to store tokenizers. */ + pHash = sqlite3_malloc(sizeof(fts2Hash)); + if( !pHash ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); + } + + /* Load the built-in tokenizers into the hash table */ + if( rc==SQLITE_OK ){ + if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple) + || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter) + || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu)) + ){ + rc = SQLITE_NOMEM; + } + } + + /* Create the virtual table wrapper around the hash-table and overload + ** the two scalar functions. If this is successful, register the + ** module with sqlite. + */ + if( SQLITE_OK==rc + && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) + ){ + return sqlite3_create_module_v2( + db, "fts2", &fts2Module, (void *)pHash, hashDestroy + ); + } + + /* An error has occured. Delete the hash table and return the error code. */ + assert( rc!=SQLITE_OK ); + if( pHash ){ + sqlite3Fts2HashClear(pHash); + sqlite3_free(pHash); + } + return rc; } #if !SQLITE_CORE -int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, - const sqlite3_api_routines *pApi){ +int sqlite3_extension_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ SQLITE_EXTENSION_INIT2(pApi) return sqlite3Fts2Init(db); } diff --git a/ext/fts2/fts2_icu.c b/ext/fts2/fts2_icu.c new file mode 100644 index 0000000000..ed15f333d6 --- /dev/null +++ b/ext/fts2/fts2_icu.c @@ -0,0 +1,257 @@ +/* +** 2007 June 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements a tokenizer for fts2 based on the ICU library. +** +** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $ +*/ + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) +#ifdef SQLITE_ENABLE_ICU + +#include +#include +#include "fts2_tokenizer.h" + +#include +#include +#include +#include + +typedef struct IcuTokenizer IcuTokenizer; +typedef struct IcuCursor IcuCursor; + +struct IcuTokenizer { + sqlite3_tokenizer base; + char *zLocale; +}; + +struct IcuCursor { + sqlite3_tokenizer_cursor base; + + UBreakIterator *pIter; /* ICU break-iterator object */ + int nChar; /* Number of UChar elements in pInput */ + UChar *aChar; /* Copy of input using utf-16 encoding */ + int *aOffset; /* Offsets of each character in utf-8 input */ + + int nBuffer; + char *zBuffer; + + int iToken; +}; + +/* +** Create a new tokenizer instance. +*/ +static int icuCreate( + int argc, /* Number of entries in argv[] */ + const char * const *argv, /* Tokenizer creation arguments */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ +){ + IcuTokenizer *p; + int n = 0; + + if( argc>0 ){ + n = strlen(argv[0])+1; + } + p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); + if( !p ){ + return SQLITE_NOMEM; + } + memset(p, 0, sizeof(IcuTokenizer)); + + if( n ){ + p->zLocale = (char *)&p[1]; + memcpy(p->zLocale, argv[0], n); + } + + *ppTokenizer = (sqlite3_tokenizer *)p; + + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int icuDestroy(sqlite3_tokenizer *pTokenizer){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int icuOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, /* Input string */ + int nInput, /* Length of zInput in bytes */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + IcuCursor *pCsr; + + const int32_t opt = U_FOLD_CASE_DEFAULT; + UErrorCode status = U_ZERO_ERROR; + int nChar; + + UChar32 c; + int iInput = 0; + int iOut = 0; + + *ppCursor = 0; + + nChar = nInput+1; + pCsr = (IcuCursor *)sqlite3_malloc( + sizeof(IcuCursor) + /* IcuCursor */ + nChar * sizeof(UChar) + /* IcuCursor.aChar[] */ + (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ + ); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(IcuCursor)); + pCsr->aChar = (UChar *)&pCsr[1]; + pCsr->aOffset = (int *)&pCsr->aChar[nChar]; + + pCsr->aOffset[iOut] = iInput; + U8_NEXT(zInput, iInput, nInput, c); + while( c>0 ){ + int isError = 0; + c = u_foldCase(c, opt); + U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); + if( isError ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->aOffset[iOut] = iInput; + + if( iInputpIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); + if( !U_SUCCESS(status) ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; + } + pCsr->nChar = iOut; + + ubrk_first(pCsr->pIter); + *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to icuOpen(). +*/ +static int icuClose(sqlite3_tokenizer_cursor *pCursor){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + ubrk_close(pCsr->pIter); + sqlite3_free(pCsr->zBuffer); + sqlite3_free(pCsr); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. +*/ +static int icuNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + + int iStart = 0; + int iEnd = 0; + int nByte = 0; + + while( iStart==iEnd ){ + UChar32 c; + + iStart = ubrk_current(pCsr->pIter); + iEnd = ubrk_next(pCsr->pIter); + if( iEnd==UBRK_DONE ){ + return SQLITE_DONE; + } + + while( iStartaChar, iWhite, pCsr->nChar, c); + if( u_isspace(c) ){ + iStart = iWhite; + }else{ + break; + } + } + assert(iStart<=iEnd); + } + + do { + UErrorCode status = U_ZERO_ERROR; + if( nByte ){ + char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); + if( !zNew ){ + return SQLITE_NOMEM; + } + pCsr->zBuffer = zNew; + pCsr->nBuffer = nByte; + } + + u_strToUTF8( + pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ + &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ + &status /* Output success/failure */ + ); + } while( nByte>pCsr->nBuffer ); + + *ppToken = pCsr->zBuffer; + *pnBytes = nByte; + *piStartOffset = pCsr->aOffset[iStart]; + *piEndOffset = pCsr->aOffset[iEnd]; + *piPosition = pCsr->iToken++; + + return SQLITE_OK; +} + +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module icuTokenizerModule = { + 0, /* iVersion */ + icuCreate, /* xCreate */ + icuDestroy, /* xCreate */ + icuOpen, /* xOpen */ + icuClose, /* xClose */ + icuNext, /* xNext */ +}; + +/* +** Set *ppModule to point at the implementation of the ICU tokenizer. +*/ +void sqlite3Fts2IcuTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &icuTokenizerModule; +} + +#endif /* defined(SQLITE_ENABLE_ICU) */ +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_tokenizer.c b/ext/fts2/fts2_tokenizer.c new file mode 100644 index 0000000000..95ab370fbf --- /dev/null +++ b/ext/fts2/fts2_tokenizer.c @@ -0,0 +1,230 @@ + +#include "sqlite3.h" +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + +#include "fts2_hash.h" +#include "fts2_tokenizer.h" +#include + +/* +** Implementation of the SQL scalar function for accessing the underlying +** hash table. This function may be called as follows: +** +** SELECT (); +** SELECT (, ); +** +** where is the name passed as the second argument +** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). +** +** If the argument is specified, it must be a blob value +** containing a pointer to be stored as the hash data corresponding +** to the string . If is not specified, then +** the string must already exist in the has table. Otherwise, +** an error is returned. +** +** Whether or not the argument is specified, the value returned +** is a blob containing the pointer stored as the hash data corresponding +** to string (after the hash-table is updated, if applicable). +*/ +static void scalarFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts2Hash *pHash; + void *pPtr = 0; + const unsigned char *zName; + int nName; + + assert( argc==1 || argc==2 ); + + pHash = (fts2Hash *)sqlite3_user_data(context); + + zName = sqlite3_value_text(argv[0]); + nName = sqlite3_value_bytes(argv[0])+1; + + if( argc==2 ){ + void *pOld; + int n = sqlite3_value_bytes(argv[1]); + if( n!=sizeof(pPtr) ){ + sqlite3_result_error(context, "argument type mismatch", -1); + return; + } + pPtr = *(void **)sqlite3_value_blob(argv[1]); + pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); + if( pOld==pPtr ){ + sqlite3_result_error(context, "out of memory", -1); + return; + } + }else{ + pPtr = sqlite3Fts2HashFind(pHash, zName, nName); + if( !pPtr ){ + char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } + } + + sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); +} + +#ifdef SQLITE_TEST + +#include + +/* +** Implementation of a special SQL scalar function for testing tokenizers +** designed to be used in concert with the Tcl testing framework. This +** function must be called with two arguments: +** +** SELECT (, ); +** SELECT (, ); +** +** where is the name passed as the second argument +** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') +** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). +** +** The return value is a string that may be interpreted as a Tcl +** list. For each token in the , three elements are +** added to the returned list. The first is the token position, the +** second is the token text (folded, stemmed, etc.) and the third is the +** substring of associated with the token. For example, +** using the built-in "simple" tokenizer: +** +** SELECT fts_tokenizer_test('simple', 'I don't see how'); +** +** will return the string: +** +** "{0 i I 1 dont don't 2 see see 3 how how}" +** +*/ +static void testFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + fts2Hash *pHash; + sqlite3_tokenizer_module *p; + sqlite3_tokenizer *pTokenizer = 0; + sqlite3_tokenizer_cursor *pCsr = 0; + + const char *zErr = 0; + + const char *zName; + int nName; + const char *zInput; + int nInput; + + const char *zToken; + int nToken; + int iStart; + int iEnd; + int iPos; + + Tcl_Obj *pRet; + + assert( argc==2 ); + + nName = sqlite3_value_bytes(argv[0]); + zName = (const char *)sqlite3_value_text(argv[0]); + nInput = sqlite3_value_bytes(argv[1]); + zInput = (const char *)sqlite3_value_text(argv[1]); + + pHash = (fts2Hash *)sqlite3_user_data(context); + p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); + + if( !p ){ + char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } + + pRet = Tcl_NewObj(); + Tcl_IncrRefCount(pRet); + + if( SQLITE_OK!=p->xCreate(0, 0, &pTokenizer) ){ + zErr = "error in xCreate()"; + goto finish; + } + pTokenizer->pModule = p; + if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ + zErr = "error in xOpen()"; + goto finish; + } + pCsr->pTokenizer = pTokenizer; + + while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + zToken = &zInput[iStart]; + nToken = iEnd-iStart; + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + } + + if( SQLITE_OK!=p->xClose(pCsr) ){ + zErr = "error in xClose()"; + goto finish; + } + if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ + zErr = "error in xDestroy()"; + goto finish; + } + +finish: + if( zErr ){ + sqlite3_result_error(context, zErr, -1); + }else{ + sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); + } + Tcl_DecrRefCount(pRet); +} +#endif + +/* +** Set up SQL objects in database db used to access the contents of +** the hash table pointed to by argument pHash. The hash table must +** been initialised to use string keys, and to take a private copy +** of the key when a value is inserted. i.e. by a call similar to: +** +** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); +** +** This function adds a scalar function (see header comment above +** scalarFunc() in this file for details) and, if ENABLE_TABLE is +** defined at compilation time, a temporary virtual table (see header +** comment above struct HashTableVtab) to the database schema. Both +** provide read/write access to the contents of *pHash. +** +** The third argument to this function, zName, is used as the name +** of both the scalar and, if created, the virtual table. +*/ +int sqlite3Fts2InitHashTable( + sqlite3 *db, + fts2Hash *pHash, + const char *zName +){ + int rc; + void *p = (void *)pHash; + const int any = SQLITE_ANY; + char *zTest = 0; + +#ifdef SQLITE_TEST + zTest = sqlite3_mprintf("%s_test", zName); + if( !zTest ){ + return SQLITE_NOMEM; + } +#endif + + if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) + || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) +#ifdef SQLITE_TEST + || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) +#endif + ); + + sqlite3_free(zTest); + return rc; +} + diff --git a/ext/fts2/fts2_tokenizer.h b/ext/fts2/fts2_tokenizer.h index 35f8238b20..9b482abf00 100644 --- a/ext/fts2/fts2_tokenizer.h +++ b/ext/fts2/fts2_tokenizer.h @@ -75,16 +75,4 @@ struct sqlite3_tokenizer_cursor { /* Tokenizer implementations will typically add additional fields */ }; -/* -** Get the module for a tokenizer which generates tokens based on a -** set of non-token characters. The default is to break tokens at any -** non-alnum character, though the set of delimiters can also be -** specified by the first argv argument to xCreate(). -*/ -/* TODO(shess) This doesn't belong here. Need some sort of -** registration process. -*/ -void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); - #endif /* _FTS2_TOKENIZER_H_ */ diff --git a/ext/icu/README.txt b/ext/icu/README.txt index 7073413d34..5c995ccb4a 100644 --- a/ext/icu/README.txt +++ b/ext/icu/README.txt @@ -113,9 +113,15 @@ SQLite. Documentation follows. 2 COMPILATION AND USAGE The easiest way to compile and use the ICU extension is to build - and use it as a dynamically loadable SQLite extension. + and use it as a dynamically loadable SQLite extension. To do this + using gcc on *nix: + gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so + You may need to add "-I" flags so that gcc can find sqlite3ext.h + and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be + loaded into sqlite in the same way as any other dynamically loadable + extension. 3 BUGS, PROBLEMS AND SECURITY ISSUES diff --git a/ext/icu/icu.c b/ext/icu/icu.c index 0be817eaac..11bb116d8d 100644 --- a/ext/icu/icu.c +++ b/ext/icu/icu.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** $Id: icu.c,v 1.5 2007/06/11 08:00:00 danielk1977 Exp $ +** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $ ** ** This file implements an integration between the ICU library ** ("International Components for Unicode", an open-source library @@ -37,11 +37,12 @@ #include #include -#include "sqlite3.h" #ifndef SQLITE_CORE #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 +#else + #include "sqlite3.h" #endif /* @@ -342,7 +343,7 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ nInput = sqlite3_value_bytes16(apArg[0]); nOutput = nInput * 2 + 2; - zOutput = sqlite3_malloc(nInput*2+2); + zOutput = sqlite3_malloc(nOutput); if( !zOutput ){ return; } diff --git a/main.mk b/main.mk index 138e9a29f4..d65ba052e0 100644 --- a/main.mk +++ b/main.mk @@ -148,7 +148,9 @@ EXTSRC += -DSQLITE_CORE=1 \ $(TOP)/ext/icu/icu.c \ $(TOP)/ext/fts2/fts2.c \ $(TOP)/ext/fts2/fts2_hash.c \ + $(TOP)/ext/fts2/fts2_icu.c \ $(TOP)/ext/fts2/fts2_porter.c \ + $(TOP)/ext/fts2/fts2_tokenizer.c \ $(TOP)/ext/fts2/fts2_tokenizer1.c # Generated source code files diff --git a/manifest b/manifest index d6e3bd825a..943c9da09b 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Clarify\sdocumentation\sof\sthe\scolumn\smetadata\sAPIs.\s\sMake\ssure\sthat\sthe\nnew\sdocumentation\sclaims\sare\stested.\s(CVS\s4107) -D 2007-06-21T15:25:05 +C Extend\sfts2\sso\sthat\suser\sdefined\stokenizers\smay\sbe\sadded.\sAdd\sa\stokenizer\sthat\suses\sthe\sICU\slibrary\sif\savailable.\sDocumentation\sand\stests\sto\scome.\s(CVS\s4108) +D 2007-06-22T15:21:16 F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -36,19 +36,21 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts2/fts2.c 8f9bd5fce1a6900072ad9b65dd41fe8ba010f099 +F ext/fts2/fts2.c 841766f2f14d68e623404f9531d98afa0f7cbf05 F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e +F ext/fts2/fts2_icu.c 45b54d1e075020b35db20f69d829f95ca0651111 F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d -F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd +F ext/fts2/fts2_tokenizer.c 836373ee0fab4f8288a7815496529f25e4504881 +F ext/fts2/fts2_tokenizer.h 6d151c51382e8f6cf689c616bb697fe780478089 F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977 -F ext/icu/README.txt a470afe5adf6534cc0bdafca31e6cf4d88c321fa -F ext/icu/icu.c daab19e2c5221685688ecff2bb75bf9e0eea361d +F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33 +F ext/icu/icu.c 61a345d8126686aa3487aa8d2d0f68abd655f7a4 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387 -F main.mk 522c81a818f2f81eb5e904ce983710449c5d76ad +F main.mk 9007943b573ddccc1bb39f8fcb7b2812f6cc8792 F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5 @@ -73,14 +75,14 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988 F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29 F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b F src/expr.c 763ca2b39fe551a6dc7d37ba40544311622eee32 -F src/func.c 4668843e6f0d27653939e12fc32276fb8e38d21a +F src/func.c 6b45261aa2c514f642201b90493af68469c04af6 F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5 F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564 F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01 F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35 -F src/loadext.c afe4f4755dc49c36ef505748bbdddecb9f1d02a2 -F src/main.c 797dc983716c1480f6af78a36be3add8806211a1 +F src/loadext.c b85b4e777cda9bf95475152ed240b6dfd2a0ecd9 +F src/main.c 65fc7de0b3c2e5b637c000ecf419c35de2525ef9 F src/malloc.c fa9bbccc4e6d099cd04c2518d238a1669c9d1020 F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217 F src/os.c 1f10b47acc1177fb9225edb4f5f0d25ed716f9cb @@ -104,9 +106,9 @@ F src/random.c 6119474a6f6917f708c1dee25b9a8e519a620e88 F src/select.c 33a258fc9c9dccb28ae2d3a02f1e1148d6433148 F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96 F src/shell.c 4b0fc3c76a9f23a1c963e01703c0fbbca1b5c34d -F src/sqlite.h.in 475e0e3dbd34c151ca89423c97d878c99575c71a -F src/sqlite3ext.h 7d0d363ea7327e817ef0dfe1b7eee1f171b72890 -F src/sqliteInt.h ed31d1a0311c1ffc018fa2e9035a6cf7985049c8 +F src/sqlite.h.in 6f290b660b2e7c3359968bb4b344ec31a1178746 +F src/sqlite3ext.h 95575e0d175a0271fe2c3232c0d11e8720ed6887 +F src/sqliteInt.h 6503239d26b1943227031aa005320ef09b9b92b7 F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d F src/tclsqlite.c 4bffe56752d2c24ade23340e46a91fd92c316e08 @@ -143,7 +145,7 @@ F src/vdbeaux.c c580d3605edc2c24ba9bd26fa7aa8b4fff10daa4 F src/vdbeblob.c 96f3572fdc45eda5be06e6372b612bc30742d9f0 F src/vdbefifo.c 3ca8049c561d5d67cbcb94dc909ae9bb68c0bf8f F src/vdbemem.c ca4d3994507cb0a9504820293af69f5c778b4abd -F src/vtab.c c5ebebf615b2f29499fbe97a584c4bb342632aa0 +F src/vtab.c 51d43cda45d25e6f3a15d19fe32992b7756e74db F src/where.c 12387641659605318ae03d87f0687f223dfc9568 F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -253,6 +255,7 @@ F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3 F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51 F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638 F test/fts2o.test 05ce2ac9111c29998418a584de02136a0ded471b +F test/fts2token.test 8cfc9ee33361b93fa175197f25fefdd13dfb442e F test/func.test 605989453d1b42cec1d05c17aa232dc98e3e04e6 F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a F test/fuzz2.test ea38692ce2da99ad79fe0be5eb1a452c1c4d37bb @@ -509,7 +512,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 -P af4b914a2152ce021401b6f78bb88a5323aa6fc2 -R 9c91b2e377cf682d8693c478c169d4ef -U drh -Z 97256b78e966a6731710de3bdea5bd11 +P 2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f +R c1366eff2872139d79721716582129d6 +U danielk1977 +Z 5d0863f4b162f005704998d90939d28f diff --git a/manifest.uuid b/manifest.uuid index 0538e7dfd3..7366ee1df0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f \ No newline at end of file +68677e420c744b39ea9d7399819e0f376748886d \ No newline at end of file diff --git a/src/func.c b/src/func.c index c836ebe5bd..d76b38d7bb 100644 --- a/src/func.c +++ b/src/func.c @@ -16,7 +16,7 @@ ** sqliteRegisterBuildinFunctions() found at the bottom of the file. ** All other code has file scope. ** -** $Id: func.c,v 1.160 2007/06/07 19:08:33 drh Exp $ +** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $ */ #include "sqliteInt.h" #include @@ -1410,7 +1410,13 @@ void sqlite3RegisterBuiltinFunctions(sqlite3 *db){ } } sqlite3RegisterDateTimeFunctions(db); - sqlite3_overload_function(db, "MATCH", 2); + if( !sqlite3MallocFailed() ){ + int rc = sqlite3_overload_function(db, "MATCH", 2); + assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); + if( rc==SQLITE_NOMEM ){ + sqlite3FailedMalloc(); + } + } #ifdef SQLITE_SSE (void)sqlite3SseFunctions(db); #endif diff --git a/src/loadext.c b/src/loadext.c index b435de21dc..9ecadd422a 100644 --- a/src/loadext.c +++ b/src/loadext.c @@ -157,6 +157,7 @@ const sqlite3_api_routines sqlite3_apis = { sqlite3_create_function, sqlite3_create_function16, sqlite3_create_module, + sqlite3_create_module_v2, sqlite3_data_count, sqlite3_db_handle, sqlite3_declare_vtab, diff --git a/src/main.c b/src/main.c index e4eb7fc127..c99073525b 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.376 2007/05/08 20:37:39 drh Exp $ +** $Id: main.c,v 1.377 2007/06/22 15:21:16 danielk1977 Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -194,6 +194,9 @@ int sqlite3_close(sqlite3 *db){ #ifndef SQLITE_OMIT_VIRTUALTABLE for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){ Module *pMod = (Module *)sqliteHashData(i); + if( pMod->xDestroy ){ + pMod->xDestroy(pMod->pAux); + } sqliteFree(pMod); } sqlite3HashClear(&db->aModule); @@ -986,41 +989,47 @@ static int openDatabase( db->aDb[1].safety_level = 1; #endif + db->magic = SQLITE_MAGIC_OPEN; + if( sqlite3MallocFailed() ){ + goto opendb_out; + } + /* Register all built-in functions, but do not attempt to read the ** database schema yet. This is delayed until the first time the database ** is accessed. */ - if( !sqlite3MallocFailed() ){ - sqlite3Error(db, SQLITE_OK, 0); - sqlite3RegisterBuiltinFunctions(db); - } - db->magic = SQLITE_MAGIC_OPEN; + sqlite3Error(db, SQLITE_OK, 0); + sqlite3RegisterBuiltinFunctions(db); /* Load automatic extensions - extensions that have been registered ** using the sqlite3_automatic_extension() API. */ (void)sqlite3AutoLoadExtensions(db); + if( sqlite3_errcode(db)!=SQLITE_OK ){ + goto opendb_out; + } #ifdef SQLITE_ENABLE_FTS1 - { + if( !sqlite3MallocFailed() ){ extern int sqlite3Fts1Init(sqlite3*); - sqlite3Fts1Init(db); + rc = sqlite3Fts1Init(db); } #endif #ifdef SQLITE_ENABLE_FTS2 - { + if( !sqlite3MallocFailed() && rc==SQLITE_OK ){ extern int sqlite3Fts2Init(sqlite3*); - sqlite3Fts2Init(db); + rc = sqlite3Fts2Init(db); } #endif #ifdef SQLITE_ENABLE_ICU - if( !sqlite3MallocFailed() ){ + if( !sqlite3MallocFailed() && rc==SQLITE_OK ){ extern int sqlite3IcuInit(sqlite3*); - sqlite3IcuInit(db); + rc = sqlite3IcuInit(db); } #endif + sqlite3Error(db, rc, 0); /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 59c1061129..75ab6f55d0 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -30,7 +30,7 @@ ** the version number) and changes its name to "sqlite3.h" as ** part of the build process. ** -** @(#) $Id: sqlite.h.in,v 1.214 2007/06/21 15:25:05 drh Exp $ +** @(#) $Id: sqlite.h.in,v 1.215 2007/06/22 15:21:16 danielk1977 Exp $ */ #ifndef _SQLITE3_H_ #define _SQLITE3_H_ @@ -2497,6 +2497,19 @@ int sqlite3_create_module( void * /* Client data for xCreate/xConnect */ ); +/* +** This routine is identical to the sqlite3_create_module() method above, +** except that it allows a destructor function to be specified. It is +** even more experimental than the rest of the virtual tables API. +*/ +int sqlite3_create_module_v2( + sqlite3 *db, /* SQLite connection to register module with */ + const char *zName, /* Name of the module */ + const sqlite3_module *, /* Methods for the module */ + void *, /* Client data for xCreate/xConnect */ + void(*xDestroy)(void*) /* Module destructor function */ +); + /* ** Every module implementation uses a subclass of the following structure ** to describe a particular instance of the module. Each subclass will diff --git a/src/sqlite3ext.h b/src/sqlite3ext.h index 0d70e64d75..2915aadc12 100644 --- a/src/sqlite3ext.h +++ b/src/sqlite3ext.h @@ -15,7 +15,7 @@ ** as extensions by SQLite should #include this file instead of ** sqlite3.h. ** -** @(#) $Id: sqlite3ext.h,v 1.10 2007/03/29 18:46:01 drh Exp $ +** @(#) $Id: sqlite3ext.h,v 1.11 2007/06/22 15:21:16 danielk1977 Exp $ */ #ifndef _SQLITE3EXT_H_ #define _SQLITE3EXT_H_ @@ -76,6 +76,7 @@ struct sqlite3_api_routines { int (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*)); int (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*)); int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); + int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *)); int (*data_count)(sqlite3_stmt*pStmt); sqlite3 * (*db_handle)(sqlite3_stmt*); int (*declare_vtab)(sqlite3*,const char*); @@ -209,6 +210,7 @@ struct sqlite3_api_routines { #define sqlite3_create_function sqlite3_api->create_function #define sqlite3_create_function16 sqlite3_api->create_function16 #define sqlite3_create_module sqlite3_api->create_module +#define sqlite3_create_module_v2 sqlite3_api->create_module_v2 #define sqlite3_data_count sqlite3_api->data_count #define sqlite3_db_handle sqlite3_api->db_handle #define sqlite3_declare_vtab sqlite3_api->declare_vtab diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 38cad64d99..5fd3674711 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.574 2007/06/20 15:29:25 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.575 2007/06/22 15:21:16 danielk1977 Exp $ */ #ifndef _SQLITEINT_H_ #define _SQLITEINT_H_ @@ -584,6 +584,7 @@ struct Module { const sqlite3_module *pModule; /* Callback pointers */ const char *zName; /* Name passed to create_module() */ void *pAux; /* pAux passed to create_module() */ + void (*xDestroy)(void *); /* Module destructor function */ }; /* diff --git a/src/vtab.c b/src/vtab.c index 940c86e30b..5b9fbc4722 100644 --- a/src/vtab.c +++ b/src/vtab.c @@ -11,11 +11,38 @@ ************************************************************************* ** This file contains code used to help implement virtual tables. ** -** $Id: vtab.c,v 1.46 2007/05/04 13:15:57 drh Exp $ +** $Id: vtab.c,v 1.47 2007/06/22 15:21:16 danielk1977 Exp $ */ #ifndef SQLITE_OMIT_VIRTUALTABLE #include "sqliteInt.h" +static int createModule( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux, /* Context pointer for xCreate/xConnect */ + void (*xDestroy)(void *) /* Module destructor function */ +) { + int nName = strlen(zName); + Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1); + if( pMod ){ + char *zCopy = (char *)(&pMod[1]); + memcpy(zCopy, zName, nName+1); + pMod->zName = zCopy; + pMod->pModule = pModule; + pMod->pAux = pAux; + pMod->xDestroy = xDestroy; + pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod); + if( pMod && pMod->xDestroy ){ + pMod->xDestroy(pMod->pAux); + } + sqliteFree(pMod); + sqlite3ResetInternalSchema(db, 0); + } + return sqlite3ApiExit(db, SQLITE_OK); +} + + /* ** External API function used to create a new virtual-table module. */ @@ -25,19 +52,20 @@ int sqlite3_create_module( const sqlite3_module *pModule, /* The definition of the module */ void *pAux /* Context pointer for xCreate/xConnect */ ){ - int nName = strlen(zName); - Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1); - if( pMod ){ - char *zCopy = (char *)(&pMod[1]); - memcpy(zCopy, zName, nName+1); - pMod->zName = zCopy; - pMod->pModule = pModule; - pMod->pAux = pAux; - pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod); - sqliteFree(pMod); - sqlite3ResetInternalSchema(db, 0); - } - return sqlite3ApiExit(db, SQLITE_OK); + return createModule(db, zName, pModule, pAux, 0); +} + +/* +** External API function used to create a new virtual-table module. +*/ +int sqlite3_create_module_v2( + sqlite3 *db, /* Database in which module is registered */ + const char *zName, /* Name assigned to this module */ + const sqlite3_module *pModule, /* The definition of the module */ + void *pAux, /* Context pointer for xCreate/xConnect */ + void (*xDestroy)(void *) /* Module destructor function */ +){ + return createModule(db, zName, pModule, pAux, xDestroy); } /* diff --git a/test/fts2token.test b/test/fts2token.test new file mode 100644 index 0000000000..641aedab7a --- /dev/null +++ b/test/fts2token.test @@ -0,0 +1,107 @@ +# 2007 June 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The focus +# of this script is testing the pluggable tokeniser feature of the +# FTS2 module. +# +# $Id: fts2token.test,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $ +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE_ENABLE_FTS2 is defined, omit this file. +ifcapable !fts2 { + finish_test + return +} + +#-------------------------------------------------------------------------- +# Test cases fts2token-1.* are the warm-body test for the SQL scalar +# function fts2_tokenizer(). The procedure is as follows: +# +# 1: Verify that there is no such fts2 tokenizer as 'blah'. +# +# 2: Query for the built-in tokenizer 'simple'. Insert a copy of the +# retrieved value as tokenizer 'blah'. +# +# 3: Test that the value returned for tokenizer 'blah' is now the +# same as that retrieved for 'simple'. +# +# 4: Test that it is now possible to create an fts2 table using +# tokenizer 'blah' (it was not possible in step 1). +# +# 5: Test that the table created to use tokenizer 'blah' is usable. +# +do_test fts2token-1.1 { + catchsql { + CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah); + } +} {1 {unknown tokenizer: blah}} +do_test fts2token-1.2 { + execsql { + SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL; + } +} {0} +do_test fts2token-1.3 { + execsql { + SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple'); + } +} {1} +do_test fts2token-1.4 { + catchsql { + CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah); + } +} {0 {}} +do_test fts2token-1.5 { + execsql { + INSERT INTO t1(content) VALUES('There was movement at the station'); + INSERT INTO t1(content) VALUES('For the word has passed around'); + INSERT INTO t1(content) VALUES('That the colt from ol regret had got away'); + SELECT content FROM t1 WHERE content MATCH 'movement' + } +} {{There was movement at the station}} + +#-------------------------------------------------------------------------- +# Test cases fts2token-2.* test error cases in the scalar function based +# API for getting and setting tokenizers. +# +do_test fts2token-2.1 { + catchsql { + SELECT fts2_tokenizer('nosuchtokenizer'); + } +} {1 {unknown tokenizer: nosuchtokenizer}} + +#-------------------------------------------------------------------------- +# Test cases fts2token-3.* test the three built-in tokenizers with a +# simple input string via the built-in test function. This is as much +# to test the test function as the tokenizer implementations. +# +do_test fts2token-3.1 { + execsql { + SELECT fts2_tokenizer_test('simple', 'I don''t see how'); + } +} {{0 i I 1 don don 2 t t 3 see see 4 how how}} +do_test fts2token-3.2 { + execsql { + SELECT fts2_tokenizer_test('porter', 'I don''t see how'); + } +} {{0 i I 1 don don 2 t t 3 see see 4 how how}} + +ifcapable icu { + do_test fts2token-3.3 { + execsql { + SELECT fts2_tokenizer_test('icu', 'I don''t see how'); + } + } {{0 i I 1 don't don't 2 see see 3 how how}} +} + +finish_test