1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Extend fts2 so that user defined tokenizers may be added. Add a tokenizer that uses the ICU library if available. Documentation and tests to come. (CVS 4108)

FossilOrigin-Name: 68677e420c744b39ea9d7399819e0f376748886d
This commit is contained in:
danielk1977
2007-06-22 15:21:15 +00:00
parent bf2564f612
commit 832a58a68c
17 changed files with 821 additions and 81 deletions

View File

@@ -1847,7 +1847,7 @@ static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
return (fulltext_vtab *) c->base.pVtab; return (fulltext_vtab *) c->base.pVtab;
} }
static const sqlite3_module fulltextModule; /* forward declaration */ static const sqlite3_module fts2Module; /* forward declaration */
/* Return a dynamically generated statement of the form /* Return a dynamically generated statement of the form
* insert into %_content (rowid, ...) values (?, ...) * insert into %_content (rowid, ...) values (?, ...)
@@ -2761,6 +2761,7 @@ static char *fulltextSchema(
*/ */
static int constructVtab( static int constructVtab(
sqlite3 *db, /* The SQLite database connection */ sqlite3 *db, /* The SQLite database connection */
fts2Hash *pHash, /* Hash table containing tokenizers */
TableSpec *spec, /* Parsed spec information from parseSpec() */ TableSpec *spec, /* Parsed spec information from parseSpec() */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */ char **pzErr /* Write any error message here */
@@ -2771,6 +2772,9 @@ static int constructVtab(
const sqlite3_tokenizer_module *m = NULL; const sqlite3_tokenizer_module *m = NULL;
char *schema; char *schema;
char const *zTok; /* Name of tokenizer to use for this fts table */
int nTok; /* Length of zTok, including nul terminator */
v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
if( v==0 ) return SQLITE_NOMEM; if( v==0 ) return SQLITE_NOMEM;
CLEAR(v); CLEAR(v);
@@ -2787,16 +2791,20 @@ static int constructVtab(
if( spec->azTokenizer==0 ){ if( spec->azTokenizer==0 ){
return SQLITE_NOMEM; return SQLITE_NOMEM;
} }
/* TODO(shess) For now, add new tokenizers as else if clauses. */
if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ zTok = spec->azTokenizer[0];
sqlite3Fts2SimpleTokenizerModule(&m); if( !zTok ){
}else if( startsWith(spec->azTokenizer[0], "porter") ){ zTok = "simple";
sqlite3Fts2PorterTokenizerModule(&m); }
}else{ nTok = strlen(zTok)+1;
m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok);
if( !m ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
rc = SQLITE_ERROR; rc = SQLITE_ERROR;
goto err; goto err;
} }
for(n=0; spec->azTokenizer[n]; n++){} for(n=0; spec->azTokenizer[n]; n++){}
if( n ){ if( n ){
rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
@@ -2841,7 +2849,7 @@ static int fulltextConnect(
int rc = parseSpec(&spec, argc, argv, pzErr); int rc = parseSpec(&spec, argc, argv, pzErr);
if( rc!=SQLITE_OK ) return rc; if( rc!=SQLITE_OK ) return rc;
rc = constructVtab(db, &spec, ppVTab, pzErr); rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
clearTableSpec(&spec); clearTableSpec(&spec);
return rc; return rc;
} }
@@ -2887,7 +2895,7 @@ static int fulltextCreate(sqlite3 *db, void *pAux,
");"); ");");
if( rc!=SQLITE_OK ) goto out; if( rc!=SQLITE_OK ) goto out;
rc = constructVtab(db, &spec, ppVTab, pzErr); rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
out: out:
clearTableSpec(&spec); clearTableSpec(&spec);
@@ -5833,7 +5841,7 @@ static int fulltextFindFunction(
return 0; return 0;
} }
static const sqlite3_module fulltextModule = { static const sqlite3_module fts2Module = {
/* iVersion */ 0, /* iVersion */ 0,
/* xCreate */ fulltextCreate, /* xCreate */ fulltextCreate,
/* xConnect */ fulltextConnect, /* xConnect */ fulltextConnect,
@@ -5855,15 +5863,93 @@ static const sqlite3_module fulltextModule = {
/* xFindFunction */ fulltextFindFunction, /* xFindFunction */ fulltextFindFunction,
}; };
static void hashDestroy(void *p){
fts2Hash *pHash = (fts2Hash *)p;
sqlite3Fts2HashClear(pHash);
sqlite3_free(pHash);
}
/*
** The fts2 built-in tokenizers - "simple" and "porter" - are implemented
** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following
** two forward declarations are for functions declared in these files
** used to retrieve the respective implementations.
**
** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed
** to by the argument to point a the "simple" tokenizer implementation.
** Function ...PorterTokenizerModule() sets *pModule to point to the
** porter tokenizer/stemmer implementation.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Initialise the fts2 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
** SQLite. If fts2 is built as a dynamically loadable extension, this
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts2Init(sqlite3 *db){ int sqlite3Fts2Init(sqlite3 *db){
sqlite3_overload_function(db, "snippet", -1); int rc = SQLITE_OK;
sqlite3_overload_function(db, "offsets", -1); fts2Hash *pHash = 0;
return sqlite3_create_module(db, "fts2", &fulltextModule, 0); const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
const sqlite3_tokenizer_module *pIcu = 0;
sqlite3Fts2SimpleTokenizerModule(&pSimple);
sqlite3Fts2PorterTokenizerModule(&pPorter);
#ifdef SQLITE_ENABLE_ICU
sqlite3Fts2IcuTokenizerModule(&pIcu);
#endif
/* Allocate and initialise the hash-table used to store tokenizers. */
pHash = sqlite3_malloc(sizeof(fts2Hash));
if( !pHash ){
rc = SQLITE_NOMEM;
}else{
sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
}
/* Load the built-in tokenizers into the hash table */
if( rc==SQLITE_OK ){
if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter)
|| (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu))
){
rc = SQLITE_NOMEM;
}
}
/* Create the virtual table wrapper around the hash-table and overload
** the two scalar functions. If this is successful, register the
** module with sqlite.
*/
if( SQLITE_OK==rc
&& SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
){
return sqlite3_create_module_v2(
db, "fts2", &fts2Module, (void *)pHash, hashDestroy
);
}
/* An error has occured. Delete the hash table and return the error code. */
assert( rc!=SQLITE_OK );
if( pHash ){
sqlite3Fts2HashClear(pHash);
sqlite3_free(pHash);
}
return rc;
} }
#if !SQLITE_CORE #if !SQLITE_CORE
int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, int sqlite3_extension_init(
const sqlite3_api_routines *pApi){ sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi) SQLITE_EXTENSION_INIT2(pApi)
return sqlite3Fts2Init(db); return sqlite3Fts2Init(db);
} }

257
ext/fts2/fts2_icu.c Normal file
View File

@@ -0,0 +1,257 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
**
** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[nChar];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

230
ext/fts2/fts2_tokenizer.c Normal file
View File

@@ -0,0 +1,230 @@
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts2Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#include <tcl.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[1]);
zInput = (const char *)sqlite3_value_text(argv[1]);
pHash = (fts2Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(0, 0, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
sqlite3 *db,
fts2Hash *pHash,
const char *zName
){
int rc;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
#ifdef SQLITE_TEST
zTest = sqlite3_mprintf("%s_test", zName);
if( !zTest ){
return SQLITE_NOMEM;
}
#endif
if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
return rc;
}

View File

@@ -75,16 +75,4 @@ struct sqlite3_tokenizer_cursor {
/* Tokenizer implementations will typically add additional fields */ /* Tokenizer implementations will typically add additional fields */
}; };
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif /* _FTS2_TOKENIZER_H_ */ #endif /* _FTS2_TOKENIZER_H_ */

View File

@@ -113,9 +113,15 @@ SQLite. Documentation follows.
2 COMPILATION AND USAGE 2 COMPILATION AND USAGE
The easiest way to compile and use the ICU extension is to build The easiest way to compile and use the ICU extension is to build
and use it as a dynamically loadable SQLite extension. and use it as a dynamically loadable SQLite extension. To do this
using gcc on *nix:
gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so
You may need to add "-I" flags so that gcc can find sqlite3ext.h
and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be
loaded into sqlite in the same way as any other dynamically loadable
extension.
3 BUGS, PROBLEMS AND SECURITY ISSUES 3 BUGS, PROBLEMS AND SECURITY ISSUES

View File

@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give. ** May you share freely, never taking more than you give.
** **
************************************************************************* *************************************************************************
** $Id: icu.c,v 1.5 2007/06/11 08:00:00 danielk1977 Exp $ ** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $
** **
** This file implements an integration between the ICU library ** This file implements an integration between the ICU library
** ("International Components for Unicode", an open-source library ** ("International Components for Unicode", an open-source library
@@ -37,11 +37,12 @@
#include <unicode/ucol.h> #include <unicode/ucol.h>
#include <assert.h> #include <assert.h>
#include "sqlite3.h"
#ifndef SQLITE_CORE #ifndef SQLITE_CORE
#include "sqlite3ext.h" #include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1 SQLITE_EXTENSION_INIT1
#else
#include "sqlite3.h"
#endif #endif
/* /*
@@ -342,7 +343,7 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
nInput = sqlite3_value_bytes16(apArg[0]); nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2; nOutput = nInput * 2 + 2;
zOutput = sqlite3_malloc(nInput*2+2); zOutput = sqlite3_malloc(nOutput);
if( !zOutput ){ if( !zOutput ){
return; return;
} }

View File

@@ -148,7 +148,9 @@ EXTSRC += -DSQLITE_CORE=1 \
$(TOP)/ext/icu/icu.c \ $(TOP)/ext/icu/icu.c \
$(TOP)/ext/fts2/fts2.c \ $(TOP)/ext/fts2/fts2.c \
$(TOP)/ext/fts2/fts2_hash.c \ $(TOP)/ext/fts2/fts2_hash.c \
$(TOP)/ext/fts2/fts2_icu.c \
$(TOP)/ext/fts2/fts2_porter.c \ $(TOP)/ext/fts2/fts2_porter.c \
$(TOP)/ext/fts2/fts2_tokenizer.c \
$(TOP)/ext/fts2/fts2_tokenizer1.c $(TOP)/ext/fts2/fts2_tokenizer1.c
# Generated source code files # Generated source code files

View File

@@ -1,5 +1,5 @@
C Clarify\sdocumentation\sof\sthe\scolumn\smetadata\sAPIs.\s\sMake\ssure\sthat\sthe\nnew\sdocumentation\sclaims\sare\stested.\s(CVS\s4107) C Extend\sfts2\sso\sthat\suser\sdefined\stokenizers\smay\sbe\sadded.\sAdd\sa\stokenizer\sthat\suses\sthe\sICU\slibrary\sif\savailable.\sDocumentation\sand\stests\sto\scome.\s(CVS\s4108)
D 2007-06-21T15:25:05 D 2007-06-22T15:21:16
F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -36,19 +36,21 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts2/fts2.c 8f9bd5fce1a6900072ad9b65dd41fe8ba010f099 F ext/fts2/fts2.c 841766f2f14d68e623404f9531d98afa0f7cbf05
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
F ext/fts2/fts2_icu.c 45b54d1e075020b35db20f69d829f95ca0651111
F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd F ext/fts2/fts2_tokenizer.c 836373ee0fab4f8288a7815496529f25e4504881
F ext/fts2/fts2_tokenizer.h 6d151c51382e8f6cf689c616bb697fe780478089
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977 F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
F ext/icu/README.txt a470afe5adf6534cc0bdafca31e6cf4d88c321fa F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
F ext/icu/icu.c daab19e2c5221685688ecff2bb75bf9e0eea361d F ext/icu/icu.c 61a345d8126686aa3487aa8d2d0f68abd655f7a4
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387 F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk 522c81a818f2f81eb5e904ce983710449c5d76ad F main.mk 9007943b573ddccc1bb39f8fcb7b2812f6cc8792
F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb
F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d
F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5 F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5
@@ -73,14 +75,14 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29 F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
F src/expr.c 763ca2b39fe551a6dc7d37ba40544311622eee32 F src/expr.c 763ca2b39fe551a6dc7d37ba40544311622eee32
F src/func.c 4668843e6f0d27653939e12fc32276fb8e38d21a F src/func.c 6b45261aa2c514f642201b90493af68469c04af6
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5 F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564 F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01 F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01
F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35 F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35
F src/loadext.c afe4f4755dc49c36ef505748bbdddecb9f1d02a2 F src/loadext.c b85b4e777cda9bf95475152ed240b6dfd2a0ecd9
F src/main.c 797dc983716c1480f6af78a36be3add8806211a1 F src/main.c 65fc7de0b3c2e5b637c000ecf419c35de2525ef9
F src/malloc.c fa9bbccc4e6d099cd04c2518d238a1669c9d1020 F src/malloc.c fa9bbccc4e6d099cd04c2518d238a1669c9d1020
F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217 F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217
F src/os.c 1f10b47acc1177fb9225edb4f5f0d25ed716f9cb F src/os.c 1f10b47acc1177fb9225edb4f5f0d25ed716f9cb
@@ -104,9 +106,9 @@ F src/random.c 6119474a6f6917f708c1dee25b9a8e519a620e88
F src/select.c 33a258fc9c9dccb28ae2d3a02f1e1148d6433148 F src/select.c 33a258fc9c9dccb28ae2d3a02f1e1148d6433148
F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96 F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
F src/shell.c 4b0fc3c76a9f23a1c963e01703c0fbbca1b5c34d F src/shell.c 4b0fc3c76a9f23a1c963e01703c0fbbca1b5c34d
F src/sqlite.h.in 475e0e3dbd34c151ca89423c97d878c99575c71a F src/sqlite.h.in 6f290b660b2e7c3359968bb4b344ec31a1178746
F src/sqlite3ext.h 7d0d363ea7327e817ef0dfe1b7eee1f171b72890 F src/sqlite3ext.h 95575e0d175a0271fe2c3232c0d11e8720ed6887
F src/sqliteInt.h ed31d1a0311c1ffc018fa2e9035a6cf7985049c8 F src/sqliteInt.h 6503239d26b1943227031aa005320ef09b9b92b7
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
F src/tclsqlite.c 4bffe56752d2c24ade23340e46a91fd92c316e08 F src/tclsqlite.c 4bffe56752d2c24ade23340e46a91fd92c316e08
@@ -143,7 +145,7 @@ F src/vdbeaux.c c580d3605edc2c24ba9bd26fa7aa8b4fff10daa4
F src/vdbeblob.c 96f3572fdc45eda5be06e6372b612bc30742d9f0 F src/vdbeblob.c 96f3572fdc45eda5be06e6372b612bc30742d9f0
F src/vdbefifo.c 3ca8049c561d5d67cbcb94dc909ae9bb68c0bf8f F src/vdbefifo.c 3ca8049c561d5d67cbcb94dc909ae9bb68c0bf8f
F src/vdbemem.c ca4d3994507cb0a9504820293af69f5c778b4abd F src/vdbemem.c ca4d3994507cb0a9504820293af69f5c778b4abd
F src/vtab.c c5ebebf615b2f29499fbe97a584c4bb342632aa0 F src/vtab.c 51d43cda45d25e6f3a15d19fe32992b7756e74db
F src/where.c 12387641659605318ae03d87f0687f223dfc9568 F src/where.c 12387641659605318ae03d87f0687f223dfc9568
F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617 F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@@ -253,6 +255,7 @@ F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3
F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51 F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638 F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
F test/fts2o.test 05ce2ac9111c29998418a584de02136a0ded471b F test/fts2o.test 05ce2ac9111c29998418a584de02136a0ded471b
F test/fts2token.test 8cfc9ee33361b93fa175197f25fefdd13dfb442e
F test/func.test 605989453d1b42cec1d05c17aa232dc98e3e04e6 F test/func.test 605989453d1b42cec1d05c17aa232dc98e3e04e6
F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
F test/fuzz2.test ea38692ce2da99ad79fe0be5eb1a452c1c4d37bb F test/fuzz2.test ea38692ce2da99ad79fe0be5eb1a452c1c4d37bb
@@ -509,7 +512,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P af4b914a2152ce021401b6f78bb88a5323aa6fc2 P 2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f
R 9c91b2e377cf682d8693c478c169d4ef R c1366eff2872139d79721716582129d6
U drh U danielk1977
Z 97256b78e966a6731710de3bdea5bd11 Z 5d0863f4b162f005704998d90939d28f

View File

@@ -1 +1 @@
2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f 68677e420c744b39ea9d7399819e0f376748886d

View File

@@ -16,7 +16,7 @@
** sqliteRegisterBuildinFunctions() found at the bottom of the file. ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope. ** All other code has file scope.
** **
** $Id: func.c,v 1.160 2007/06/07 19:08:33 drh Exp $ ** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#include "sqliteInt.h" #include "sqliteInt.h"
#include <ctype.h> #include <ctype.h>
@@ -1410,7 +1410,13 @@ void sqlite3RegisterBuiltinFunctions(sqlite3 *db){
} }
} }
sqlite3RegisterDateTimeFunctions(db); sqlite3RegisterDateTimeFunctions(db);
sqlite3_overload_function(db, "MATCH", 2); if( !sqlite3MallocFailed() ){
int rc = sqlite3_overload_function(db, "MATCH", 2);
assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
if( rc==SQLITE_NOMEM ){
sqlite3FailedMalloc();
}
}
#ifdef SQLITE_SSE #ifdef SQLITE_SSE
(void)sqlite3SseFunctions(db); (void)sqlite3SseFunctions(db);
#endif #endif

View File

@@ -157,6 +157,7 @@ const sqlite3_api_routines sqlite3_apis = {
sqlite3_create_function, sqlite3_create_function,
sqlite3_create_function16, sqlite3_create_function16,
sqlite3_create_module, sqlite3_create_module,
sqlite3_create_module_v2,
sqlite3_data_count, sqlite3_data_count,
sqlite3_db_handle, sqlite3_db_handle,
sqlite3_declare_vtab, sqlite3_declare_vtab,

View File

@@ -14,7 +14,7 @@
** other files are for internal use by SQLite and should not be ** other files are for internal use by SQLite and should not be
** accessed by users of the library. ** accessed by users of the library.
** **
** $Id: main.c,v 1.376 2007/05/08 20:37:39 drh Exp $ ** $Id: main.c,v 1.377 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#include "sqliteInt.h" #include "sqliteInt.h"
#include "os.h" #include "os.h"
@@ -194,6 +194,9 @@ int sqlite3_close(sqlite3 *db){
#ifndef SQLITE_OMIT_VIRTUALTABLE #ifndef SQLITE_OMIT_VIRTUALTABLE
for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){ for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
Module *pMod = (Module *)sqliteHashData(i); Module *pMod = (Module *)sqliteHashData(i);
if( pMod->xDestroy ){
pMod->xDestroy(pMod->pAux);
}
sqliteFree(pMod); sqliteFree(pMod);
} }
sqlite3HashClear(&db->aModule); sqlite3HashClear(&db->aModule);
@@ -986,41 +989,47 @@ static int openDatabase(
db->aDb[1].safety_level = 1; db->aDb[1].safety_level = 1;
#endif #endif
db->magic = SQLITE_MAGIC_OPEN;
if( sqlite3MallocFailed() ){
goto opendb_out;
}
/* Register all built-in functions, but do not attempt to read the /* Register all built-in functions, but do not attempt to read the
** database schema yet. This is delayed until the first time the database ** database schema yet. This is delayed until the first time the database
** is accessed. ** is accessed.
*/ */
if( !sqlite3MallocFailed() ){ sqlite3Error(db, SQLITE_OK, 0);
sqlite3Error(db, SQLITE_OK, 0); sqlite3RegisterBuiltinFunctions(db);
sqlite3RegisterBuiltinFunctions(db);
}
db->magic = SQLITE_MAGIC_OPEN;
/* Load automatic extensions - extensions that have been registered /* Load automatic extensions - extensions that have been registered
** using the sqlite3_automatic_extension() API. ** using the sqlite3_automatic_extension() API.
*/ */
(void)sqlite3AutoLoadExtensions(db); (void)sqlite3AutoLoadExtensions(db);
if( sqlite3_errcode(db)!=SQLITE_OK ){
goto opendb_out;
}
#ifdef SQLITE_ENABLE_FTS1 #ifdef SQLITE_ENABLE_FTS1
{ if( !sqlite3MallocFailed() ){
extern int sqlite3Fts1Init(sqlite3*); extern int sqlite3Fts1Init(sqlite3*);
sqlite3Fts1Init(db); rc = sqlite3Fts1Init(db);
} }
#endif #endif
#ifdef SQLITE_ENABLE_FTS2 #ifdef SQLITE_ENABLE_FTS2
{ if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3Fts2Init(sqlite3*); extern int sqlite3Fts2Init(sqlite3*);
sqlite3Fts2Init(db); rc = sqlite3Fts2Init(db);
} }
#endif #endif
#ifdef SQLITE_ENABLE_ICU #ifdef SQLITE_ENABLE_ICU
if( !sqlite3MallocFailed() ){ if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3IcuInit(sqlite3*); extern int sqlite3IcuInit(sqlite3*);
sqlite3IcuInit(db); rc = sqlite3IcuInit(db);
} }
#endif #endif
sqlite3Error(db, rc, 0);
/* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking ** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking

View File

@@ -30,7 +30,7 @@
** the version number) and changes its name to "sqlite3.h" as ** the version number) and changes its name to "sqlite3.h" as
** part of the build process. ** part of the build process.
** **
** @(#) $Id: sqlite.h.in,v 1.214 2007/06/21 15:25:05 drh Exp $ ** @(#) $Id: sqlite.h.in,v 1.215 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#ifndef _SQLITE3_H_ #ifndef _SQLITE3_H_
#define _SQLITE3_H_ #define _SQLITE3_H_
@@ -2497,6 +2497,19 @@ int sqlite3_create_module(
void * /* Client data for xCreate/xConnect */ void * /* Client data for xCreate/xConnect */
); );
/*
** This routine is identical to the sqlite3_create_module() method above,
** except that it allows a destructor function to be specified. It is
** even more experimental than the rest of the virtual tables API.
*/
int sqlite3_create_module_v2(
sqlite3 *db, /* SQLite connection to register module with */
const char *zName, /* Name of the module */
const sqlite3_module *, /* Methods for the module */
void *, /* Client data for xCreate/xConnect */
void(*xDestroy)(void*) /* Module destructor function */
);
/* /*
** Every module implementation uses a subclass of the following structure ** Every module implementation uses a subclass of the following structure
** to describe a particular instance of the module. Each subclass will ** to describe a particular instance of the module. Each subclass will

View File

@@ -15,7 +15,7 @@
** as extensions by SQLite should #include this file instead of ** as extensions by SQLite should #include this file instead of
** sqlite3.h. ** sqlite3.h.
** **
** @(#) $Id: sqlite3ext.h,v 1.10 2007/03/29 18:46:01 drh Exp $ ** @(#) $Id: sqlite3ext.h,v 1.11 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#ifndef _SQLITE3EXT_H_ #ifndef _SQLITE3EXT_H_
#define _SQLITE3EXT_H_ #define _SQLITE3EXT_H_
@@ -76,6 +76,7 @@ struct sqlite3_api_routines {
int (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*)); int (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*)); int (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*); int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *));
int (*data_count)(sqlite3_stmt*pStmt); int (*data_count)(sqlite3_stmt*pStmt);
sqlite3 * (*db_handle)(sqlite3_stmt*); sqlite3 * (*db_handle)(sqlite3_stmt*);
int (*declare_vtab)(sqlite3*,const char*); int (*declare_vtab)(sqlite3*,const char*);
@@ -209,6 +210,7 @@ struct sqlite3_api_routines {
#define sqlite3_create_function sqlite3_api->create_function #define sqlite3_create_function sqlite3_api->create_function
#define sqlite3_create_function16 sqlite3_api->create_function16 #define sqlite3_create_function16 sqlite3_api->create_function16
#define sqlite3_create_module sqlite3_api->create_module #define sqlite3_create_module sqlite3_api->create_module
#define sqlite3_create_module_v2 sqlite3_api->create_module_v2
#define sqlite3_data_count sqlite3_api->data_count #define sqlite3_data_count sqlite3_api->data_count
#define sqlite3_db_handle sqlite3_api->db_handle #define sqlite3_db_handle sqlite3_api->db_handle
#define sqlite3_declare_vtab sqlite3_api->declare_vtab #define sqlite3_declare_vtab sqlite3_api->declare_vtab

View File

@@ -11,7 +11,7 @@
************************************************************************* *************************************************************************
** Internal interface definitions for SQLite. ** Internal interface definitions for SQLite.
** **
** @(#) $Id: sqliteInt.h,v 1.574 2007/06/20 15:29:25 drh Exp $ ** @(#) $Id: sqliteInt.h,v 1.575 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#ifndef _SQLITEINT_H_ #ifndef _SQLITEINT_H_
#define _SQLITEINT_H_ #define _SQLITEINT_H_
@@ -584,6 +584,7 @@ struct Module {
const sqlite3_module *pModule; /* Callback pointers */ const sqlite3_module *pModule; /* Callback pointers */
const char *zName; /* Name passed to create_module() */ const char *zName; /* Name passed to create_module() */
void *pAux; /* pAux passed to create_module() */ void *pAux; /* pAux passed to create_module() */
void (*xDestroy)(void *); /* Module destructor function */
}; };
/* /*

View File

@@ -11,11 +11,38 @@
************************************************************************* *************************************************************************
** This file contains code used to help implement virtual tables. ** This file contains code used to help implement virtual tables.
** **
** $Id: vtab.c,v 1.46 2007/05/04 13:15:57 drh Exp $ ** $Id: vtab.c,v 1.47 2007/06/22 15:21:16 danielk1977 Exp $
*/ */
#ifndef SQLITE_OMIT_VIRTUALTABLE #ifndef SQLITE_OMIT_VIRTUALTABLE
#include "sqliteInt.h" #include "sqliteInt.h"
static int createModule(
sqlite3 *db, /* Database in which module is registered */
const char *zName, /* Name assigned to this module */
const sqlite3_module *pModule, /* The definition of the module */
void *pAux, /* Context pointer for xCreate/xConnect */
void (*xDestroy)(void *) /* Module destructor function */
) {
int nName = strlen(zName);
Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
if( pMod ){
char *zCopy = (char *)(&pMod[1]);
memcpy(zCopy, zName, nName+1);
pMod->zName = zCopy;
pMod->pModule = pModule;
pMod->pAux = pAux;
pMod->xDestroy = xDestroy;
pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
if( pMod && pMod->xDestroy ){
pMod->xDestroy(pMod->pAux);
}
sqliteFree(pMod);
sqlite3ResetInternalSchema(db, 0);
}
return sqlite3ApiExit(db, SQLITE_OK);
}
/* /*
** External API function used to create a new virtual-table module. ** External API function used to create a new virtual-table module.
*/ */
@@ -25,19 +52,20 @@ int sqlite3_create_module(
const sqlite3_module *pModule, /* The definition of the module */ const sqlite3_module *pModule, /* The definition of the module */
void *pAux /* Context pointer for xCreate/xConnect */ void *pAux /* Context pointer for xCreate/xConnect */
){ ){
int nName = strlen(zName); return createModule(db, zName, pModule, pAux, 0);
Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1); }
if( pMod ){
char *zCopy = (char *)(&pMod[1]); /*
memcpy(zCopy, zName, nName+1); ** External API function used to create a new virtual-table module.
pMod->zName = zCopy; */
pMod->pModule = pModule; int sqlite3_create_module_v2(
pMod->pAux = pAux; sqlite3 *db, /* Database in which module is registered */
pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod); const char *zName, /* Name assigned to this module */
sqliteFree(pMod); const sqlite3_module *pModule, /* The definition of the module */
sqlite3ResetInternalSchema(db, 0); void *pAux, /* Context pointer for xCreate/xConnect */
} void (*xDestroy)(void *) /* Module destructor function */
return sqlite3ApiExit(db, SQLITE_OK); ){
return createModule(db, zName, pModule, pAux, xDestroy);
} }
/* /*

107
test/fts2token.test Normal file
View File

@@ -0,0 +1,107 @@
# 2007 June 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus
# of this script is testing the pluggable tokeniser feature of the
# FTS2 module.
#
# $Id: fts2token.test,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
ifcapable !fts2 {
finish_test
return
}
#--------------------------------------------------------------------------
# Test cases fts2token-1.* are the warm-body test for the SQL scalar
# function fts2_tokenizer(). The procedure is as follows:
#
# 1: Verify that there is no such fts2 tokenizer as 'blah'.
#
# 2: Query for the built-in tokenizer 'simple'. Insert a copy of the
# retrieved value as tokenizer 'blah'.
#
# 3: Test that the value returned for tokenizer 'blah' is now the
# same as that retrieved for 'simple'.
#
# 4: Test that it is now possible to create an fts2 table using
# tokenizer 'blah' (it was not possible in step 1).
#
# 5: Test that the table created to use tokenizer 'blah' is usable.
#
do_test fts2token-1.1 {
catchsql {
CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
}
} {1 {unknown tokenizer: blah}}
do_test fts2token-1.2 {
execsql {
SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL;
}
} {0}
do_test fts2token-1.3 {
execsql {
SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple');
}
} {1}
do_test fts2token-1.4 {
catchsql {
CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
}
} {0 {}}
do_test fts2token-1.5 {
execsql {
INSERT INTO t1(content) VALUES('There was movement at the station');
INSERT INTO t1(content) VALUES('For the word has passed around');
INSERT INTO t1(content) VALUES('That the colt from ol regret had got away');
SELECT content FROM t1 WHERE content MATCH 'movement'
}
} {{There was movement at the station}}
#--------------------------------------------------------------------------
# Test cases fts2token-2.* test error cases in the scalar function based
# API for getting and setting tokenizers.
#
do_test fts2token-2.1 {
catchsql {
SELECT fts2_tokenizer('nosuchtokenizer');
}
} {1 {unknown tokenizer: nosuchtokenizer}}
#--------------------------------------------------------------------------
# Test cases fts2token-3.* test the three built-in tokenizers with a
# simple input string via the built-in test function. This is as much
# to test the test function as the tokenizer implementations.
#
do_test fts2token-3.1 {
execsql {
SELECT fts2_tokenizer_test('simple', 'I don''t see how');
}
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
do_test fts2token-3.2 {
execsql {
SELECT fts2_tokenizer_test('porter', 'I don''t see how');
}
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
ifcapable icu {
do_test fts2token-3.3 {
execsql {
SELECT fts2_tokenizer_test('icu', 'I don''t see how');
}
} {{0 i I 1 don't don't 2 see see 3 how how}}
}
finish_test