1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-08 14:02:16 +03:00

Extend fts2 so that user defined tokenizers may be added. Add a tokenizer that uses the ICU library if available. Documentation and tests to come. (CVS 4108)

FossilOrigin-Name: 68677e420c744b39ea9d7399819e0f376748886d
This commit is contained in:
danielk1977
2007-06-22 15:21:15 +00:00
parent bf2564f612
commit 832a58a68c
17 changed files with 821 additions and 81 deletions

View File

@@ -1847,7 +1847,7 @@ static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
return (fulltext_vtab *) c->base.pVtab;
}
static const sqlite3_module fulltextModule; /* forward declaration */
static const sqlite3_module fts2Module; /* forward declaration */
/* Return a dynamically generated statement of the form
* insert into %_content (rowid, ...) values (?, ...)
@@ -2761,6 +2761,7 @@ static char *fulltextSchema(
*/
static int constructVtab(
sqlite3 *db, /* The SQLite database connection */
fts2Hash *pHash, /* Hash table containing tokenizers */
TableSpec *spec, /* Parsed spec information from parseSpec() */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
@@ -2771,6 +2772,9 @@ static int constructVtab(
const sqlite3_tokenizer_module *m = NULL;
char *schema;
char const *zTok; /* Name of tokenizer to use for this fts table */
int nTok; /* Length of zTok, including nul terminator */
v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
if( v==0 ) return SQLITE_NOMEM;
CLEAR(v);
@@ -2787,16 +2791,20 @@ static int constructVtab(
if( spec->azTokenizer==0 ){
return SQLITE_NOMEM;
}
/* TODO(shess) For now, add new tokenizers as else if clauses. */
if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
sqlite3Fts2SimpleTokenizerModule(&m);
}else if( startsWith(spec->azTokenizer[0], "porter") ){
sqlite3Fts2PorterTokenizerModule(&m);
}else{
zTok = spec->azTokenizer[0];
if( !zTok ){
zTok = "simple";
}
nTok = strlen(zTok)+1;
m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok);
if( !m ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
rc = SQLITE_ERROR;
goto err;
}
for(n=0; spec->azTokenizer[n]; n++){}
if( n ){
rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
@@ -2841,7 +2849,7 @@ static int fulltextConnect(
int rc = parseSpec(&spec, argc, argv, pzErr);
if( rc!=SQLITE_OK ) return rc;
rc = constructVtab(db, &spec, ppVTab, pzErr);
rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
clearTableSpec(&spec);
return rc;
}
@@ -2887,7 +2895,7 @@ static int fulltextCreate(sqlite3 *db, void *pAux,
");");
if( rc!=SQLITE_OK ) goto out;
rc = constructVtab(db, &spec, ppVTab, pzErr);
rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
out:
clearTableSpec(&spec);
@@ -5833,7 +5841,7 @@ static int fulltextFindFunction(
return 0;
}
static const sqlite3_module fulltextModule = {
static const sqlite3_module fts2Module = {
/* iVersion */ 0,
/* xCreate */ fulltextCreate,
/* xConnect */ fulltextConnect,
@@ -5855,15 +5863,93 @@ static const sqlite3_module fulltextModule = {
/* xFindFunction */ fulltextFindFunction,
};
static void hashDestroy(void *p){
fts2Hash *pHash = (fts2Hash *)p;
sqlite3Fts2HashClear(pHash);
sqlite3_free(pHash);
}
/*
** The fts2 built-in tokenizers - "simple" and "porter" - are implemented
** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following
** two forward declarations are for functions declared in these files
** used to retrieve the respective implementations.
**
** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed
** to by the argument to point a the "simple" tokenizer implementation.
** Function ...PorterTokenizerModule() sets *pModule to point to the
** porter tokenizer/stemmer implementation.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Initialise the fts2 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
** SQLite. If fts2 is built as a dynamically loadable extension, this
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts2Init(sqlite3 *db){
sqlite3_overload_function(db, "snippet", -1);
sqlite3_overload_function(db, "offsets", -1);
return sqlite3_create_module(db, "fts2", &fulltextModule, 0);
int rc = SQLITE_OK;
fts2Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
const sqlite3_tokenizer_module *pIcu = 0;
sqlite3Fts2SimpleTokenizerModule(&pSimple);
sqlite3Fts2PorterTokenizerModule(&pPorter);
#ifdef SQLITE_ENABLE_ICU
sqlite3Fts2IcuTokenizerModule(&pIcu);
#endif
/* Allocate and initialise the hash-table used to store tokenizers. */
pHash = sqlite3_malloc(sizeof(fts2Hash));
if( !pHash ){
rc = SQLITE_NOMEM;
}else{
sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
}
/* Load the built-in tokenizers into the hash table */
if( rc==SQLITE_OK ){
if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter)
|| (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu))
){
rc = SQLITE_NOMEM;
}
}
/* Create the virtual table wrapper around the hash-table and overload
** the two scalar functions. If this is successful, register the
** module with sqlite.
*/
if( SQLITE_OK==rc
&& SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
&& SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
){
return sqlite3_create_module_v2(
db, "fts2", &fts2Module, (void *)pHash, hashDestroy
);
}
/* An error has occured. Delete the hash table and return the error code. */
assert( rc!=SQLITE_OK );
if( pHash ){
sqlite3Fts2HashClear(pHash);
sqlite3_free(pHash);
}
return rc;
}
#if !SQLITE_CORE
int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
const sqlite3_api_routines *pApi){
int sqlite3_extension_init(
sqlite3 *db,
char **pzErrMsg,
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3Fts2Init(db);
}

257
ext/fts2/fts2_icu.c Normal file
View File

@@ -0,0 +1,257 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
**
** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[nChar];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

230
ext/fts2/fts2_tokenizer.c Normal file
View File

@@ -0,0 +1,230 @@
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts2Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#include <tcl.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[1]);
zInput = (const char *)sqlite3_value_text(argv[1]);
pHash = (fts2Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(0, 0, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
sqlite3 *db,
fts2Hash *pHash,
const char *zName
){
int rc;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
#ifdef SQLITE_TEST
zTest = sqlite3_mprintf("%s_test", zName);
if( !zTest ){
return SQLITE_NOMEM;
}
#endif
if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
return rc;
}

View File

@@ -75,16 +75,4 @@ struct sqlite3_tokenizer_cursor {
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif /* _FTS2_TOKENIZER_H_ */

View File

@@ -113,9 +113,15 @@ SQLite. Documentation follows.
2 COMPILATION AND USAGE
The easiest way to compile and use the ICU extension is to build
and use it as a dynamically loadable SQLite extension.
and use it as a dynamically loadable SQLite extension. To do this
using gcc on *nix:
gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so
You may need to add "-I" flags so that gcc can find sqlite3ext.h
and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be
loaded into sqlite in the same way as any other dynamically loadable
extension.
3 BUGS, PROBLEMS AND SECURITY ISSUES

View File

@@ -9,7 +9,7 @@
** May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: icu.c,v 1.5 2007/06/11 08:00:00 danielk1977 Exp $
** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $
**
** This file implements an integration between the ICU library
** ("International Components for Unicode", an open-source library
@@ -37,11 +37,12 @@
#include <unicode/ucol.h>
#include <assert.h>
#include "sqlite3.h"
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#else
#include "sqlite3.h"
#endif
/*
@@ -342,7 +343,7 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){
nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2;
zOutput = sqlite3_malloc(nInput*2+2);
zOutput = sqlite3_malloc(nOutput);
if( !zOutput ){
return;
}

View File

@@ -148,7 +148,9 @@ EXTSRC += -DSQLITE_CORE=1 \
$(TOP)/ext/icu/icu.c \
$(TOP)/ext/fts2/fts2.c \
$(TOP)/ext/fts2/fts2_hash.c \
$(TOP)/ext/fts2/fts2_icu.c \
$(TOP)/ext/fts2/fts2_porter.c \
$(TOP)/ext/fts2/fts2_tokenizer.c \
$(TOP)/ext/fts2/fts2_tokenizer1.c
# Generated source code files

View File

@@ -1,5 +1,5 @@
C Clarify\sdocumentation\sof\sthe\scolumn\smetadata\sAPIs.\s\sMake\ssure\sthat\sthe\nnew\sdocumentation\sclaims\sare\stested.\s(CVS\s4107)
D 2007-06-21T15:25:05
C Extend\sfts2\sso\sthat\suser\sdefined\stokenizers\smay\sbe\sadded.\sAdd\sa\stokenizer\sthat\suses\sthe\sICU\slibrary\sif\savailable.\sDocumentation\sand\stests\sto\scome.\s(CVS\s4108)
D 2007-06-22T15:21:16
F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@@ -36,19 +36,21 @@ F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
F ext/fts2/fts2.c 8f9bd5fce1a6900072ad9b65dd41fe8ba010f099
F ext/fts2/fts2.c 841766f2f14d68e623404f9531d98afa0f7cbf05
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
F ext/fts2/fts2_icu.c 45b54d1e075020b35db20f69d829f95ca0651111
F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
F ext/fts2/fts2_tokenizer.c 836373ee0fab4f8288a7815496529f25e4504881
F ext/fts2/fts2_tokenizer.h 6d151c51382e8f6cf689c616bb697fe780478089
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
F ext/icu/README.txt a470afe5adf6534cc0bdafca31e6cf4d88c321fa
F ext/icu/icu.c daab19e2c5221685688ecff2bb75bf9e0eea361d
F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
F ext/icu/icu.c 61a345d8126686aa3487aa8d2d0f68abd655f7a4
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk 522c81a818f2f81eb5e904ce983710449c5d76ad
F main.mk 9007943b573ddccc1bb39f8fcb7b2812f6cc8792
F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb
F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d
F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5
@@ -73,14 +75,14 @@ F src/date.c 6049db7d5a8fdf2c677ff7d58fa31d4f6593c988
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
F src/expr.c 763ca2b39fe551a6dc7d37ba40544311622eee32
F src/func.c 4668843e6f0d27653939e12fc32276fb8e38d21a
F src/func.c 6b45261aa2c514f642201b90493af68469c04af6
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01
F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35
F src/loadext.c afe4f4755dc49c36ef505748bbdddecb9f1d02a2
F src/main.c 797dc983716c1480f6af78a36be3add8806211a1
F src/loadext.c b85b4e777cda9bf95475152ed240b6dfd2a0ecd9
F src/main.c 65fc7de0b3c2e5b637c000ecf419c35de2525ef9
F src/malloc.c fa9bbccc4e6d099cd04c2518d238a1669c9d1020
F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217
F src/os.c 1f10b47acc1177fb9225edb4f5f0d25ed716f9cb
@@ -104,9 +106,9 @@ F src/random.c 6119474a6f6917f708c1dee25b9a8e519a620e88
F src/select.c 33a258fc9c9dccb28ae2d3a02f1e1148d6433148
F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
F src/shell.c 4b0fc3c76a9f23a1c963e01703c0fbbca1b5c34d
F src/sqlite.h.in 475e0e3dbd34c151ca89423c97d878c99575c71a
F src/sqlite3ext.h 7d0d363ea7327e817ef0dfe1b7eee1f171b72890
F src/sqliteInt.h ed31d1a0311c1ffc018fa2e9035a6cf7985049c8
F src/sqlite.h.in 6f290b660b2e7c3359968bb4b344ec31a1178746
F src/sqlite3ext.h 95575e0d175a0271fe2c3232c0d11e8720ed6887
F src/sqliteInt.h 6503239d26b1943227031aa005320ef09b9b92b7
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
F src/tclsqlite.c 4bffe56752d2c24ade23340e46a91fd92c316e08
@@ -143,7 +145,7 @@ F src/vdbeaux.c c580d3605edc2c24ba9bd26fa7aa8b4fff10daa4
F src/vdbeblob.c 96f3572fdc45eda5be06e6372b612bc30742d9f0
F src/vdbefifo.c 3ca8049c561d5d67cbcb94dc909ae9bb68c0bf8f
F src/vdbemem.c ca4d3994507cb0a9504820293af69f5c778b4abd
F src/vtab.c c5ebebf615b2f29499fbe97a584c4bb342632aa0
F src/vtab.c 51d43cda45d25e6f3a15d19fe32992b7756e74db
F src/where.c 12387641659605318ae03d87f0687f223dfc9568
F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
@@ -253,6 +255,7 @@ F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3
F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
F test/fts2o.test 05ce2ac9111c29998418a584de02136a0ded471b
F test/fts2token.test 8cfc9ee33361b93fa175197f25fefdd13dfb442e
F test/func.test 605989453d1b42cec1d05c17aa232dc98e3e04e6
F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
F test/fuzz2.test ea38692ce2da99ad79fe0be5eb1a452c1c4d37bb
@@ -509,7 +512,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P af4b914a2152ce021401b6f78bb88a5323aa6fc2
R 9c91b2e377cf682d8693c478c169d4ef
U drh
Z 97256b78e966a6731710de3bdea5bd11
P 2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f
R c1366eff2872139d79721716582129d6
U danielk1977
Z 5d0863f4b162f005704998d90939d28f

View File

@@ -1 +1 @@
2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f
68677e420c744b39ea9d7399819e0f376748886d

View File

@@ -16,7 +16,7 @@
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.160 2007/06/07 19:08:33 drh Exp $
** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
@@ -1410,7 +1410,13 @@ void sqlite3RegisterBuiltinFunctions(sqlite3 *db){
}
}
sqlite3RegisterDateTimeFunctions(db);
sqlite3_overload_function(db, "MATCH", 2);
if( !sqlite3MallocFailed() ){
int rc = sqlite3_overload_function(db, "MATCH", 2);
assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
if( rc==SQLITE_NOMEM ){
sqlite3FailedMalloc();
}
}
#ifdef SQLITE_SSE
(void)sqlite3SseFunctions(db);
#endif

View File

@@ -157,6 +157,7 @@ const sqlite3_api_routines sqlite3_apis = {
sqlite3_create_function,
sqlite3_create_function16,
sqlite3_create_module,
sqlite3_create_module_v2,
sqlite3_data_count,
sqlite3_db_handle,
sqlite3_declare_vtab,

View File

@@ -14,7 +14,7 @@
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.376 2007/05/08 20:37:39 drh Exp $
** $Id: main.c,v 1.377 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
@@ -194,6 +194,9 @@ int sqlite3_close(sqlite3 *db){
#ifndef SQLITE_OMIT_VIRTUALTABLE
for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
Module *pMod = (Module *)sqliteHashData(i);
if( pMod->xDestroy ){
pMod->xDestroy(pMod->pAux);
}
sqliteFree(pMod);
}
sqlite3HashClear(&db->aModule);
@@ -986,41 +989,47 @@ static int openDatabase(
db->aDb[1].safety_level = 1;
#endif
db->magic = SQLITE_MAGIC_OPEN;
if( sqlite3MallocFailed() ){
goto opendb_out;
}
/* Register all built-in functions, but do not attempt to read the
** database schema yet. This is delayed until the first time the database
** is accessed.
*/
if( !sqlite3MallocFailed() ){
sqlite3Error(db, SQLITE_OK, 0);
sqlite3RegisterBuiltinFunctions(db);
}
db->magic = SQLITE_MAGIC_OPEN;
sqlite3Error(db, SQLITE_OK, 0);
sqlite3RegisterBuiltinFunctions(db);
/* Load automatic extensions - extensions that have been registered
** using the sqlite3_automatic_extension() API.
*/
(void)sqlite3AutoLoadExtensions(db);
if( sqlite3_errcode(db)!=SQLITE_OK ){
goto opendb_out;
}
#ifdef SQLITE_ENABLE_FTS1
{
if( !sqlite3MallocFailed() ){
extern int sqlite3Fts1Init(sqlite3*);
sqlite3Fts1Init(db);
rc = sqlite3Fts1Init(db);
}
#endif
#ifdef SQLITE_ENABLE_FTS2
{
if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3Fts2Init(sqlite3*);
sqlite3Fts2Init(db);
rc = sqlite3Fts2Init(db);
}
#endif
#ifdef SQLITE_ENABLE_ICU
if( !sqlite3MallocFailed() ){
if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3IcuInit(sqlite3*);
sqlite3IcuInit(db);
rc = sqlite3IcuInit(db);
}
#endif
sqlite3Error(db, rc, 0);
/* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking

View File

@@ -30,7 +30,7 @@
** the version number) and changes its name to "sqlite3.h" as
** part of the build process.
**
** @(#) $Id: sqlite.h.in,v 1.214 2007/06/21 15:25:05 drh Exp $
** @(#) $Id: sqlite.h.in,v 1.215 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3_H_
#define _SQLITE3_H_
@@ -2497,6 +2497,19 @@ int sqlite3_create_module(
void * /* Client data for xCreate/xConnect */
);
/*
** This routine is identical to the sqlite3_create_module() method above,
** except that it allows a destructor function to be specified. It is
** even more experimental than the rest of the virtual tables API.
*/
int sqlite3_create_module_v2(
sqlite3 *db, /* SQLite connection to register module with */
const char *zName, /* Name of the module */
const sqlite3_module *, /* Methods for the module */
void *, /* Client data for xCreate/xConnect */
void(*xDestroy)(void*) /* Module destructor function */
);
/*
** Every module implementation uses a subclass of the following structure
** to describe a particular instance of the module. Each subclass will

View File

@@ -15,7 +15,7 @@
** as extensions by SQLite should #include this file instead of
** sqlite3.h.
**
** @(#) $Id: sqlite3ext.h,v 1.10 2007/03/29 18:46:01 drh Exp $
** @(#) $Id: sqlite3ext.h,v 1.11 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3EXT_H_
#define _SQLITE3EXT_H_
@@ -76,6 +76,7 @@ struct sqlite3_api_routines {
int (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *));
int (*data_count)(sqlite3_stmt*pStmt);
sqlite3 * (*db_handle)(sqlite3_stmt*);
int (*declare_vtab)(sqlite3*,const char*);
@@ -209,6 +210,7 @@ struct sqlite3_api_routines {
#define sqlite3_create_function sqlite3_api->create_function
#define sqlite3_create_function16 sqlite3_api->create_function16
#define sqlite3_create_module sqlite3_api->create_module
#define sqlite3_create_module_v2 sqlite3_api->create_module_v2
#define sqlite3_data_count sqlite3_api->data_count
#define sqlite3_db_handle sqlite3_api->db_handle
#define sqlite3_declare_vtab sqlite3_api->declare_vtab

View File

@@ -11,7 +11,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.574 2007/06/20 15:29:25 drh Exp $
** @(#) $Id: sqliteInt.h,v 1.575 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
@@ -584,6 +584,7 @@ struct Module {
const sqlite3_module *pModule; /* Callback pointers */
const char *zName; /* Name passed to create_module() */
void *pAux; /* pAux passed to create_module() */
void (*xDestroy)(void *); /* Module destructor function */
};
/*

View File

@@ -11,11 +11,38 @@
*************************************************************************
** This file contains code used to help implement virtual tables.
**
** $Id: vtab.c,v 1.46 2007/05/04 13:15:57 drh Exp $
** $Id: vtab.c,v 1.47 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
#include "sqliteInt.h"
static int createModule(
sqlite3 *db, /* Database in which module is registered */
const char *zName, /* Name assigned to this module */
const sqlite3_module *pModule, /* The definition of the module */
void *pAux, /* Context pointer for xCreate/xConnect */
void (*xDestroy)(void *) /* Module destructor function */
) {
int nName = strlen(zName);
Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
if( pMod ){
char *zCopy = (char *)(&pMod[1]);
memcpy(zCopy, zName, nName+1);
pMod->zName = zCopy;
pMod->pModule = pModule;
pMod->pAux = pAux;
pMod->xDestroy = xDestroy;
pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
if( pMod && pMod->xDestroy ){
pMod->xDestroy(pMod->pAux);
}
sqliteFree(pMod);
sqlite3ResetInternalSchema(db, 0);
}
return sqlite3ApiExit(db, SQLITE_OK);
}
/*
** External API function used to create a new virtual-table module.
*/
@@ -25,19 +52,20 @@ int sqlite3_create_module(
const sqlite3_module *pModule, /* The definition of the module */
void *pAux /* Context pointer for xCreate/xConnect */
){
int nName = strlen(zName);
Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
if( pMod ){
char *zCopy = (char *)(&pMod[1]);
memcpy(zCopy, zName, nName+1);
pMod->zName = zCopy;
pMod->pModule = pModule;
pMod->pAux = pAux;
pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
sqliteFree(pMod);
sqlite3ResetInternalSchema(db, 0);
}
return sqlite3ApiExit(db, SQLITE_OK);
return createModule(db, zName, pModule, pAux, 0);
}
/*
** External API function used to create a new virtual-table module.
*/
int sqlite3_create_module_v2(
sqlite3 *db, /* Database in which module is registered */
const char *zName, /* Name assigned to this module */
const sqlite3_module *pModule, /* The definition of the module */
void *pAux, /* Context pointer for xCreate/xConnect */
void (*xDestroy)(void *) /* Module destructor function */
){
return createModule(db, zName, pModule, pAux, xDestroy);
}
/*

107
test/fts2token.test Normal file
View File

@@ -0,0 +1,107 @@
# 2007 June 21
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus
# of this script is testing the pluggable tokeniser feature of the
# FTS2 module.
#
# $Id: fts2token.test,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
# If SQLITE_ENABLE_FTS2 is defined, omit this file.
ifcapable !fts2 {
finish_test
return
}
#--------------------------------------------------------------------------
# Test cases fts2token-1.* are the warm-body test for the SQL scalar
# function fts2_tokenizer(). The procedure is as follows:
#
# 1: Verify that there is no such fts2 tokenizer as 'blah'.
#
# 2: Query for the built-in tokenizer 'simple'. Insert a copy of the
# retrieved value as tokenizer 'blah'.
#
# 3: Test that the value returned for tokenizer 'blah' is now the
# same as that retrieved for 'simple'.
#
# 4: Test that it is now possible to create an fts2 table using
# tokenizer 'blah' (it was not possible in step 1).
#
# 5: Test that the table created to use tokenizer 'blah' is usable.
#
do_test fts2token-1.1 {
catchsql {
CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
}
} {1 {unknown tokenizer: blah}}
do_test fts2token-1.2 {
execsql {
SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL;
}
} {0}
do_test fts2token-1.3 {
execsql {
SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple');
}
} {1}
do_test fts2token-1.4 {
catchsql {
CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
}
} {0 {}}
do_test fts2token-1.5 {
execsql {
INSERT INTO t1(content) VALUES('There was movement at the station');
INSERT INTO t1(content) VALUES('For the word has passed around');
INSERT INTO t1(content) VALUES('That the colt from ol regret had got away');
SELECT content FROM t1 WHERE content MATCH 'movement'
}
} {{There was movement at the station}}
#--------------------------------------------------------------------------
# Test cases fts2token-2.* test error cases in the scalar function based
# API for getting and setting tokenizers.
#
do_test fts2token-2.1 {
catchsql {
SELECT fts2_tokenizer('nosuchtokenizer');
}
} {1 {unknown tokenizer: nosuchtokenizer}}
#--------------------------------------------------------------------------
# Test cases fts2token-3.* test the three built-in tokenizers with a
# simple input string via the built-in test function. This is as much
# to test the test function as the tokenizer implementations.
#
do_test fts2token-3.1 {
execsql {
SELECT fts2_tokenizer_test('simple', 'I don''t see how');
}
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
do_test fts2token-3.2 {
execsql {
SELECT fts2_tokenizer_test('porter', 'I don''t see how');
}
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
ifcapable icu {
do_test fts2token-3.3 {
execsql {
SELECT fts2_tokenizer_test('icu', 'I don''t see how');
}
} {{0 i I 1 don't don't 2 see see 3 how how}}
}
finish_test