mirror of
https://github.com/sqlite/sqlite.git
synced 2025-08-07 02:42:48 +03:00
Merge latest enhancments into this branch from branch wal2.
FossilOrigin-Name: 5a22010c35789c6d12e4dd45c81c10b203f4662f206cb636bd0c2781f1cd9571
This commit is contained in:
49
Makefile.in
49
Makefile.in
@@ -315,24 +315,6 @@ SRC = \
|
||||
|
||||
# Source code for extensions
|
||||
#
|
||||
SRC += \
|
||||
$(TOP)/ext/fts1/fts1.c \
|
||||
$(TOP)/ext/fts1/fts1.h \
|
||||
$(TOP)/ext/fts1/fts1_hash.c \
|
||||
$(TOP)/ext/fts1/fts1_hash.h \
|
||||
$(TOP)/ext/fts1/fts1_porter.c \
|
||||
$(TOP)/ext/fts1/fts1_tokenizer.h \
|
||||
$(TOP)/ext/fts1/fts1_tokenizer1.c
|
||||
SRC += \
|
||||
$(TOP)/ext/fts2/fts2.c \
|
||||
$(TOP)/ext/fts2/fts2.h \
|
||||
$(TOP)/ext/fts2/fts2_hash.c \
|
||||
$(TOP)/ext/fts2/fts2_hash.h \
|
||||
$(TOP)/ext/fts2/fts2_icu.c \
|
||||
$(TOP)/ext/fts2/fts2_porter.c \
|
||||
$(TOP)/ext/fts2/fts2_tokenizer.h \
|
||||
$(TOP)/ext/fts2/fts2_tokenizer.c \
|
||||
$(TOP)/ext/fts2/fts2_tokenizer1.c
|
||||
SRC += \
|
||||
$(TOP)/ext/fts3/fts3.c \
|
||||
$(TOP)/ext/fts3/fts3.h \
|
||||
@@ -438,7 +420,7 @@ TESTSRC = \
|
||||
$(TOP)/ext/recover/sqlite3recover.c \
|
||||
$(TOP)/ext/recover/dbdata.c \
|
||||
$(TOP)/ext/recover/test_recover.c \
|
||||
$(TOP)/ext/rbu/test_rbu.c
|
||||
$(TOP)/ext/rbu/test_rbu.c
|
||||
|
||||
# Statically linked extensions
|
||||
#
|
||||
@@ -567,14 +549,6 @@ HDR = \
|
||||
|
||||
# Header files used by extensions
|
||||
#
|
||||
EXTHDR += \
|
||||
$(TOP)/ext/fts1/fts1.h \
|
||||
$(TOP)/ext/fts1/fts1_hash.h \
|
||||
$(TOP)/ext/fts1/fts1_tokenizer.h
|
||||
EXTHDR += \
|
||||
$(TOP)/ext/fts2/fts2.h \
|
||||
$(TOP)/ext/fts2/fts2_hash.h \
|
||||
$(TOP)/ext/fts2/fts2_tokenizer.h
|
||||
EXTHDR += \
|
||||
$(TOP)/ext/fts3/fts3.h \
|
||||
$(TOP)/ext/fts3/fts3Int.h \
|
||||
@@ -1120,6 +1094,9 @@ SHELL_SRC = \
|
||||
$(TOP)/ext/misc/appendvfs.c \
|
||||
$(TOP)/ext/misc/completion.c \
|
||||
$(TOP)/ext/misc/decimal.c \
|
||||
$(TOP)/ext/misc/basexx.c \
|
||||
$(TOP)/ext/misc/base64.c \
|
||||
$(TOP)/ext/misc/base85.c \
|
||||
$(TOP)/ext/misc/fileio.c \
|
||||
$(TOP)/ext/misc/ieee754.c \
|
||||
$(TOP)/ext/misc/regexp.c \
|
||||
@@ -1147,24 +1124,6 @@ shell.c: $(SHELL_SRC) $(TOP)/tool/mkshellc.tcl
|
||||
icu.lo: $(TOP)/ext/icu/icu.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/icu/icu.c
|
||||
|
||||
fts2.lo: $(TOP)/ext/fts2/fts2.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2.c
|
||||
|
||||
fts2_hash.lo: $(TOP)/ext/fts2/fts2_hash.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_hash.c
|
||||
|
||||
fts2_icu.lo: $(TOP)/ext/fts2/fts2_icu.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_icu.c
|
||||
|
||||
fts2_porter.lo: $(TOP)/ext/fts2/fts2_porter.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_porter.c
|
||||
|
||||
fts2_tokenizer.lo: $(TOP)/ext/fts2/fts2_tokenizer.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c
|
||||
|
||||
fts2_tokenizer1.lo: $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c
|
||||
|
||||
fts3.lo: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c
|
||||
|
||||
|
57
Makefile.msc
57
Makefile.msc
@@ -1412,20 +1412,6 @@ SRC05 = \
|
||||
$(TOP)\src\wal.h \
|
||||
$(TOP)\src\whereInt.h
|
||||
|
||||
# Extension source code files, part 1.
|
||||
#
|
||||
SRC06 = \
|
||||
$(TOP)\ext\fts1\fts1.c \
|
||||
$(TOP)\ext\fts1\fts1_hash.c \
|
||||
$(TOP)\ext\fts1\fts1_porter.c \
|
||||
$(TOP)\ext\fts1\fts1_tokenizer1.c \
|
||||
$(TOP)\ext\fts2\fts2.c \
|
||||
$(TOP)\ext\fts2\fts2_hash.c \
|
||||
$(TOP)\ext\fts2\fts2_icu.c \
|
||||
$(TOP)\ext\fts2\fts2_porter.c \
|
||||
$(TOP)\ext\fts2\fts2_tokenizer.c \
|
||||
$(TOP)\ext\fts2\fts2_tokenizer1.c
|
||||
|
||||
# Extension source code files, part 2.
|
||||
#
|
||||
SRC07 = \
|
||||
@@ -1448,16 +1434,6 @@ SRC07 = \
|
||||
$(TOP)\ext\rbu\sqlite3rbu.c \
|
||||
$(TOP)\ext\misc\stmt.c
|
||||
|
||||
# Extension header files, part 1.
|
||||
#
|
||||
SRC08 = \
|
||||
$(TOP)\ext\fts1\fts1.h \
|
||||
$(TOP)\ext\fts1\fts1_hash.h \
|
||||
$(TOP)\ext\fts1\fts1_tokenizer.h \
|
||||
$(TOP)\ext\fts2\fts2.h \
|
||||
$(TOP)\ext\fts2\fts2_hash.h \
|
||||
$(TOP)\ext\fts2\fts2_tokenizer.h
|
||||
|
||||
# Extension header files, part 2.
|
||||
#
|
||||
SRC09 = \
|
||||
@@ -1498,7 +1474,7 @@ SRC12 =
|
||||
|
||||
# All source code files.
|
||||
#
|
||||
SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC06) $(SRC07) $(SRC08) $(SRC09) $(SRC10) $(SRC11) $(SRC12)
|
||||
SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC07) $(SRC09) $(SRC10) $(SRC11) $(SRC12)
|
||||
|
||||
# Source code to the test files.
|
||||
#
|
||||
@@ -1607,7 +1583,6 @@ TESTEXT = $(TESTEXT) $(TOP)\ext\misc\zipfile.c
|
||||
TESTSRC2 = \
|
||||
$(SRC00) \
|
||||
$(SRC01) \
|
||||
$(SRC06) \
|
||||
$(SRC07) \
|
||||
$(SRC10) \
|
||||
$(TOP)\ext\async\sqlite3async.c
|
||||
@@ -1642,14 +1617,6 @@ HDR = \
|
||||
|
||||
# Header files used by extensions
|
||||
#
|
||||
EXTHDR = $(EXTHDR) \
|
||||
$(TOP)\ext\fts1\fts1.h \
|
||||
$(TOP)\ext\fts1\fts1_hash.h \
|
||||
$(TOP)\ext\fts1\fts1_tokenizer.h
|
||||
EXTHDR = $(EXTHDR) \
|
||||
$(TOP)\ext\fts2\fts2.h \
|
||||
$(TOP)\ext\fts2\fts2_hash.h \
|
||||
$(TOP)\ext\fts2\fts2_tokenizer.h
|
||||
EXTHDR = $(EXTHDR) \
|
||||
$(TOP)\ext\fts3\fts3.h \
|
||||
$(TOP)\ext\fts3\fts3Int.h \
|
||||
@@ -1861,9 +1828,7 @@ mptest: mptester.exe
|
||||
for %i in ($(SRC03)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC04)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC05)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC06)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC07)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC08)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC09)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC10)) do copy /Y %i tsrc
|
||||
for %i in ($(SRC11)) do copy /Y %i tsrc
|
||||
@@ -2233,6 +2198,8 @@ SHELL_SRC = \
|
||||
$(TOP)\src\shell.c.in \
|
||||
$(TOP)\ext\misc\appendvfs.c \
|
||||
$(TOP)\ext\misc\completion.c \
|
||||
$(TOP)\ext\misc\base64.c \
|
||||
$(TOP)\ext\misc\base85.c \
|
||||
$(TOP)\ext\misc\decimal.c \
|
||||
$(TOP)\ext\misc\fileio.c \
|
||||
$(TOP)\ext\misc\ieee754.c \
|
||||
@@ -2266,24 +2233,6 @@ zlib:
|
||||
icu.lo: $(TOP)\ext\icu\icu.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\icu\icu.c
|
||||
|
||||
fts2.lo: $(TOP)\ext\fts2\fts2.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2.c
|
||||
|
||||
fts2_hash.lo: $(TOP)\ext\fts2\fts2_hash.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_hash.c
|
||||
|
||||
fts2_icu.lo: $(TOP)\ext\fts2\fts2_icu.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_icu.c
|
||||
|
||||
fts2_porter.lo: $(TOP)\ext\fts2\fts2_porter.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_porter.c
|
||||
|
||||
fts2_tokenizer.lo: $(TOP)\ext\fts2\fts2_tokenizer.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer.c
|
||||
|
||||
fts2_tokenizer1.lo: $(TOP)\ext\fts2\fts2_tokenizer1.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer1.c
|
||||
|
||||
fts3.lo: $(TOP)\ext\fts3\fts3.c $(HDR) $(EXTHDR)
|
||||
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3.c
|
||||
|
||||
|
@@ -1,2 +0,0 @@
|
||||
This folder contains source code to the first full-text search
|
||||
extension for SQLite.
|
@@ -1,404 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "ft_hash.h"
|
||||
|
||||
void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants HASH_INT, HASH_POINTER,
|
||||
** HASH_BINARY, or HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer. CopyKey only makes
|
||||
** sense for HASH_STRING and HASH_BINARY and is ignored
|
||||
** for other key classes.
|
||||
*/
|
||||
void HashInit(Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
#if 0
|
||||
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
|
||||
#endif
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void HashClear(Hash *pH){
|
||||
HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_INT
|
||||
*/
|
||||
static int intHash(const void *pKey, int nKey){
|
||||
return nKey ^ (nKey<<8) ^ (nKey>>8);
|
||||
}
|
||||
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
return n2 - n1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0 /* NOT USED */
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_POINTER
|
||||
*/
|
||||
static int ptrHash(const void *pKey, int nKey){
|
||||
uptr x = Addr(pKey);
|
||||
return x ^ (x<<8) ^ (x>>8);
|
||||
}
|
||||
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( pKey1==pKey2 ) return 0;
|
||||
if( pKey1<pKey2 ) return -1;
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intHash;
|
||||
case HASH_POINTER: return &ptrHash;
|
||||
case HASH_STRING: return &strHash;
|
||||
case HASH_BINARY: return &binHash;;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
#if 0 /* HASH_INT and HASH_POINTER are never used */
|
||||
switch( keyClass ){
|
||||
case HASH_INT: return &intCompare;
|
||||
case HASH_POINTER: return &ptrCompare;
|
||||
case HASH_STRING: return &strCompare;
|
||||
case HASH_BINARY: return &binCompare;
|
||||
default: break;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
if( keyClass==HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
Hash *pH, /* The complete hash table */
|
||||
struct _ht *pEntry, /* The entry into which pNew is inserted */
|
||||
HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(Hash *pH, int new_size){
|
||||
struct _ht *new_ht; /* The new hash table */
|
||||
HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static HashElem *findElementGivenHash(
|
||||
const Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
Hash *pH, /* The pH containing "elem" */
|
||||
HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *HashFind(const Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
HashElem *elem; /* Used to loop thru the element list */
|
||||
HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
@@ -1,111 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _HASH_H_
|
||||
#define _HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct Hash Hash;
|
||||
typedef struct HashElem HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct HashElem {
|
||||
HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 4 different modes of operation for a hash table:
|
||||
**
|
||||
** HASH_INT nKey is used as the key and pKey is ignored.
|
||||
**
|
||||
** HASH_POINTER pKey is used as the key and nKey is ignored.
|
||||
**
|
||||
** HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made for HASH_STRING and HASH_BINARY
|
||||
** if the copyKey parameter to HashInit is 1.
|
||||
*/
|
||||
/* #define HASH_INT 1 // NOT USED */
|
||||
/* #define HASH_POINTER 2 // NOT USED */
|
||||
#define HASH_STRING 3
|
||||
#define HASH_BINARY 4
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void HashInit(Hash*, int keytype, int copyKey);
|
||||
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *HashFind(const Hash*, const void *pKey, int nKey);
|
||||
void HashClear(Hash*);
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** Hash h;
|
||||
** HashElem *p;
|
||||
** ...
|
||||
** for(p=HashFirst(&h); p; p=HashNext(p)){
|
||||
** SomeStructure *pData = HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define HashFirst(H) ((H)->first)
|
||||
#define HashNext(E) ((E)->next)
|
||||
#define HashData(E) ((E)->data)
|
||||
#define HashKey(E) ((E)->pKey)
|
||||
#define HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _HASH_H_ */
|
3348
ext/fts1/fts1.c
3348
ext/fts1/fts1.c
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts1Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
@@ -1,369 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include "fts1_hash.h"
|
||||
|
||||
static void *malloc_and_zero(int n){
|
||||
void *p = malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
pNew->xMalloc = malloc_and_zero;
|
||||
pNew->xFree = free;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts1HashClear(fts1Hash *pH){
|
||||
fts1HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts1HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS1_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS1_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts1Hash *pH, /* The complete hash table */
|
||||
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts1HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts1HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts1Hash *pH, int new_size){
|
||||
struct _fts1ht *new_ht; /* The new hash table */
|
||||
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
|
||||
if( new_ht==0 ) return;
|
||||
if( pH->ht ) pH->xFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts1HashElem *findElementGivenHash(
|
||||
const fts1Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts1ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts1Hash *pH, /* The pH containing "elem" */
|
||||
fts1HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts1ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
pH->xFree(elem->pKey);
|
||||
}
|
||||
pH->xFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts1HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts1HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts1HashInsert(
|
||||
fts1Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts1HashElem *elem; /* Used to loop thru the element list */
|
||||
fts1HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = pH->xMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
pH->xFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
@@ -1,112 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS1_HASH_H_
|
||||
#define _FTS1_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts1Hash fts1Hash;
|
||||
typedef struct fts1HashElem fts1HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts1Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts1HashElem *first; /* The first element of the array */
|
||||
void *(*xMalloc)(int); /* malloc() function to use */
|
||||
void (*xFree)(void *); /* free() function to use */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts1ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts1HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts1HashElem {
|
||||
fts1HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
|
||||
*/
|
||||
#define FTS1_HASH_STRING 1
|
||||
#define FTS1_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts1HashClear(fts1Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts1HashInit sqlite3Fts1HashInit
|
||||
#define fts1HashInsert sqlite3Fts1HashInsert
|
||||
#define fts1HashFind sqlite3Fts1HashFind
|
||||
#define fts1HashClear sqlite3Fts1HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts1Hash h;
|
||||
** fts1HashElem *p;
|
||||
** ...
|
||||
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
|
||||
** SomeStructure *pData = fts1HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts1HashFirst(H) ((H)->first)
|
||||
#define fts1HashNext(E) ((E)->next)
|
||||
#define fts1HashData(E) ((E)->data)
|
||||
#define fts1HashKey(E) ((E)->pKey)
|
||||
#define fts1HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts1HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS1_HASH_H_ */
|
@@ -1,643 +0,0 @@
|
||||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
free(c->zToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char isIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
@@ -1,90 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS1_TOKENIZER_H_
|
||||
#define _FTS1_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char *const*argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
#endif /* _FTS1_TOKENIZER_H_ */
|
@@ -1,221 +0,0 @@
|
||||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS1 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS1 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fts1_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int isDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !isalnum(i);
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
free(c->pToken);
|
||||
free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts1SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */
|
1511
ext/fts1/fulltext.c
1511
ext/fts1/fulltext.c
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int fulltext_init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
@@ -1,174 +0,0 @@
|
||||
/*
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#if !defined(__APPLE__)
|
||||
#include <malloc.h>
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "tokenizer.h"
|
||||
|
||||
/* Duplicate a string; the caller must free() the returned string.
|
||||
* (We don't use strdup() since it's not part of the standard C library and
|
||||
* may not be available everywhere.) */
|
||||
/* TODO(shess) Copied from fulltext.c, consider util.c for such
|
||||
** things. */
|
||||
static char *string_dup(const char *s){
|
||||
char *str = malloc(strlen(s) + 1);
|
||||
strcpy(str, s);
|
||||
return str;
|
||||
}
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
const char *zDelim; /* token delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
const char *pCurrent; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nTokenBytes; /* actual size of current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
|
||||
|
||||
static int simpleCreate(
|
||||
int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
t->zDelim = string_dup(argv[1]);
|
||||
} else {
|
||||
/* Build a string excluding alphanumeric ASCII characters */
|
||||
char zDelim[0x80]; /* nul-terminated, so nul not a member */
|
||||
int i, j;
|
||||
for(i=1, j=0; i<0x80; i++){
|
||||
if( !isalnum(i) ){
|
||||
zDelim[j++] = i;
|
||||
}
|
||||
}
|
||||
zDelim[j++] = '\0';
|
||||
assert( j<=sizeof(zDelim) );
|
||||
t->zDelim = string_dup(zDelim);
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
|
||||
|
||||
free((void *) t->zDelim);
|
||||
free(t);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
|
||||
c->pInput = pInput;
|
||||
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
|
||||
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenBytes = 0;
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
|
||||
if( NULL!=c->zToken ){
|
||||
free(c->zToken);
|
||||
}
|
||||
free(c);
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
int ii;
|
||||
|
||||
while( c->pCurrent-c->pInput<c->nBytes ){
|
||||
int n = (int) strcspn(c->pCurrent, t->zDelim);
|
||||
if( n>0 ){
|
||||
if( n+1>c->nTokenAllocated ){
|
||||
c->zToken = realloc(c->zToken, n+1);
|
||||
}
|
||||
for(ii=0; ii<n; ii++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
char ch = c->pCurrent[ii];
|
||||
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch;
|
||||
}
|
||||
c->zToken[n] = '\0';
|
||||
*ppToken = c->zToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = (int) (c->pCurrent-c->pInput);
|
||||
*piEndOffset = *piStartOffset+n;
|
||||
*piPosition = c->iToken++;
|
||||
c->pCurrent += n + 1;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
c->pCurrent += n + 1;
|
||||
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
|
||||
** to happen in two places, though, which is annoying.
|
||||
*/
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
static sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
void get_simple_tokenizer_module(
|
||||
sqlite3_tokenizer_module **ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
@@ -1,89 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _TOKENIZER_H_
|
||||
#define _TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
int iVersion; /* currently 0 */
|
||||
|
||||
/*
|
||||
** Create and destroy a tokenizer. argc/argv are passed down from
|
||||
** the fulltext virtual table creation to allow customization.
|
||||
*/
|
||||
int (*xCreate)(int argc, const char **argv,
|
||||
sqlite3_tokenizer **ppTokenizer);
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Tokenize a particular input. Call xOpen() to prepare to
|
||||
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
|
||||
** xClose() to free any internal state. The pInput passed to
|
||||
** xOpen() must exist until the cursor is closed. The ppToken
|
||||
** result from xNext() is only valid until the next call to xNext()
|
||||
** or until xClose() is called.
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
|
||||
const char *pInput, int nBytes,
|
||||
sqlite3_tokenizer_cursor **ppCursor);
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
|
||||
const char **ppToken, int *pnBytes,
|
||||
int *piStartOffset, int *piEndOffset, int *piPosition);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
/*
|
||||
** Get the module for a tokenizer which generates tokens based on a
|
||||
** set of non-token characters. The default is to break tokens at any
|
||||
** non-alnum character, though the set of delimiters can also be
|
||||
** specified by the first argv argument to xCreate().
|
||||
*/
|
||||
/* TODO(shess) This doesn't belong here. Need some sort of
|
||||
** registration process.
|
||||
*/
|
||||
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
|
||||
|
||||
#endif /* _TOKENIZER_H_ */
|
@@ -1,133 +0,0 @@
|
||||
|
||||
1. FTS2 Tokenizers
|
||||
|
||||
When creating a new full-text table, FTS2 allows the user to select
|
||||
the text tokenizer implementation to be used when indexing text
|
||||
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
|
||||
statement:
|
||||
|
||||
CREATE VIRTUAL TABLE <table-name> USING fts2(
|
||||
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
|
||||
);
|
||||
|
||||
The built-in tokenizers (valid values to pass as <tokenizer name>) are
|
||||
"simple" and "porter".
|
||||
|
||||
<tokenizer-args> should consist of zero or more white-space separated
|
||||
arguments to pass to the selected tokenizer implementation. The
|
||||
interpretation of the arguments, if any, depends on the individual
|
||||
tokenizer.
|
||||
|
||||
2. Custom Tokenizers
|
||||
|
||||
FTS2 allows users to provide custom tokenizer implementations. The
|
||||
interface used to create a new tokenizer is defined and described in
|
||||
the fts2_tokenizer.h source file.
|
||||
|
||||
Registering a new FTS2 tokenizer is similar to registering a new
|
||||
virtual table module with SQLite. The user passes a pointer to a
|
||||
structure containing pointers to various callback functions that
|
||||
make up the implementation of the new tokenizer type. For tokenizers,
|
||||
the structure (defined in fts2_tokenizer.h) is called
|
||||
"sqlite3_tokenizer_module".
|
||||
|
||||
FTS2 does not expose a C-function that users call to register new
|
||||
tokenizer types with a database handle. Instead, the pointer must
|
||||
be encoded as an SQL blob value and passed to FTS2 through the SQL
|
||||
engine by evaluating a special scalar function, "fts2_tokenizer()".
|
||||
The fts2_tokenizer() function may be called with one or two arguments,
|
||||
as follows:
|
||||
|
||||
SELECT fts2_tokenizer(<tokenizer-name>);
|
||||
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
|
||||
|
||||
Where <tokenizer-name> is a string identifying the tokenizer and
|
||||
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
|
||||
structure encoded as an SQL blob. If the second argument is present,
|
||||
it is registered as tokenizer <tokenizer-name> and a copy of it
|
||||
returned. If only one argument is passed, a pointer to the tokenizer
|
||||
implementation currently registered as <tokenizer-name> is returned,
|
||||
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
|
||||
(error) is raised.
|
||||
|
||||
SECURITY: If the fts2 extension is used in an environment where potentially
|
||||
malicious users may execute arbitrary SQL (i.e. gears), they should be
|
||||
prevented from invoking the fts2_tokenizer() function, possibly using the
|
||||
authorisation callback.
|
||||
|
||||
See "Sample code" below for an example of calling the fts2_tokenizer()
|
||||
function from C code.
|
||||
|
||||
3. ICU Library Tokenizers
|
||||
|
||||
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
|
||||
symbol defined, then there exists a built-in tokenizer named "icu"
|
||||
implemented using the ICU library. The first argument passed to the
|
||||
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
|
||||
an ICU locale identifier. For example "tr_TR" for Turkish as used
|
||||
in Turkey, or "en_AU" for English as used in Australia. For example:
|
||||
|
||||
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
|
||||
|
||||
The ICU tokenizer implementation is very simple. It splits the input
|
||||
text according to the ICU rules for finding word boundaries and discards
|
||||
any tokens that consist entirely of white-space. This may be suitable
|
||||
for some applications in some locales, but not all. If more complex
|
||||
processing is required, for example to implement stemming or
|
||||
discard punctuation, this can be done by creating a tokenizer
|
||||
implementation that uses the ICU tokenizer as part of its implementation.
|
||||
|
||||
When using the ICU tokenizer this way, it is safe to overwrite the
|
||||
contents of the strings returned by the xNext() method (see
|
||||
fts2_tokenizer.h).
|
||||
|
||||
4. Sample code.
|
||||
|
||||
The following two code samples illustrate the way C code should invoke
|
||||
the fts2_tokenizer() scalar function:
|
||||
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
int queryTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
@@ -1,4 +0,0 @@
|
||||
This folder contains source code to the second full-text search
|
||||
extension for SQLite. While the API is the same, this version uses a
|
||||
substantially different storage schema from fts1, so tables will need
|
||||
to be rebuilt.
|
6860
ext/fts2/fts2.c
6860
ext/fts2/fts2.c
File diff suppressed because it is too large
Load Diff
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This header file is used by programs that want to link against the
|
||||
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
int sqlite3Fts2Init(sqlite3 *db);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
@@ -1,376 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the implementation of generic hash-tables used in SQLite.
|
||||
** We've modified it slightly to serve as a standalone hash table
|
||||
** implementation for the full-text indexing module.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_hash.h"
|
||||
|
||||
/*
|
||||
** Malloc and Free functions
|
||||
*/
|
||||
static void *fts2HashMalloc(int n){
|
||||
void *p = sqlite3_malloc(n);
|
||||
if( p ){
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
static void fts2HashFree(void *p){
|
||||
sqlite3_free(p);
|
||||
}
|
||||
|
||||
/* Turn bulk memory into a hash table object by initializing the
|
||||
** fields of the Hash structure.
|
||||
**
|
||||
** "pNew" is a pointer to the hash table that is to be initialized.
|
||||
** keyClass is one of the constants
|
||||
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
|
||||
** determines what kind of key the hash table will use. "copyKey" is
|
||||
** true if the hash table should make its own private copy of keys and
|
||||
** false if it should just use the supplied pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
|
||||
assert( pNew!=0 );
|
||||
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
|
||||
pNew->keyClass = keyClass;
|
||||
pNew->copyKey = copyKey;
|
||||
pNew->first = 0;
|
||||
pNew->count = 0;
|
||||
pNew->htsize = 0;
|
||||
pNew->ht = 0;
|
||||
}
|
||||
|
||||
/* Remove all entries from a hash table. Reclaim all memory.
|
||||
** Call this routine to delete a hash table or to reset a hash table
|
||||
** to the empty state.
|
||||
*/
|
||||
void sqlite3Fts2HashClear(fts2Hash *pH){
|
||||
fts2HashElem *elem; /* For looping over all elements of the table */
|
||||
|
||||
assert( pH!=0 );
|
||||
elem = pH->first;
|
||||
pH->first = 0;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = 0;
|
||||
pH->htsize = 0;
|
||||
while( elem ){
|
||||
fts2HashElem *next_elem = elem->next;
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree(elem);
|
||||
elem = next_elem;
|
||||
}
|
||||
pH->count = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_STRING
|
||||
*/
|
||||
static int strHash(const void *pKey, int nKey){
|
||||
const char *z = (const char *)pKey;
|
||||
int h = 0;
|
||||
if( nKey<=0 ) nKey = (int) strlen(z);
|
||||
while( nKey > 0 ){
|
||||
h = (h<<3) ^ h ^ *z++;
|
||||
nKey--;
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
|
||||
*/
|
||||
static int binHash(const void *pKey, int nKey){
|
||||
int h = 0;
|
||||
const char *z = (const char *)pKey;
|
||||
while( nKey-- > 0 ){
|
||||
h = (h<<3) ^ h ^ *(z++);
|
||||
}
|
||||
return h & 0x7fffffff;
|
||||
}
|
||||
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
|
||||
if( n1!=n2 ) return 1;
|
||||
return memcmp(pKey1,pKey2,n1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** The C syntax in this function definition may be unfamilar to some
|
||||
** programmers, so we provide the following additional explanation:
|
||||
**
|
||||
** The name of the function is "hashFunction". The function takes a
|
||||
** single parameter "keyClass". The return value of hashFunction()
|
||||
** is a pointer to another function. Specifically, the return value
|
||||
** of hashFunction() is a pointer to a function that takes two parameters
|
||||
** with types "const void*" and "int" and returns an "int".
|
||||
*/
|
||||
static int (*hashFunction(int keyClass))(const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strHash;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binHash;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Return a pointer to the appropriate hash function given the key class.
|
||||
**
|
||||
** For help in interpreted the obscure C code in the function definition,
|
||||
** see the header comment on the previous function.
|
||||
*/
|
||||
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
|
||||
if( keyClass==FTS2_HASH_STRING ){
|
||||
return &strCompare;
|
||||
}else{
|
||||
assert( keyClass==FTS2_HASH_BINARY );
|
||||
return &binCompare;
|
||||
}
|
||||
}
|
||||
|
||||
/* Link an element into the hash table
|
||||
*/
|
||||
static void insertElement(
|
||||
fts2Hash *pH, /* The complete hash table */
|
||||
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
|
||||
fts2HashElem *pNew /* The element to be inserted */
|
||||
){
|
||||
fts2HashElem *pHead; /* First element already in pEntry */
|
||||
pHead = pEntry->chain;
|
||||
if( pHead ){
|
||||
pNew->next = pHead;
|
||||
pNew->prev = pHead->prev;
|
||||
if( pHead->prev ){ pHead->prev->next = pNew; }
|
||||
else { pH->first = pNew; }
|
||||
pHead->prev = pNew;
|
||||
}else{
|
||||
pNew->next = pH->first;
|
||||
if( pH->first ){ pH->first->prev = pNew; }
|
||||
pNew->prev = 0;
|
||||
pH->first = pNew;
|
||||
}
|
||||
pEntry->count++;
|
||||
pEntry->chain = pNew;
|
||||
}
|
||||
|
||||
|
||||
/* Resize the hash table so that it cantains "new_size" buckets.
|
||||
** "new_size" must be a power of 2. The hash table might fail
|
||||
** to resize if sqliteMalloc() fails.
|
||||
*/
|
||||
static void rehash(fts2Hash *pH, int new_size){
|
||||
struct _fts2ht *new_ht; /* The new hash table */
|
||||
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( (new_size & (new_size-1))==0 );
|
||||
new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
|
||||
if( new_ht==0 ) return;
|
||||
fts2HashFree(pH->ht);
|
||||
pH->ht = new_ht;
|
||||
pH->htsize = new_size;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
|
||||
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
|
||||
next_elem = elem->next;
|
||||
insertElement(pH, &new_ht[h], elem);
|
||||
}
|
||||
}
|
||||
|
||||
/* This function (for internal use only) locates an element in an
|
||||
** hash table that matches the given key. The hash for this key has
|
||||
** already been computed and is passed as the 4th parameter.
|
||||
*/
|
||||
static fts2HashElem *findElementGivenHash(
|
||||
const fts2Hash *pH, /* The pH to be searched */
|
||||
const void *pKey, /* The key we are searching for */
|
||||
int nKey,
|
||||
int h /* The hash for this key. */
|
||||
){
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
int count; /* Number of elements left to test */
|
||||
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
|
||||
|
||||
if( pH->ht ){
|
||||
struct _fts2ht *pEntry = &pH->ht[h];
|
||||
elem = pEntry->chain;
|
||||
count = pEntry->count;
|
||||
xCompare = compareFunction(pH->keyClass);
|
||||
while( count-- && elem ){
|
||||
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
|
||||
return elem;
|
||||
}
|
||||
elem = elem->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove a single entry from the hash table given a pointer to that
|
||||
** element and a hash on the element's key.
|
||||
*/
|
||||
static void removeElementGivenHash(
|
||||
fts2Hash *pH, /* The pH containing "elem" */
|
||||
fts2HashElem* elem, /* The element to be removed from the pH */
|
||||
int h /* Hash value for the element */
|
||||
){
|
||||
struct _fts2ht *pEntry;
|
||||
if( elem->prev ){
|
||||
elem->prev->next = elem->next;
|
||||
}else{
|
||||
pH->first = elem->next;
|
||||
}
|
||||
if( elem->next ){
|
||||
elem->next->prev = elem->prev;
|
||||
}
|
||||
pEntry = &pH->ht[h];
|
||||
if( pEntry->chain==elem ){
|
||||
pEntry->chain = elem->next;
|
||||
}
|
||||
pEntry->count--;
|
||||
if( pEntry->count<=0 ){
|
||||
pEntry->chain = 0;
|
||||
}
|
||||
if( pH->copyKey && elem->pKey ){
|
||||
fts2HashFree(elem->pKey);
|
||||
}
|
||||
fts2HashFree( elem );
|
||||
pH->count--;
|
||||
if( pH->count<=0 ){
|
||||
assert( pH->first==0 );
|
||||
assert( pH->count==0 );
|
||||
fts2HashClear(pH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to locate an element of the hash table pH with a key
|
||||
** that matches pKey,nKey. Return the data for this element if it is
|
||||
** found, or NULL if there is no match.
|
||||
*/
|
||||
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
|
||||
int h; /* A hash on key */
|
||||
fts2HashElem *elem; /* The element that matches key */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
if( pH==0 || pH->ht==0 ) return 0;
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
h = (*xHash)(pKey,nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
|
||||
return elem ? elem->data : 0;
|
||||
}
|
||||
|
||||
/* Insert an element into the hash table pH. The key is pKey,nKey
|
||||
** and the data is "data".
|
||||
**
|
||||
** If no element exists with a matching key, then a new
|
||||
** element is created. A copy of the key is made if the copyKey
|
||||
** flag is set. NULL is returned.
|
||||
**
|
||||
** If another element already exists with the same key, then the
|
||||
** new data replaces the old data and the old data is returned.
|
||||
** The key is not copied in this instance. If a malloc fails, then
|
||||
** the new data is returned and the hash table is unchanged.
|
||||
**
|
||||
** If the "data" parameter to this function is NULL, then the
|
||||
** element corresponding to "key" is removed from the hash table.
|
||||
*/
|
||||
void *sqlite3Fts2HashInsert(
|
||||
fts2Hash *pH, /* The hash table to insert into */
|
||||
const void *pKey, /* The key */
|
||||
int nKey, /* Number of bytes in the key */
|
||||
void *data /* The data */
|
||||
){
|
||||
int hraw; /* Raw hash value of the key */
|
||||
int h; /* the hash of the key modulo hash table size */
|
||||
fts2HashElem *elem; /* Used to loop thru the element list */
|
||||
fts2HashElem *new_elem; /* New element added to the pH */
|
||||
int (*xHash)(const void*,int); /* The hash function */
|
||||
|
||||
assert( pH!=0 );
|
||||
xHash = hashFunction(pH->keyClass);
|
||||
assert( xHash!=0 );
|
||||
hraw = (*xHash)(pKey, nKey);
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
elem = findElementGivenHash(pH,pKey,nKey,h);
|
||||
if( elem ){
|
||||
void *old_data = elem->data;
|
||||
if( data==0 ){
|
||||
removeElementGivenHash(pH,elem,h);
|
||||
}else{
|
||||
elem->data = data;
|
||||
}
|
||||
return old_data;
|
||||
}
|
||||
if( data==0 ) return 0;
|
||||
new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
|
||||
if( new_elem==0 ) return data;
|
||||
if( pH->copyKey && pKey!=0 ){
|
||||
new_elem->pKey = fts2HashMalloc( nKey );
|
||||
if( new_elem->pKey==0 ){
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
memcpy((void*)new_elem->pKey, pKey, nKey);
|
||||
}else{
|
||||
new_elem->pKey = (void*)pKey;
|
||||
}
|
||||
new_elem->nKey = nKey;
|
||||
pH->count++;
|
||||
if( pH->htsize==0 ){
|
||||
rehash(pH,8);
|
||||
if( pH->htsize==0 ){
|
||||
pH->count = 0;
|
||||
fts2HashFree(new_elem);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
if( pH->count > pH->htsize ){
|
||||
rehash(pH,pH->htsize*2);
|
||||
}
|
||||
assert( pH->htsize>0 );
|
||||
assert( (pH->htsize & (pH->htsize-1))==0 );
|
||||
h = hraw & (pH->htsize-1);
|
||||
insertElement(pH, &pH->ht[h], new_elem);
|
||||
new_elem->data = data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
@@ -1,110 +0,0 @@
|
||||
/*
|
||||
** 2001 September 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This is the header file for the generic hash-table implementation
|
||||
** used in SQLite. We've modified it slightly to serve as a standalone
|
||||
** hash table implementation for the full-text indexing module.
|
||||
**
|
||||
*/
|
||||
#ifndef _FTS2_HASH_H_
|
||||
#define _FTS2_HASH_H_
|
||||
|
||||
/* Forward declarations of structures. */
|
||||
typedef struct fts2Hash fts2Hash;
|
||||
typedef struct fts2HashElem fts2HashElem;
|
||||
|
||||
/* A complete hash table is an instance of the following structure.
|
||||
** The internals of this structure are intended to be opaque -- client
|
||||
** code should not attempt to access or modify the fields of this structure
|
||||
** directly. Change this structure only by using the routines below.
|
||||
** However, many of the "procedures" and "functions" for modifying and
|
||||
** accessing this structure are really macros, so we can't really make
|
||||
** this structure opaque.
|
||||
*/
|
||||
struct fts2Hash {
|
||||
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
|
||||
char copyKey; /* True if copy of key made on insert */
|
||||
int count; /* Number of entries in this table */
|
||||
fts2HashElem *first; /* The first element of the array */
|
||||
int htsize; /* Number of buckets in the hash table */
|
||||
struct _fts2ht { /* the hash table */
|
||||
int count; /* Number of entries with this hash */
|
||||
fts2HashElem *chain; /* Pointer to first entry with this hash */
|
||||
} *ht;
|
||||
};
|
||||
|
||||
/* Each element in the hash table is an instance of the following
|
||||
** structure. All elements are stored on a single doubly-linked list.
|
||||
**
|
||||
** Again, this structure is intended to be opaque, but it can't really
|
||||
** be opaque because it is used by macros.
|
||||
*/
|
||||
struct fts2HashElem {
|
||||
fts2HashElem *next, *prev; /* Next and previous elements in the table */
|
||||
void *data; /* Data associated with this element */
|
||||
void *pKey; int nKey; /* Key associated with this element */
|
||||
};
|
||||
|
||||
/*
|
||||
** There are 2 different modes of operation for a hash table:
|
||||
**
|
||||
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
|
||||
** (including the null-terminator, if any). Case
|
||||
** is respected in comparisons.
|
||||
**
|
||||
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
|
||||
** memcmp() is used to compare keys.
|
||||
**
|
||||
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
|
||||
*/
|
||||
#define FTS2_HASH_STRING 1
|
||||
#define FTS2_HASH_BINARY 2
|
||||
|
||||
/*
|
||||
** Access routines. To delete, insert a NULL pointer.
|
||||
*/
|
||||
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
|
||||
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
|
||||
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
|
||||
void sqlite3Fts2HashClear(fts2Hash*);
|
||||
|
||||
/*
|
||||
** Shorthand for the functions above
|
||||
*/
|
||||
#define fts2HashInit sqlite3Fts2HashInit
|
||||
#define fts2HashInsert sqlite3Fts2HashInsert
|
||||
#define fts2HashFind sqlite3Fts2HashFind
|
||||
#define fts2HashClear sqlite3Fts2HashClear
|
||||
|
||||
/*
|
||||
** Macros for looping over all elements of a hash table. The idiom is
|
||||
** like this:
|
||||
**
|
||||
** fts2Hash h;
|
||||
** fts2HashElem *p;
|
||||
** ...
|
||||
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
|
||||
** SomeStructure *pData = fts2HashData(p);
|
||||
** // do something with pData
|
||||
** }
|
||||
*/
|
||||
#define fts2HashFirst(H) ((H)->first)
|
||||
#define fts2HashNext(E) ((E)->next)
|
||||
#define fts2HashData(E) ((E)->data)
|
||||
#define fts2HashKey(E) ((E)->pKey)
|
||||
#define fts2HashKeysize(E) ((E)->nKey)
|
||||
|
||||
/*
|
||||
** Number of entries in a hash table
|
||||
*/
|
||||
#define fts2HashCount(H) ((H)->count)
|
||||
|
||||
#endif /* _FTS2_HASH_H_ */
|
@@ -1,260 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** This file implements a tokenizer for fts2 based on the ICU library.
|
||||
**
|
||||
** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $
|
||||
*/
|
||||
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
#ifdef SQLITE_ENABLE_ICU
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utf16.h>
|
||||
|
||||
typedef struct IcuTokenizer IcuTokenizer;
|
||||
typedef struct IcuCursor IcuCursor;
|
||||
|
||||
struct IcuTokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char *zLocale;
|
||||
};
|
||||
|
||||
struct IcuCursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
|
||||
UBreakIterator *pIter; /* ICU break-iterator object */
|
||||
int nChar; /* Number of UChar elements in pInput */
|
||||
UChar *aChar; /* Copy of input using utf-16 encoding */
|
||||
int *aOffset; /* Offsets of each character in utf-8 input */
|
||||
|
||||
int nBuffer;
|
||||
char *zBuffer;
|
||||
|
||||
int iToken;
|
||||
};
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int icuCreate(
|
||||
int argc, /* Number of entries in argv[] */
|
||||
const char * const *argv, /* Tokenizer creation arguments */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
){
|
||||
IcuTokenizer *p;
|
||||
int n = 0;
|
||||
|
||||
if( argc>0 ){
|
||||
n = strlen(argv[0])+1;
|
||||
}
|
||||
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
|
||||
if( !p ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(p, 0, sizeof(IcuTokenizer));
|
||||
|
||||
if( n ){
|
||||
p->zLocale = (char *)&p[1];
|
||||
memcpy(p->zLocale, argv[0], n);
|
||||
}
|
||||
|
||||
*ppTokenizer = (sqlite3_tokenizer *)p;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
sqlite3_free(p);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int icuOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, /* Input string */
|
||||
int nInput, /* Length of zInput in bytes */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
|
||||
IcuCursor *pCsr;
|
||||
|
||||
const int32_t opt = U_FOLD_CASE_DEFAULT;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int nChar;
|
||||
|
||||
UChar32 c;
|
||||
int iInput = 0;
|
||||
int iOut = 0;
|
||||
|
||||
*ppCursor = 0;
|
||||
|
||||
if( nInput<0 ){
|
||||
nInput = strlen(zInput);
|
||||
}
|
||||
nChar = nInput+1;
|
||||
pCsr = (IcuCursor *)sqlite3_malloc(
|
||||
sizeof(IcuCursor) + /* IcuCursor */
|
||||
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
|
||||
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
|
||||
);
|
||||
if( !pCsr ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pCsr, 0, sizeof(IcuCursor));
|
||||
pCsr->aChar = (UChar *)&pCsr[1];
|
||||
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
|
||||
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
while( c>0 ){
|
||||
int isError = 0;
|
||||
c = u_foldCase(c, opt);
|
||||
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
|
||||
if( isError ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->aOffset[iOut] = iInput;
|
||||
|
||||
if( iInput<nInput ){
|
||||
U8_NEXT(zInput, iInput, nInput, c);
|
||||
}else{
|
||||
c = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
|
||||
if( !U_SUCCESS(status) ){
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
pCsr->nChar = iOut;
|
||||
|
||||
ubrk_first(pCsr->pIter);
|
||||
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to icuOpen().
|
||||
*/
|
||||
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
ubrk_close(pCsr->pIter);
|
||||
sqlite3_free(pCsr->zBuffer);
|
||||
sqlite3_free(pCsr);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor.
|
||||
*/
|
||||
static int icuNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
IcuCursor *pCsr = (IcuCursor *)pCursor;
|
||||
|
||||
int iStart = 0;
|
||||
int iEnd = 0;
|
||||
int nByte = 0;
|
||||
|
||||
while( iStart==iEnd ){
|
||||
UChar32 c;
|
||||
|
||||
iStart = ubrk_current(pCsr->pIter);
|
||||
iEnd = ubrk_next(pCsr->pIter);
|
||||
if( iEnd==UBRK_DONE ){
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
while( iStart<iEnd ){
|
||||
int iWhite = iStart;
|
||||
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
|
||||
if( u_isspace(c) ){
|
||||
iStart = iWhite;
|
||||
}else{
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(iStart<=iEnd);
|
||||
}
|
||||
|
||||
do {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if( nByte ){
|
||||
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
|
||||
if( !zNew ){
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
pCsr->zBuffer = zNew;
|
||||
pCsr->nBuffer = nByte;
|
||||
}
|
||||
|
||||
u_strToUTF8(
|
||||
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
|
||||
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
|
||||
&status /* Output success/failure */
|
||||
);
|
||||
} while( nByte>pCsr->nBuffer );
|
||||
|
||||
*ppToken = pCsr->zBuffer;
|
||||
*pnBytes = nByte;
|
||||
*piStartOffset = pCsr->aOffset[iStart];
|
||||
*piEndOffset = pCsr->aOffset[iEnd];
|
||||
*piPosition = pCsr->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module icuTokenizerModule = {
|
||||
0, /* iVersion */
|
||||
icuCreate, /* xCreate */
|
||||
icuDestroy, /* xCreate */
|
||||
icuOpen, /* xOpen */
|
||||
icuClose, /* xClose */
|
||||
icuNext, /* xNext */
|
||||
};
|
||||
|
||||
/*
|
||||
** Set *ppModule to point at the implementation of the ICU tokenizer.
|
||||
*/
|
||||
void sqlite3Fts2IcuTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &icuTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* defined(SQLITE_ENABLE_ICU) */
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
@@ -1,644 +0,0 @@
|
||||
/*
|
||||
** 2006 September 30
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
*************************************************************************
|
||||
** Implementation of the full-text-search tokenizer that implements
|
||||
** a Porter stemmer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
/*
|
||||
** Class derived from sqlite3_tokenizer
|
||||
*/
|
||||
typedef struct porter_tokenizer {
|
||||
sqlite3_tokenizer base; /* Base class */
|
||||
} porter_tokenizer;
|
||||
|
||||
/*
|
||||
** Class derived from sqlit3_tokenizer_cursor
|
||||
*/
|
||||
typedef struct porter_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *zInput; /* input we are tokenizing */
|
||||
int nInput; /* size of the input */
|
||||
int iOffset; /* current position in zInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *zToken; /* storage for current token */
|
||||
int nAllocated; /* space allocated to zToken buffer */
|
||||
} porter_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule;
|
||||
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int porterCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
porter_tokenizer *t;
|
||||
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is zInput[0..nInput-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int porterOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *zInput, int nInput, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
porter_tokenizer_cursor *c;
|
||||
|
||||
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->zInput = zInput;
|
||||
if( zInput==0 ){
|
||||
c->nInput = 0;
|
||||
}else if( nInput<0 ){
|
||||
c->nInput = (int)strlen(zInput);
|
||||
}else{
|
||||
c->nInput = nInput;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->zToken = NULL; /* no space allocated, yet. */
|
||||
c->nAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** porterOpen() above.
|
||||
*/
|
||||
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->zToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
/*
|
||||
** Vowel or consonant
|
||||
*/
|
||||
static const char cType[] = {
|
||||
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
|
||||
1, 1, 1, 2, 1
|
||||
};
|
||||
|
||||
/*
|
||||
** isConsonant() and isVowel() determine if their first character in
|
||||
** the string they point to is a consonant or a vowel, according
|
||||
** to Porter ruls.
|
||||
**
|
||||
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
|
||||
** 'Y' is a consonant unless it follows another consonant,
|
||||
** in which case it is a vowel.
|
||||
**
|
||||
** In these routine, the letters are in reverse order. So the 'y' rule
|
||||
** is that 'y' is a consonant unless it is followed by another
|
||||
** consonent.
|
||||
*/
|
||||
static int isVowel(const char*);
|
||||
static int isConsonant(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return j;
|
||||
return z[1]==0 || isVowel(z + 1);
|
||||
}
|
||||
static int isVowel(const char *z){
|
||||
int j;
|
||||
char x = *z;
|
||||
if( x==0 ) return 0;
|
||||
assert( x>='a' && x<='z' );
|
||||
j = cType[x-'a'];
|
||||
if( j<2 ) return 1-j;
|
||||
return isConsonant(z + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Let any sequence of one or more vowels be represented by V and let
|
||||
** C be sequence of one or more consonants. Then every word can be
|
||||
** represented as:
|
||||
**
|
||||
** [C] (VC){m} [V]
|
||||
**
|
||||
** In prose: A word is an optional consonant followed by zero or
|
||||
** vowel-consonant pairs followed by an optional vowel. "m" is the
|
||||
** number of vowel consonant pairs. This routine computes the value
|
||||
** of m for the first i bytes of a word.
|
||||
**
|
||||
** Return true if the m-value for z is 1 or more. In other words,
|
||||
** return true if z contains at least one vowel that is followed
|
||||
** by a consonant.
|
||||
**
|
||||
** In this routine z[] is in reverse order. So we are really looking
|
||||
** for an instance of of a consonant followed by a vowel.
|
||||
*/
|
||||
static int m_gt_0(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m which is
|
||||
** exactly 1
|
||||
*/
|
||||
static int m_eq_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 1;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z==0;
|
||||
}
|
||||
|
||||
/* Like mgt0 above except we are looking for a value of m>1 instead
|
||||
** or m>0
|
||||
*/
|
||||
static int m_gt_1(const char *z){
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isVowel(z) ){ z++; }
|
||||
if( *z==0 ) return 0;
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if there is a vowel anywhere within z[0..n-1]
|
||||
*/
|
||||
static int hasVowel(const char *z){
|
||||
while( isConsonant(z) ){ z++; }
|
||||
return *z!=0;
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends in a double consonant.
|
||||
**
|
||||
** The text is reversed here. So we are really looking at
|
||||
** the first two characters of z[].
|
||||
*/
|
||||
static int doubleConsonant(const char *z){
|
||||
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
|
||||
}
|
||||
|
||||
/*
|
||||
** Return TRUE if the word ends with three letters which
|
||||
** are consonant-vowel-consonent and where the final consonant
|
||||
** is not 'w', 'x', or 'y'.
|
||||
**
|
||||
** The word is reversed here. So we are really checking the
|
||||
** first three letters and the first one cannot be in [wxy].
|
||||
*/
|
||||
static int star_oh(const char *z){
|
||||
return
|
||||
z[0]!=0 && isConsonant(z) &&
|
||||
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
|
||||
z[1]!=0 && isVowel(z+1) &&
|
||||
z[2]!=0 && isConsonant(z+2);
|
||||
}
|
||||
|
||||
/*
|
||||
** If the word ends with zFrom and xCond() is true for the stem
|
||||
** of the word that preceeds the zFrom ending, then change the
|
||||
** ending to zTo.
|
||||
**
|
||||
** The input word *pz and zFrom are both in reverse order. zTo
|
||||
** is in normal order.
|
||||
**
|
||||
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
|
||||
** match. Not that TRUE is returned even if xCond() fails and
|
||||
** no substitution occurs.
|
||||
*/
|
||||
static int stem(
|
||||
char **pz, /* The word being stemmed (Reversed) */
|
||||
const char *zFrom, /* If the ending matches this... (Reversed) */
|
||||
const char *zTo, /* ... change the ending to this (not reversed) */
|
||||
int (*xCond)(const char*) /* Condition that must be true */
|
||||
){
|
||||
char *z = *pz;
|
||||
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
|
||||
if( *zFrom!=0 ) return 0;
|
||||
if( xCond && !xCond(z) ) return 1;
|
||||
while( *zTo ){
|
||||
*(--z) = *(zTo++);
|
||||
}
|
||||
*pz = z;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
** This is the fallback stemmer used when the porter stemmer is
|
||||
** inappropriate. The input word is copied into the output with
|
||||
** US-ASCII case folding. If the input word is too long (more
|
||||
** than 20 bytes if it contains no digits or more than 6 bytes if
|
||||
** it contains digits) then word is truncated to 20 or 6 bytes
|
||||
** by taking 10 or 3 bytes from the beginning and end.
|
||||
*/
|
||||
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, mx, j;
|
||||
int hasDigit = 0;
|
||||
for(i=0; i<nIn; i++){
|
||||
int c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zOut[i] = c - 'A' + 'a';
|
||||
}else{
|
||||
if( c>='0' && c<='9' ) hasDigit = 1;
|
||||
zOut[i] = c;
|
||||
}
|
||||
}
|
||||
mx = hasDigit ? 3 : 10;
|
||||
if( nIn>mx*2 ){
|
||||
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
|
||||
zOut[j] = zOut[i];
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
zOut[i] = 0;
|
||||
*pnOut = i;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
|
||||
** zOut is at least big enough to hold nIn bytes. Write the actual
|
||||
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
|
||||
**
|
||||
** Any upper-case characters in the US-ASCII character set ([A-Z])
|
||||
** are converted to lower case. Upper-case UTF characters are
|
||||
** unchanged.
|
||||
**
|
||||
** Words that are longer than about 20 bytes are stemmed by retaining
|
||||
** a few bytes from the beginning and the end of the word. If the
|
||||
** word contains digits, 3 bytes are taken from the beginning and
|
||||
** 3 bytes from the end. For long words without digits, 10 bytes
|
||||
** are taken from each end. US-ASCII case folding still applies.
|
||||
**
|
||||
** If the input word contains not digits but does characters not
|
||||
** in [a-zA-Z] then no stemming is attempted and this routine just
|
||||
** copies the input into the input into the output with US-ASCII
|
||||
** case folding.
|
||||
**
|
||||
** Stemming never increases the length of the word. So there is
|
||||
** no chance of overflowing the zOut buffer.
|
||||
*/
|
||||
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
|
||||
int i, j, c;
|
||||
char zReverse[28];
|
||||
char *z, *z2;
|
||||
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
|
||||
/* The word is too big or too small for the porter stemmer.
|
||||
** Fallback to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
|
||||
c = zIn[i];
|
||||
if( c>='A' && c<='Z' ){
|
||||
zReverse[j] = c + 'a' - 'A';
|
||||
}else if( c>='a' && c<='z' ){
|
||||
zReverse[j] = c;
|
||||
}else{
|
||||
/* The use of a character not in [a-zA-Z] means that we fallback
|
||||
** to the copy stemmer */
|
||||
copy_stemmer(zIn, nIn, zOut, pnOut);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
|
||||
z = &zReverse[j+1];
|
||||
|
||||
|
||||
/* Step 1a */
|
||||
if( z[0]=='s' ){
|
||||
if(
|
||||
!stem(&z, "sess", "ss", 0) &&
|
||||
!stem(&z, "sei", "i", 0) &&
|
||||
!stem(&z, "ss", "ss", 0)
|
||||
){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1b */
|
||||
z2 = z;
|
||||
if( stem(&z, "dee", "ee", m_gt_0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if(
|
||||
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
|
||||
&& z!=z2
|
||||
){
|
||||
if( stem(&z, "ta", "ate", 0) ||
|
||||
stem(&z, "lb", "ble", 0) ||
|
||||
stem(&z, "zi", "ize", 0) ){
|
||||
/* Do nothing. The work was all in the test */
|
||||
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
|
||||
z++;
|
||||
}else if( m_eq_1(z) && star_oh(z) ){
|
||||
*(--z) = 'e';
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 1c */
|
||||
if( z[0]=='y' && hasVowel(z+1) ){
|
||||
z[0] = 'i';
|
||||
}
|
||||
|
||||
/* Step 2 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
stem(&z, "lanoita", "ate", m_gt_0) ||
|
||||
stem(&z, "lanoit", "tion", m_gt_0);
|
||||
break;
|
||||
case 'c':
|
||||
stem(&z, "icne", "ence", m_gt_0) ||
|
||||
stem(&z, "icna", "ance", m_gt_0);
|
||||
break;
|
||||
case 'e':
|
||||
stem(&z, "rezi", "ize", m_gt_0);
|
||||
break;
|
||||
case 'g':
|
||||
stem(&z, "igol", "log", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "ilb", "ble", m_gt_0) ||
|
||||
stem(&z, "illa", "al", m_gt_0) ||
|
||||
stem(&z, "iltne", "ent", m_gt_0) ||
|
||||
stem(&z, "ile", "e", m_gt_0) ||
|
||||
stem(&z, "ilsuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 'o':
|
||||
stem(&z, "noitazi", "ize", m_gt_0) ||
|
||||
stem(&z, "noita", "ate", m_gt_0) ||
|
||||
stem(&z, "rota", "ate", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "msila", "al", m_gt_0) ||
|
||||
stem(&z, "ssenevi", "ive", m_gt_0) ||
|
||||
stem(&z, "ssenluf", "ful", m_gt_0) ||
|
||||
stem(&z, "ssensuo", "ous", m_gt_0);
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "itila", "al", m_gt_0) ||
|
||||
stem(&z, "itivi", "ive", m_gt_0) ||
|
||||
stem(&z, "itilib", "ble", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 3 */
|
||||
switch( z[0] ){
|
||||
case 'e':
|
||||
stem(&z, "etaci", "ic", m_gt_0) ||
|
||||
stem(&z, "evita", "", m_gt_0) ||
|
||||
stem(&z, "ezila", "al", m_gt_0);
|
||||
break;
|
||||
case 'i':
|
||||
stem(&z, "itici", "ic", m_gt_0);
|
||||
break;
|
||||
case 'l':
|
||||
stem(&z, "laci", "ic", m_gt_0) ||
|
||||
stem(&z, "luf", "", m_gt_0);
|
||||
break;
|
||||
case 's':
|
||||
stem(&z, "ssen", "", m_gt_0);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 4 */
|
||||
switch( z[1] ){
|
||||
case 'a':
|
||||
if( z[0]=='l' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'e':
|
||||
if( z[0]=='r' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'i':
|
||||
if( z[0]=='c' && m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
break;
|
||||
case 'l':
|
||||
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
|
||||
z += 4;
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
if( z[0]=='t' ){
|
||||
if( z[2]=='a' ){
|
||||
if( m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
}else if( z[2]=='e' ){
|
||||
stem(&z, "tneme", "", m_gt_1) ||
|
||||
stem(&z, "tnem", "", m_gt_1) ||
|
||||
stem(&z, "tne", "", m_gt_1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if( z[0]=='u' ){
|
||||
if( m_gt_1(z+2) ){
|
||||
z += 2;
|
||||
}
|
||||
}else if( z[3]=='s' || z[3]=='t' ){
|
||||
stem(&z, "noi", "", m_gt_1);
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
stem(&z, "eta", "", m_gt_1) ||
|
||||
stem(&z, "iti", "", m_gt_1);
|
||||
break;
|
||||
case 'u':
|
||||
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
case 'z':
|
||||
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
|
||||
z += 3;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Step 5a */
|
||||
if( z[0]=='e' ){
|
||||
if( m_gt_1(z+1) ){
|
||||
z++;
|
||||
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
|
||||
z++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 5b */
|
||||
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
|
||||
z++;
|
||||
}
|
||||
|
||||
/* z[] is now the stemmed word in reverse order. Flip it back
|
||||
** around into forward order and return.
|
||||
*/
|
||||
*pnOut = i = strlen(z);
|
||||
zOut[i] = 0;
|
||||
while( *z ){
|
||||
zOut[--i] = *(z++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Characters that can be part of a token. We assume any character
|
||||
** whose value is greater than 0x80 (any UTF character) can be
|
||||
** part of a token. In other words, delimiters all must have
|
||||
** values of 0x7f or lower.
|
||||
*/
|
||||
static const char porterIdChar[] = {
|
||||
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
|
||||
};
|
||||
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to porterOpen().
|
||||
*/
|
||||
static int porterNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
|
||||
const char **pzToken, /* OUT: *pzToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
|
||||
const char *z = c->zInput;
|
||||
|
||||
while( c->iOffset<c->nInput ){
|
||||
int iStartOffset, ch;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int n = c->iOffset-iStartOffset;
|
||||
if( n>c->nAllocated ){
|
||||
c->nAllocated = n+20;
|
||||
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
|
||||
if( c->zToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
|
||||
*pzToken = c->zToken;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the porter-stemmer tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module porterTokenizerModule = {
|
||||
0,
|
||||
porterCreate,
|
||||
porterDestroy,
|
||||
porterOpen,
|
||||
porterClose,
|
||||
porterNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new porter tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2PorterTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &porterTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
@@ -1,375 +0,0 @@
|
||||
/*
|
||||
** 2007 June 22
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** This is part of an SQLite module implementing full-text search.
|
||||
** This particular file implements the generic tokenizer interface.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
|
||||
#include "fts2_hash.h"
|
||||
#include "fts2_tokenizer.h"
|
||||
#include <assert.h>
|
||||
|
||||
/*
|
||||
** Implementation of the SQL scalar function for accessing the underlying
|
||||
** hash table. This function may be called as follows:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
|
||||
**
|
||||
** If the <pointer> argument is specified, it must be a blob value
|
||||
** containing a pointer to be stored as the hash data corresponding
|
||||
** to the string <key-name>. If <pointer> is not specified, then
|
||||
** the string <key-name> must already exist in the has table. Otherwise,
|
||||
** an error is returned.
|
||||
**
|
||||
** Whether or not the <pointer> argument is specified, the value returned
|
||||
** is a blob containing the pointer stored as the hash data corresponding
|
||||
** to string <key-name> (after the hash-table is updated, if applicable).
|
||||
*/
|
||||
static void scalarFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
void *pPtr = 0;
|
||||
const unsigned char *zName;
|
||||
int nName;
|
||||
|
||||
assert( argc==1 || argc==2 );
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
|
||||
zName = sqlite3_value_text(argv[0]);
|
||||
nName = sqlite3_value_bytes(argv[0])+1;
|
||||
|
||||
if( argc==2 ){
|
||||
void *pOld;
|
||||
int n = sqlite3_value_bytes(argv[1]);
|
||||
if( n!=sizeof(pPtr) ){
|
||||
sqlite3_result_error(context, "argument type mismatch", -1);
|
||||
return;
|
||||
}
|
||||
pPtr = *(void **)sqlite3_value_blob(argv[1]);
|
||||
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
|
||||
if( pOld==pPtr ){
|
||||
sqlite3_result_error(context, "out of memory", -1);
|
||||
return;
|
||||
}
|
||||
}else{
|
||||
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
|
||||
if( !pPtr ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
|
||||
}
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
|
||||
#if defined(INCLUDE_SQLITE_TCL_H)
|
||||
# include "sqlite_tcl.h"
|
||||
#else
|
||||
# include "tcl.h"
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
** Implementation of a special SQL scalar function for testing tokenizers
|
||||
** designed to be used in concert with the Tcl testing framework. This
|
||||
** function must be called with two arguments:
|
||||
**
|
||||
** SELECT <function-name>(<key-name>, <input-string>);
|
||||
** SELECT <function-name>(<key-name>, <pointer>);
|
||||
**
|
||||
** where <function-name> is the name passed as the second argument
|
||||
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
|
||||
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
|
||||
**
|
||||
** The return value is a string that may be interpreted as a Tcl
|
||||
** list. For each token in the <input-string>, three elements are
|
||||
** added to the returned list. The first is the token position, the
|
||||
** second is the token text (folded, stemmed, etc.) and the third is the
|
||||
** substring of <input-string> associated with the token. For example,
|
||||
** using the built-in "simple" tokenizer:
|
||||
**
|
||||
** SELECT fts_tokenizer_test('simple', 'I don't see how');
|
||||
**
|
||||
** will return the string:
|
||||
**
|
||||
** "{0 i I 1 dont don't 2 see see 3 how how}"
|
||||
**
|
||||
*/
|
||||
static void testFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
fts2Hash *pHash;
|
||||
sqlite3_tokenizer_module *p;
|
||||
sqlite3_tokenizer *pTokenizer = 0;
|
||||
sqlite3_tokenizer_cursor *pCsr = 0;
|
||||
|
||||
const char *zErr = 0;
|
||||
|
||||
const char *zName;
|
||||
int nName;
|
||||
const char *zInput;
|
||||
int nInput;
|
||||
|
||||
const char *zArg = 0;
|
||||
|
||||
const char *zToken;
|
||||
int nToken;
|
||||
int iStart;
|
||||
int iEnd;
|
||||
int iPos;
|
||||
|
||||
Tcl_Obj *pRet;
|
||||
|
||||
assert( argc==2 || argc==3 );
|
||||
|
||||
nName = sqlite3_value_bytes(argv[0]);
|
||||
zName = (const char *)sqlite3_value_text(argv[0]);
|
||||
nInput = sqlite3_value_bytes(argv[argc-1]);
|
||||
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
|
||||
|
||||
if( argc==3 ){
|
||||
zArg = (const char *)sqlite3_value_text(argv[1]);
|
||||
}
|
||||
|
||||
pHash = (fts2Hash *)sqlite3_user_data(context);
|
||||
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
|
||||
|
||||
if( !p ){
|
||||
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
sqlite3_free(zErr);
|
||||
return;
|
||||
}
|
||||
|
||||
pRet = Tcl_NewObj();
|
||||
Tcl_IncrRefCount(pRet);
|
||||
|
||||
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
|
||||
zErr = "error in xCreate()";
|
||||
goto finish;
|
||||
}
|
||||
pTokenizer->pModule = p;
|
||||
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
|
||||
zErr = "error in xOpen()";
|
||||
goto finish;
|
||||
}
|
||||
pCsr->pTokenizer = pTokenizer;
|
||||
|
||||
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
zToken = &zInput[iStart];
|
||||
nToken = iEnd-iStart;
|
||||
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
|
||||
}
|
||||
|
||||
if( SQLITE_OK!=p->xClose(pCsr) ){
|
||||
zErr = "error in xClose()";
|
||||
goto finish;
|
||||
}
|
||||
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
|
||||
zErr = "error in xDestroy()";
|
||||
goto finish;
|
||||
}
|
||||
|
||||
finish:
|
||||
if( zErr ){
|
||||
sqlite3_result_error(context, zErr, -1);
|
||||
}else{
|
||||
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
|
||||
}
|
||||
Tcl_DecrRefCount(pRet);
|
||||
}
|
||||
|
||||
static
|
||||
int registerTokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module *p
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
|
||||
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
|
||||
sqlite3_step(pStmt);
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
static
|
||||
int queryFts2Tokenizer(
|
||||
sqlite3 *db,
|
||||
char *zName,
|
||||
const sqlite3_tokenizer_module **pp
|
||||
){
|
||||
int rc;
|
||||
sqlite3_stmt *pStmt;
|
||||
const char zSql[] = "SELECT fts2_tokenizer(?)";
|
||||
|
||||
*pp = 0;
|
||||
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
|
||||
if( rc!=SQLITE_OK ){
|
||||
return rc;
|
||||
}
|
||||
|
||||
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
|
||||
if( SQLITE_ROW==sqlite3_step(pStmt) ){
|
||||
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
|
||||
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
|
||||
}
|
||||
}
|
||||
|
||||
return sqlite3_finalize(pStmt);
|
||||
}
|
||||
|
||||
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
|
||||
|
||||
/*
|
||||
** Implementation of the scalar function fts2_tokenizer_internal_test().
|
||||
** This function is used for testing only, it is not included in the
|
||||
** build unless SQLITE_TEST is defined.
|
||||
**
|
||||
** The purpose of this is to test that the fts2_tokenizer() function
|
||||
** can be used as designed by the C-code in the queryFts2Tokenizer and
|
||||
** registerTokenizer() functions above. These two functions are repeated
|
||||
** in the README.tokenizer file as an example, so it is important to
|
||||
** test them.
|
||||
**
|
||||
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
|
||||
** function with no arguments. An assert() will fail if a problem is
|
||||
** detected. i.e.:
|
||||
**
|
||||
** SELECT fts2_tokenizer_internal_test();
|
||||
**
|
||||
*/
|
||||
static void intTestFunc(
|
||||
sqlite3_context *context,
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
int rc;
|
||||
const sqlite3_tokenizer_module *p1;
|
||||
const sqlite3_tokenizer_module *p2;
|
||||
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
|
||||
|
||||
/* Test the query function */
|
||||
sqlite3Fts2SimpleTokenizerModule(&p1);
|
||||
rc = queryFts2Tokenizer(db, "simple", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p1==p2 );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_ERROR );
|
||||
assert( p2==0 );
|
||||
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
|
||||
|
||||
/* Test the storage function */
|
||||
rc = registerTokenizer(db, "nosuchtokenizer", p1);
|
||||
assert( rc==SQLITE_OK );
|
||||
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
|
||||
assert( rc==SQLITE_OK );
|
||||
assert( p2==p1 );
|
||||
|
||||
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Set up SQL objects in database db used to access the contents of
|
||||
** the hash table pointed to by argument pHash. The hash table must
|
||||
** been initialized to use string keys, and to take a private copy
|
||||
** of the key when a value is inserted. i.e. by a call similar to:
|
||||
**
|
||||
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
|
||||
**
|
||||
** This function adds a scalar function (see header comment above
|
||||
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
|
||||
** defined at compilation time, a temporary virtual table (see header
|
||||
** comment above struct HashTableVtab) to the database schema. Both
|
||||
** provide read/write access to the contents of *pHash.
|
||||
**
|
||||
** The third argument to this function, zName, is used as the name
|
||||
** of both the scalar and, if created, the virtual table.
|
||||
*/
|
||||
int sqlite3Fts2InitHashTable(
|
||||
sqlite3 *db,
|
||||
fts2Hash *pHash,
|
||||
const char *zName
|
||||
){
|
||||
int rc = SQLITE_OK;
|
||||
void *p = (void *)pHash;
|
||||
const int any = SQLITE_ANY;
|
||||
char *zTest = 0;
|
||||
char *zTest2 = 0;
|
||||
|
||||
#ifdef SQLITE_TEST
|
||||
void *pdb = (void *)db;
|
||||
zTest = sqlite3_mprintf("%s_test", zName);
|
||||
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
|
||||
if( !zTest || !zTest2 ){
|
||||
rc = SQLITE_NOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( rc!=SQLITE_OK
|
||||
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
|
||||
#ifdef SQLITE_TEST
|
||||
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|
||||
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
|
||||
#endif
|
||||
);
|
||||
|
||||
sqlite3_free(zTest);
|
||||
sqlite3_free(zTest2);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
** 2006 July 10
|
||||
**
|
||||
** The author disclaims copyright to this source code.
|
||||
**
|
||||
*************************************************************************
|
||||
** Defines the interface to tokenizers used by fulltext-search. There
|
||||
** are three basic components:
|
||||
**
|
||||
** sqlite3_tokenizer_module is a singleton defining the tokenizer
|
||||
** interface functions. This is essentially the class structure for
|
||||
** tokenizers.
|
||||
**
|
||||
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
|
||||
** including customization information defined at creation time.
|
||||
**
|
||||
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
|
||||
** tokens from a particular input.
|
||||
*/
|
||||
#ifndef _FTS2_TOKENIZER_H_
|
||||
#define _FTS2_TOKENIZER_H_
|
||||
|
||||
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
|
||||
** If tokenizers are to be allowed to call sqlite3_*() functions, then
|
||||
** we will need a way to register the API consistently.
|
||||
*/
|
||||
#include "sqlite3.h"
|
||||
|
||||
/*
|
||||
** Structures used by the tokenizer interface. When a new tokenizer
|
||||
** implementation is registered, the caller provides a pointer to
|
||||
** an sqlite3_tokenizer_module containing pointers to the callback
|
||||
** functions that make up an implementation.
|
||||
**
|
||||
** When an fts2 table is created, it passes any arguments passed to
|
||||
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
|
||||
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
|
||||
** implementation. The xCreate() function in turn returns an
|
||||
** sqlite3_tokenizer structure representing the specific tokenizer to
|
||||
** be used for the fts2 table (customized by the tokenizer clause arguments).
|
||||
**
|
||||
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
|
||||
** method is called. It returns an sqlite3_tokenizer_cursor object
|
||||
** that may be used to tokenize a specific input buffer based on
|
||||
** the tokenization rules supplied by a specific sqlite3_tokenizer
|
||||
** object.
|
||||
*/
|
||||
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
|
||||
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
|
||||
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
|
||||
|
||||
struct sqlite3_tokenizer_module {
|
||||
|
||||
/*
|
||||
** Structure version. Should always be set to 0.
|
||||
*/
|
||||
int iVersion;
|
||||
|
||||
/*
|
||||
** Create a new tokenizer. The values in the argv[] array are the
|
||||
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
|
||||
** TABLE statement that created the fts2 table. For example, if
|
||||
** the following SQL is executed:
|
||||
**
|
||||
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
|
||||
**
|
||||
** then argc is set to 2, and the argv[] array contains pointers
|
||||
** to the strings "arg1" and "arg2".
|
||||
**
|
||||
** This method should return either SQLITE_OK (0), or an SQLite error
|
||||
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
|
||||
** to point at the newly created tokenizer structure. The generic
|
||||
** sqlite3_tokenizer.pModule variable should not be initialized by
|
||||
** this callback. The caller will do so.
|
||||
*/
|
||||
int (*xCreate)(
|
||||
int argc, /* Size of argv array */
|
||||
const char *const*argv, /* Tokenizer argument strings */
|
||||
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer. The fts2 module calls this method
|
||||
** exactly once for each successful call to xCreate().
|
||||
*/
|
||||
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
|
||||
|
||||
/*
|
||||
** Create a tokenizer cursor to tokenize an input buffer. The caller
|
||||
** is responsible for ensuring that the input buffer remains valid
|
||||
** until the cursor is closed (using the xClose() method).
|
||||
*/
|
||||
int (*xOpen)(
|
||||
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
|
||||
const char *pInput, int nBytes, /* Input buffer */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
|
||||
);
|
||||
|
||||
/*
|
||||
** Destroy an existing tokenizer cursor. The fts2 module calls this
|
||||
** method exactly once for each successful call to xOpen().
|
||||
*/
|
||||
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
|
||||
|
||||
/*
|
||||
** Retrieve the next token from the tokenizer cursor pCursor. This
|
||||
** method should either return SQLITE_OK and set the values of the
|
||||
** "OUT" variables identified below, or SQLITE_DONE to indicate that
|
||||
** the end of the buffer has been reached, or an SQLite error code.
|
||||
**
|
||||
** *ppToken should be set to point at a buffer containing the
|
||||
** normalized version of the token (i.e. after any case-folding and/or
|
||||
** stemming has been performed). *pnBytes should be set to the length
|
||||
** of this buffer in bytes. The input text that generated the token is
|
||||
** identified by the byte offsets returned in *piStartOffset and
|
||||
** *piEndOffset.
|
||||
**
|
||||
** The buffer *ppToken is set to point at is managed by the tokenizer
|
||||
** implementation. It is only required to be valid until the next call
|
||||
** to xNext() or xClose().
|
||||
*/
|
||||
/* TODO(shess) current implementation requires pInput to be
|
||||
** nul-terminated. This should either be fixed, or pInput/nBytes
|
||||
** should be converted to zInput.
|
||||
*/
|
||||
int (*xNext)(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
|
||||
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
|
||||
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
|
||||
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
|
||||
int *piPosition /* OUT: Number of tokens returned before this one */
|
||||
);
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer {
|
||||
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
struct sqlite3_tokenizer_cursor {
|
||||
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
|
||||
/* Tokenizer implementations will typically add additional fields */
|
||||
};
|
||||
|
||||
#endif /* _FTS2_TOKENIZER_H_ */
|
@@ -1,233 +0,0 @@
|
||||
/*
|
||||
** 2006 Oct 10
|
||||
**
|
||||
** The author disclaims copyright to this source code. In place of
|
||||
** a legal notice, here is a blessing:
|
||||
**
|
||||
** May you do good and not evil.
|
||||
** May you find forgiveness for yourself and forgive others.
|
||||
** May you share freely, never taking more than you give.
|
||||
**
|
||||
******************************************************************************
|
||||
**
|
||||
** Implementation of the "simple" full-text-search tokenizer.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The code in this file is only compiled if:
|
||||
**
|
||||
** * The FTS2 module is being built as an extension
|
||||
** (in which case SQLITE_CORE is not defined), or
|
||||
**
|
||||
** * The FTS2 module is being built into the core of
|
||||
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
|
||||
*/
|
||||
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sqlite3.h"
|
||||
#include "sqlite3ext.h"
|
||||
SQLITE_EXTENSION_INIT3
|
||||
#include "fts2_tokenizer.h"
|
||||
|
||||
typedef struct simple_tokenizer {
|
||||
sqlite3_tokenizer base;
|
||||
char delim[128]; /* flag ASCII delimiters */
|
||||
} simple_tokenizer;
|
||||
|
||||
typedef struct simple_tokenizer_cursor {
|
||||
sqlite3_tokenizer_cursor base;
|
||||
const char *pInput; /* input we are tokenizing */
|
||||
int nBytes; /* size of the input */
|
||||
int iOffset; /* current position in pInput */
|
||||
int iToken; /* index of next token to be returned */
|
||||
char *pToken; /* storage for current token */
|
||||
int nTokenAllocated; /* space allocated to zToken buffer */
|
||||
} simple_tokenizer_cursor;
|
||||
|
||||
|
||||
/* Forward declaration */
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule;
|
||||
|
||||
static int simpleDelim(simple_tokenizer *t, unsigned char c){
|
||||
return c<0x80 && t->delim[c];
|
||||
}
|
||||
|
||||
/*
|
||||
** Create a new tokenizer instance.
|
||||
*/
|
||||
static int simpleCreate(
|
||||
int argc, const char * const *argv,
|
||||
sqlite3_tokenizer **ppTokenizer
|
||||
){
|
||||
simple_tokenizer *t;
|
||||
|
||||
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
|
||||
if( t==NULL ) return SQLITE_NOMEM;
|
||||
memset(t, 0, sizeof(*t));
|
||||
|
||||
/* TODO(shess) Delimiters need to remain the same from run to run,
|
||||
** else we need to reindex. One solution would be a meta-table to
|
||||
** track such information in the database, then we'd only want this
|
||||
** information on the initial create.
|
||||
*/
|
||||
if( argc>1 ){
|
||||
int i, n = strlen(argv[1]);
|
||||
for(i=0; i<n; i++){
|
||||
unsigned char ch = argv[1][i];
|
||||
/* We explicitly don't support UTF-8 delimiters for now. */
|
||||
if( ch>=0x80 ){
|
||||
sqlite3_free(t);
|
||||
return SQLITE_ERROR;
|
||||
}
|
||||
t->delim[ch] = 1;
|
||||
}
|
||||
} else {
|
||||
/* Mark non-alphanumeric ASCII characters as delimiters */
|
||||
int i;
|
||||
for(i=1; i<0x80; i++){
|
||||
t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') ||
|
||||
(i>='a' && i<='z'));
|
||||
}
|
||||
}
|
||||
|
||||
*ppTokenizer = &t->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Destroy a tokenizer
|
||||
*/
|
||||
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
|
||||
sqlite3_free(pTokenizer);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Prepare to begin tokenizing a particular string. The input
|
||||
** string to be tokenized is pInput[0..nBytes-1]. A cursor
|
||||
** used to incrementally tokenize this string is returned in
|
||||
** *ppCursor.
|
||||
*/
|
||||
static int simpleOpen(
|
||||
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
|
||||
const char *pInput, int nBytes, /* String to be tokenized */
|
||||
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
|
||||
){
|
||||
simple_tokenizer_cursor *c;
|
||||
|
||||
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
|
||||
if( c==NULL ) return SQLITE_NOMEM;
|
||||
|
||||
c->pInput = pInput;
|
||||
if( pInput==0 ){
|
||||
c->nBytes = 0;
|
||||
}else if( nBytes<0 ){
|
||||
c->nBytes = (int)strlen(pInput);
|
||||
}else{
|
||||
c->nBytes = nBytes;
|
||||
}
|
||||
c->iOffset = 0; /* start tokenizing at the beginning */
|
||||
c->iToken = 0;
|
||||
c->pToken = NULL; /* no space allocated, yet. */
|
||||
c->nTokenAllocated = 0;
|
||||
|
||||
*ppCursor = &c->base;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Close a tokenization cursor previously opened by a call to
|
||||
** simpleOpen() above.
|
||||
*/
|
||||
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
sqlite3_free(c->pToken);
|
||||
sqlite3_free(c);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Extract the next token from a tokenization cursor. The cursor must
|
||||
** have been opened by a prior call to simpleOpen().
|
||||
*/
|
||||
static int simpleNext(
|
||||
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
|
||||
const char **ppToken, /* OUT: *ppToken is the token text */
|
||||
int *pnBytes, /* OUT: Number of bytes in token */
|
||||
int *piStartOffset, /* OUT: Starting offset of token */
|
||||
int *piEndOffset, /* OUT: Ending offset of token */
|
||||
int *piPosition /* OUT: Position integer of token */
|
||||
){
|
||||
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
|
||||
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
|
||||
unsigned char *p = (unsigned char *)c->pInput;
|
||||
|
||||
while( c->iOffset<c->nBytes ){
|
||||
int iStartOffset;
|
||||
|
||||
/* Scan past delimiter characters */
|
||||
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
/* Count non-delimiter characters. */
|
||||
iStartOffset = c->iOffset;
|
||||
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
|
||||
c->iOffset++;
|
||||
}
|
||||
|
||||
if( c->iOffset>iStartOffset ){
|
||||
int i, n = c->iOffset-iStartOffset;
|
||||
if( n>c->nTokenAllocated ){
|
||||
c->nTokenAllocated = n+20;
|
||||
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
|
||||
if( c->pToken==NULL ) return SQLITE_NOMEM;
|
||||
}
|
||||
for(i=0; i<n; i++){
|
||||
/* TODO(shess) This needs expansion to handle UTF-8
|
||||
** case-insensitivity.
|
||||
*/
|
||||
unsigned char ch = p[iStartOffset+i];
|
||||
c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch;
|
||||
}
|
||||
*ppToken = c->pToken;
|
||||
*pnBytes = n;
|
||||
*piStartOffset = iStartOffset;
|
||||
*piEndOffset = c->iOffset;
|
||||
*piPosition = c->iToken++;
|
||||
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
return SQLITE_DONE;
|
||||
}
|
||||
|
||||
/*
|
||||
** The set of routines that implement the simple tokenizer
|
||||
*/
|
||||
static const sqlite3_tokenizer_module simpleTokenizerModule = {
|
||||
0,
|
||||
simpleCreate,
|
||||
simpleDestroy,
|
||||
simpleOpen,
|
||||
simpleClose,
|
||||
simpleNext,
|
||||
};
|
||||
|
||||
/*
|
||||
** Allocate a new simple tokenizer. Return a pointer to the new
|
||||
** tokenizer in *ppModule
|
||||
*/
|
||||
void sqlite3Fts2SimpleTokenizerModule(
|
||||
sqlite3_tokenizer_module const**ppModule
|
||||
){
|
||||
*ppModule = &simpleTokenizerModule;
|
||||
}
|
||||
|
||||
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
|
@@ -1,116 +0,0 @@
|
||||
#!/usr/bin/tclsh
|
||||
#
|
||||
# This script builds a single C code file holding all of FTS2 code.
|
||||
# The name of the output file is fts2amal.c. To build this file,
|
||||
# first do:
|
||||
#
|
||||
# make target_source
|
||||
#
|
||||
# The make target above moves all of the source code files into
|
||||
# a subdirectory named "tsrc". (This script expects to find the files
|
||||
# there and will not work if they are not found.)
|
||||
#
|
||||
# After the "tsrc" directory has been created and populated, run
|
||||
# this script:
|
||||
#
|
||||
# tclsh mkfts2amal.tcl
|
||||
#
|
||||
# The amalgamated FTS2 code will be written into fts2amal.c
|
||||
#
|
||||
|
||||
# Open the output file and write a header comment at the beginning
|
||||
# of the file.
|
||||
#
|
||||
set out [open fts2amal.c w]
|
||||
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
|
||||
puts $out [subst \
|
||||
{/******************************************************************************
|
||||
** This file is an amalgamation of separate C source files from the SQLite
|
||||
** Full Text Search extension 2 (fts2). By combining all the individual C
|
||||
** code files into this single large file, the entire code can be compiled
|
||||
** as a one translation unit. This allows many compilers to do optimizations
|
||||
** that would not be possible if the files were compiled separately. It also
|
||||
** makes the code easier to import into other projects.
|
||||
**
|
||||
** This amalgamation was generated on $today.
|
||||
*/}]
|
||||
|
||||
# These are the header files used by FTS2. The first time any of these
|
||||
# files are seen in a #include statement in the C code, include the complete
|
||||
# text of the file in-line. The file only needs to be included once.
|
||||
#
|
||||
foreach hdr {
|
||||
fts2.h
|
||||
fts2_hash.h
|
||||
fts2_tokenizer.h
|
||||
sqlite3.h
|
||||
sqlite3ext.h
|
||||
} {
|
||||
set available_hdr($hdr) 1
|
||||
}
|
||||
|
||||
# 78 stars used for comment formatting.
|
||||
set s78 \
|
||||
{*****************************************************************************}
|
||||
|
||||
# Insert a comment into the code
|
||||
#
|
||||
proc section_comment {text} {
|
||||
global out s78
|
||||
set n [string length $text]
|
||||
set nstar [expr {60 - $n}]
|
||||
set stars [string range $s78 0 $nstar]
|
||||
puts $out "/************** $text $stars/"
|
||||
}
|
||||
|
||||
# Read the source file named $filename and write it into the
|
||||
# sqlite3.c output file. If any #include statements are seen,
|
||||
# process them approprately.
|
||||
#
|
||||
proc copy_file {filename} {
|
||||
global seen_hdr available_hdr out
|
||||
set tail [file tail $filename]
|
||||
section_comment "Begin file $tail"
|
||||
set in [open $filename r]
|
||||
while {![eof $in]} {
|
||||
set line [gets $in]
|
||||
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
|
||||
if {[info exists available_hdr($hdr)]} {
|
||||
if {$available_hdr($hdr)} {
|
||||
section_comment "Include $hdr in the middle of $tail"
|
||||
copy_file tsrc/$hdr
|
||||
section_comment "Continuing where we left off in $tail"
|
||||
}
|
||||
} elseif {![info exists seen_hdr($hdr)]} {
|
||||
set seen_hdr($hdr) 1
|
||||
puts $out $line
|
||||
}
|
||||
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
|
||||
puts $out "#if 0"
|
||||
} elseif {[regexp {^#line} $line]} {
|
||||
# Skip #line directives.
|
||||
} else {
|
||||
puts $out $line
|
||||
}
|
||||
}
|
||||
close $in
|
||||
section_comment "End of $tail"
|
||||
}
|
||||
|
||||
|
||||
# Process the source files. Process files containing commonly
|
||||
# used subroutines first in order to help the compiler find
|
||||
# inlining opportunities.
|
||||
#
|
||||
foreach file {
|
||||
fts2.c
|
||||
fts2_hash.c
|
||||
fts2_porter.c
|
||||
fts2_tokenizer.c
|
||||
fts2_tokenizer1.c
|
||||
fts2_icu.c
|
||||
} {
|
||||
copy_file tsrc/$file
|
||||
}
|
||||
|
||||
close $out
|
@@ -5287,9 +5287,8 @@ static void fts3EvalNextRow(
|
||||
Fts3Expr *pExpr, /* Expr. to advance to next matching row */
|
||||
int *pRc /* IN/OUT: Error code */
|
||||
){
|
||||
if( *pRc==SQLITE_OK ){
|
||||
if( *pRc==SQLITE_OK && pExpr->bEof==0 ){
|
||||
int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
|
||||
assert( pExpr->bEof==0 );
|
||||
pExpr->bStart = 1;
|
||||
|
||||
switch( pExpr->eType ){
|
||||
@@ -5765,6 +5764,22 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** This is an sqlite3Fts3ExprIterate() callback. If the Fts3Expr.aMI[] array
|
||||
** has not yet been allocated, allocate and zero it. Otherwise, just zero
|
||||
** it.
|
||||
*/
|
||||
static int fts3AllocateMSI(Fts3Expr *pExpr, int iPhrase, void *pCtx){
|
||||
Fts3Table *pTab = (Fts3Table*)pCtx;
|
||||
UNUSED_PARAMETER(iPhrase);
|
||||
if( pExpr->aMI==0 ){
|
||||
pExpr->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
|
||||
if( pExpr->aMI==0 ) return SQLITE_NOMEM;
|
||||
}
|
||||
memset(pExpr->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
** Expression pExpr must be of type FTSQUERY_PHRASE.
|
||||
**
|
||||
@@ -5786,7 +5801,6 @@ static int fts3EvalGatherStats(
|
||||
if( pExpr->aMI==0 ){
|
||||
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
|
||||
Fts3Expr *pRoot; /* Root of NEAR expression */
|
||||
Fts3Expr *p; /* Iterator used for several purposes */
|
||||
|
||||
sqlite3_int64 iPrevId = pCsr->iPrevId;
|
||||
sqlite3_int64 iDocid;
|
||||
@@ -5794,7 +5808,9 @@ static int fts3EvalGatherStats(
|
||||
|
||||
/* Find the root of the NEAR expression */
|
||||
pRoot = pExpr;
|
||||
while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
|
||||
while( pRoot->pParent
|
||||
&& (pRoot->pParent->eType==FTSQUERY_NEAR || pRoot->bDeferred)
|
||||
){
|
||||
pRoot = pRoot->pParent;
|
||||
}
|
||||
iDocid = pRoot->iDocid;
|
||||
@@ -5802,14 +5818,8 @@ static int fts3EvalGatherStats(
|
||||
assert( pRoot->bStart );
|
||||
|
||||
/* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
|
||||
for(p=pRoot; p; p=p->pLeft){
|
||||
Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
|
||||
assert( pE->aMI==0 );
|
||||
pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
|
||||
if( !pE->aMI ) return SQLITE_NOMEM;
|
||||
memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
|
||||
}
|
||||
|
||||
rc = sqlite3Fts3ExprIterate(pRoot, fts3AllocateMSI, (void*)pTab);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
fts3EvalRestart(pCsr, pRoot, &rc);
|
||||
|
||||
while( pCsr->isEof==0 && rc==SQLITE_OK ){
|
||||
@@ -5965,6 +5975,7 @@ int sqlite3Fts3EvalPhrasePoslist(
|
||||
u8 bTreeEof = 0;
|
||||
Fts3Expr *p; /* Used to iterate from pExpr to root */
|
||||
Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
|
||||
Fts3Expr *pRun; /* Closest non-deferred ancestor of pNear */
|
||||
int bMatch;
|
||||
|
||||
/* Check if this phrase descends from an OR expression node. If not,
|
||||
@@ -5979,25 +5990,30 @@ int sqlite3Fts3EvalPhrasePoslist(
|
||||
if( p->bEof ) bTreeEof = 1;
|
||||
}
|
||||
if( bOr==0 ) return SQLITE_OK;
|
||||
pRun = pNear;
|
||||
while( pRun->bDeferred ){
|
||||
assert( pRun->pParent );
|
||||
pRun = pRun->pParent;
|
||||
}
|
||||
|
||||
/* This is the descendent of an OR node. In this case we cannot use
|
||||
** an incremental phrase. Load the entire doclist for the phrase
|
||||
** into memory in this case. */
|
||||
if( pPhrase->bIncr ){
|
||||
int bEofSave = pNear->bEof;
|
||||
fts3EvalRestart(pCsr, pNear, &rc);
|
||||
while( rc==SQLITE_OK && !pNear->bEof ){
|
||||
fts3EvalNextRow(pCsr, pNear, &rc);
|
||||
if( bEofSave==0 && pNear->iDocid==iDocid ) break;
|
||||
int bEofSave = pRun->bEof;
|
||||
fts3EvalRestart(pCsr, pRun, &rc);
|
||||
while( rc==SQLITE_OK && !pRun->bEof ){
|
||||
fts3EvalNextRow(pCsr, pRun, &rc);
|
||||
if( bEofSave==0 && pRun->iDocid==iDocid ) break;
|
||||
}
|
||||
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
|
||||
if( rc==SQLITE_OK && pNear->bEof!=bEofSave ){
|
||||
if( rc==SQLITE_OK && pRun->bEof!=bEofSave ){
|
||||
rc = FTS_CORRUPT_VTAB;
|
||||
}
|
||||
}
|
||||
if( bTreeEof ){
|
||||
while( rc==SQLITE_OK && !pNear->bEof ){
|
||||
fts3EvalNextRow(pCsr, pNear, &rc);
|
||||
while( rc==SQLITE_OK && !pRun->bEof ){
|
||||
fts3EvalNextRow(pCsr, pRun, &rc);
|
||||
}
|
||||
}
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
|
@@ -650,5 +650,7 @@ int sqlite3FtsUnicodeIsalnum(int);
|
||||
int sqlite3FtsUnicodeIsdiacritic(int);
|
||||
#endif
|
||||
|
||||
int sqlite3Fts3ExprIterate(Fts3Expr*, int (*x)(Fts3Expr*,int,void*), void*);
|
||||
|
||||
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
|
||||
#endif /* _FTSINT_H */
|
||||
|
@@ -41,7 +41,7 @@ typedef sqlite3_int64 i64;
|
||||
|
||||
|
||||
/*
|
||||
** Used as an fts3ExprIterate() context when loading phrase doclists to
|
||||
** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to
|
||||
** Fts3Expr.aDoclist[]/nDoclist.
|
||||
*/
|
||||
typedef struct LoadDoclistCtx LoadDoclistCtx;
|
||||
@@ -85,7 +85,7 @@ struct SnippetFragment {
|
||||
};
|
||||
|
||||
/*
|
||||
** This type is used as an fts3ExprIterate() context object while
|
||||
** This type is used as an sqlite3Fts3ExprIterate() context object while
|
||||
** accumulating the data returned by the matchinfo() function.
|
||||
*/
|
||||
typedef struct MatchInfo MatchInfo;
|
||||
@@ -244,7 +244,7 @@ static void fts3GetDeltaPosition(char **pp, i64 *piPos){
|
||||
}
|
||||
|
||||
/*
|
||||
** Helper function for fts3ExprIterate() (see below).
|
||||
** Helper function for sqlite3Fts3ExprIterate() (see below).
|
||||
*/
|
||||
static int fts3ExprIterate2(
|
||||
Fts3Expr *pExpr, /* Expression to iterate phrases of */
|
||||
@@ -278,7 +278,7 @@ static int fts3ExprIterate2(
|
||||
** Otherwise, SQLITE_OK is returned after a callback has been made for
|
||||
** all eligible phrase nodes.
|
||||
*/
|
||||
static int fts3ExprIterate(
|
||||
int sqlite3Fts3ExprIterate(
|
||||
Fts3Expr *pExpr, /* Expression to iterate phrases of */
|
||||
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
|
||||
void *pCtx /* Second argument to pass to callback */
|
||||
@@ -287,10 +287,9 @@ static int fts3ExprIterate(
|
||||
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
** This is an fts3ExprIterate() callback used while loading the doclists
|
||||
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
|
||||
** This is an sqlite3Fts3ExprIterate() callback used while loading the
|
||||
** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
|
||||
** fts3ExprLoadDoclists().
|
||||
*/
|
||||
static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
@@ -322,9 +321,9 @@ static int fts3ExprLoadDoclists(
|
||||
int *pnToken /* OUT: Number of tokens in query */
|
||||
){
|
||||
int rc; /* Return Code */
|
||||
LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
|
||||
LoadDoclistCtx sCtx = {0,0,0}; /* Context for sqlite3Fts3ExprIterate() */
|
||||
sCtx.pCsr = pCsr;
|
||||
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
|
||||
rc = sqlite3Fts3ExprIterate(pCsr->pExpr,fts3ExprLoadDoclistsCb,(void*)&sCtx);
|
||||
if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
|
||||
if( pnToken ) *pnToken = sCtx.nToken;
|
||||
return rc;
|
||||
@@ -337,7 +336,7 @@ static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
}
|
||||
static int fts3ExprPhraseCount(Fts3Expr *pExpr){
|
||||
int nPhrase = 0;
|
||||
(void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
|
||||
(void)sqlite3Fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
|
||||
return nPhrase;
|
||||
}
|
||||
|
||||
@@ -465,8 +464,9 @@ static void fts3SnippetDetails(
|
||||
}
|
||||
|
||||
/*
|
||||
** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
|
||||
** Each invocation populates an element of the SnippetIter.aPhrase[] array.
|
||||
** This function is an sqlite3Fts3ExprIterate() callback used by
|
||||
** fts3BestSnippet(). Each invocation populates an element of the
|
||||
** SnippetIter.aPhrase[] array.
|
||||
*/
|
||||
static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
SnippetIter *p = (SnippetIter *)ctx;
|
||||
@@ -556,7 +556,9 @@ static int fts3BestSnippet(
|
||||
sIter.nSnippet = nSnippet;
|
||||
sIter.nPhrase = nList;
|
||||
sIter.iCurrent = -1;
|
||||
rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
|
||||
rc = sqlite3Fts3ExprIterate(
|
||||
pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter
|
||||
);
|
||||
if( rc==SQLITE_OK ){
|
||||
|
||||
/* Set the *pmSeen output variable. */
|
||||
@@ -917,10 +919,10 @@ static int fts3ExprLHitGather(
|
||||
}
|
||||
|
||||
/*
|
||||
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
|
||||
** for a single query.
|
||||
** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo
|
||||
** stats for a single query.
|
||||
**
|
||||
** fts3ExprIterate() callback to load the 'global' elements of a
|
||||
** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a
|
||||
** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
|
||||
** of the matchinfo array that are constant for all rows returned by the
|
||||
** current query.
|
||||
@@ -955,7 +957,7 @@ static int fts3ExprGlobalHitsCb(
|
||||
}
|
||||
|
||||
/*
|
||||
** fts3ExprIterate() callback used to collect the "local" part of the
|
||||
** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the
|
||||
** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
|
||||
** array that are different for each row returned by the query.
|
||||
*/
|
||||
@@ -1151,7 +1153,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
|
||||
**/
|
||||
aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase);
|
||||
if( !aIter ) return SQLITE_NOMEM;
|
||||
(void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
|
||||
(void)sqlite3Fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
|
||||
|
||||
for(i=0; i<pInfo->nPhrase; i++){
|
||||
LcsIterator *pIter = &aIter[i];
|
||||
@@ -1328,11 +1330,11 @@ static int fts3MatchinfoValues(
|
||||
rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
}
|
||||
rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
|
||||
rc = sqlite3Fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
|
||||
sqlite3Fts3EvalTestDeferred(pCsr, &rc);
|
||||
if( rc!=SQLITE_OK ) break;
|
||||
}
|
||||
(void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
|
||||
(void)sqlite3Fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1555,7 +1557,7 @@ struct TermOffsetCtx {
|
||||
};
|
||||
|
||||
/*
|
||||
** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
|
||||
** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets().
|
||||
*/
|
||||
static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
|
||||
TermOffsetCtx *p = (TermOffsetCtx *)ctx;
|
||||
@@ -1637,7 +1639,9 @@ void sqlite3Fts3Offsets(
|
||||
*/
|
||||
sCtx.iCol = iCol;
|
||||
sCtx.iTerm = 0;
|
||||
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
|
||||
rc = sqlite3Fts3ExprIterate(
|
||||
pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx
|
||||
);
|
||||
if( rc!=SQLITE_OK ) goto offsets_out;
|
||||
|
||||
/* Retreive the text stored in column iCol. If an SQL NULL is stored
|
||||
|
@@ -5073,7 +5073,7 @@ static void fts5MergePrefixLists(
|
||||
/* Initialize a doclist-iterator for each input buffer. Arrange them in
|
||||
** a linked-list starting at pHead in ascending order of rowid. Avoid
|
||||
** linking any iterators already at EOF into the linked list at all. */
|
||||
assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
|
||||
assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
|
||||
memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
|
||||
pHead = &aMerger[nBuf];
|
||||
fts5DoclistIterInit(p1, &pHead->iter);
|
||||
|
@@ -1625,7 +1625,7 @@ static int fts5UpdateMethod(
|
||||
int rc = SQLITE_OK; /* Return code */
|
||||
|
||||
/* A transaction must be open when this is called. */
|
||||
assert( pTab->ts.eState==1 );
|
||||
assert( pTab->ts.eState==1 || pTab->ts.eState==2 );
|
||||
|
||||
assert( pVtab->zErrMsg==0 );
|
||||
assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
|
||||
@@ -2867,7 +2867,9 @@ static int fts5Init(sqlite3 *db){
|
||||
}
|
||||
if( rc==SQLITE_OK ){
|
||||
rc = sqlite3_create_function(
|
||||
db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
|
||||
db, "fts5_source_id", 0,
|
||||
SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS,
|
||||
p, fts5SourceIdFunc, 0, 0
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@@ -402,5 +402,46 @@ do_test 15.4 {
|
||||
list [catch { db2 eval COMMIT } msg] $msg
|
||||
} {0 {}}
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
reset_db
|
||||
forcedelete test.db2
|
||||
sqlite3 db2 test.db
|
||||
do_execsql_test 16.0 {
|
||||
|
||||
ATTACH 'test.db2' AS aux;
|
||||
CREATE TABLE aux.t2(x,y);
|
||||
INSERT INTO t2 VALUES(1, 2);
|
||||
CREATE VIRTUAL TABLE x1 USING fts5(a);
|
||||
BEGIN;
|
||||
INSERT INTO x1 VALUES('abc');
|
||||
INSERT INTO t2 VALUES(3, 4);
|
||||
}
|
||||
|
||||
do_execsql_test -db db2 16.1 {
|
||||
ATTACH 'test.db2' AS aux;
|
||||
BEGIN;
|
||||
SELECT * FROM t2
|
||||
} {1 2}
|
||||
|
||||
do_catchsql_test 16.2 {
|
||||
COMMIT;
|
||||
} {1 {database is locked}}
|
||||
|
||||
do_execsql_test 16.3 {
|
||||
INSERT INTO x1 VALUES('def');
|
||||
}
|
||||
|
||||
do_execsql_test -db db2 16.4 {
|
||||
END
|
||||
}
|
||||
|
||||
do_execsql_test 16.5 {
|
||||
COMMIT
|
||||
}
|
||||
|
||||
do_execsql_test -db db2 16.6 {
|
||||
SELECT * FROM x1
|
||||
} {abc def}
|
||||
|
||||
finish_test
|
||||
|
||||
|
@@ -44,6 +44,8 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* #define LSM_DEBUG_EXPENSIVE 1 */
|
||||
|
||||
/*
|
||||
** Default values for various data structure parameters. These may be
|
||||
** overridden by calls to lsm_config().
|
||||
@@ -405,7 +407,7 @@ struct Segment {
|
||||
LsmPgno iFirst; /* First page of this run */
|
||||
LsmPgno iLastPg; /* Last page of this run */
|
||||
LsmPgno iRoot; /* Root page number (if any) */
|
||||
int nSize; /* Size of this run in pages */
|
||||
LsmPgno nSize; /* Size of this run in pages */
|
||||
|
||||
Redirect *pRedirect; /* Block redirects (or NULL) */
|
||||
};
|
||||
@@ -853,6 +855,8 @@ int lsmVarintGet32(u8 *, int *);
|
||||
int lsmVarintPut64(u8 *aData, i64 iVal);
|
||||
int lsmVarintGet64(const u8 *aData, i64 *piVal);
|
||||
|
||||
int lsmVarintLen64(i64);
|
||||
|
||||
int lsmVarintLen32(int);
|
||||
int lsmVarintSize(u8 c);
|
||||
|
||||
|
@@ -511,7 +511,7 @@ static void ckptNewSegment(
|
||||
pSegment->iFirst = ckptGobble64(aIn, piIn);
|
||||
pSegment->iLastPg = ckptGobble64(aIn, piIn);
|
||||
pSegment->iRoot = ckptGobble64(aIn, piIn);
|
||||
pSegment->nSize = (int)ckptGobble64(aIn, piIn);
|
||||
pSegment->nSize = ckptGobble64(aIn, piIn);
|
||||
assert( pSegment->iFirst );
|
||||
}
|
||||
|
||||
|
@@ -215,8 +215,8 @@ struct FileSystem {
|
||||
char *zLog; /* Database file name */
|
||||
int nMetasize; /* Size of meta pages in bytes */
|
||||
int nMetaRwSize; /* Read/written size of meta pages in bytes */
|
||||
int nPagesize; /* Database page-size in bytes */
|
||||
int nBlocksize; /* Database block-size in bytes */
|
||||
i64 nPagesize; /* Database page-size in bytes */
|
||||
i64 nBlocksize; /* Database block-size in bytes */
|
||||
|
||||
/* r/w file descriptors for both files. */
|
||||
LsmFile *pLsmFile; /* Used after lsm_close() to link into list */
|
||||
@@ -889,7 +889,7 @@ static LsmPgno fsFirstPageOnBlock(FileSystem *pFS, int iBlock){
|
||||
iPg = pFS->nBlocksize * (LsmPgno)(iBlock-1) + 4;
|
||||
}
|
||||
}else{
|
||||
const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
|
||||
const i64 nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
|
||||
if( iBlock==1 ){
|
||||
iPg = 1 + ((pFS->nMetasize*2 + pFS->nPagesize - 1) / pFS->nPagesize);
|
||||
}else{
|
||||
@@ -1131,7 +1131,6 @@ static void fsGrowMapping(
|
||||
i64 iSz, /* Minimum size to extend mapping to */
|
||||
int *pRc /* IN/OUT: Error code */
|
||||
){
|
||||
assert( pFS->pCompress==0 );
|
||||
assert( PAGE_HASPREV==4 );
|
||||
|
||||
if( *pRc==LSM_OK && iSz>pFS->nMap ){
|
||||
@@ -1872,7 +1871,7 @@ void lsmFsGobble(
|
||||
assert( nPgno>0 && 0==fsPageRedirects(pFS, pRun, aPgno[0]) );
|
||||
|
||||
iBlk = fsPageToBlock(pFS, pRun->iFirst);
|
||||
pRun->nSize += (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
|
||||
pRun->nSize += (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
|
||||
|
||||
while( rc==LSM_OK ){
|
||||
int iNext = 0;
|
||||
@@ -1883,13 +1882,13 @@ void lsmFsGobble(
|
||||
}
|
||||
rc = fsBlockNext(pFS, pRun, iBlk, &iNext);
|
||||
if( rc==LSM_OK ) rc = fsFreeBlock(pFS, pSnapshot, pRun, iBlk);
|
||||
pRun->nSize -= (int)(
|
||||
pRun->nSize -= (
|
||||
1 + fsLastPageOnBlock(pFS, iBlk) - fsFirstPageOnBlock(pFS, iBlk)
|
||||
);
|
||||
iBlk = iNext;
|
||||
}
|
||||
|
||||
pRun->nSize -= (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
|
||||
pRun->nSize -= (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
|
||||
assert( pRun->nSize>0 );
|
||||
}
|
||||
|
||||
|
@@ -432,7 +432,7 @@ int lsm_config(lsm_db *pDb, int eParam, ...){
|
||||
}
|
||||
|
||||
void lsmAppendSegmentList(LsmString *pStr, char *zPre, Segment *pSeg){
|
||||
lsmStringAppendf(pStr, "%s{%d %d %d %d}", zPre,
|
||||
lsmStringAppendf(pStr, "%s{%lld %lld %lld %lld}", zPre,
|
||||
pSeg->iFirst, pSeg->iLastPg, pSeg->iRoot, pSeg->nSize
|
||||
);
|
||||
}
|
||||
|
@@ -1306,6 +1306,24 @@ int lsmBeginRoTrans(lsm_db *db){
|
||||
}
|
||||
}
|
||||
|
||||
/* In 'lsm_open()' we don't update the page and block sizes in the
|
||||
** Filesystem for 'readonly' connection. Because member 'db->pShmhdr' is a
|
||||
** nullpointer, this prevents loading a checkpoint. Now that the system is
|
||||
** live this member should be set. So we can update both values in
|
||||
** the Filesystem.
|
||||
**
|
||||
** Configure the file-system connection with the page-size and block-size
|
||||
** of this database. Even if the database file is zero bytes in size
|
||||
** on disk, these values have been set in shared-memory by now, and so
|
||||
** are guaranteed not to change during the lifetime of this connection. */
|
||||
if( LSM_OK==rc
|
||||
&& 0==lsmCheckpointClientCacheOk(db)
|
||||
&& LSM_OK==(rc=lsmCheckpointLoad(db, 0))
|
||||
){
|
||||
lsmFsSetPageSize(db->pFS, lsmCheckpointPgsz(db->aSnapshot));
|
||||
lsmFsSetBlockSize(db->pFS, lsmCheckpointBlksz(db->aSnapshot));
|
||||
}
|
||||
|
||||
if( rc==LSM_OK ){
|
||||
rc = lsmBeginReadTrans(db);
|
||||
}
|
||||
|
@@ -525,7 +525,7 @@ static u8 *pageGetKey(
|
||||
LsmBlob *pBlob /* If required, use this for dynamic memory */
|
||||
){
|
||||
u8 *pKey;
|
||||
int nDummy;
|
||||
i64 nDummy;
|
||||
int eType;
|
||||
u8 *aData;
|
||||
int nData;
|
||||
@@ -537,10 +537,10 @@ static u8 *pageGetKey(
|
||||
|
||||
pKey = pageGetCell(aData, nData, iCell);
|
||||
eType = *pKey++;
|
||||
pKey += lsmVarintGet32(pKey, &nDummy);
|
||||
pKey += lsmVarintGet64(pKey, &nDummy);
|
||||
pKey += lsmVarintGet32(pKey, pnKey);
|
||||
if( rtIsWrite(eType) ){
|
||||
pKey += lsmVarintGet32(pKey, &nDummy);
|
||||
pKey += lsmVarintGet64(pKey, &nDummy);
|
||||
}
|
||||
*piTopic = rtTopic(eType);
|
||||
|
||||
@@ -657,14 +657,14 @@ static int btreeCursorLoadKey(BtreeCursor *pCsr){
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int btreeCursorPtr(u8 *aData, int nData, int iCell){
|
||||
static LsmPgno btreeCursorPtr(u8 *aData, int nData, int iCell){
|
||||
int nCell;
|
||||
|
||||
nCell = pageGetNRec(aData, nData);
|
||||
if( iCell>=nCell ){
|
||||
return (int)pageGetPtr(aData, nData);
|
||||
return pageGetPtr(aData, nData);
|
||||
}
|
||||
return (int)pageGetRecordPtr(aData, nData, iCell);
|
||||
return pageGetRecordPtr(aData, nData, iCell);
|
||||
}
|
||||
|
||||
static int btreeCursorNext(BtreeCursor *pCsr){
|
||||
@@ -751,7 +751,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){
|
||||
|
||||
Page *pPg = 0;
|
||||
FileSystem *pFS = pCsr->pFS;
|
||||
int iPg = (int)pCsr->pSeg->iRoot;
|
||||
LsmPgno iPg = pCsr->pSeg->iRoot;
|
||||
|
||||
do {
|
||||
rc = lsmFsDbPageGet(pFS, pCsr->pSeg, iPg, &pPg);
|
||||
@@ -779,7 +779,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){
|
||||
assert( pCsr->aPg[pCsr->nDepth].iCell==0 );
|
||||
pCsr->aPg[pCsr->nDepth].pPage = pPg;
|
||||
pCsr->nDepth++;
|
||||
iPg = (int)pageGetRecordPtr(aData, nData, 0);
|
||||
iPg = pageGetRecordPtr(aData, nData, 0);
|
||||
}
|
||||
}
|
||||
}while( rc==LSM_OK );
|
||||
@@ -871,7 +871,7 @@ static int btreeCursorRestore(
|
||||
int nSeek;
|
||||
int iTopicSeek;
|
||||
int iPg = 0;
|
||||
int iLoad = (int)pSeg->iRoot;
|
||||
LsmPgno iLoad = pSeg->iRoot;
|
||||
Page *pPg = pCsr->aPg[nDepth-1].pPage;
|
||||
|
||||
if( pageObjGetNRec(pPg)==0 ){
|
||||
@@ -903,7 +903,7 @@ static int btreeCursorRestore(
|
||||
aData = fsPageData(pPg2, &nData);
|
||||
assert( (pageGetFlags(aData, nData) & SEGMENT_BTREE_FLAG) );
|
||||
|
||||
iLoad = (int)pageGetPtr(aData, nData);
|
||||
iLoad = pageGetPtr(aData, nData);
|
||||
iCell2 = pageGetNRec(aData, nData);
|
||||
iMax = iCell2-1;
|
||||
iMin = 0;
|
||||
@@ -926,7 +926,7 @@ static int btreeCursorRestore(
|
||||
assert( res!=0 );
|
||||
|
||||
if( res<0 ){
|
||||
iLoad = (int)iPtr;
|
||||
iLoad = iPtr;
|
||||
iCell2 = iTry;
|
||||
iMax = iTry-1;
|
||||
}else{
|
||||
@@ -1013,7 +1013,7 @@ static void segmentPtrSetPage(SegmentPtr *pPtr, Page *pNext){
|
||||
static int segmentPtrLoadPage(
|
||||
FileSystem *pFS,
|
||||
SegmentPtr *pPtr, /* Load page into this SegmentPtr object */
|
||||
int iNew /* Page number of new page */
|
||||
LsmPgno iNew /* Page number of new page */
|
||||
){
|
||||
Page *pPg = 0; /* The new page */
|
||||
int rc; /* Return Code */
|
||||
@@ -1633,7 +1633,7 @@ static int segmentPtrSeek(
|
||||
int iTopic, /* Key topic to seek to */
|
||||
void *pKey, int nKey, /* Key to seek to */
|
||||
int eSeek, /* Search bias - see above */
|
||||
int *piPtr, /* OUT: FC pointer */
|
||||
LsmPgno *piPtr, /* OUT: FC pointer */
|
||||
int *pbStop
|
||||
){
|
||||
int (*xCmp)(void *, int, void *, int) = pCsr->pDb->xCmp;
|
||||
@@ -1759,7 +1759,7 @@ static int segmentPtrSeek(
|
||||
}
|
||||
|
||||
assert( rc!=LSM_OK || assertSeekResult(pCsr,pPtr,iTopic,pKey,nKey,eSeek) );
|
||||
*piPtr = (int)iPtrOut;
|
||||
*piPtr = iPtrOut;
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1773,11 +1773,11 @@ static int seekInBtree(
|
||||
){
|
||||
int i = 0;
|
||||
int rc;
|
||||
int iPg;
|
||||
LsmPgno iPg;
|
||||
Page *pPg = 0;
|
||||
LsmBlob blob = {0, 0, 0};
|
||||
|
||||
iPg = (int)pSeg->iRoot;
|
||||
iPg = pSeg->iRoot;
|
||||
do {
|
||||
LsmPgno *piFirst = 0;
|
||||
if( aPg ){
|
||||
@@ -1799,7 +1799,7 @@ static int seekInBtree(
|
||||
flags = pageGetFlags(aData, nData);
|
||||
if( (flags & SEGMENT_BTREE_FLAG)==0 ) break;
|
||||
|
||||
iPg = (int)pageGetPtr(aData, nData);
|
||||
iPg = pageGetPtr(aData, nData);
|
||||
nRec = pageGetNRec(aData, nData);
|
||||
|
||||
iMin = 0;
|
||||
@@ -1825,7 +1825,7 @@ static int seekInBtree(
|
||||
pCsr->pDb->xCmp, iTopic, pKey, nKey, iTopicT, pKeyT, nKeyT
|
||||
);
|
||||
if( res<0 ){
|
||||
iPg = (int)iPtr;
|
||||
iPg = iPtr;
|
||||
iMax = iTry-1;
|
||||
}else{
|
||||
iMin = iTry+1;
|
||||
@@ -1851,12 +1851,12 @@ static int seekInSegment(
|
||||
SegmentPtr *pPtr,
|
||||
int iTopic,
|
||||
void *pKey, int nKey,
|
||||
int iPg, /* Page to search */
|
||||
LsmPgno iPg, /* Page to search */
|
||||
int eSeek, /* Search bias - see above */
|
||||
int *piPtr, /* OUT: FC pointer */
|
||||
LsmPgno *piPtr, /* OUT: FC pointer */
|
||||
int *pbStop /* OUT: Stop search flag */
|
||||
){
|
||||
int iPtr = iPg;
|
||||
LsmPgno iPtr = iPg;
|
||||
int rc = LSM_OK;
|
||||
|
||||
if( pPtr->pSeg->iRoot ){
|
||||
@@ -1866,7 +1866,7 @@ static int seekInSegment(
|
||||
if( rc==LSM_OK ) segmentPtrSetPage(pPtr, pPg);
|
||||
}else{
|
||||
if( iPtr==0 ){
|
||||
iPtr = (int)pPtr->pSeg->iFirst;
|
||||
iPtr = pPtr->pSeg->iFirst;
|
||||
}
|
||||
if( rc==LSM_OK ){
|
||||
rc = segmentPtrLoadPage(pCsr->pDb->pFS, pPtr, iPtr);
|
||||
@@ -1904,7 +1904,7 @@ static int seekInLevel(
|
||||
){
|
||||
Level *pLvl = aPtr[0].pLevel; /* Level to seek within */
|
||||
int rc = LSM_OK; /* Return code */
|
||||
int iOut = 0; /* Pointer to return to caller */
|
||||
LsmPgno iOut = 0; /* Pointer to return to caller */
|
||||
int res = -1; /* Result of xCmp(pKey, split) */
|
||||
int nRhs = pLvl->nRight; /* Number of right-hand-side segments */
|
||||
int bStop = 0;
|
||||
@@ -1923,8 +1923,8 @@ static int seekInLevel(
|
||||
** left-hand-side of the level in this case. */
|
||||
if( res<0 ){
|
||||
int i;
|
||||
int iPtr = 0;
|
||||
if( nRhs==0 ) iPtr = (int)*piPgno;
|
||||
LsmPgno iPtr = 0;
|
||||
if( nRhs==0 ) iPtr = *piPgno;
|
||||
|
||||
rc = seekInSegment(
|
||||
pCsr, &aPtr[0], iTopic, pKey, nKey, iPtr, eSeek, &iOut, &bStop
|
||||
@@ -1939,7 +1939,7 @@ static int seekInLevel(
|
||||
|
||||
if( res>=0 ){
|
||||
int bHit = 0; /* True if at least one rhs is not EOF */
|
||||
int iPtr = (int)*piPgno;
|
||||
LsmPgno iPtr = *piPgno;
|
||||
int i;
|
||||
segmentPtrReset(&aPtr[0], LSM_SEGMENTPTR_FREE_THRESHOLD);
|
||||
for(i=1; rc==LSM_OK && i<=nRhs && bStop==0; i++){
|
||||
@@ -3361,6 +3361,8 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){
|
||||
int iOff;
|
||||
int nKey;
|
||||
int eType;
|
||||
i64 nDummy;
|
||||
|
||||
|
||||
nRec = lsmGetU16(&aData[SEGMENT_NRECORD_OFFSET(nData)]);
|
||||
iOff = lsmGetU16(&aData[SEGMENT_CELLPTR_OFFSET(nData, nRec-1)]);
|
||||
@@ -3370,7 +3372,7 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){
|
||||
|| eType==(LSM_SEPARATOR)
|
||||
);
|
||||
|
||||
iOff += lsmVarintGet32(&aData[iOff], &nKey);
|
||||
iOff += lsmVarintGet64(&aData[iOff], &nDummy);
|
||||
iOff += lsmVarintGet32(&aData[iOff], &nKey);
|
||||
|
||||
return iOff + (eType ? nKey : 0);
|
||||
@@ -3480,7 +3482,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){
|
||||
lsm_env *pEnv = pMW->pDb->pEnv;
|
||||
Page **apHier = 0;
|
||||
int nHier = 0;
|
||||
int iPg = (int)pSeg->iRoot;
|
||||
LsmPgno iPg = pSeg->iRoot;
|
||||
|
||||
do {
|
||||
Page *pPg = 0;
|
||||
@@ -3506,7 +3508,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){
|
||||
nHier++;
|
||||
|
||||
apHier[0] = pPg;
|
||||
iPg = (int)pageGetPtr(aData, nData);
|
||||
iPg = pageGetPtr(aData, nData);
|
||||
}else{
|
||||
lsmFsPageRelease(pPg);
|
||||
break;
|
||||
@@ -3625,10 +3627,11 @@ static int mergeWorkerBtreeWrite(
|
||||
assert( lsmFsPageWritable(pOld) );
|
||||
aData = fsPageData(pOld, &nData);
|
||||
if( eType==0 ){
|
||||
nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32((int)iKeyPg);
|
||||
nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen64(iKeyPg);
|
||||
}else{
|
||||
nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32(nKey) + nKey;
|
||||
nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen32(nKey) + nKey;
|
||||
}
|
||||
|
||||
nRec = pageGetNRec(aData, nData);
|
||||
nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData);
|
||||
if( nByte<=nFree ) break;
|
||||
@@ -3672,11 +3675,11 @@ static int mergeWorkerBtreeWrite(
|
||||
lsmPutU16(&aData[SEGMENT_NRECORD_OFFSET(nData)], (u16)(nRec+1));
|
||||
if( eType==0 ){
|
||||
aData[iOff++] = 0x00;
|
||||
iOff += lsmVarintPut32(&aData[iOff], (int)iPtr);
|
||||
iOff += lsmVarintPut32(&aData[iOff], (int)iKeyPg);
|
||||
iOff += lsmVarintPut64(&aData[iOff], iPtr);
|
||||
iOff += lsmVarintPut64(&aData[iOff], iKeyPg);
|
||||
}else{
|
||||
aData[iOff++] = eType;
|
||||
iOff += lsmVarintPut32(&aData[iOff], (int)iPtr);
|
||||
iOff += lsmVarintPut64(&aData[iOff], iPtr);
|
||||
iOff += lsmVarintPut32(&aData[iOff], nKey);
|
||||
memcpy(&aData[iOff], pKey, nKey);
|
||||
}
|
||||
@@ -3872,7 +3875,7 @@ static int mergeWorkerNextPage(
|
||||
static int mergeWorkerData(
|
||||
MergeWorker *pMW, /* Merge worker object */
|
||||
int bSep, /* True to write to separators run */
|
||||
int iFPtr, /* Footer ptr for new pages */
|
||||
LsmPgno iFPtr, /* Footer ptr for new pages */
|
||||
u8 *aWrite, /* Write data from this buffer */
|
||||
int nWrite /* Size of aWrite[] in bytes */
|
||||
){
|
||||
@@ -3916,14 +3919,14 @@ static int mergeWorkerData(
|
||||
static int mergeWorkerFirstPage(MergeWorker *pMW){
|
||||
int rc = LSM_OK; /* Return code */
|
||||
Page *pPg = 0; /* First page of run pSeg */
|
||||
int iFPtr = 0; /* Pointer value read from footer of pPg */
|
||||
LsmPgno iFPtr = 0; /* Pointer value read from footer of pPg */
|
||||
MultiCursor *pCsr = pMW->pCsr;
|
||||
|
||||
assert( pMW->pPage==0 );
|
||||
|
||||
if( pCsr->pBtCsr ){
|
||||
rc = LSM_OK;
|
||||
iFPtr = (int)pMW->pLevel->pNext->lhs.iFirst;
|
||||
iFPtr = pMW->pLevel->pNext->lhs.iFirst;
|
||||
}else if( pCsr->nPtr>0 ){
|
||||
Segment *pSeg;
|
||||
pSeg = pCsr->aPtr[pCsr->nPtr-1].pSeg;
|
||||
@@ -3932,7 +3935,7 @@ static int mergeWorkerFirstPage(MergeWorker *pMW){
|
||||
u8 *aData; /* Buffer for page pPg */
|
||||
int nData; /* Size of aData[] in bytes */
|
||||
aData = fsPageData(pPg, &nData);
|
||||
iFPtr = (int)pageGetPtr(aData, nData);
|
||||
iFPtr = pageGetPtr(aData, nData);
|
||||
lsmFsPageRelease(pPg);
|
||||
}
|
||||
}
|
||||
@@ -3951,7 +3954,7 @@ static int mergeWorkerWrite(
|
||||
int eType, /* One of SORTED_SEPARATOR, WRITE or DELETE */
|
||||
void *pKey, int nKey, /* Key value */
|
||||
void *pVal, int nVal, /* Value value */
|
||||
int iPtr /* Absolute value of page pointer, or 0 */
|
||||
LsmPgno iPtr /* Absolute value of page pointer, or 0 */
|
||||
){
|
||||
int rc = LSM_OK; /* Return code */
|
||||
Merge *pMerge; /* Persistent part of level merge state */
|
||||
@@ -3960,8 +3963,8 @@ static int mergeWorkerWrite(
|
||||
u8 *aData; /* Data buffer for page pWriter->pPage */
|
||||
int nData = 0; /* Size of buffer aData[] in bytes */
|
||||
int nRec = 0; /* Number of records on page pPg */
|
||||
int iFPtr = 0; /* Value of pointer in footer of pPg */
|
||||
int iRPtr = 0; /* Value of pointer written into record */
|
||||
LsmPgno iFPtr = 0; /* Value of pointer in footer of pPg */
|
||||
LsmPgno iRPtr = 0; /* Value of pointer written into record */
|
||||
int iOff = 0; /* Current write offset within page pPg */
|
||||
Segment *pSeg; /* Segment being written */
|
||||
int flags = 0; /* If != 0, flags value for page footer */
|
||||
@@ -3978,8 +3981,8 @@ static int mergeWorkerWrite(
|
||||
if( pPg ){
|
||||
aData = fsPageData(pPg, &nData);
|
||||
nRec = pageGetNRec(aData, nData);
|
||||
iFPtr = (int)pageGetPtr(aData, nData);
|
||||
iRPtr = iPtr - iFPtr;
|
||||
iFPtr = pageGetPtr(aData, nData);
|
||||
iRPtr = iPtr ? (iPtr - iFPtr) : 0;
|
||||
}
|
||||
|
||||
/* Figure out how much space is required by the new record. The space
|
||||
@@ -3997,7 +4000,7 @@ static int mergeWorkerWrite(
|
||||
** 4) Value size - 1 varint (only if LSM_INSERT flag is set)
|
||||
*/
|
||||
if( rc==LSM_OK ){
|
||||
nHdr = 1 + lsmVarintLen32(iRPtr) + lsmVarintLen32(nKey);
|
||||
nHdr = 1 + lsmVarintLen64(iRPtr) + lsmVarintLen32(nKey);
|
||||
if( rtIsWrite(eType) ) nHdr += lsmVarintLen32(nVal);
|
||||
|
||||
/* If the entire header will not fit on page pPg, or if page pPg is
|
||||
@@ -4009,8 +4012,8 @@ static int mergeWorkerWrite(
|
||||
assert( aData );
|
||||
memset(&aData[iOff], 0, SEGMENT_EOF(nData, nRec)-iOff);
|
||||
}
|
||||
iFPtr = (int)*pMW->pCsr->pPrevMergePtr;
|
||||
iRPtr = iPtr - iFPtr;
|
||||
iFPtr = *pMW->pCsr->pPrevMergePtr;
|
||||
iRPtr = iPtr ? (iPtr - iFPtr) : 0;
|
||||
iOff = 0;
|
||||
nRec = 0;
|
||||
rc = mergeWorkerNextPage(pMW, iFPtr);
|
||||
@@ -4049,7 +4052,7 @@ static int mergeWorkerWrite(
|
||||
|
||||
/* Write the entry header into the current page. */
|
||||
aData[iOff++] = (u8)eType; /* 1 */
|
||||
iOff += lsmVarintPut32(&aData[iOff], iRPtr); /* 2 */
|
||||
iOff += lsmVarintPut64(&aData[iOff], iRPtr); /* 2 */
|
||||
iOff += lsmVarintPut32(&aData[iOff], nKey); /* 3 */
|
||||
if( rtIsWrite(eType) ) iOff += lsmVarintPut32(&aData[iOff], nVal); /* 4 */
|
||||
pMerge->iOutputOff = iOff;
|
||||
@@ -4283,7 +4286,7 @@ static int mergeWorkerStep(MergeWorker *pMW){
|
||||
pVal = pCsr->val.pData;
|
||||
}
|
||||
if( rc==LSM_OK ){
|
||||
rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, (int)iPtr);
|
||||
rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, iPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4604,7 +4607,7 @@ static int mergeWorkerInit(
|
||||
SegmentPtr *pPtr;
|
||||
assert( pCsr->aPtr[i].pPg==0 );
|
||||
pPtr = &pCsr->aPtr[i];
|
||||
rc = segmentPtrLoadPage(pDb->pFS, pPtr, (int)pInput->iPg);
|
||||
rc = segmentPtrLoadPage(pDb->pFS, pPtr, pInput->iPg);
|
||||
if( rc==LSM_OK && pPtr->nCell>0 ){
|
||||
rc = segmentPtrLoadCell(pPtr, pInput->iCell);
|
||||
}
|
||||
@@ -5469,7 +5472,7 @@ int lsmFlushTreeToDisk(lsm_db *pDb){
|
||||
** be freed by the caller using lsmFree().
|
||||
*/
|
||||
static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){
|
||||
int nSize = pSeg->nSize;
|
||||
LsmPgno nSize = pSeg->nSize;
|
||||
LsmPgno iRoot = pSeg->iRoot;
|
||||
LsmPgno iFirst = pSeg->iFirst;
|
||||
LsmPgno iLast = pSeg->iLastPg;
|
||||
@@ -5481,9 +5484,9 @@ static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){
|
||||
|
||||
z1 = lsmMallocPrintf(pEnv, "%d.%d", iFirst, iLast);
|
||||
if( iRoot ){
|
||||
z2 = lsmMallocPrintf(pEnv, "root=%d", iRoot);
|
||||
z2 = lsmMallocPrintf(pEnv, "root=%lld", iRoot);
|
||||
}else{
|
||||
z2 = lsmMallocPrintf(pEnv, "size=%d", nSize);
|
||||
z2 = lsmMallocPrintf(pEnv, "size=%lld", nSize);
|
||||
}
|
||||
|
||||
nPad = nMin - 2 - strlen(z1) - 1 - strlen(z2);
|
||||
@@ -5536,7 +5539,7 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
|
||||
int i;
|
||||
|
||||
int nRec;
|
||||
int iPtr;
|
||||
LsmPgno iPtr;
|
||||
int flags;
|
||||
u8 *aData;
|
||||
int nData;
|
||||
@@ -5544,11 +5547,11 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
|
||||
aData = fsPageData(pPg, &nData);
|
||||
|
||||
nRec = pageGetNRec(aData, nData);
|
||||
iPtr = (int)pageGetPtr(aData, nData);
|
||||
iPtr = pageGetPtr(aData, nData);
|
||||
flags = pageGetFlags(aData, nData);
|
||||
|
||||
lsmStringInit(&s, pDb->pEnv);
|
||||
lsmStringAppendf(&s,"nCell=%d iPtr=%d flags=%d {", nRec, iPtr, flags);
|
||||
lsmStringAppendf(&s,"nCell=%d iPtr=%lld flags=%d {", nRec, iPtr, flags);
|
||||
if( flags&SEGMENT_BTREE_FLAG ) iPtr = 0;
|
||||
|
||||
for(i=0; i<nRec; i++){
|
||||
@@ -5558,13 +5561,13 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
|
||||
u8 *aVal = 0; int nVal = 0; /* Value */
|
||||
int iTopic;
|
||||
u8 *aCell;
|
||||
int iPgPtr;
|
||||
i64 iPgPtr;
|
||||
int eType;
|
||||
|
||||
aCell = pageGetCell(aData, nData, i);
|
||||
eType = *aCell++;
|
||||
assert( (flags & SEGMENT_BTREE_FLAG) || eType!=0 );
|
||||
aCell += lsmVarintGet32(aCell, &iPgPtr);
|
||||
aCell += lsmVarintGet64(aCell, &iPgPtr);
|
||||
|
||||
if( eType==0 ){
|
||||
LsmPgno iRef; /* Page number of referenced page */
|
||||
@@ -5590,7 +5593,7 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
|
||||
}
|
||||
}
|
||||
|
||||
lsmStringAppendf(&s, " %d", iPgPtr+iPtr);
|
||||
lsmStringAppendf(&s, " %lld", iPgPtr+iPtr);
|
||||
lsmFsPageRelease(pRef);
|
||||
}
|
||||
lsmStringAppend(&s, "}", 1);
|
||||
@@ -5720,20 +5723,20 @@ static int infoPageDump(
|
||||
int nKeyWidth = 0;
|
||||
LsmString str;
|
||||
int nRec;
|
||||
int iPtr;
|
||||
LsmPgno iPtr;
|
||||
int flags2;
|
||||
int iCell;
|
||||
u8 *aData; int nData; /* Page data and size thereof */
|
||||
|
||||
aData = fsPageData(pPg, &nData);
|
||||
nRec = pageGetNRec(aData, nData);
|
||||
iPtr = (int)pageGetPtr(aData, nData);
|
||||
iPtr = pageGetPtr(aData, nData);
|
||||
flags2 = pageGetFlags(aData, nData);
|
||||
|
||||
lsmStringInit(&str, pDb->pEnv);
|
||||
lsmStringAppendf(&str, "Page : %lld (%d bytes)\n", iPg, nData);
|
||||
lsmStringAppendf(&str, "nRec : %d\n", nRec);
|
||||
lsmStringAppendf(&str, "iPtr : %d\n", iPtr);
|
||||
lsmStringAppendf(&str, "iPtr : %lld\n", iPtr);
|
||||
lsmStringAppendf(&str, "flags: %04x\n", flags2);
|
||||
lsmStringAppendf(&str, "\n");
|
||||
|
||||
|
@@ -185,6 +185,11 @@ int lsmVarintLen32(int n){
|
||||
return lsmVarintPut32(aData, n);
|
||||
}
|
||||
|
||||
int lsmVarintLen64(i64 n){
|
||||
u8 aData[9];
|
||||
return lsmVarintPut64(aData, n);
|
||||
}
|
||||
|
||||
/*
|
||||
** The argument is the first byte of a varint. This function returns the
|
||||
** total number of bytes in the entire varint (including the first byte).
|
||||
|
@@ -53,8 +53,15 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#ifndef SQLITE_SHELL_EXTFUNCS /* Guard for #include as built-in extension. */
|
||||
#include "sqlite3ext.h"
|
||||
|
||||
#ifndef deliberate_fall_through
|
||||
/* Quiet some compilers about some of our intentional code. */
|
||||
# if GCC_VERSION>=7000000
|
||||
# define deliberate_fall_through __attribute__((fallthrough));
|
||||
# else
|
||||
# define deliberate_fall_through
|
||||
# endif
|
||||
#endif
|
||||
|
||||
SQLITE_EXTENSION_INIT1;
|
||||
@@ -64,12 +71,12 @@ SQLITE_EXTENSION_INIT1;
|
||||
#define ND 0x82 /* Not above or digit-value */
|
||||
#define PAD_CHAR '='
|
||||
|
||||
#ifndef UBYTE_TYPEDEF
|
||||
typedef unsigned char ubyte;
|
||||
# define UBYTE_TYPEDEF
|
||||
#ifndef U8_TYPEDEF
|
||||
typedef unsigned char u8;
|
||||
#define U8_TYPEDEF
|
||||
#endif
|
||||
|
||||
static const ubyte b64DigitValues[128] = {
|
||||
static const u8 b64DigitValues[128] = {
|
||||
/* HT LF VT FF CR */
|
||||
ND,ND,ND,ND, ND,ND,ND,ND, ND,WS,WS,WS, WS,WS,ND,ND,
|
||||
/* US */
|
||||
@@ -92,18 +99,18 @@ static const char b64Numerals[64+1]
|
||||
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
#define BX_DV_PROTO(c) \
|
||||
((((ubyte)(c))<0x80)? (ubyte)(b64DigitValues[(ubyte)(c)]) : 0x80)
|
||||
#define IS_BX_DIGIT(bdp) (((ubyte)(bdp))<0x80)
|
||||
((((u8)(c))<0x80)? (u8)(b64DigitValues[(u8)(c)]) : 0x80)
|
||||
#define IS_BX_DIGIT(bdp) (((u8)(bdp))<0x80)
|
||||
#define IS_BX_WS(bdp) ((bdp)==WS)
|
||||
#define IS_BX_PAD(bdp) ((bdp)==PC)
|
||||
#define BX_NUMERAL(dv) (b64Numerals[(ubyte)(dv)])
|
||||
#define BX_NUMERAL(dv) (b64Numerals[(u8)(dv)])
|
||||
/* Width of base64 lines. Should be an integer multiple of 4. */
|
||||
#define B64_DARK_MAX 72
|
||||
|
||||
/* Encode a byte buffer into base64 text with linefeeds appended to limit
|
||||
** encoded group lengths to B64_DARK_MAX or to terminate the last group.
|
||||
*/
|
||||
static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){
|
||||
static char* toBase64( u8 *pIn, int nbIn, char *pOut ){
|
||||
int nCol = 0;
|
||||
while( nbIn >= 3 ){
|
||||
/* Do the bit-shuffle, exploiting unsigned input to avoid masking. */
|
||||
@@ -128,7 +135,7 @@ static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){
|
||||
if( nbe<nbIn ) qv |= *pIn++;
|
||||
}
|
||||
for( nbe=3; nbe>=0; --nbe ){
|
||||
char ce = (nbe<nco)? BX_NUMERAL((ubyte)(qv & 0x3f)) : PAD_CHAR;
|
||||
char ce = (nbe<nco)? BX_NUMERAL((u8)(qv & 0x3f)) : PAD_CHAR;
|
||||
qv >>= 6;
|
||||
pOut[nbe] = ce;
|
||||
}
|
||||
@@ -147,7 +154,7 @@ static char * skipNonB64( char *s ){
|
||||
}
|
||||
|
||||
/* Decode base64 text into a byte buffer. */
|
||||
static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){
|
||||
static u8* fromBase64( char *pIn, int ncIn, u8 *pOut ){
|
||||
if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn;
|
||||
while( ncIn>0 && *pIn!=PAD_CHAR ){
|
||||
static signed char nboi[] = { 0, 0, 1, 2, 3 };
|
||||
@@ -162,20 +169,20 @@ static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){
|
||||
if( nbo==0 ) break;
|
||||
for( nac=0; nac<4; ++nac ){
|
||||
char c = (nac<nti)? *pIn++ : b64Numerals[0];
|
||||
ubyte bdp = BX_DV_PROTO(c);
|
||||
u8 bdp = BX_DV_PROTO(c);
|
||||
switch( bdp ){
|
||||
case ND:
|
||||
/* Treat dark non-digits as pad, but they terminate decode too. */
|
||||
ncIn = 0;
|
||||
/* fall thru */
|
||||
deliberate_fall_through;
|
||||
case WS:
|
||||
/* Treat whitespace as pad and terminate this group.*/
|
||||
nti = nac;
|
||||
/* fall thru */
|
||||
deliberate_fall_through;
|
||||
case PC:
|
||||
bdp = 0;
|
||||
--nbo;
|
||||
/* fall thru */
|
||||
deliberate_fall_through;
|
||||
default: /* bdp is the digit value. */
|
||||
qv = qv<<6 | bdp;
|
||||
break;
|
||||
@@ -200,7 +207,7 @@ static void base64(sqlite3_context *context, int na, sqlite3_value *av[]){
|
||||
int nvMax = sqlite3_limit(sqlite3_context_db_handle(context),
|
||||
SQLITE_LIMIT_LENGTH, -1);
|
||||
char *cBuf;
|
||||
ubyte *bBuf;
|
||||
u8 *bBuf;
|
||||
assert(na==1);
|
||||
switch( sqlite3_value_type(av[0]) ){
|
||||
case SQLITE_BLOB:
|
||||
@@ -213,7 +220,7 @@ static void base64(sqlite3_context *context, int na, sqlite3_value *av[]){
|
||||
}
|
||||
cBuf = sqlite3_malloc(nc);
|
||||
if( !cBuf ) goto memFail;
|
||||
bBuf = (ubyte*)sqlite3_value_blob(av[0]);
|
||||
bBuf = (u8*)sqlite3_value_blob(av[0]);
|
||||
nc = (int)(toBase64(bBuf, nb, cBuf) - cBuf);
|
||||
sqlite3_result_text(context, cBuf, nc, sqlite3_free);
|
||||
break;
|
||||
|
@@ -85,9 +85,7 @@
|
||||
|
||||
#ifndef BASE85_STANDALONE
|
||||
|
||||
#ifndef SQLITE_SHELL_EXTFUNCS /* Guard for #include as built-in extension. */
|
||||
# include "sqlite3ext.h"
|
||||
#endif
|
||||
|
||||
SQLITE_EXTENSION_INIT1;
|
||||
|
||||
@@ -113,9 +111,9 @@ static void sayHelp(){
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef UBYTE_TYPEDEF
|
||||
typedef unsigned char ubyte;
|
||||
# define UBYTE_TYPEDEF
|
||||
#ifndef U8_TYPEDEF
|
||||
typedef unsigned char u8;
|
||||
#define U8_TYPEDEF
|
||||
#endif
|
||||
|
||||
/* Classify c according to interval within USASCII set w.r.t. base85
|
||||
@@ -124,15 +122,15 @@ typedef unsigned char ubyte;
|
||||
#define B85_CLASS( c ) (((c)>='#')+((c)>'&')+((c)>='*')+((c)>'z'))
|
||||
|
||||
/* Provide digitValue to b85Numeral offset as a function of above class. */
|
||||
static ubyte b85_cOffset[] = { 0, '#', 0, '*'-4, 0 };
|
||||
static u8 b85_cOffset[] = { 0, '#', 0, '*'-4, 0 };
|
||||
#define B85_DNOS( c ) b85_cOffset[B85_CLASS(c)]
|
||||
|
||||
/* Say whether c is a base85 numeral. */
|
||||
#define IS_B85( c ) (B85_CLASS(c) & 1)
|
||||
|
||||
#if 0 /* Not used, */
|
||||
static ubyte base85DigitValue( char c ){
|
||||
ubyte dv = (ubyte)(c - '#');
|
||||
static u8 base85DigitValue( char c ){
|
||||
u8 dv = (u8)(c - '#');
|
||||
if( dv>87 ) return 0xff;
|
||||
return (dv > 3)? dv-3 : dv;
|
||||
}
|
||||
@@ -151,7 +149,7 @@ static char * skipNonB85( char *s ){
|
||||
/* Convert small integer, known to be in 0..84 inclusive, to base85 numeral.
|
||||
* Do not use the macro form with argument expression having a side-effect.*/
|
||||
#if 0
|
||||
static char base85Numeral( ubyte b ){
|
||||
static char base85Numeral( u8 b ){
|
||||
return (b < 4)? (char)(b + '#') : (char)(b - 4 + '*');
|
||||
}
|
||||
#else
|
||||
@@ -169,11 +167,12 @@ static char *putcs(char *pc, char *s){
|
||||
** to be appended to encoded groups to limit their length to B85_DARK_MAX
|
||||
** or to terminate the last group (to aid concatenation.)
|
||||
*/
|
||||
static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
|
||||
static char* toBase85( u8 *pIn, int nbIn, char *pOut, char *pSep ){
|
||||
int nCol = 0;
|
||||
while( nbIn >= 4 ){
|
||||
int nco = 5;
|
||||
unsigned long qbv = (pIn[0]<<24)|(pIn[1]<<16)|(pIn[2]<<8)|pIn[3];
|
||||
unsigned long qbv = (((unsigned long)pIn[0])<<24) |
|
||||
(pIn[1]<<16) | (pIn[2]<<8) | pIn[3];
|
||||
while( nco > 0 ){
|
||||
unsigned nqv = (unsigned)(qbv/85UL);
|
||||
unsigned char dv = qbv - 85UL*nqv;
|
||||
@@ -197,7 +196,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
|
||||
}
|
||||
nCol += nco;
|
||||
while( nco > 0 ){
|
||||
ubyte dv = (ubyte)(qv % 85);
|
||||
u8 dv = (u8)(qv % 85);
|
||||
qv /= 85;
|
||||
pOut[--nco] = base85Numeral(dv);
|
||||
}
|
||||
@@ -209,7 +208,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
|
||||
}
|
||||
|
||||
/* Decode base85 text into a byte buffer. */
|
||||
static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){
|
||||
static u8* fromBase85( char *pIn, int ncIn, u8 *pOut ){
|
||||
if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn;
|
||||
while( ncIn>0 ){
|
||||
static signed char nboi[] = { 0, 0, 1, 2, 3, 4 };
|
||||
@@ -223,7 +222,7 @@ static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){
|
||||
if( nbo==0 ) break;
|
||||
while( nti>0 ){
|
||||
char c = *pIn++;
|
||||
ubyte cdo = B85_DNOS(c);
|
||||
u8 cdo = B85_DNOS(c);
|
||||
--ncIn;
|
||||
if( cdo==0 ) break;
|
||||
qv = 85 * qv + (c - cdo);
|
||||
@@ -287,7 +286,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){
|
||||
int nvMax = sqlite3_limit(sqlite3_context_db_handle(context),
|
||||
SQLITE_LIMIT_LENGTH, -1);
|
||||
char *cBuf;
|
||||
ubyte *bBuf;
|
||||
u8 *bBuf;
|
||||
assert(na==1);
|
||||
switch( sqlite3_value_type(av[0]) ){
|
||||
case SQLITE_BLOB:
|
||||
@@ -300,7 +299,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){
|
||||
}
|
||||
cBuf = sqlite3_malloc(nc);
|
||||
if( !cBuf ) goto memFail;
|
||||
bBuf = (ubyte*)sqlite3_value_blob(av[0]);
|
||||
bBuf = (u8*)sqlite3_value_blob(av[0]);
|
||||
nc = (int)(toBase85(bBuf, nb, cBuf, "\n") - cBuf);
|
||||
sqlite3_result_text(context, cBuf, nc, sqlite3_free);
|
||||
break;
|
||||
@@ -370,7 +369,7 @@ static int sqlite3_base85_init
|
||||
int main(int na, char *av[]){
|
||||
int cin;
|
||||
int rc = 0;
|
||||
ubyte bBuf[4*(B85_DARK_MAX/5)];
|
||||
u8 bBuf[4*(B85_DARK_MAX/5)];
|
||||
char cBuf[5*(sizeof(bBuf)/4)+2];
|
||||
size_t nio;
|
||||
# ifndef OMIT_BASE85_CHECKER
|
||||
|
@@ -49,6 +49,9 @@ static void init_api_ptr(const sqlite3_api_routines *pApi){
|
||||
#undef SQLITE_EXTENSION_INIT2
|
||||
#define SQLITE_EXTENSION_INIT2(v) (void)v
|
||||
|
||||
typedef unsigned char u8;
|
||||
#define U8_TYPEDEF
|
||||
|
||||
/* These next 2 undef's are only needed because the entry point names
|
||||
* collide when formulated per the rules stated for loadable extension
|
||||
* entry point names that will be deduced from the file basenames.
|
||||
|
@@ -28,7 +28,7 @@
|
||||
**
|
||||
** There is an optional third parameter to determine the datatype of
|
||||
** the C-language array. Allowed values of the third parameter are
|
||||
** 'int32', 'int64', 'double', 'char*'. Example:
|
||||
** 'int32', 'int64', 'double', 'char*', 'struct iovec'. Example:
|
||||
**
|
||||
** SELECT * FROM carray($ptr,10,'char*');
|
||||
**
|
||||
@@ -56,6 +56,14 @@
|
||||
SQLITE_EXTENSION_INIT1
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#ifdef _WIN32
|
||||
struct iovec {
|
||||
void *iov_base;
|
||||
size_t iov_len;
|
||||
};
|
||||
#else
|
||||
# include <sys/uio.h>
|
||||
#endif
|
||||
|
||||
/* Allowed values for the mFlags parameter to sqlite3_carray_bind().
|
||||
** Must exactly match the definitions in carray.h.
|
||||
@@ -65,6 +73,7 @@ SQLITE_EXTENSION_INIT1
|
||||
# define CARRAY_INT64 1 /* Data is 64-bit signed integers */
|
||||
# define CARRAY_DOUBLE 2 /* Data is doubles */
|
||||
# define CARRAY_TEXT 3 /* Data is char* */
|
||||
# define CARRAY_BLOB 4 /* Data is struct iovec* */
|
||||
#endif
|
||||
|
||||
#ifndef SQLITE_API
|
||||
@@ -80,7 +89,8 @@ SQLITE_EXTENSION_INIT1
|
||||
/*
|
||||
** Names of allowed datatypes
|
||||
*/
|
||||
static const char *azType[] = { "int32", "int64", "double", "char*" };
|
||||
static const char *azType[] = { "int32", "int64", "double", "char*",
|
||||
"struct iovec" };
|
||||
|
||||
/*
|
||||
** Structure used to hold the sqlite3_carray_bind() information
|
||||
@@ -224,6 +234,12 @@ static int carrayColumn(
|
||||
sqlite3_result_text(ctx, p[pCur->iRowid-1], -1, SQLITE_TRANSIENT);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
case CARRAY_BLOB: {
|
||||
const struct iovec *p = (struct iovec*)pCur->pPtr;
|
||||
sqlite3_result_blob(ctx, p[pCur->iRowid-1].iov_base,
|
||||
(int)p[pCur->iRowid-1].iov_len, SQLITE_TRANSIENT);
|
||||
return SQLITE_OK;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -268,7 +284,7 @@ static int carrayFilter(
|
||||
if( pBind==0 ) break;
|
||||
pCur->pPtr = pBind->aData;
|
||||
pCur->iCnt = pBind->nData;
|
||||
pCur->eType = pBind->mFlags & 0x03;
|
||||
pCur->eType = pBind->mFlags & 0x07;
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
@@ -431,24 +447,29 @@ SQLITE_API int sqlite3_carray_bind(
|
||||
pNew->mFlags = mFlags;
|
||||
if( xDestroy==SQLITE_TRANSIENT ){
|
||||
sqlite3_int64 sz = nData;
|
||||
switch( mFlags & 0x03 ){
|
||||
case CARRAY_INT32: sz *= 4; break;
|
||||
case CARRAY_INT64: sz *= 8; break;
|
||||
case CARRAY_DOUBLE: sz *= 8; break;
|
||||
case CARRAY_TEXT: sz *= sizeof(char*); break;
|
||||
switch( mFlags & 0x07 ){
|
||||
case CARRAY_INT32: sz *= 4; break;
|
||||
case CARRAY_INT64: sz *= 8; break;
|
||||
case CARRAY_DOUBLE: sz *= 8; break;
|
||||
case CARRAY_TEXT: sz *= sizeof(char*); break;
|
||||
case CARRAY_BLOB: sz *= sizeof(struct iovec); break;
|
||||
}
|
||||
if( (mFlags & 0x03)==CARRAY_TEXT ){
|
||||
if( (mFlags & 0x07)==CARRAY_TEXT ){
|
||||
for(i=0; i<nData; i++){
|
||||
const char *z = ((char**)aData)[i];
|
||||
if( z ) sz += strlen(z) + 1;
|
||||
}
|
||||
}else if( (mFlags & 0x07)==CARRAY_BLOB ){
|
||||
for(i=0; i<nData; i++){
|
||||
sz += ((struct iovec*)aData)[i].iov_len;
|
||||
}
|
||||
}
|
||||
pNew->aData = sqlite3_malloc64( sz );
|
||||
if( pNew->aData==0 ){
|
||||
sqlite3_free(pNew);
|
||||
return SQLITE_NOMEM;
|
||||
}
|
||||
if( (mFlags & 0x03)==CARRAY_TEXT ){
|
||||
if( (mFlags & 0x07)==CARRAY_TEXT ){
|
||||
char **az = (char**)pNew->aData;
|
||||
char *z = (char*)&az[nData];
|
||||
for(i=0; i<nData; i++){
|
||||
@@ -463,6 +484,16 @@ SQLITE_API int sqlite3_carray_bind(
|
||||
memcpy(z, zData, n+1);
|
||||
z += n+1;
|
||||
}
|
||||
}else if( (mFlags & 0x07)==CARRAY_BLOB ){
|
||||
struct iovec *p = (struct iovec*)pNew->aData;
|
||||
unsigned char *z = (unsigned char*)&p[nData];
|
||||
for(i=0; i<nData; i++){
|
||||
size_t n = ((struct iovec*)aData)[i].iov_len;
|
||||
p[i].iov_len = n;
|
||||
p[i].iov_base = z;
|
||||
z += n;
|
||||
memcpy(p[i].iov_base, ((struct iovec*)aData)[i].iov_base, n);
|
||||
}
|
||||
}else{
|
||||
memcpy(pNew->aData, aData, sz);
|
||||
}
|
||||
|
@@ -41,10 +41,10 @@ SQLITE_API int sqlite3_carray_bind(
|
||||
#define CARRAY_INT64 1 /* Data is 64-bit signed integers */
|
||||
#define CARRAY_DOUBLE 2 /* Data is doubles */
|
||||
#define CARRAY_TEXT 3 /* Data is char* */
|
||||
#define CARRAY_BLOB 4 /* Data is struct iovec */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end of the 'extern "C"' block */
|
||||
#endif
|
||||
|
||||
#endif /* ifndef _CARRAY_H */
|
||||
|
||||
|
@@ -616,7 +616,7 @@ int sqlite3_decimal_init(
|
||||
|
||||
SQLITE_EXTENSION_INIT2(pApi);
|
||||
|
||||
for(i=0; i<sizeof(aFunc)/sizeof(aFunc[0]) && rc==SQLITE_OK; i++){
|
||||
for(i=0; i<(int)(sizeof(aFunc)/sizeof(aFunc[0])) && rc==SQLITE_OK; i++){
|
||||
rc = sqlite3_create_function(db, aFunc[i].zFuncName, aFunc[i].nArg,
|
||||
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
|
||||
0, aFunc[i].xFunc, 0, 0);
|
||||
|
@@ -803,6 +803,7 @@ static void re_bytecode_func(
|
||||
int i;
|
||||
int n;
|
||||
char *z;
|
||||
(void)argc;
|
||||
|
||||
zPattern = (const char*)sqlite3_value_text(argv[0]);
|
||||
if( zPattern==0 ) return;
|
||||
|
@@ -91,7 +91,9 @@ static void sqlarUncompressFunc(
|
||||
}else{
|
||||
const Bytef *pData= sqlite3_value_blob(argv[0]);
|
||||
Bytef *pOut = sqlite3_malloc(sz);
|
||||
if( Z_OK!=uncompress(pOut, &sz, pData, nData) ){
|
||||
if( pOut==0 ){
|
||||
sqlite3_result_error_nomem(context);
|
||||
}else if( Z_OK!=uncompress(pOut, &sz, pData, nData) ){
|
||||
sqlite3_result_error(context, "error in uncompress()", -1);
|
||||
}else{
|
||||
sqlite3_result_blob(context, pOut, sz, SQLITE_TRANSIENT);
|
||||
@@ -100,7 +102,6 @@ static void sqlarUncompressFunc(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
#endif
|
||||
|
@@ -97,6 +97,10 @@ static int stmtConnect(
|
||||
#define STMT_COLUMN_MEM 10 /* SQLITE_STMTSTATUS_MEMUSED */
|
||||
|
||||
|
||||
(void)pAux;
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
(void)pzErr;
|
||||
rc = sqlite3_declare_vtab(db,
|
||||
"CREATE TABLE x(sql,ncol,ro,busy,nscan,nsort,naidx,nstep,"
|
||||
"reprep,run,mem)");
|
||||
@@ -216,6 +220,10 @@ static int stmtFilter(
|
||||
sqlite3_int64 iRowid = 1;
|
||||
StmtRow **ppRow = 0;
|
||||
|
||||
(void)idxNum;
|
||||
(void)idxStr;
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
stmtCsrReset(pCur);
|
||||
ppRow = &pCur->pRow;
|
||||
for(p=sqlite3_next_stmt(pCur->db, 0); p; p=sqlite3_next_stmt(pCur->db, p)){
|
||||
@@ -271,6 +279,7 @@ static int stmtBestIndex(
|
||||
sqlite3_vtab *tab,
|
||||
sqlite3_index_info *pIdxInfo
|
||||
){
|
||||
(void)tab;
|
||||
pIdxInfo->estimatedCost = (double)500;
|
||||
pIdxInfo->estimatedRows = 500;
|
||||
return SQLITE_OK;
|
||||
|
@@ -352,6 +352,7 @@ static int zipfileConnect(
|
||||
const char *zFile = 0;
|
||||
ZipfileTab *pNew = 0;
|
||||
int rc;
|
||||
(void)pAux;
|
||||
|
||||
/* If the table name is not "zipfile", require that the argument be
|
||||
** specified. This stops zipfile tables from being created as:
|
||||
@@ -808,6 +809,7 @@ static int zipfileGetEntry(
|
||||
u8 *aRead;
|
||||
char **pzErr = &pTab->base.zErrMsg;
|
||||
int rc = SQLITE_OK;
|
||||
(void)nBlob;
|
||||
|
||||
if( aBlob==0 ){
|
||||
aRead = pTab->aBuffer;
|
||||
@@ -1254,6 +1256,9 @@ static int zipfileFilter(
|
||||
int rc = SQLITE_OK; /* Return Code */
|
||||
int bInMemory = 0; /* True for an in-memory zipfile */
|
||||
|
||||
(void)idxStr;
|
||||
(void)argc;
|
||||
|
||||
zipfileResetCursor(pCsr);
|
||||
|
||||
if( pTab->zFile ){
|
||||
@@ -1280,7 +1285,7 @@ static int zipfileFilter(
|
||||
}
|
||||
|
||||
if( 0==pTab->pWriteFd && 0==bInMemory ){
|
||||
pCsr->pFile = fopen(zFile, "rb");
|
||||
pCsr->pFile = zFile ? fopen(zFile, "rb") : 0;
|
||||
if( pCsr->pFile==0 ){
|
||||
zipfileCursorErr(pCsr, "cannot open file: %s", zFile);
|
||||
rc = SQLITE_ERROR;
|
||||
@@ -1314,6 +1319,7 @@ static int zipfileBestIndex(
|
||||
int i;
|
||||
int idx = -1;
|
||||
int unusable = 0;
|
||||
(void)tab;
|
||||
|
||||
for(i=0; i<pIdxInfo->nConstraint; i++){
|
||||
const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
|
||||
@@ -1564,6 +1570,8 @@ static int zipfileUpdate(
|
||||
int bIsDir = 0;
|
||||
u32 iCrc32 = 0;
|
||||
|
||||
(void)pRowid;
|
||||
|
||||
if( pTab->pWriteFd==0 ){
|
||||
rc = zipfileBegin(pVtab);
|
||||
if( rc!=SQLITE_OK ) return rc;
|
||||
@@ -1898,6 +1906,7 @@ static int zipfileFindFunction(
|
||||
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
|
||||
void **ppArg /* OUT: User data for *pxFunc */
|
||||
){
|
||||
(void)nArg;
|
||||
if( sqlite3_stricmp("zipfile_cds", zName)==0 ){
|
||||
*pxFunc = zipfileFunctionCds;
|
||||
*ppArg = (void*)pVtab;
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu1
|
||||
|
||||
db close
|
||||
|
@@ -10,13 +10,10 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu10
|
||||
|
||||
|
||||
#--------------------------------------------------------------------
|
||||
# Test that UPDATE commands work even if the input columns are in a
|
||||
# different order to the output columns.
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu11
|
||||
|
||||
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/lock_common.tcl
|
||||
set ::testprefix rbu12
|
||||
|
||||
|
@@ -13,10 +13,8 @@
|
||||
# for UPDATE statements. This tests RBU's internal UPDATE statement cache.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/lock_common.tcl
|
||||
set ::testprefix rbu13
|
||||
|
||||
|
@@ -13,10 +13,8 @@
|
||||
# table.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/lock_common.tcl
|
||||
set ::testprefix rbu14
|
||||
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu3
|
||||
|
||||
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu5
|
||||
|
||||
|
||||
|
@@ -13,10 +13,8 @@
|
||||
# outcome of some other client writing to the database while an RBU update
|
||||
# is being applied.
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu6
|
||||
|
||||
proc setup_test {} {
|
||||
|
@@ -13,10 +13,8 @@
|
||||
#
|
||||
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu7
|
||||
|
||||
# Test index:
|
||||
|
@@ -12,10 +12,8 @@
|
||||
# Test the rbu_delta() feature.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu8
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
|
@@ -12,10 +12,8 @@
|
||||
# Test RBU with virtual tables. And tables with no PRIMARY KEY declarations.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbu9
|
||||
|
||||
ifcapable !fts3 {
|
||||
|
@@ -14,10 +14,8 @@
|
||||
# a wal mode database via RBU.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuA
|
||||
|
||||
set db_sql {
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuB
|
||||
|
||||
db close
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuC
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbubusy
|
||||
|
||||
db close
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbucollate
|
||||
|
||||
ifcapable !icu_collations {
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbucrash
|
||||
|
||||
db close
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbucrash2
|
||||
|
||||
db close
|
||||
|
@@ -12,10 +12,9 @@
|
||||
# Tests for the [sqldiff --rbu] command.
|
||||
#
|
||||
#
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set testprefix rbudiff
|
||||
|
||||
set PROG [test_find_sqldiff]
|
||||
|
@@ -13,10 +13,8 @@
|
||||
# enabled by SQLITE_DIRECT_OVERFLOW_READ - Direct Overflow Read.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbudor
|
||||
|
||||
set bigA [string repeat a 5000]
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuexlock
|
||||
|
||||
db close
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuexpr
|
||||
|
||||
db close
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/malloc_common.tcl
|
||||
set ::testprefix rbufault
|
||||
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/malloc_common.tcl
|
||||
set ::testprefix rbufault2
|
||||
|
||||
|
@@ -13,6 +13,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/malloc_common.tcl
|
||||
set ::testprefix rbufault3
|
||||
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
source $testdir/malloc_common.tcl
|
||||
set ::testprefix rbufault4
|
||||
|
||||
|
@@ -13,10 +13,8 @@
|
||||
# contains tests to ensure that RBU works with FTS tables.
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbufts
|
||||
|
||||
ifcapable !fts3 {
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbumisc
|
||||
|
||||
db close
|
||||
|
@@ -9,11 +9,14 @@
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# TESTRUNNER: slow
|
||||
#
|
||||
# This file contains tests of multiple RBU operations running
|
||||
# concurrently within the same process.
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbumulti
|
||||
|
||||
db close
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbupartial
|
||||
|
||||
db close
|
||||
|
81
ext/rbu/rbupass.test
Normal file
81
ext/rbu/rbupass.test
Normal file
@@ -0,0 +1,81 @@
|
||||
# 2023 January 13
|
||||
#
|
||||
# The author disclaims copyright to this source code. In place of
|
||||
# a legal notice, here is a blessing:
|
||||
#
|
||||
# May you do good and not evil.
|
||||
# May you find forgiveness for yourself and forgive others.
|
||||
# May you share freely, never taking more than you give.
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbupass
|
||||
|
||||
if {[info commands register_demovfs]==""} {
|
||||
finish_test
|
||||
return
|
||||
}
|
||||
|
||||
db close
|
||||
sqlite3_shutdown
|
||||
sqlite3_config_uri 1
|
||||
|
||||
register_demovfs
|
||||
sqlite3rbu_create_vfs myvfs demo
|
||||
|
||||
sqlite3 db file:test.db?vfs=myvfs
|
||||
do_execsql_test 1.0 {
|
||||
CREATE TABLE t1(a INTEGER PRIMARY KEY, b);
|
||||
INSERT INTO t1 VALUES(1, 2);
|
||||
SELECT * FROM t1;
|
||||
} {1 2}
|
||||
|
||||
do_execsql_test 1.1 {
|
||||
PRAGMA journal_mode = wal;
|
||||
} {delete}
|
||||
|
||||
do_execsql_test 1.2 {
|
||||
SELECT * FROM t1;
|
||||
} {1 2}
|
||||
|
||||
do_test 1.3 {
|
||||
forcedelete rbu.db
|
||||
sqlite3 rbu rbu.db
|
||||
rbu eval {
|
||||
CREATE TABLE data_t1(a, b, rbu_control);
|
||||
INSERT INTO data_t1 VALUES(2, 4, 0);
|
||||
}
|
||||
rbu close
|
||||
} {}
|
||||
|
||||
do_test 1.4 {
|
||||
sqlite3rbu rbu test.db rbu.db
|
||||
} {rbu}
|
||||
do_test 1.5 {
|
||||
rbu step
|
||||
} {SQLITE_CANTOPEN}
|
||||
do_test 1.6 {
|
||||
list [catch { rbu close } msg] $msg
|
||||
} {1 {SQLITE_CANTOPEN - unable to open database file}}
|
||||
|
||||
do_test 1.7 {
|
||||
sqlite3rbu_vacuum rbu test.db
|
||||
} {rbu}
|
||||
do_test 1.8 {
|
||||
rbu step
|
||||
catch { rbu close }
|
||||
} {1}
|
||||
|
||||
do_execsql_test 1.9 {
|
||||
SELECT * FROM t1;
|
||||
} {1 2}
|
||||
|
||||
db close
|
||||
sqlite3rbu_destroy_vfs myvfs
|
||||
unregister_demovfs
|
||||
sqlite3_shutdown
|
||||
finish_test
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuprogress
|
||||
|
||||
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rburename
|
||||
|
||||
|
||||
|
@@ -9,11 +9,14 @@
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# TESTRUNNER: slow
|
||||
#
|
||||
# This file contains tests for resumption of RBU operations in the
|
||||
# case where the previous RBU process crashed.
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rburesume
|
||||
|
||||
forcedelete test.db-shm test.db-oal
|
||||
|
@@ -10,10 +10,8 @@
|
||||
#***********************************************************************
|
||||
#
|
||||
|
||||
if {![info exists testdir]} {
|
||||
set testdir [file join [file dirname [info script]] .. .. test]
|
||||
}
|
||||
source $testdir/tester.tcl
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbusave
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
|
@@ -12,6 +12,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbusplit
|
||||
|
||||
db close
|
||||
|
@@ -9,8 +9,10 @@
|
||||
#
|
||||
#***********************************************************************
|
||||
#
|
||||
# TESTRUNNER: slow
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbutemplimit
|
||||
|
||||
db close
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set ::testprefix rbuvacuum
|
||||
|
||||
foreach step {0 1} {
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
|
||||
foreach {step} {0 1} {
|
||||
foreach {ttt state} {
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set testprefix rbuvacuum3
|
||||
|
||||
do_execsql_test 1.0 {
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#
|
||||
|
||||
source [file join [file dirname [info script]] rbu_common.tcl]
|
||||
ifcapable !rbu { finish_test ; return }
|
||||
set testprefix rbuvacuum4
|
||||
|
||||
set step 1
|
||||
|
@@ -3830,7 +3830,8 @@ static void rbuSetupOal(sqlite3rbu *p, RbuState *pState){
|
||||
static void rbuDeleteOalFile(sqlite3rbu *p){
|
||||
char *zOal = rbuMPrintf(p, "%s-oal", p->zTarget);
|
||||
if( zOal ){
|
||||
sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
|
||||
sqlite3_vfs *pVfs = 0;
|
||||
sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_VFS_POINTER, &pVfs);
|
||||
assert( pVfs && p->rc==SQLITE_OK && p->zErrmsg==0 );
|
||||
pVfs->xDelete(pVfs, zOal, 0);
|
||||
sqlite3_free(zOal);
|
||||
@@ -4587,9 +4588,12 @@ static int rbuVfsClose(sqlite3_file *pFile){
|
||||
sqlite3_free(p->zDel);
|
||||
|
||||
if( p->openFlags & SQLITE_OPEN_MAIN_DB ){
|
||||
const sqlite3_io_methods *pMeth = p->pReal->pMethods;
|
||||
rbuMainlistRemove(p);
|
||||
rbuUnlockShm(p);
|
||||
p->pReal->pMethods->xShmUnmap(p->pReal, 0);
|
||||
if( pMeth->iVersion>1 && pMeth->xShmUnmap ){
|
||||
pMeth->xShmUnmap(p->pReal, 0);
|
||||
}
|
||||
}
|
||||
else if( (p->openFlags & SQLITE_OPEN_DELETEONCLOSE) && p->pRbu ){
|
||||
rbuUpdateTempSize(p, 0);
|
||||
@@ -5048,6 +5052,25 @@ static int rbuVfsOpen(
|
||||
rbuVfsShmUnmap, /* xShmUnmap */
|
||||
0, 0 /* xFetch, xUnfetch */
|
||||
};
|
||||
static sqlite3_io_methods rbuvfs_io_methods1 = {
|
||||
1, /* iVersion */
|
||||
rbuVfsClose, /* xClose */
|
||||
rbuVfsRead, /* xRead */
|
||||
rbuVfsWrite, /* xWrite */
|
||||
rbuVfsTruncate, /* xTruncate */
|
||||
rbuVfsSync, /* xSync */
|
||||
rbuVfsFileSize, /* xFileSize */
|
||||
rbuVfsLock, /* xLock */
|
||||
rbuVfsUnlock, /* xUnlock */
|
||||
rbuVfsCheckReservedLock, /* xCheckReservedLock */
|
||||
rbuVfsFileControl, /* xFileControl */
|
||||
rbuVfsSectorSize, /* xSectorSize */
|
||||
rbuVfsDeviceCharacteristics, /* xDeviceCharacteristics */
|
||||
0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
|
||||
|
||||
rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs;
|
||||
sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs;
|
||||
rbu_file *pFd = (rbu_file *)pFile;
|
||||
@@ -5102,10 +5125,15 @@ static int rbuVfsOpen(
|
||||
rc = pRealVfs->xOpen(pRealVfs, zOpen, pFd->pReal, oflags, pOutFlags);
|
||||
}
|
||||
if( pFd->pReal->pMethods ){
|
||||
const sqlite3_io_methods *pMeth = pFd->pReal->pMethods;
|
||||
/* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods
|
||||
** pointer and, if the file is a main database file, link it into the
|
||||
** mutex protected linked list of all such files. */
|
||||
pFile->pMethods = &rbuvfs_io_methods;
|
||||
if( pMeth->iVersion<2 || pMeth->xShmLock==0 ){
|
||||
pFile->pMethods = &rbuvfs_io_methods1;
|
||||
}else{
|
||||
pFile->pMethods = &rbuvfs_io_methods;
|
||||
}
|
||||
if( flags & SQLITE_OPEN_MAIN_DB ){
|
||||
rbuMainlistAdd(pFd);
|
||||
}
|
||||
|
@@ -10,42 +10,42 @@
|
||||
**
|
||||
*************************************************************************
|
||||
**
|
||||
** This file contains the public interface for the RBU extension.
|
||||
** This file contains the public interface for the RBU extension.
|
||||
*/
|
||||
|
||||
/*
|
||||
** SUMMARY
|
||||
**
|
||||
** Writing a transaction containing a large number of operations on
|
||||
** Writing a transaction containing a large number of operations on
|
||||
** b-tree indexes that are collectively larger than the available cache
|
||||
** memory can be very inefficient.
|
||||
** memory can be very inefficient.
|
||||
**
|
||||
** The problem is that in order to update a b-tree, the leaf page (at least)
|
||||
** containing the entry being inserted or deleted must be modified. If the
|
||||
** working set of leaves is larger than the available cache memory, then a
|
||||
** single leaf that is modified more than once as part of the transaction
|
||||
** working set of leaves is larger than the available cache memory, then a
|
||||
** single leaf that is modified more than once as part of the transaction
|
||||
** may be loaded from or written to the persistent media multiple times.
|
||||
** Additionally, because the index updates are likely to be applied in
|
||||
** random order, access to pages within the database is also likely to be in
|
||||
** random order, access to pages within the database is also likely to be in
|
||||
** random order, which is itself quite inefficient.
|
||||
**
|
||||
** One way to improve the situation is to sort the operations on each index
|
||||
** by index key before applying them to the b-tree. This leads to an IO
|
||||
** pattern that resembles a single linear scan through the index b-tree,
|
||||
** and all but guarantees each modified leaf page is loaded and stored
|
||||
** and all but guarantees each modified leaf page is loaded and stored
|
||||
** exactly once. SQLite uses this trick to improve the performance of
|
||||
** CREATE INDEX commands. This extension allows it to be used to improve
|
||||
** the performance of large transactions on existing databases.
|
||||
**
|
||||
** Additionally, this extension allows the work involved in writing the
|
||||
** large transaction to be broken down into sub-transactions performed
|
||||
** sequentially by separate processes. This is useful if the system cannot
|
||||
** guarantee that a single update process will run for long enough to apply
|
||||
** the entire update, for example because the update is being applied on a
|
||||
** mobile device that is frequently rebooted. Even after the writer process
|
||||
** Additionally, this extension allows the work involved in writing the
|
||||
** large transaction to be broken down into sub-transactions performed
|
||||
** sequentially by separate processes. This is useful if the system cannot
|
||||
** guarantee that a single update process will run for long enough to apply
|
||||
** the entire update, for example because the update is being applied on a
|
||||
** mobile device that is frequently rebooted. Even after the writer process
|
||||
** has committed one or more sub-transactions, other database clients continue
|
||||
** to read from the original database snapshot. In other words, partially
|
||||
** applied transactions are not visible to other clients.
|
||||
** to read from the original database snapshot. In other words, partially
|
||||
** applied transactions are not visible to other clients.
|
||||
**
|
||||
** "RBU" stands for "Resumable Bulk Update". As in a large database update
|
||||
** transmitted via a wireless network to a mobile device. A transaction
|
||||
@@ -61,9 +61,9 @@
|
||||
**
|
||||
** * INSERT statements may not use any default values.
|
||||
**
|
||||
** * UPDATE and DELETE statements must identify their target rows by
|
||||
** * UPDATE and DELETE statements must identify their target rows by
|
||||
** non-NULL PRIMARY KEY values. Rows with NULL values stored in PRIMARY
|
||||
** KEY fields may not be updated or deleted. If the table being written
|
||||
** KEY fields may not be updated or deleted. If the table being written
|
||||
** has no PRIMARY KEY, affected rows must be identified by rowid.
|
||||
**
|
||||
** * UPDATE statements may not modify PRIMARY KEY columns.
|
||||
@@ -80,10 +80,10 @@
|
||||
** PREPARATION
|
||||
**
|
||||
** An "RBU update" is stored as a separate SQLite database. A database
|
||||
** containing an RBU update is an "RBU database". For each table in the
|
||||
** containing an RBU update is an "RBU database". For each table in the
|
||||
** target database to be updated, the RBU database should contain a table
|
||||
** named "data_<target name>" containing the same set of columns as the
|
||||
** target table, and one more - "rbu_control". The data_% table should
|
||||
** target table, and one more - "rbu_control". The data_% table should
|
||||
** have no PRIMARY KEY or UNIQUE constraints, but each column should have
|
||||
** the same type as the corresponding column in the target database.
|
||||
** The "rbu_control" column should have no type at all. For example, if
|
||||
@@ -98,22 +98,22 @@
|
||||
** The order of the columns in the data_% table does not matter.
|
||||
**
|
||||
** Instead of a regular table, the RBU database may also contain virtual
|
||||
** tables or view named using the data_<target> naming scheme.
|
||||
** tables or views named using the data_<target> naming scheme.
|
||||
**
|
||||
** Instead of the plain data_<target> naming scheme, RBU database tables
|
||||
** Instead of the plain data_<target> naming scheme, RBU database tables
|
||||
** may also be named data<integer>_<target>, where <integer> is any sequence
|
||||
** of zero or more numeric characters (0-9). This can be significant because
|
||||
** tables within the RBU database are always processed in order sorted by
|
||||
** tables within the RBU database are always processed in order sorted by
|
||||
** name. By judicious selection of the <integer> portion of the names
|
||||
** of the RBU tables the user can therefore control the order in which they
|
||||
** are processed. This can be useful, for example, to ensure that "external
|
||||
** content" FTS4 tables are updated before their underlying content tables.
|
||||
**
|
||||
** If the target database table is a virtual table or a table that has no
|
||||
** PRIMARY KEY declaration, the data_% table must also contain a column
|
||||
** named "rbu_rowid". This column is mapped to the tables implicit primary
|
||||
** key column - "rowid". Virtual tables for which the "rowid" column does
|
||||
** not function like a primary key value cannot be updated using RBU. For
|
||||
** PRIMARY KEY declaration, the data_% table must also contain a column
|
||||
** named "rbu_rowid". This column is mapped to the table's implicit primary
|
||||
** key column - "rowid". Virtual tables for which the "rowid" column does
|
||||
** not function like a primary key value cannot be updated using RBU. For
|
||||
** example, if the target db contains either of the following:
|
||||
**
|
||||
** CREATE VIRTUAL TABLE x1 USING fts3(a, b);
|
||||
@@ -136,35 +136,35 @@
|
||||
** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control);
|
||||
** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control);
|
||||
**
|
||||
** For each row to INSERT into the target database as part of the RBU
|
||||
** For each row to INSERT into the target database as part of the RBU
|
||||
** update, the corresponding data_% table should contain a single record
|
||||
** with the "rbu_control" column set to contain integer value 0. The
|
||||
** other columns should be set to the values that make up the new record
|
||||
** to insert.
|
||||
** other columns should be set to the values that make up the new record
|
||||
** to insert.
|
||||
**
|
||||
** If the target database table has an INTEGER PRIMARY KEY, it is not
|
||||
** possible to insert a NULL value into the IPK column. Attempting to
|
||||
** If the target database table has an INTEGER PRIMARY KEY, it is not
|
||||
** possible to insert a NULL value into the IPK column. Attempting to
|
||||
** do so results in an SQLITE_MISMATCH error.
|
||||
**
|
||||
** For each row to DELETE from the target database as part of the RBU
|
||||
** For each row to DELETE from the target database as part of the RBU
|
||||
** update, the corresponding data_% table should contain a single record
|
||||
** with the "rbu_control" column set to contain integer value 1. The
|
||||
** real primary key values of the row to delete should be stored in the
|
||||
** corresponding columns of the data_% table. The values stored in the
|
||||
** other columns are not used.
|
||||
**
|
||||
** For each row to UPDATE from the target database as part of the RBU
|
||||
** For each row to UPDATE from the target database as part of the RBU
|
||||
** update, the corresponding data_% table should contain a single record
|
||||
** with the "rbu_control" column set to contain a value of type text.
|
||||
** The real primary key values identifying the row to update should be
|
||||
** The real primary key values identifying the row to update should be
|
||||
** stored in the corresponding columns of the data_% table row, as should
|
||||
** the new values of all columns being update. The text value in the
|
||||
** the new values of all columns being update. The text value in the
|
||||
** "rbu_control" column must contain the same number of characters as
|
||||
** there are columns in the target database table, and must consist entirely
|
||||
** of 'x' and '.' characters (or in some special cases 'd' - see below). For
|
||||
** of 'x' and '.' characters (or in some special cases 'd' - see below). For
|
||||
** each column that is being updated, the corresponding character is set to
|
||||
** 'x'. For those that remain as they are, the corresponding character of the
|
||||
** rbu_control value should be set to '.'. For example, given the tables
|
||||
** rbu_control value should be set to '.'. For example, given the tables
|
||||
** above, the update statement:
|
||||
**
|
||||
** UPDATE t1 SET c = 'usa' WHERE a = 4;
|
||||
@@ -178,30 +178,30 @@
|
||||
** target table with the value stored in the corresponding data_% column, the
|
||||
** user-defined SQL function "rbu_delta()" is invoked and the result stored in
|
||||
** the target table column. rbu_delta() is invoked with two arguments - the
|
||||
** original value currently stored in the target table column and the
|
||||
** original value currently stored in the target table column and the
|
||||
** value specified in the data_xxx table.
|
||||
**
|
||||
** For example, this row:
|
||||
**
|
||||
** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d');
|
||||
**
|
||||
** is similar to an UPDATE statement such as:
|
||||
** is similar to an UPDATE statement such as:
|
||||
**
|
||||
** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4;
|
||||
**
|
||||
** Finally, if an 'f' character appears in place of a 'd' or 's' in an
|
||||
** Finally, if an 'f' character appears in place of a 'd' or 's' in an
|
||||
** ota_control string, the contents of the data_xxx table column is assumed
|
||||
** to be a "fossil delta" - a patch to be applied to a blob value in the
|
||||
** format used by the fossil source-code management system. In this case
|
||||
** the existing value within the target database table must be of type BLOB.
|
||||
** the existing value within the target database table must be of type BLOB.
|
||||
** It is replaced by the result of applying the specified fossil delta to
|
||||
** itself.
|
||||
**
|
||||
** If the target database table is a virtual table or a table with no PRIMARY
|
||||
** KEY, the rbu_control value should not include a character corresponding
|
||||
** KEY, the rbu_control value should not include a character corresponding
|
||||
** to the rbu_rowid value. For example, this:
|
||||
**
|
||||
** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control)
|
||||
** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control)
|
||||
** VALUES(NULL, 'usa', 12, '.x');
|
||||
**
|
||||
** causes a result similar to:
|
||||
|
@@ -164,6 +164,9 @@ static int dbdataConnect(
|
||||
DbdataTable *pTab = 0;
|
||||
int rc = sqlite3_declare_vtab(db, pAux ? DBPTR_SCHEMA : DBDATA_SCHEMA);
|
||||
|
||||
(void)argc;
|
||||
(void)argv;
|
||||
(void)pzErr;
|
||||
if( rc==SQLITE_OK ){
|
||||
pTab = (DbdataTable*)sqlite3_malloc64(sizeof(DbdataTable));
|
||||
if( pTab==0 ){
|
||||
@@ -768,6 +771,8 @@ static int dbdataFilter(
|
||||
DbdataTable *pTab = (DbdataTable*)pCursor->pVtab;
|
||||
int rc = SQLITE_OK;
|
||||
const char *zSchema = "main";
|
||||
(void)idxStr;
|
||||
(void)argc;
|
||||
|
||||
dbdataResetCursor(pCsr);
|
||||
assert( pCsr->iPgno==1 );
|
||||
@@ -936,6 +941,7 @@ int sqlite3_dbdata_init(
|
||||
const sqlite3_api_routines *pApi
|
||||
){
|
||||
SQLITE_EXTENSION_INIT2(pApi);
|
||||
(void)pzErrMsg;
|
||||
return sqlite3DbdataRegister(db);
|
||||
}
|
||||
|
||||
|
@@ -761,6 +761,7 @@ static void recoverEscapeCrnl(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
const char *zText = (const char*)sqlite3_value_text(argv[0]);
|
||||
(void)argc;
|
||||
if( zText && zText[0]=='\'' ){
|
||||
int nText = sqlite3_value_bytes(argv[0]);
|
||||
int i;
|
||||
@@ -913,7 +914,7 @@ static void recoverTransferSettings(sqlite3_recover *p){
|
||||
return;
|
||||
}
|
||||
|
||||
for(ii=0; ii<sizeof(aPragma)/sizeof(aPragma[0]); ii++){
|
||||
for(ii=0; ii<(int)(sizeof(aPragma)/sizeof(aPragma[0])); ii++){
|
||||
const char *zPrag = aPragma[ii];
|
||||
sqlite3_stmt *p1 = 0;
|
||||
p1 = recoverPreparePrintf(p, p->dbIn, "PRAGMA %Q.%s", p->zDb, zPrag);
|
||||
@@ -991,7 +992,9 @@ static int recoverOpenOutput(sqlite3_recover *p){
|
||||
}
|
||||
|
||||
/* Register the custom user-functions with the output handle. */
|
||||
for(ii=0; p->errCode==SQLITE_OK && ii<sizeof(aFunc)/sizeof(aFunc[0]); ii++){
|
||||
for(ii=0;
|
||||
p->errCode==SQLITE_OK && ii<(int)(sizeof(aFunc)/sizeof(aFunc[0]));
|
||||
ii++){
|
||||
p->errCode = sqlite3_create_function(db, aFunc[ii].zName,
|
||||
aFunc[ii].nArg, SQLITE_UTF8, (void*)p, aFunc[ii].xFunc, 0, 0
|
||||
);
|
||||
@@ -2388,7 +2391,7 @@ static int recoverVfsRead(sqlite3_file *pFd, void *aBuf, int nByte, i64 iOff){
|
||||
if( pgsz==65536 ) pgsz = 1;
|
||||
recoverPutU16(&aHdr[16], pgsz);
|
||||
aHdr[20] = nReserve;
|
||||
for(ii=0; ii<sizeof(aPreserve)/sizeof(aPreserve[0]); ii++){
|
||||
for(ii=0; ii<(int)(sizeof(aPreserve)/sizeof(aPreserve[0])); ii++){
|
||||
memcpy(&aHdr[aPreserve[ii]], &a[aPreserve[ii]], 4);
|
||||
}
|
||||
memcpy(aBuf, aHdr, sizeof(aHdr));
|
||||
@@ -2512,10 +2515,16 @@ static int recoverVfsFetch(
|
||||
int iAmt,
|
||||
void **pp
|
||||
){
|
||||
(void)pFd;
|
||||
(void)iOff;
|
||||
(void)iAmt;
|
||||
*pp = 0;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
static int recoverVfsUnfetch(sqlite3_file *pFd, sqlite3_int64 iOff, void *p){
|
||||
(void)pFd;
|
||||
(void)iOff;
|
||||
(void)p;
|
||||
return SQLITE_OK;
|
||||
}
|
||||
|
||||
|
@@ -304,7 +304,7 @@ static GeoPoly *geopolyFuncParam(
|
||||
int nByte;
|
||||
testcase( pCtx==0 );
|
||||
if( sqlite3_value_type(pVal)==SQLITE_BLOB
|
||||
&& (nByte = sqlite3_value_bytes(pVal))>=(4+6*sizeof(GeoCoord))
|
||||
&& (nByte = sqlite3_value_bytes(pVal))>=(int)(4+6*sizeof(GeoCoord))
|
||||
){
|
||||
const unsigned char *a = sqlite3_value_blob(pVal);
|
||||
int nVertex;
|
||||
@@ -362,6 +362,7 @@ static void geopolyBlobFunc(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
|
||||
(void)argc;
|
||||
if( p ){
|
||||
sqlite3_result_blob(context, p->hdr,
|
||||
4+8*p->nVertex, SQLITE_TRANSIENT);
|
||||
@@ -381,6 +382,7 @@ static void geopolyJsonFunc(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
|
||||
(void)argc;
|
||||
if( p ){
|
||||
sqlite3 *db = sqlite3_context_db_handle(context);
|
||||
sqlite3_str *x = sqlite3_str_new(db);
|
||||
@@ -462,6 +464,7 @@ static void geopolyXformFunc(
|
||||
double F = sqlite3_value_double(argv[6]);
|
||||
GeoCoord x1, y1, x0, y0;
|
||||
int ii;
|
||||
(void)argc;
|
||||
if( p ){
|
||||
for(ii=0; ii<p->nVertex; ii++){
|
||||
x0 = GeoX(p,ii);
|
||||
@@ -512,6 +515,7 @@ static void geopolyAreaFunc(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
|
||||
(void)argc;
|
||||
if( p ){
|
||||
sqlite3_result_double(context, geopolyArea(p));
|
||||
sqlite3_free(p);
|
||||
@@ -537,6 +541,7 @@ static void geopolyCcwFunc(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
|
||||
(void)argc;
|
||||
if( p ){
|
||||
if( geopolyArea(p)<0.0 ){
|
||||
int ii, jj;
|
||||
@@ -591,6 +596,7 @@ static void geopolyRegularFunc(
|
||||
int n = sqlite3_value_int(argv[3]);
|
||||
int i;
|
||||
GeoPoly *p;
|
||||
(void)argc;
|
||||
|
||||
if( n<3 || r<=0.0 ) return;
|
||||
if( n>1000 ) n = 1000;
|
||||
@@ -700,6 +706,7 @@ static void geopolyBBoxFunc(
|
||||
sqlite3_value **argv
|
||||
){
|
||||
GeoPoly *p = geopolyBBox(context, argv[0], 0, 0);
|
||||
(void)argc;
|
||||
if( p ){
|
||||
sqlite3_result_blob(context, p->hdr,
|
||||
4+8*p->nVertex, SQLITE_TRANSIENT);
|
||||
@@ -727,6 +734,7 @@ static void geopolyBBoxStep(
|
||||
){
|
||||
RtreeCoord a[4];
|
||||
int rc = SQLITE_OK;
|
||||
(void)argc;
|
||||
(void)geopolyBBox(context, argv[0], a, &rc);
|
||||
if( rc==SQLITE_OK ){
|
||||
GeoBBox *pBBox;
|
||||
@@ -815,6 +823,8 @@ static void geopolyContainsPointFunc(
|
||||
int v = 0;
|
||||
int cnt = 0;
|
||||
int ii;
|
||||
(void)argc;
|
||||
|
||||
if( p1==0 ) return;
|
||||
for(ii=0; ii<p1->nVertex-1; ii++){
|
||||
v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii),
|
||||
@@ -854,6 +864,7 @@ static void geopolyWithinFunc(
|
||||
){
|
||||
GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
|
||||
GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
|
||||
(void)argc;
|
||||
if( p1 && p2 ){
|
||||
int x = geopolyOverlap(p1, p2);
|
||||
if( x<0 ){
|
||||
@@ -1184,6 +1195,7 @@ static void geopolyOverlapFunc(
|
||||
){
|
||||
GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
|
||||
GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
|
||||
(void)argc;
|
||||
if( p1 && p2 ){
|
||||
int x = geopolyOverlap(p1, p2);
|
||||
if( x<0 ){
|
||||
@@ -1204,8 +1216,12 @@ static void geopolyDebugFunc(
|
||||
int argc,
|
||||
sqlite3_value **argv
|
||||
){
|
||||
(void)context;
|
||||
(void)argc;
|
||||
#ifdef GEOPOLY_ENABLE_DEBUG
|
||||
geo_debug = sqlite3_value_int(argv[0]);
|
||||
#else
|
||||
(void)argv;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1233,6 +1249,7 @@ static int geopolyInit(
|
||||
sqlite3_str *pSql;
|
||||
char *zSql;
|
||||
int ii;
|
||||
(void)pAux;
|
||||
|
||||
sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
|
||||
|
||||
@@ -1349,6 +1366,7 @@ static int geopolyFilter(
|
||||
RtreeNode *pRoot = 0;
|
||||
int rc = SQLITE_OK;
|
||||
int iCell = 0;
|
||||
(void)idxStr;
|
||||
|
||||
rtreeReference(pRtree);
|
||||
|
||||
@@ -1475,6 +1493,7 @@ static int geopolyBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
|
||||
int iRowidTerm = -1;
|
||||
int iFuncTerm = -1;
|
||||
int idxNum = 0;
|
||||
(void)tab;
|
||||
|
||||
for(ii=0; ii<pIdxInfo->nConstraint; ii++){
|
||||
struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii];
|
||||
@@ -1721,6 +1740,8 @@ static int geopolyFindFunction(
|
||||
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
|
||||
void **ppArg
|
||||
){
|
||||
(void)pVtab;
|
||||
(void)nArg;
|
||||
if( sqlite3_stricmp(zName, "geopoly_overlap")==0 ){
|
||||
*pxFunc = geopolyOverlapFunc;
|
||||
*ppArg = 0;
|
||||
@@ -1790,7 +1811,7 @@ static int sqlite3_geopoly_init(sqlite3 *db){
|
||||
} aAgg[] = {
|
||||
{ geopolyBBoxStep, geopolyBBoxFinal, "geopoly_group_bbox" },
|
||||
};
|
||||
int i;
|
||||
unsigned int i;
|
||||
for(i=0; i<sizeof(aFunc)/sizeof(aFunc[0]) && rc==SQLITE_OK; i++){
|
||||
int enc;
|
||||
if( aFunc[i].bPure ){
|
||||
|
@@ -501,7 +501,7 @@ static int readInt16(u8 *p){
|
||||
return (p[0]<<8) + p[1];
|
||||
}
|
||||
static void readCoord(u8 *p, RtreeCoord *pCoord){
|
||||
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
|
||||
assert( (((sqlite3_uint64)p)&3)==0 ); /* p is always 4-byte aligned */
|
||||
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
|
||||
pCoord->u = _byteswap_ulong(*(u32*)p);
|
||||
#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
|
||||
@@ -555,7 +555,7 @@ static void writeInt16(u8 *p, int i){
|
||||
}
|
||||
static int writeCoord(u8 *p, RtreeCoord *pCoord){
|
||||
u32 i;
|
||||
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
|
||||
assert( (((sqlite3_uint64)p)&3)==0 ); /* p is always 4-byte aligned */
|
||||
assert( sizeof(RtreeCoord)==4 );
|
||||
assert( sizeof(u32)==4 );
|
||||
#if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
|
||||
@@ -1283,7 +1283,7 @@ static void rtreeNonleafConstraint(
|
||||
assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE
|
||||
|| p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
|
||||
|| p->op==RTREE_FALSE );
|
||||
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
|
||||
assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */
|
||||
switch( p->op ){
|
||||
case RTREE_TRUE: return; /* Always satisfied */
|
||||
case RTREE_FALSE: break; /* Never satisfied */
|
||||
@@ -1336,7 +1336,7 @@ static void rtreeLeafConstraint(
|
||||
|| p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
|
||||
|| p->op==RTREE_FALSE );
|
||||
pCellData += 8 + p->iCoord*4;
|
||||
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
|
||||
assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */
|
||||
RTREE_DECODE_COORD(eInt, pCellData, xN);
|
||||
switch( p->op ){
|
||||
case RTREE_TRUE: return; /* Always satisfied */
|
||||
|
@@ -157,4 +157,19 @@ do_execsql_test 5.2 {
|
||||
SELECT rtreecheck('r3')=='ok'
|
||||
} 0
|
||||
|
||||
#-------------------------------------------------------------------------
|
||||
# dbsqlfuzz 4a1399d39bf9feccbf6b290da51d3b30103a4bf6
|
||||
#
|
||||
reset_db
|
||||
do_execsql_test 6.0 {
|
||||
PRAGMA encoding = 'utf16';
|
||||
CREATE VIRTUAL TABLE t1 USING rtree(id, x, y);
|
||||
}
|
||||
db close
|
||||
sqlite3 db test.db
|
||||
do_catchsql_test 6.1 {
|
||||
SELECT ( 'elvis' IN(SELECT rtreecheck('t1')) ) FROM (SELECT 1) GROUP BY 1;
|
||||
} {1 {database table is locked}}
|
||||
|
||||
finish_test
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user