1
0
mirror of https://github.com/sqlite/sqlite.git synced 2025-08-07 02:42:48 +03:00

Merge latest enhancments into this branch from branch wal2.

FossilOrigin-Name: 5a22010c35789c6d12e4dd45c81c10b203f4662f206cb636bd0c2781f1cd9571
This commit is contained in:
dan
2023-02-02 14:27:56 +00:00
203 changed files with 5449 additions and 18456 deletions

View File

@@ -315,24 +315,6 @@ SRC = \
# Source code for extensions
#
SRC += \
$(TOP)/ext/fts1/fts1.c \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.c \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_porter.c \
$(TOP)/ext/fts1/fts1_tokenizer.h \
$(TOP)/ext/fts1/fts1_tokenizer1.c
SRC += \
$(TOP)/ext/fts2/fts2.c \
$(TOP)/ext/fts2/fts2.h \
$(TOP)/ext/fts2/fts2_hash.c \
$(TOP)/ext/fts2/fts2_hash.h \
$(TOP)/ext/fts2/fts2_icu.c \
$(TOP)/ext/fts2/fts2_porter.c \
$(TOP)/ext/fts2/fts2_tokenizer.h \
$(TOP)/ext/fts2/fts2_tokenizer.c \
$(TOP)/ext/fts2/fts2_tokenizer1.c
SRC += \
$(TOP)/ext/fts3/fts3.c \
$(TOP)/ext/fts3/fts3.h \
@@ -567,14 +549,6 @@ HDR = \
# Header files used by extensions
#
EXTHDR += \
$(TOP)/ext/fts1/fts1.h \
$(TOP)/ext/fts1/fts1_hash.h \
$(TOP)/ext/fts1/fts1_tokenizer.h
EXTHDR += \
$(TOP)/ext/fts2/fts2.h \
$(TOP)/ext/fts2/fts2_hash.h \
$(TOP)/ext/fts2/fts2_tokenizer.h
EXTHDR += \
$(TOP)/ext/fts3/fts3.h \
$(TOP)/ext/fts3/fts3Int.h \
@@ -1120,6 +1094,9 @@ SHELL_SRC = \
$(TOP)/ext/misc/appendvfs.c \
$(TOP)/ext/misc/completion.c \
$(TOP)/ext/misc/decimal.c \
$(TOP)/ext/misc/basexx.c \
$(TOP)/ext/misc/base64.c \
$(TOP)/ext/misc/base85.c \
$(TOP)/ext/misc/fileio.c \
$(TOP)/ext/misc/ieee754.c \
$(TOP)/ext/misc/regexp.c \
@@ -1147,24 +1124,6 @@ shell.c: $(SHELL_SRC) $(TOP)/tool/mkshellc.tcl
icu.lo: $(TOP)/ext/icu/icu.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/icu/icu.c
fts2.lo: $(TOP)/ext/fts2/fts2.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2.c
fts2_hash.lo: $(TOP)/ext/fts2/fts2_hash.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_hash.c
fts2_icu.lo: $(TOP)/ext/fts2/fts2_icu.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_icu.c
fts2_porter.lo: $(TOP)/ext/fts2/fts2_porter.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_porter.c
fts2_tokenizer.lo: $(TOP)/ext/fts2/fts2_tokenizer.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c
fts2_tokenizer1.lo: $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c
fts3.lo: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR)
$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c

View File

@@ -1412,20 +1412,6 @@ SRC05 = \
$(TOP)\src\wal.h \
$(TOP)\src\whereInt.h
# Extension source code files, part 1.
#
SRC06 = \
$(TOP)\ext\fts1\fts1.c \
$(TOP)\ext\fts1\fts1_hash.c \
$(TOP)\ext\fts1\fts1_porter.c \
$(TOP)\ext\fts1\fts1_tokenizer1.c \
$(TOP)\ext\fts2\fts2.c \
$(TOP)\ext\fts2\fts2_hash.c \
$(TOP)\ext\fts2\fts2_icu.c \
$(TOP)\ext\fts2\fts2_porter.c \
$(TOP)\ext\fts2\fts2_tokenizer.c \
$(TOP)\ext\fts2\fts2_tokenizer1.c
# Extension source code files, part 2.
#
SRC07 = \
@@ -1448,16 +1434,6 @@ SRC07 = \
$(TOP)\ext\rbu\sqlite3rbu.c \
$(TOP)\ext\misc\stmt.c
# Extension header files, part 1.
#
SRC08 = \
$(TOP)\ext\fts1\fts1.h \
$(TOP)\ext\fts1\fts1_hash.h \
$(TOP)\ext\fts1\fts1_tokenizer.h \
$(TOP)\ext\fts2\fts2.h \
$(TOP)\ext\fts2\fts2_hash.h \
$(TOP)\ext\fts2\fts2_tokenizer.h
# Extension header files, part 2.
#
SRC09 = \
@@ -1498,7 +1474,7 @@ SRC12 =
# All source code files.
#
SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC06) $(SRC07) $(SRC08) $(SRC09) $(SRC10) $(SRC11) $(SRC12)
SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC07) $(SRC09) $(SRC10) $(SRC11) $(SRC12)
# Source code to the test files.
#
@@ -1607,7 +1583,6 @@ TESTEXT = $(TESTEXT) $(TOP)\ext\misc\zipfile.c
TESTSRC2 = \
$(SRC00) \
$(SRC01) \
$(SRC06) \
$(SRC07) \
$(SRC10) \
$(TOP)\ext\async\sqlite3async.c
@@ -1642,14 +1617,6 @@ HDR = \
# Header files used by extensions
#
EXTHDR = $(EXTHDR) \
$(TOP)\ext\fts1\fts1.h \
$(TOP)\ext\fts1\fts1_hash.h \
$(TOP)\ext\fts1\fts1_tokenizer.h
EXTHDR = $(EXTHDR) \
$(TOP)\ext\fts2\fts2.h \
$(TOP)\ext\fts2\fts2_hash.h \
$(TOP)\ext\fts2\fts2_tokenizer.h
EXTHDR = $(EXTHDR) \
$(TOP)\ext\fts3\fts3.h \
$(TOP)\ext\fts3\fts3Int.h \
@@ -1861,9 +1828,7 @@ mptest: mptester.exe
for %i in ($(SRC03)) do copy /Y %i tsrc
for %i in ($(SRC04)) do copy /Y %i tsrc
for %i in ($(SRC05)) do copy /Y %i tsrc
for %i in ($(SRC06)) do copy /Y %i tsrc
for %i in ($(SRC07)) do copy /Y %i tsrc
for %i in ($(SRC08)) do copy /Y %i tsrc
for %i in ($(SRC09)) do copy /Y %i tsrc
for %i in ($(SRC10)) do copy /Y %i tsrc
for %i in ($(SRC11)) do copy /Y %i tsrc
@@ -2233,6 +2198,8 @@ SHELL_SRC = \
$(TOP)\src\shell.c.in \
$(TOP)\ext\misc\appendvfs.c \
$(TOP)\ext\misc\completion.c \
$(TOP)\ext\misc\base64.c \
$(TOP)\ext\misc\base85.c \
$(TOP)\ext\misc\decimal.c \
$(TOP)\ext\misc\fileio.c \
$(TOP)\ext\misc\ieee754.c \
@@ -2266,24 +2233,6 @@ zlib:
icu.lo: $(TOP)\ext\icu\icu.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\icu\icu.c
fts2.lo: $(TOP)\ext\fts2\fts2.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2.c
fts2_hash.lo: $(TOP)\ext\fts2\fts2_hash.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_hash.c
fts2_icu.lo: $(TOP)\ext\fts2\fts2_icu.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_icu.c
fts2_porter.lo: $(TOP)\ext\fts2\fts2_porter.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_porter.c
fts2_tokenizer.lo: $(TOP)\ext\fts2\fts2_tokenizer.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer.c
fts2_tokenizer1.lo: $(TOP)\ext\fts2\fts2_tokenizer1.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer1.c
fts3.lo: $(TOP)\ext\fts3\fts3.c $(HDR) $(EXTHDR)
$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3.c

View File

@@ -1,2 +0,0 @@
This folder contains source code to the first full-text search
extension for SQLite.

View File

@@ -1,404 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "ft_hash.h"
void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants HASH_INT, HASH_POINTER,
** HASH_BINARY, or HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer. CopyKey only makes
** sense for HASH_STRING and HASH_BINARY and is ignored
** for other key classes.
*/
void HashInit(Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY );
pNew->keyClass = keyClass;
#if 0
if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0;
#endif
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void HashClear(Hash *pH){
HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_INT
*/
static int intHash(const void *pKey, int nKey){
return nKey ^ (nKey<<8) ^ (nKey>>8);
}
static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){
return n2 - n1;
}
#endif
#if 0 /* NOT USED */
/*
** Hash and comparison functions when the mode is HASH_POINTER
*/
static int ptrHash(const void *pKey, int nKey){
uptr x = Addr(pKey);
return x ^ (x<<8) ^ (x>>8);
}
static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( pKey1==pKey2 ) return 0;
if( pKey1<pKey2 ) return -1;
return 1;
}
#endif
/*
** Hash and comparison functions when the mode is HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intHash;
case HASH_POINTER: return &ptrHash;
case HASH_STRING: return &strHash;
case HASH_BINARY: return &binHash;;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strHash;
}else{
assert( keyClass==HASH_BINARY );
return &binHash;
}
#endif
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
#if 0 /* HASH_INT and HASH_POINTER are never used */
switch( keyClass ){
case HASH_INT: return &intCompare;
case HASH_POINTER: return &ptrCompare;
case HASH_STRING: return &strCompare;
case HASH_BINARY: return &binCompare;
default: break;
}
return 0;
#else
if( keyClass==HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==HASH_BINARY );
return &binCompare;
}
#endif
}
/* Link an element into the hash table
*/
static void insertElement(
Hash *pH, /* The complete hash table */
struct _ht *pEntry, /* The entry into which pNew is inserted */
HashElem *pNew /* The element to be inserted */
){
HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(Hash *pH, int new_size){
struct _ht *new_ht; /* The new hash table */
HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static HashElem *findElementGivenHash(
const Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
Hash *pH, /* The pH containing "elem" */
HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *HashFind(const Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
HashElem *elem; /* Used to loop thru the element list */
HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}

View File

@@ -1,111 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _HASH_H_
#define _HASH_H_
/* Forward declarations of structures. */
typedef struct Hash Hash;
typedef struct HashElem HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _ht { /* the hash table */
int count; /* Number of entries with this hash */
HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct HashElem {
HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 4 different modes of operation for a hash table:
**
** HASH_INT nKey is used as the key and pKey is ignored.
**
** HASH_POINTER pKey is used as the key and nKey is ignored.
**
** HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made for HASH_STRING and HASH_BINARY
** if the copyKey parameter to HashInit is 1.
*/
/* #define HASH_INT 1 // NOT USED */
/* #define HASH_POINTER 2 // NOT USED */
#define HASH_STRING 3
#define HASH_BINARY 4
/*
** Access routines. To delete, insert a NULL pointer.
*/
void HashInit(Hash*, int keytype, int copyKey);
void *HashInsert(Hash*, const void *pKey, int nKey, void *pData);
void *HashFind(const Hash*, const void *pKey, int nKey);
void HashClear(Hash*);
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** Hash h;
** HashElem *p;
** ...
** for(p=HashFirst(&h); p; p=HashNext(p)){
** SomeStructure *pData = HashData(p);
** // do something with pData
** }
*/
#define HashFirst(H) ((H)->first)
#define HashNext(E) ((E)->next)
#define HashData(E) ((E)->data)
#define HashKey(E) ((E)->pKey)
#define HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define HashCount(H) ((H)->count)
#endif /* _HASH_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts1Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@@ -1,369 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include "fts1_hash.h"
static void *malloc_and_zero(int n){
void *p = malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
pNew->xMalloc = malloc_and_zero;
pNew->xFree = free;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts1HashClear(fts1Hash *pH){
fts1HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts1HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS1_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS1_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS1_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts1Hash *pH, /* The complete hash table */
struct _fts1ht *pEntry, /* The entry into which pNew is inserted */
fts1HashElem *pNew /* The element to be inserted */
){
fts1HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts1Hash *pH, int new_size){
struct _fts1ht *new_ht; /* The new hash table */
fts1HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) );
if( new_ht==0 ) return;
if( pH->ht ) pH->xFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts1HashElem *findElementGivenHash(
const fts1Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts1HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts1ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts1Hash *pH, /* The pH containing "elem" */
fts1HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts1ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
pH->xFree(elem->pKey);
}
pH->xFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts1HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts1HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts1HashInsert(
fts1Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts1HashElem *elem; /* Used to loop thru the element list */
fts1HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = pH->xMalloc( nKey );
if( new_elem->pKey==0 ){
pH->xFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
pH->xFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View File

@@ -1,112 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS1_HASH_H_
#define _FTS1_HASH_H_
/* Forward declarations of structures. */
typedef struct fts1Hash fts1Hash;
typedef struct fts1HashElem fts1HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts1Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts1HashElem *first; /* The first element of the array */
void *(*xMalloc)(int); /* malloc() function to use */
void (*xFree)(void *); /* free() function to use */
int htsize; /* Number of buckets in the hash table */
struct _fts1ht { /* the hash table */
int count; /* Number of entries with this hash */
fts1HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts1HashElem {
fts1HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS1_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS1_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts1HashInit is 1.
*/
#define FTS1_HASH_STRING 1
#define FTS1_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey);
void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey);
void sqlite3Fts1HashClear(fts1Hash*);
/*
** Shorthand for the functions above
*/
#define fts1HashInit sqlite3Fts1HashInit
#define fts1HashInsert sqlite3Fts1HashInsert
#define fts1HashFind sqlite3Fts1HashFind
#define fts1HashClear sqlite3Fts1HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts1Hash h;
** fts1HashElem *p;
** ...
** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){
** SomeStructure *pData = fts1HashData(p);
** // do something with pData
** }
*/
#define fts1HashFirst(H) ((H)->first)
#define fts1HashNext(E) ((E)->next)
#define fts1HashData(E) ((E)->data)
#define fts1HashKey(E) ((E)->pKey)
#define fts1HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts1HashCount(H) ((H)->count)
#endif /* _FTS1_HASH_H_ */

View File

@@ -1,643 +0,0 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
free(c->zToken);
free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char isIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30]))
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

View File

@@ -1,90 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS1_TOKENIZER_H_
#define _FTS1_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char *const*argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif /* _FTS1_TOKENIZER_H_ */

View File

@@ -1,221 +0,0 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS1 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS1 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "fts1_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int isDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) calloc(sizeof(*t), 1);
if( t==NULL ) return SQLITE_NOMEM;
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !isalnum(i);
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
free(c->pToken);
free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !isDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = ch<0x80 ? tolower(ch) : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts1SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int fulltext_init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@@ -1,174 +0,0 @@
/*
** The author disclaims copyright to this source code.
**
*************************************************************************
** Implementation of the "simple" full-text-search tokenizer.
*/
#include <assert.h>
#if !defined(__APPLE__)
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "tokenizer.h"
/* Duplicate a string; the caller must free() the returned string.
* (We don't use strdup() since it's not part of the standard C library and
* may not be available everywhere.) */
/* TODO(shess) Copied from fulltext.c, consider util.c for such
** things. */
static char *string_dup(const char *s){
char *str = malloc(strlen(s) + 1);
strcpy(str, s);
return str;
}
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
const char *zDelim; /* token delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
const char *pCurrent; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nTokenBytes; /* actual size of current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */
static int simpleCreate(
int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
t->zDelim = string_dup(argv[1]);
} else {
/* Build a string excluding alphanumeric ASCII characters */
char zDelim[0x80]; /* nul-terminated, so nul not a member */
int i, j;
for(i=1, j=0; i<0x80; i++){
if( !isalnum(i) ){
zDelim[j++] = i;
}
}
zDelim[j++] = '\0';
assert( j<=sizeof(zDelim) );
t->zDelim = string_dup(zDelim);
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
simple_tokenizer *t = (simple_tokenizer *) pTokenizer;
free((void *) t->zDelim);
free(t);
return SQLITE_OK;
}
static int simpleOpen(
sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor));
c->pInput = pInput;
c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes;
c->pCurrent = c->pInput; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nTokenBytes = 0;
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
if( NULL!=c->zToken ){
free(c->zToken);
}
free(c);
return SQLITE_OK;
}
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
int ii;
while( c->pCurrent-c->pInput<c->nBytes ){
int n = (int) strcspn(c->pCurrent, t->zDelim);
if( n>0 ){
if( n+1>c->nTokenAllocated ){
c->zToken = realloc(c->zToken, n+1);
}
for(ii=0; ii<n; ii++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
char ch = c->pCurrent[ii];
c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch;
}
c->zToken[n] = '\0';
*ppToken = c->zToken;
*pnBytes = n;
*piStartOffset = (int) (c->pCurrent-c->pInput);
*piEndOffset = *piStartOffset+n;
*piPosition = c->iToken++;
c->pCurrent += n + 1;
return SQLITE_OK;
}
c->pCurrent += n + 1;
/* TODO(shess) could strspn() to skip delimiters en masse. Needs
** to happen in two places, though, which is annoying.
*/
}
return SQLITE_DONE;
}
static sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
void get_simple_tokenizer_module(
sqlite3_tokenizer_module **ppModule
){
*ppModule = &simpleTokenizerModule;
}

View File

@@ -1,89 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _TOKENIZER_H_
#define _TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface.
*/
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
struct sqlite3_tokenizer_module {
int iVersion; /* currently 0 */
/*
** Create and destroy a tokenizer. argc/argv are passed down from
** the fulltext virtual table creation to allow customization.
*/
int (*xCreate)(int argc, const char **argv,
sqlite3_tokenizer **ppTokenizer);
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Tokenize a particular input. Call xOpen() to prepare to
** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then
** xClose() to free any internal state. The pInput passed to
** xOpen() must exist until the cursor is closed. The ppToken
** result from xNext() is only valid until the next call to xNext()
** or until xClose() is called.
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xOpen)(sqlite3_tokenizer *pTokenizer,
const char *pInput, int nBytes,
sqlite3_tokenizer_cursor **ppCursor);
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
const char **ppToken, int *pnBytes,
int *piStartOffset, int *piEndOffset, int *piPosition);
};
struct sqlite3_tokenizer {
sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters. The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here. Need some sort of
** registration process.
*/
void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule);
#endif /* _TOKENIZER_H_ */

View File

@@ -1,133 +0,0 @@
1. FTS2 Tokenizers
When creating a new full-text table, FTS2 allows the user to select
the text tokenizer implementation to be used when indexing text
by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
statement:
CREATE VIRTUAL TABLE <table-name> USING fts2(
<columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
);
The built-in tokenizers (valid values to pass as <tokenizer name>) are
"simple" and "porter".
<tokenizer-args> should consist of zero or more white-space separated
arguments to pass to the selected tokenizer implementation. The
interpretation of the arguments, if any, depends on the individual
tokenizer.
2. Custom Tokenizers
FTS2 allows users to provide custom tokenizer implementations. The
interface used to create a new tokenizer is defined and described in
the fts2_tokenizer.h source file.
Registering a new FTS2 tokenizer is similar to registering a new
virtual table module with SQLite. The user passes a pointer to a
structure containing pointers to various callback functions that
make up the implementation of the new tokenizer type. For tokenizers,
the structure (defined in fts2_tokenizer.h) is called
"sqlite3_tokenizer_module".
FTS2 does not expose a C-function that users call to register new
tokenizer types with a database handle. Instead, the pointer must
be encoded as an SQL blob value and passed to FTS2 through the SQL
engine by evaluating a special scalar function, "fts2_tokenizer()".
The fts2_tokenizer() function may be called with one or two arguments,
as follows:
SELECT fts2_tokenizer(<tokenizer-name>);
SELECT fts2_tokenizer(<tokenizer-name>, <sqlite3_tokenizer_module ptr>);
Where <tokenizer-name> is a string identifying the tokenizer and
<sqlite3_tokenizer_module ptr> is a pointer to an sqlite3_tokenizer_module
structure encoded as an SQL blob. If the second argument is present,
it is registered as tokenizer <tokenizer-name> and a copy of it
returned. If only one argument is passed, a pointer to the tokenizer
implementation currently registered as <tokenizer-name> is returned,
encoded as a blob. Or, if no such tokenizer exists, an SQL exception
(error) is raised.
SECURITY: If the fts2 extension is used in an environment where potentially
malicious users may execute arbitrary SQL (i.e. gears), they should be
prevented from invoking the fts2_tokenizer() function, possibly using the
authorisation callback.
See "Sample code" below for an example of calling the fts2_tokenizer()
function from C code.
3. ICU Library Tokenizers
If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor
symbol defined, then there exists a built-in tokenizer named "icu"
implemented using the ICU library. The first argument passed to the
xCreate() method (see fts2_tokenizer.h) of this tokenizer may be
an ICU locale identifier. For example "tr_TR" for Turkish as used
in Turkey, or "en_AU" for English as used in Australia. For example:
"CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)"
The ICU tokenizer implementation is very simple. It splits the input
text according to the ICU rules for finding word boundaries and discards
any tokens that consist entirely of white-space. This may be suitable
for some applications in some locales, but not all. If more complex
processing is required, for example to implement stemming or
discard punctuation, this can be done by creating a tokenizer
implementation that uses the ICU tokenizer as part of its implementation.
When using the ICU tokenizer this way, it is safe to overwrite the
contents of the strings returned by the xNext() method (see
fts2_tokenizer.h).
4. Sample code.
The following two code samples illustrate the way C code should invoke
the fts2_tokenizer() scalar function:
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
int queryTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}

View File

@@ -1,4 +0,0 @@
This folder contains source code to the second full-text search
extension for SQLite. While the API is the same, this version uses a
substantially different storage schema from fts1, so tables will need
to be rebuilt.

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This header file is used by programs that want to link against the
** FTS2 library. All it does is declare the sqlite3Fts2Init() interface.
*/
#include "sqlite3.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
int sqlite3Fts2Init(sqlite3 *db);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */

View File

@@ -1,376 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the implementation of generic hash-tables used in SQLite.
** We've modified it slightly to serve as a standalone hash table
** implementation for the full-text indexing module.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_hash.h"
/*
** Malloc and Free functions
*/
static void *fts2HashMalloc(int n){
void *p = sqlite3_malloc(n);
if( p ){
memset(p, 0, n);
}
return p;
}
static void fts2HashFree(void *p){
sqlite3_free(p);
}
/* Turn bulk memory into a hash table object by initializing the
** fields of the Hash structure.
**
** "pNew" is a pointer to the hash table that is to be initialized.
** keyClass is one of the constants
** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass
** determines what kind of key the hash table will use. "copyKey" is
** true if the hash table should make its own private copy of keys and
** false if it should just use the supplied pointer.
*/
void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){
assert( pNew!=0 );
assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY );
pNew->keyClass = keyClass;
pNew->copyKey = copyKey;
pNew->first = 0;
pNew->count = 0;
pNew->htsize = 0;
pNew->ht = 0;
}
/* Remove all entries from a hash table. Reclaim all memory.
** Call this routine to delete a hash table or to reset a hash table
** to the empty state.
*/
void sqlite3Fts2HashClear(fts2Hash *pH){
fts2HashElem *elem; /* For looping over all elements of the table */
assert( pH!=0 );
elem = pH->first;
pH->first = 0;
fts2HashFree(pH->ht);
pH->ht = 0;
pH->htsize = 0;
while( elem ){
fts2HashElem *next_elem = elem->next;
if( pH->copyKey && elem->pKey ){
fts2HashFree(elem->pKey);
}
fts2HashFree(elem);
elem = next_elem;
}
pH->count = 0;
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_STRING
*/
static int strHash(const void *pKey, int nKey){
const char *z = (const char *)pKey;
int h = 0;
if( nKey<=0 ) nKey = (int) strlen(z);
while( nKey > 0 ){
h = (h<<3) ^ h ^ *z++;
nKey--;
}
return h & 0x7fffffff;
}
static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return strncmp((const char*)pKey1,(const char*)pKey2,n1);
}
/*
** Hash and comparison functions when the mode is FTS2_HASH_BINARY
*/
static int binHash(const void *pKey, int nKey){
int h = 0;
const char *z = (const char *)pKey;
while( nKey-- > 0 ){
h = (h<<3) ^ h ^ *(z++);
}
return h & 0x7fffffff;
}
static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){
if( n1!=n2 ) return 1;
return memcmp(pKey1,pKey2,n1);
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** The C syntax in this function definition may be unfamilar to some
** programmers, so we provide the following additional explanation:
**
** The name of the function is "hashFunction". The function takes a
** single parameter "keyClass". The return value of hashFunction()
** is a pointer to another function. Specifically, the return value
** of hashFunction() is a pointer to a function that takes two parameters
** with types "const void*" and "int" and returns an "int".
*/
static int (*hashFunction(int keyClass))(const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strHash;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binHash;
}
}
/*
** Return a pointer to the appropriate hash function given the key class.
**
** For help in interpreted the obscure C code in the function definition,
** see the header comment on the previous function.
*/
static int (*compareFunction(int keyClass))(const void*,int,const void*,int){
if( keyClass==FTS2_HASH_STRING ){
return &strCompare;
}else{
assert( keyClass==FTS2_HASH_BINARY );
return &binCompare;
}
}
/* Link an element into the hash table
*/
static void insertElement(
fts2Hash *pH, /* The complete hash table */
struct _fts2ht *pEntry, /* The entry into which pNew is inserted */
fts2HashElem *pNew /* The element to be inserted */
){
fts2HashElem *pHead; /* First element already in pEntry */
pHead = pEntry->chain;
if( pHead ){
pNew->next = pHead;
pNew->prev = pHead->prev;
if( pHead->prev ){ pHead->prev->next = pNew; }
else { pH->first = pNew; }
pHead->prev = pNew;
}else{
pNew->next = pH->first;
if( pH->first ){ pH->first->prev = pNew; }
pNew->prev = 0;
pH->first = pNew;
}
pEntry->count++;
pEntry->chain = pNew;
}
/* Resize the hash table so that it cantains "new_size" buckets.
** "new_size" must be a power of 2. The hash table might fail
** to resize if sqliteMalloc() fails.
*/
static void rehash(fts2Hash *pH, int new_size){
struct _fts2ht *new_ht; /* The new hash table */
fts2HashElem *elem, *next_elem; /* For looping over existing elements */
int (*xHash)(const void*,int); /* The hash function */
assert( (new_size & (new_size-1))==0 );
new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) );
if( new_ht==0 ) return;
fts2HashFree(pH->ht);
pH->ht = new_ht;
pH->htsize = new_size;
xHash = hashFunction(pH->keyClass);
for(elem=pH->first, pH->first=0; elem; elem = next_elem){
int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1);
next_elem = elem->next;
insertElement(pH, &new_ht[h], elem);
}
}
/* This function (for internal use only) locates an element in an
** hash table that matches the given key. The hash for this key has
** already been computed and is passed as the 4th parameter.
*/
static fts2HashElem *findElementGivenHash(
const fts2Hash *pH, /* The pH to be searched */
const void *pKey, /* The key we are searching for */
int nKey,
int h /* The hash for this key. */
){
fts2HashElem *elem; /* Used to loop thru the element list */
int count; /* Number of elements left to test */
int (*xCompare)(const void*,int,const void*,int); /* comparison function */
if( pH->ht ){
struct _fts2ht *pEntry = &pH->ht[h];
elem = pEntry->chain;
count = pEntry->count;
xCompare = compareFunction(pH->keyClass);
while( count-- && elem ){
if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){
return elem;
}
elem = elem->next;
}
}
return 0;
}
/* Remove a single entry from the hash table given a pointer to that
** element and a hash on the element's key.
*/
static void removeElementGivenHash(
fts2Hash *pH, /* The pH containing "elem" */
fts2HashElem* elem, /* The element to be removed from the pH */
int h /* Hash value for the element */
){
struct _fts2ht *pEntry;
if( elem->prev ){
elem->prev->next = elem->next;
}else{
pH->first = elem->next;
}
if( elem->next ){
elem->next->prev = elem->prev;
}
pEntry = &pH->ht[h];
if( pEntry->chain==elem ){
pEntry->chain = elem->next;
}
pEntry->count--;
if( pEntry->count<=0 ){
pEntry->chain = 0;
}
if( pH->copyKey && elem->pKey ){
fts2HashFree(elem->pKey);
}
fts2HashFree( elem );
pH->count--;
if( pH->count<=0 ){
assert( pH->first==0 );
assert( pH->count==0 );
fts2HashClear(pH);
}
}
/* Attempt to locate an element of the hash table pH with a key
** that matches pKey,nKey. Return the data for this element if it is
** found, or NULL if there is no match.
*/
void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){
int h; /* A hash on key */
fts2HashElem *elem; /* The element that matches key */
int (*xHash)(const void*,int); /* The hash function */
if( pH==0 || pH->ht==0 ) return 0;
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
h = (*xHash)(pKey,nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1));
return elem ? elem->data : 0;
}
/* Insert an element into the hash table pH. The key is pKey,nKey
** and the data is "data".
**
** If no element exists with a matching key, then a new
** element is created. A copy of the key is made if the copyKey
** flag is set. NULL is returned.
**
** If another element already exists with the same key, then the
** new data replaces the old data and the old data is returned.
** The key is not copied in this instance. If a malloc fails, then
** the new data is returned and the hash table is unchanged.
**
** If the "data" parameter to this function is NULL, then the
** element corresponding to "key" is removed from the hash table.
*/
void *sqlite3Fts2HashInsert(
fts2Hash *pH, /* The hash table to insert into */
const void *pKey, /* The key */
int nKey, /* Number of bytes in the key */
void *data /* The data */
){
int hraw; /* Raw hash value of the key */
int h; /* the hash of the key modulo hash table size */
fts2HashElem *elem; /* Used to loop thru the element list */
fts2HashElem *new_elem; /* New element added to the pH */
int (*xHash)(const void*,int); /* The hash function */
assert( pH!=0 );
xHash = hashFunction(pH->keyClass);
assert( xHash!=0 );
hraw = (*xHash)(pKey, nKey);
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
elem = findElementGivenHash(pH,pKey,nKey,h);
if( elem ){
void *old_data = elem->data;
if( data==0 ){
removeElementGivenHash(pH,elem,h);
}else{
elem->data = data;
}
return old_data;
}
if( data==0 ) return 0;
new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) );
if( new_elem==0 ) return data;
if( pH->copyKey && pKey!=0 ){
new_elem->pKey = fts2HashMalloc( nKey );
if( new_elem->pKey==0 ){
fts2HashFree(new_elem);
return data;
}
memcpy((void*)new_elem->pKey, pKey, nKey);
}else{
new_elem->pKey = (void*)pKey;
}
new_elem->nKey = nKey;
pH->count++;
if( pH->htsize==0 ){
rehash(pH,8);
if( pH->htsize==0 ){
pH->count = 0;
fts2HashFree(new_elem);
return data;
}
}
if( pH->count > pH->htsize ){
rehash(pH,pH->htsize*2);
}
assert( pH->htsize>0 );
assert( (pH->htsize & (pH->htsize-1))==0 );
h = hraw & (pH->htsize-1);
insertElement(pH, &pH->ht[h], new_elem);
new_elem->data = data;
return 0;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@@ -1,110 +0,0 @@
/*
** 2001 September 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This is the header file for the generic hash-table implementation
** used in SQLite. We've modified it slightly to serve as a standalone
** hash table implementation for the full-text indexing module.
**
*/
#ifndef _FTS2_HASH_H_
#define _FTS2_HASH_H_
/* Forward declarations of structures. */
typedef struct fts2Hash fts2Hash;
typedef struct fts2HashElem fts2HashElem;
/* A complete hash table is an instance of the following structure.
** The internals of this structure are intended to be opaque -- client
** code should not attempt to access or modify the fields of this structure
** directly. Change this structure only by using the routines below.
** However, many of the "procedures" and "functions" for modifying and
** accessing this structure are really macros, so we can't really make
** this structure opaque.
*/
struct fts2Hash {
char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */
char copyKey; /* True if copy of key made on insert */
int count; /* Number of entries in this table */
fts2HashElem *first; /* The first element of the array */
int htsize; /* Number of buckets in the hash table */
struct _fts2ht { /* the hash table */
int count; /* Number of entries with this hash */
fts2HashElem *chain; /* Pointer to first entry with this hash */
} *ht;
};
/* Each element in the hash table is an instance of the following
** structure. All elements are stored on a single doubly-linked list.
**
** Again, this structure is intended to be opaque, but it can't really
** be opaque because it is used by macros.
*/
struct fts2HashElem {
fts2HashElem *next, *prev; /* Next and previous elements in the table */
void *data; /* Data associated with this element */
void *pKey; int nKey; /* Key associated with this element */
};
/*
** There are 2 different modes of operation for a hash table:
**
** FTS2_HASH_STRING pKey points to a string that is nKey bytes long
** (including the null-terminator, if any). Case
** is respected in comparisons.
**
** FTS2_HASH_BINARY pKey points to binary data nKey bytes long.
** memcmp() is used to compare keys.
**
** A copy of the key is made if the copyKey parameter to fts2HashInit is 1.
*/
#define FTS2_HASH_STRING 1
#define FTS2_HASH_BINARY 2
/*
** Access routines. To delete, insert a NULL pointer.
*/
void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey);
void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData);
void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey);
void sqlite3Fts2HashClear(fts2Hash*);
/*
** Shorthand for the functions above
*/
#define fts2HashInit sqlite3Fts2HashInit
#define fts2HashInsert sqlite3Fts2HashInsert
#define fts2HashFind sqlite3Fts2HashFind
#define fts2HashClear sqlite3Fts2HashClear
/*
** Macros for looping over all elements of a hash table. The idiom is
** like this:
**
** fts2Hash h;
** fts2HashElem *p;
** ...
** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){
** SomeStructure *pData = fts2HashData(p);
** // do something with pData
** }
*/
#define fts2HashFirst(H) ((H)->first)
#define fts2HashNext(E) ((E)->next)
#define fts2HashData(E) ((E)->data)
#define fts2HashKey(E) ((E)->pKey)
#define fts2HashKeysize(E) ((E)->nKey)
/*
** Number of entries in a hash table
*/
#define fts2HashCount(H) ((H)->count)
#endif /* _FTS2_HASH_H_ */

View File

@@ -1,260 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
**
** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU
#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"
#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>
typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;
struct IcuTokenizer {
sqlite3_tokenizer base;
char *zLocale;
};
struct IcuCursor {
sqlite3_tokenizer_cursor base;
UBreakIterator *pIter; /* ICU break-iterator object */
int nChar; /* Number of UChar elements in pInput */
UChar *aChar; /* Copy of input using utf-16 encoding */
int *aOffset; /* Offsets of each character in utf-8 input */
int nBuffer;
char *zBuffer;
int iToken;
};
/*
** Create a new tokenizer instance.
*/
static int icuCreate(
int argc, /* Number of entries in argv[] */
const char * const *argv, /* Tokenizer creation arguments */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
){
IcuTokenizer *p;
int n = 0;
if( argc>0 ){
n = strlen(argv[0])+1;
}
p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, sizeof(IcuTokenizer));
if( n ){
p->zLocale = (char *)&p[1];
memcpy(p->zLocale, argv[0], n);
}
*ppTokenizer = (sqlite3_tokenizer *)p;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
sqlite3_free(p);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int icuOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, /* Input string */
int nInput, /* Length of zInput in bytes */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
IcuCursor *pCsr;
const int32_t opt = U_FOLD_CASE_DEFAULT;
UErrorCode status = U_ZERO_ERROR;
int nChar;
UChar32 c;
int iInput = 0;
int iOut = 0;
*ppCursor = 0;
if( nInput<0 ){
nInput = strlen(zInput);
}
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
return SQLITE_NOMEM;
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
while( c>0 ){
int isError = 0;
c = u_foldCase(c, opt);
U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
if( isError ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->aOffset[iOut] = iInput;
if( iInput<nInput ){
U8_NEXT(zInput, iInput, nInput, c);
}else{
c = 0;
}
}
pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
if( !U_SUCCESS(status) ){
sqlite3_free(pCsr);
return SQLITE_ERROR;
}
pCsr->nChar = iOut;
ubrk_first(pCsr->pIter);
*ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
IcuCursor *pCsr = (IcuCursor *)pCursor;
ubrk_close(pCsr->pIter);
sqlite3_free(pCsr->zBuffer);
sqlite3_free(pCsr);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
IcuCursor *pCsr = (IcuCursor *)pCursor;
int iStart = 0;
int iEnd = 0;
int nByte = 0;
while( iStart==iEnd ){
UChar32 c;
iStart = ubrk_current(pCsr->pIter);
iEnd = ubrk_next(pCsr->pIter);
if( iEnd==UBRK_DONE ){
return SQLITE_DONE;
}
while( iStart<iEnd ){
int iWhite = iStart;
U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
break;
}
}
assert(iStart<=iEnd);
}
do {
UErrorCode status = U_ZERO_ERROR;
if( nByte ){
char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
if( !zNew ){
return SQLITE_NOMEM;
}
pCsr->zBuffer = zNew;
pCsr->nBuffer = nByte;
}
u_strToUTF8(
pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
&pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
&status /* Output success/failure */
);
} while( nByte>pCsr->nBuffer );
*ppToken = pCsr->zBuffer;
*pnBytes = nByte;
*piStartOffset = pCsr->aOffset[iStart];
*piEndOffset = pCsr->aOffset[iEnd];
*piPosition = pCsr->iToken++;
return SQLITE_OK;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
0, /* iVersion */
icuCreate, /* xCreate */
icuDestroy, /* xCreate */
icuOpen, /* xOpen */
icuClose, /* xClose */
icuNext, /* xNext */
};
/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &icuTokenizerModule;
}
#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@@ -1,644 +0,0 @@
/*
** 2006 September 30
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** Implementation of the full-text-search tokenizer that implements
** a Porter stemmer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_tokenizer.h"
/*
** Class derived from sqlite3_tokenizer
*/
typedef struct porter_tokenizer {
sqlite3_tokenizer base; /* Base class */
} porter_tokenizer;
/*
** Class derived from sqlit3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *zInput; /* input we are tokenizing */
int nInput; /* size of the input */
int iOffset; /* current position in zInput */
int iToken; /* index of next token to be returned */
char *zToken; /* storage for current token */
int nAllocated; /* space allocated to zToken buffer */
} porter_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module porterTokenizerModule;
/*
** Create a new tokenizer instance.
*/
static int porterCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
porter_tokenizer *t;
t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int porterDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is zInput[0..nInput-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int porterOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *zInput, int nInput, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
porter_tokenizer_cursor *c;
c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->zInput = zInput;
if( zInput==0 ){
c->nInput = 0;
}else if( nInput<0 ){
c->nInput = (int)strlen(zInput);
}else{
c->nInput = nInput;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->zToken = NULL; /* no space allocated, yet. */
c->nAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** porterOpen() above.
*/
static int porterClose(sqlite3_tokenizer_cursor *pCursor){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
sqlite3_free(c->zToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Vowel or consonant
*/
static const char cType[] = {
0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0,
1, 1, 1, 2, 1
};
/*
** isConsonant() and isVowel() determine if their first character in
** the string they point to is a consonant or a vowel, according
** to Porter ruls.
**
** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'.
** 'Y' is a consonant unless it follows another consonant,
** in which case it is a vowel.
**
** In these routine, the letters are in reverse order. So the 'y' rule
** is that 'y' is a consonant unless it is followed by another
** consonent.
*/
static int isVowel(const char*);
static int isConsonant(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return j;
return z[1]==0 || isVowel(z + 1);
}
static int isVowel(const char *z){
int j;
char x = *z;
if( x==0 ) return 0;
assert( x>='a' && x<='z' );
j = cType[x-'a'];
if( j<2 ) return 1-j;
return isConsonant(z + 1);
}
/*
** Let any sequence of one or more vowels be represented by V and let
** C be sequence of one or more consonants. Then every word can be
** represented as:
**
** [C] (VC){m} [V]
**
** In prose: A word is an optional consonant followed by zero or
** vowel-consonant pairs followed by an optional vowel. "m" is the
** number of vowel consonant pairs. This routine computes the value
** of m for the first i bytes of a word.
**
** Return true if the m-value for z is 1 or more. In other words,
** return true if z contains at least one vowel that is followed
** by a consonant.
**
** In this routine z[] is in reverse order. So we are really looking
** for an instance of of a consonant followed by a vowel.
*/
static int m_gt_0(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/* Like mgt0 above except we are looking for a value of m which is
** exactly 1
*/
static int m_eq_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 1;
while( isConsonant(z) ){ z++; }
return *z==0;
}
/* Like mgt0 above except we are looking for a value of m>1 instead
** or m>0
*/
static int m_gt_1(const char *z){
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
if( *z==0 ) return 0;
while( isVowel(z) ){ z++; }
if( *z==0 ) return 0;
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if there is a vowel anywhere within z[0..n-1]
*/
static int hasVowel(const char *z){
while( isConsonant(z) ){ z++; }
return *z!=0;
}
/*
** Return TRUE if the word ends in a double consonant.
**
** The text is reversed here. So we are really looking at
** the first two characters of z[].
*/
static int doubleConsonant(const char *z){
return isConsonant(z) && z[0]==z[1] && isConsonant(z+1);
}
/*
** Return TRUE if the word ends with three letters which
** are consonant-vowel-consonent and where the final consonant
** is not 'w', 'x', or 'y'.
**
** The word is reversed here. So we are really checking the
** first three letters and the first one cannot be in [wxy].
*/
static int star_oh(const char *z){
return
z[0]!=0 && isConsonant(z) &&
z[0]!='w' && z[0]!='x' && z[0]!='y' &&
z[1]!=0 && isVowel(z+1) &&
z[2]!=0 && isConsonant(z+2);
}
/*
** If the word ends with zFrom and xCond() is true for the stem
** of the word that preceeds the zFrom ending, then change the
** ending to zTo.
**
** The input word *pz and zFrom are both in reverse order. zTo
** is in normal order.
**
** Return TRUE if zFrom matches. Return FALSE if zFrom does not
** match. Not that TRUE is returned even if xCond() fails and
** no substitution occurs.
*/
static int stem(
char **pz, /* The word being stemmed (Reversed) */
const char *zFrom, /* If the ending matches this... (Reversed) */
const char *zTo, /* ... change the ending to this (not reversed) */
int (*xCond)(const char*) /* Condition that must be true */
){
char *z = *pz;
while( *zFrom && *zFrom==*z ){ z++; zFrom++; }
if( *zFrom!=0 ) return 0;
if( xCond && !xCond(z) ) return 1;
while( *zTo ){
*(--z) = *(zTo++);
}
*pz = z;
return 1;
}
/*
** This is the fallback stemmer used when the porter stemmer is
** inappropriate. The input word is copied into the output with
** US-ASCII case folding. If the input word is too long (more
** than 20 bytes if it contains no digits or more than 6 bytes if
** it contains digits) then word is truncated to 20 or 6 bytes
** by taking 10 or 3 bytes from the beginning and end.
*/
static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, mx, j;
int hasDigit = 0;
for(i=0; i<nIn; i++){
int c = zIn[i];
if( c>='A' && c<='Z' ){
zOut[i] = c - 'A' + 'a';
}else{
if( c>='0' && c<='9' ) hasDigit = 1;
zOut[i] = c;
}
}
mx = hasDigit ? 3 : 10;
if( nIn>mx*2 ){
for(j=mx, i=nIn-mx; i<nIn; i++, j++){
zOut[j] = zOut[i];
}
i = j;
}
zOut[i] = 0;
*pnOut = i;
}
/*
** Stem the input word zIn[0..nIn-1]. Store the output in zOut.
** zOut is at least big enough to hold nIn bytes. Write the actual
** size of the output word (exclusive of the '\0' terminator) into *pnOut.
**
** Any upper-case characters in the US-ASCII character set ([A-Z])
** are converted to lower case. Upper-case UTF characters are
** unchanged.
**
** Words that are longer than about 20 bytes are stemmed by retaining
** a few bytes from the beginning and the end of the word. If the
** word contains digits, 3 bytes are taken from the beginning and
** 3 bytes from the end. For long words without digits, 10 bytes
** are taken from each end. US-ASCII case folding still applies.
**
** If the input word contains not digits but does characters not
** in [a-zA-Z] then no stemming is attempted and this routine just
** copies the input into the input into the output with US-ASCII
** case folding.
**
** Stemming never increases the length of the word. So there is
** no chance of overflowing the zOut buffer.
*/
static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){
int i, j, c;
char zReverse[28];
char *z, *z2;
if( nIn<3 || nIn>=sizeof(zReverse)-7 ){
/* The word is too big or too small for the porter stemmer.
** Fallback to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){
c = zIn[i];
if( c>='A' && c<='Z' ){
zReverse[j] = c + 'a' - 'A';
}else if( c>='a' && c<='z' ){
zReverse[j] = c;
}else{
/* The use of a character not in [a-zA-Z] means that we fallback
** to the copy stemmer */
copy_stemmer(zIn, nIn, zOut, pnOut);
return;
}
}
memset(&zReverse[sizeof(zReverse)-5], 0, 5);
z = &zReverse[j+1];
/* Step 1a */
if( z[0]=='s' ){
if(
!stem(&z, "sess", "ss", 0) &&
!stem(&z, "sei", "i", 0) &&
!stem(&z, "ss", "ss", 0)
){
z++;
}
}
/* Step 1b */
z2 = z;
if( stem(&z, "dee", "ee", m_gt_0) ){
/* Do nothing. The work was all in the test */
}else if(
(stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel))
&& z!=z2
){
if( stem(&z, "ta", "ate", 0) ||
stem(&z, "lb", "ble", 0) ||
stem(&z, "zi", "ize", 0) ){
/* Do nothing. The work was all in the test */
}else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){
z++;
}else if( m_eq_1(z) && star_oh(z) ){
*(--z) = 'e';
}
}
/* Step 1c */
if( z[0]=='y' && hasVowel(z+1) ){
z[0] = 'i';
}
/* Step 2 */
switch( z[1] ){
case 'a':
stem(&z, "lanoita", "ate", m_gt_0) ||
stem(&z, "lanoit", "tion", m_gt_0);
break;
case 'c':
stem(&z, "icne", "ence", m_gt_0) ||
stem(&z, "icna", "ance", m_gt_0);
break;
case 'e':
stem(&z, "rezi", "ize", m_gt_0);
break;
case 'g':
stem(&z, "igol", "log", m_gt_0);
break;
case 'l':
stem(&z, "ilb", "ble", m_gt_0) ||
stem(&z, "illa", "al", m_gt_0) ||
stem(&z, "iltne", "ent", m_gt_0) ||
stem(&z, "ile", "e", m_gt_0) ||
stem(&z, "ilsuo", "ous", m_gt_0);
break;
case 'o':
stem(&z, "noitazi", "ize", m_gt_0) ||
stem(&z, "noita", "ate", m_gt_0) ||
stem(&z, "rota", "ate", m_gt_0);
break;
case 's':
stem(&z, "msila", "al", m_gt_0) ||
stem(&z, "ssenevi", "ive", m_gt_0) ||
stem(&z, "ssenluf", "ful", m_gt_0) ||
stem(&z, "ssensuo", "ous", m_gt_0);
break;
case 't':
stem(&z, "itila", "al", m_gt_0) ||
stem(&z, "itivi", "ive", m_gt_0) ||
stem(&z, "itilib", "ble", m_gt_0);
break;
}
/* Step 3 */
switch( z[0] ){
case 'e':
stem(&z, "etaci", "ic", m_gt_0) ||
stem(&z, "evita", "", m_gt_0) ||
stem(&z, "ezila", "al", m_gt_0);
break;
case 'i':
stem(&z, "itici", "ic", m_gt_0);
break;
case 'l':
stem(&z, "laci", "ic", m_gt_0) ||
stem(&z, "luf", "", m_gt_0);
break;
case 's':
stem(&z, "ssen", "", m_gt_0);
break;
}
/* Step 4 */
switch( z[1] ){
case 'a':
if( z[0]=='l' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'c':
if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'e':
if( z[0]=='r' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'i':
if( z[0]=='c' && m_gt_1(z+2) ){
z += 2;
}
break;
case 'l':
if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){
z += 4;
}
break;
case 'n':
if( z[0]=='t' ){
if( z[2]=='a' ){
if( m_gt_1(z+3) ){
z += 3;
}
}else if( z[2]=='e' ){
stem(&z, "tneme", "", m_gt_1) ||
stem(&z, "tnem", "", m_gt_1) ||
stem(&z, "tne", "", m_gt_1);
}
}
break;
case 'o':
if( z[0]=='u' ){
if( m_gt_1(z+2) ){
z += 2;
}
}else if( z[3]=='s' || z[3]=='t' ){
stem(&z, "noi", "", m_gt_1);
}
break;
case 's':
if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
case 't':
stem(&z, "eta", "", m_gt_1) ||
stem(&z, "iti", "", m_gt_1);
break;
case 'u':
if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){
z += 3;
}
break;
case 'v':
case 'z':
if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){
z += 3;
}
break;
}
/* Step 5a */
if( z[0]=='e' ){
if( m_gt_1(z+1) ){
z++;
}else if( m_eq_1(z+1) && !star_oh(z+1) ){
z++;
}
}
/* Step 5b */
if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){
z++;
}
/* z[] is now the stemmed word in reverse order. Flip it back
** around into forward order and return.
*/
*pnOut = i = strlen(z);
zOut[i] = 0;
while( *z ){
zOut[--i] = *(z++);
}
}
/*
** Characters that can be part of a token. We assume any character
** whose value is greater than 0x80 (any UTF character) can be
** part of a token. In other words, delimiters all must have
** values of 0x7f or lower.
*/
static const char porterIdChar[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};
#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30]))
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to porterOpen().
*/
static int porterNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */
const char **pzToken, /* OUT: *pzToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor;
const char *z = c->zInput;
while( c->iOffset<c->nInput ){
int iStartOffset, ch;
/* Scan past delimiter characters */
while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int n = c->iOffset-iStartOffset;
if( n>c->nAllocated ){
c->nAllocated = n+20;
c->zToken = sqlite3_realloc(c->zToken, c->nAllocated);
if( c->zToken==NULL ) return SQLITE_NOMEM;
}
porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes);
*pzToken = c->zToken;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the porter-stemmer tokenizer
*/
static const sqlite3_tokenizer_module porterTokenizerModule = {
0,
porterCreate,
porterDestroy,
porterOpen,
porterClose,
porterNext,
};
/*
** Allocate a new porter tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2PorterTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &porterTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@@ -1,375 +0,0 @@
/*
** 2007 June 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is part of an SQLite module implementing full-text search.
** This particular file implements the generic tokenizer interface.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>
/*
** Implementation of the SQL scalar function for accessing the underlying
** hash table. This function may be called as follows:
**
** SELECT <function-name>(<key-name>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
void *pPtr = 0;
const unsigned char *zName;
int nName;
assert( argc==1 || argc==2 );
pHash = (fts2Hash *)sqlite3_user_data(context);
zName = sqlite3_value_text(argv[0]);
nName = sqlite3_value_bytes(argv[0])+1;
if( argc==2 ){
void *pOld;
int n = sqlite3_value_bytes(argv[1]);
if( n!=sizeof(pPtr) ){
sqlite3_result_error(context, "argument type mismatch", -1);
return;
}
pPtr = *(void **)sqlite3_value_blob(argv[1]);
pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
if( pOld==pPtr ){
sqlite3_result_error(context, "out of memory", -1);
return;
}
}else{
pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
if( !pPtr ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
}
sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}
#ifdef SQLITE_TEST
#if defined(INCLUDE_SQLITE_TCL_H)
# include "sqlite_tcl.h"
#else
# include "tcl.h"
#endif
#include <string.h>
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
** SELECT <function-name>(<key-name>, <input-string>);
** SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example,
** using the built-in "simple" tokenizer:
**
** SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
** "{0 i I 1 dont don't 2 see see 3 how how}"
**
*/
static void testFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
fts2Hash *pHash;
sqlite3_tokenizer_module *p;
sqlite3_tokenizer *pTokenizer = 0;
sqlite3_tokenizer_cursor *pCsr = 0;
const char *zErr = 0;
const char *zName;
int nName;
const char *zInput;
int nInput;
const char *zArg = 0;
const char *zToken;
int nToken;
int iStart;
int iEnd;
int iPos;
Tcl_Obj *pRet;
assert( argc==2 || argc==3 );
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
if( argc==3 ){
zArg = (const char *)sqlite3_value_text(argv[1]);
}
pHash = (fts2Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
if( !p ){
char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
sqlite3_result_error(context, zErr, -1);
sqlite3_free(zErr);
return;
}
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
zToken = &zInput[iStart];
nToken = iEnd-iStart;
Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
}
if( SQLITE_OK!=p->xClose(pCsr) ){
zErr = "error in xClose()";
goto finish;
}
if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
zErr = "error in xDestroy()";
goto finish;
}
finish:
if( zErr ){
sqlite3_result_error(context, zErr, -1);
}else{
sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
}
Tcl_DecrRefCount(pRet);
}
static
int registerTokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module *p
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
sqlite3_step(pStmt);
return sqlite3_finalize(pStmt);
}
static
int queryFts2Tokenizer(
sqlite3 *db,
char *zName,
const sqlite3_tokenizer_module **pp
){
int rc;
sqlite3_stmt *pStmt;
const char zSql[] = "SELECT fts2_tokenizer(?)";
*pp = 0;
rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
if( rc!=SQLITE_OK ){
return rc;
}
sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(pStmt) ){
if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
}
}
return sqlite3_finalize(pStmt);
}
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryFts2Tokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
**
** SELECT fts2_tokenizer_internal_test();
**
*/
static void intTestFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int rc;
const sqlite3_tokenizer_module *p1;
const sqlite3_tokenizer_module *p2;
sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
/* Test the query function */
sqlite3Fts2SimpleTokenizerModule(&p1);
rc = queryFts2Tokenizer(db, "simple", &p2);
assert( rc==SQLITE_OK );
assert( p1==p2 );
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_ERROR );
assert( p2==0 );
assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
/* Test the storage function */
rc = registerTokenizer(db, "nosuchtokenizer", p1);
assert( rc==SQLITE_OK );
rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
assert( rc==SQLITE_OK );
assert( p2==p1 );
sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}
#endif
/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialized to use string keys, and to take a private copy
** of the key when a value is inserted. i.e. by a call similar to:
**
** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header
** comment above struct HashTableVtab) to the database schema. Both
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
sqlite3 *db,
fts2Hash *pHash,
const char *zName
){
int rc = SQLITE_OK;
void *p = (void *)pHash;
const int any = SQLITE_ANY;
char *zTest = 0;
char *zTest2 = 0;
#ifdef SQLITE_TEST
void *pdb = (void *)db;
zTest = sqlite3_mprintf("%s_test", zName);
zTest2 = sqlite3_mprintf("%s_internal_test", zName);
if( !zTest || !zTest2 ){
rc = SQLITE_NOMEM;
}
#endif
if( rc!=SQLITE_OK
|| (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
|| (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
|| (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
#endif
);
sqlite3_free(zTest);
sqlite3_free(zTest2);
return rc;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@@ -1,145 +0,0 @@
/*
** 2006 July 10
**
** The author disclaims copyright to this source code.
**
*************************************************************************
** Defines the interface to tokenizers used by fulltext-search. There
** are three basic components:
**
** sqlite3_tokenizer_module is a singleton defining the tokenizer
** interface functions. This is essentially the class structure for
** tokenizers.
**
** sqlite3_tokenizer is used to define a particular tokenizer, perhaps
** including customization information defined at creation time.
**
** sqlite3_tokenizer_cursor is generated by a tokenizer to generate
** tokens from a particular input.
*/
#ifndef _FTS2_TOKENIZER_H_
#define _FTS2_TOKENIZER_H_
/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time.
** If tokenizers are to be allowed to call sqlite3_*() functions, then
** we will need a way to register the API consistently.
*/
#include "sqlite3.h"
/*
** Structures used by the tokenizer interface. When a new tokenizer
** implementation is registered, the caller provides a pointer to
** an sqlite3_tokenizer_module containing pointers to the callback
** functions that make up an implementation.
**
** When an fts2 table is created, it passes any arguments passed to
** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the
** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer
** implementation. The xCreate() function in turn returns an
** sqlite3_tokenizer structure representing the specific tokenizer to
** be used for the fts2 table (customized by the tokenizer clause arguments).
**
** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen()
** method is called. It returns an sqlite3_tokenizer_cursor object
** that may be used to tokenize a specific input buffer based on
** the tokenization rules supplied by a specific sqlite3_tokenizer
** object.
*/
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
** Structure version. Should always be set to 0.
*/
int iVersion;
/*
** Create a new tokenizer. The values in the argv[] array are the
** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
** TABLE statement that created the fts2 table. For example, if
** the following SQL is executed:
**
** CREATE .. USING fts2( ... , tokenizer <tokenizer-name> arg1 arg2)
**
** then argc is set to 2, and the argv[] array contains pointers
** to the strings "arg1" and "arg2".
**
** This method should return either SQLITE_OK (0), or an SQLite error
** code. If SQLITE_OK is returned, then *ppTokenizer should be set
** to point at the newly created tokenizer structure. The generic
** sqlite3_tokenizer.pModule variable should not be initialized by
** this callback. The caller will do so.
*/
int (*xCreate)(
int argc, /* Size of argv array */
const char *const*argv, /* Tokenizer argument strings */
sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
);
/*
** Destroy an existing tokenizer. The fts2 module calls this method
** exactly once for each successful call to xCreate().
*/
int (*xDestroy)(sqlite3_tokenizer *pTokenizer);
/*
** Create a tokenizer cursor to tokenize an input buffer. The caller
** is responsible for ensuring that the input buffer remains valid
** until the cursor is closed (using the xClose() method).
*/
int (*xOpen)(
sqlite3_tokenizer *pTokenizer, /* Tokenizer object */
const char *pInput, int nBytes, /* Input buffer */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */
);
/*
** Destroy an existing tokenizer cursor. The fts2 module calls this
** method exactly once for each successful call to xOpen().
*/
int (*xClose)(sqlite3_tokenizer_cursor *pCursor);
/*
** Retrieve the next token from the tokenizer cursor pCursor. This
** method should either return SQLITE_OK and set the values of the
** "OUT" variables identified below, or SQLITE_DONE to indicate that
** the end of the buffer has been reached, or an SQLite error code.
**
** *ppToken should be set to point at a buffer containing the
** normalized version of the token (i.e. after any case-folding and/or
** stemming has been performed). *pnBytes should be set to the length
** of this buffer in bytes. The input text that generated the token is
** identified by the byte offsets returned in *piStartOffset and
** *piEndOffset.
**
** The buffer *ppToken is set to point at is managed by the tokenizer
** implementation. It is only required to be valid until the next call
** to xNext() or xClose().
*/
/* TODO(shess) current implementation requires pInput to be
** nul-terminated. This should either be fixed, or pInput/nBytes
** should be converted to zInput.
*/
int (*xNext)(
sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */
const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */
int *piStartOffset, /* OUT: Byte offset of token in input buffer */
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
};
struct sqlite3_tokenizer {
const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */
/* Tokenizer implementations will typically add additional fields */
};
struct sqlite3_tokenizer_cursor {
sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */
/* Tokenizer implementations will typically add additional fields */
};
#endif /* _FTS2_TOKENIZER_H_ */

View File

@@ -1,233 +0,0 @@
/*
** 2006 Oct 10
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "simple" full-text-search tokenizer.
*/
/*
** The code in this file is only compiled if:
**
** * The FTS2 module is being built as an extension
** (in which case SQLITE_CORE is not defined), or
**
** * The FTS2 module is being built into the core of
** SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
*/
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT3
#include "fts2_tokenizer.h"
typedef struct simple_tokenizer {
sqlite3_tokenizer base;
char delim[128]; /* flag ASCII delimiters */
} simple_tokenizer;
typedef struct simple_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
const char *pInput; /* input we are tokenizing */
int nBytes; /* size of the input */
int iOffset; /* current position in pInput */
int iToken; /* index of next token to be returned */
char *pToken; /* storage for current token */
int nTokenAllocated; /* space allocated to zToken buffer */
} simple_tokenizer_cursor;
/* Forward declaration */
static const sqlite3_tokenizer_module simpleTokenizerModule;
static int simpleDelim(simple_tokenizer *t, unsigned char c){
return c<0x80 && t->delim[c];
}
/*
** Create a new tokenizer instance.
*/
static int simpleCreate(
int argc, const char * const *argv,
sqlite3_tokenizer **ppTokenizer
){
simple_tokenizer *t;
t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t));
if( t==NULL ) return SQLITE_NOMEM;
memset(t, 0, sizeof(*t));
/* TODO(shess) Delimiters need to remain the same from run to run,
** else we need to reindex. One solution would be a meta-table to
** track such information in the database, then we'd only want this
** information on the initial create.
*/
if( argc>1 ){
int i, n = strlen(argv[1]);
for(i=0; i<n; i++){
unsigned char ch = argv[1][i];
/* We explicitly don't support UTF-8 delimiters for now. */
if( ch>=0x80 ){
sqlite3_free(t);
return SQLITE_ERROR;
}
t->delim[ch] = 1;
}
} else {
/* Mark non-alphanumeric ASCII characters as delimiters */
int i;
for(i=1; i<0x80; i++){
t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') ||
(i>='a' && i<='z'));
}
}
*ppTokenizer = &t->base;
return SQLITE_OK;
}
/*
** Destroy a tokenizer
*/
static int simpleDestroy(sqlite3_tokenizer *pTokenizer){
sqlite3_free(pTokenizer);
return SQLITE_OK;
}
/*
** Prepare to begin tokenizing a particular string. The input
** string to be tokenized is pInput[0..nBytes-1]. A cursor
** used to incrementally tokenize this string is returned in
** *ppCursor.
*/
static int simpleOpen(
sqlite3_tokenizer *pTokenizer, /* The tokenizer */
const char *pInput, int nBytes, /* String to be tokenized */
sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
){
simple_tokenizer_cursor *c;
c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c));
if( c==NULL ) return SQLITE_NOMEM;
c->pInput = pInput;
if( pInput==0 ){
c->nBytes = 0;
}else if( nBytes<0 ){
c->nBytes = (int)strlen(pInput);
}else{
c->nBytes = nBytes;
}
c->iOffset = 0; /* start tokenizing at the beginning */
c->iToken = 0;
c->pToken = NULL; /* no space allocated, yet. */
c->nTokenAllocated = 0;
*ppCursor = &c->base;
return SQLITE_OK;
}
/*
** Close a tokenization cursor previously opened by a call to
** simpleOpen() above.
*/
static int simpleClose(sqlite3_tokenizer_cursor *pCursor){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
sqlite3_free(c->pToken);
sqlite3_free(c);
return SQLITE_OK;
}
/*
** Extract the next token from a tokenization cursor. The cursor must
** have been opened by a prior call to simpleOpen().
*/
static int simpleNext(
sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
const char **ppToken, /* OUT: *ppToken is the token text */
int *pnBytes, /* OUT: Number of bytes in token */
int *piStartOffset, /* OUT: Starting offset of token */
int *piEndOffset, /* OUT: Ending offset of token */
int *piPosition /* OUT: Position integer of token */
){
simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor;
simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer;
unsigned char *p = (unsigned char *)c->pInput;
while( c->iOffset<c->nBytes ){
int iStartOffset;
/* Scan past delimiter characters */
while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
/* Count non-delimiter characters. */
iStartOffset = c->iOffset;
while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){
c->iOffset++;
}
if( c->iOffset>iStartOffset ){
int i, n = c->iOffset-iStartOffset;
if( n>c->nTokenAllocated ){
c->nTokenAllocated = n+20;
c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated);
if( c->pToken==NULL ) return SQLITE_NOMEM;
}
for(i=0; i<n; i++){
/* TODO(shess) This needs expansion to handle UTF-8
** case-insensitivity.
*/
unsigned char ch = p[iStartOffset+i];
c->pToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch;
}
*ppToken = c->pToken;
*pnBytes = n;
*piStartOffset = iStartOffset;
*piEndOffset = c->iOffset;
*piPosition = c->iToken++;
return SQLITE_OK;
}
}
return SQLITE_DONE;
}
/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module simpleTokenizerModule = {
0,
simpleCreate,
simpleDestroy,
simpleOpen,
simpleClose,
simpleNext,
};
/*
** Allocate a new simple tokenizer. Return a pointer to the new
** tokenizer in *ppModule
*/
void sqlite3Fts2SimpleTokenizerModule(
sqlite3_tokenizer_module const**ppModule
){
*ppModule = &simpleTokenizerModule;
}
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

View File

@@ -1,116 +0,0 @@
#!/usr/bin/tclsh
#
# This script builds a single C code file holding all of FTS2 code.
# The name of the output file is fts2amal.c. To build this file,
# first do:
#
# make target_source
#
# The make target above moves all of the source code files into
# a subdirectory named "tsrc". (This script expects to find the files
# there and will not work if they are not found.)
#
# After the "tsrc" directory has been created and populated, run
# this script:
#
# tclsh mkfts2amal.tcl
#
# The amalgamated FTS2 code will be written into fts2amal.c
#
# Open the output file and write a header comment at the beginning
# of the file.
#
set out [open fts2amal.c w]
set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1]
puts $out [subst \
{/******************************************************************************
** This file is an amalgamation of separate C source files from the SQLite
** Full Text Search extension 2 (fts2). By combining all the individual C
** code files into this single large file, the entire code can be compiled
** as a one translation unit. This allows many compilers to do optimizations
** that would not be possible if the files were compiled separately. It also
** makes the code easier to import into other projects.
**
** This amalgamation was generated on $today.
*/}]
# These are the header files used by FTS2. The first time any of these
# files are seen in a #include statement in the C code, include the complete
# text of the file in-line. The file only needs to be included once.
#
foreach hdr {
fts2.h
fts2_hash.h
fts2_tokenizer.h
sqlite3.h
sqlite3ext.h
} {
set available_hdr($hdr) 1
}
# 78 stars used for comment formatting.
set s78 \
{*****************************************************************************}
# Insert a comment into the code
#
proc section_comment {text} {
global out s78
set n [string length $text]
set nstar [expr {60 - $n}]
set stars [string range $s78 0 $nstar]
puts $out "/************** $text $stars/"
}
# Read the source file named $filename and write it into the
# sqlite3.c output file. If any #include statements are seen,
# process them approprately.
#
proc copy_file {filename} {
global seen_hdr available_hdr out
set tail [file tail $filename]
section_comment "Begin file $tail"
set in [open $filename r]
while {![eof $in]} {
set line [gets $in]
if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} {
if {[info exists available_hdr($hdr)]} {
if {$available_hdr($hdr)} {
section_comment "Include $hdr in the middle of $tail"
copy_file tsrc/$hdr
section_comment "Continuing where we left off in $tail"
}
} elseif {![info exists seen_hdr($hdr)]} {
set seen_hdr($hdr) 1
puts $out $line
}
} elseif {[regexp {^#ifdef __cplusplus} $line]} {
puts $out "#if 0"
} elseif {[regexp {^#line} $line]} {
# Skip #line directives.
} else {
puts $out $line
}
}
close $in
section_comment "End of $tail"
}
# Process the source files. Process files containing commonly
# used subroutines first in order to help the compiler find
# inlining opportunities.
#
foreach file {
fts2.c
fts2_hash.c
fts2_porter.c
fts2_tokenizer.c
fts2_tokenizer1.c
fts2_icu.c
} {
copy_file tsrc/$file
}
close $out

View File

@@ -5287,9 +5287,8 @@ static void fts3EvalNextRow(
Fts3Expr *pExpr, /* Expr. to advance to next matching row */
int *pRc /* IN/OUT: Error code */
){
if( *pRc==SQLITE_OK ){
if( *pRc==SQLITE_OK && pExpr->bEof==0 ){
int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */
assert( pExpr->bEof==0 );
pExpr->bStart = 1;
switch( pExpr->eType ){
@@ -5765,6 +5764,22 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){
}
}
/*
** This is an sqlite3Fts3ExprIterate() callback. If the Fts3Expr.aMI[] array
** has not yet been allocated, allocate and zero it. Otherwise, just zero
** it.
*/
static int fts3AllocateMSI(Fts3Expr *pExpr, int iPhrase, void *pCtx){
Fts3Table *pTab = (Fts3Table*)pCtx;
UNUSED_PARAMETER(iPhrase);
if( pExpr->aMI==0 ){
pExpr->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
if( pExpr->aMI==0 ) return SQLITE_NOMEM;
}
memset(pExpr->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
return SQLITE_OK;
}
/*
** Expression pExpr must be of type FTSQUERY_PHRASE.
**
@@ -5786,7 +5801,6 @@ static int fts3EvalGatherStats(
if( pExpr->aMI==0 ){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
Fts3Expr *pRoot; /* Root of NEAR expression */
Fts3Expr *p; /* Iterator used for several purposes */
sqlite3_int64 iPrevId = pCsr->iPrevId;
sqlite3_int64 iDocid;
@@ -5794,7 +5808,9 @@ static int fts3EvalGatherStats(
/* Find the root of the NEAR expression */
pRoot = pExpr;
while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){
while( pRoot->pParent
&& (pRoot->pParent->eType==FTSQUERY_NEAR || pRoot->bDeferred)
){
pRoot = pRoot->pParent;
}
iDocid = pRoot->iDocid;
@@ -5802,14 +5818,8 @@ static int fts3EvalGatherStats(
assert( pRoot->bStart );
/* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */
for(p=pRoot; p; p=p->pLeft){
Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight);
assert( pE->aMI==0 );
pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32));
if( !pE->aMI ) return SQLITE_NOMEM;
memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32));
}
rc = sqlite3Fts3ExprIterate(pRoot, fts3AllocateMSI, (void*)pTab);
if( rc!=SQLITE_OK ) return rc;
fts3EvalRestart(pCsr, pRoot, &rc);
while( pCsr->isEof==0 && rc==SQLITE_OK ){
@@ -5965,6 +5975,7 @@ int sqlite3Fts3EvalPhrasePoslist(
u8 bTreeEof = 0;
Fts3Expr *p; /* Used to iterate from pExpr to root */
Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */
Fts3Expr *pRun; /* Closest non-deferred ancestor of pNear */
int bMatch;
/* Check if this phrase descends from an OR expression node. If not,
@@ -5979,25 +5990,30 @@ int sqlite3Fts3EvalPhrasePoslist(
if( p->bEof ) bTreeEof = 1;
}
if( bOr==0 ) return SQLITE_OK;
pRun = pNear;
while( pRun->bDeferred ){
assert( pRun->pParent );
pRun = pRun->pParent;
}
/* This is the descendent of an OR node. In this case we cannot use
** an incremental phrase. Load the entire doclist for the phrase
** into memory in this case. */
if( pPhrase->bIncr ){
int bEofSave = pNear->bEof;
fts3EvalRestart(pCsr, pNear, &rc);
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
if( bEofSave==0 && pNear->iDocid==iDocid ) break;
int bEofSave = pRun->bEof;
fts3EvalRestart(pCsr, pRun, &rc);
while( rc==SQLITE_OK && !pRun->bEof ){
fts3EvalNextRow(pCsr, pRun, &rc);
if( bEofSave==0 && pRun->iDocid==iDocid ) break;
}
assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
if( rc==SQLITE_OK && pNear->bEof!=bEofSave ){
if( rc==SQLITE_OK && pRun->bEof!=bEofSave ){
rc = FTS_CORRUPT_VTAB;
}
}
if( bTreeEof ){
while( rc==SQLITE_OK && !pNear->bEof ){
fts3EvalNextRow(pCsr, pNear, &rc);
while( rc==SQLITE_OK && !pRun->bEof ){
fts3EvalNextRow(pCsr, pRun, &rc);
}
}
if( rc!=SQLITE_OK ) return rc;

View File

@@ -650,5 +650,7 @@ int sqlite3FtsUnicodeIsalnum(int);
int sqlite3FtsUnicodeIsdiacritic(int);
#endif
int sqlite3Fts3ExprIterate(Fts3Expr*, int (*x)(Fts3Expr*,int,void*), void*);
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
#endif /* _FTSINT_H */

View File

@@ -41,7 +41,7 @@ typedef sqlite3_int64 i64;
/*
** Used as an fts3ExprIterate() context when loading phrase doclists to
** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to
** Fts3Expr.aDoclist[]/nDoclist.
*/
typedef struct LoadDoclistCtx LoadDoclistCtx;
@@ -85,7 +85,7 @@ struct SnippetFragment {
};
/*
** This type is used as an fts3ExprIterate() context object while
** This type is used as an sqlite3Fts3ExprIterate() context object while
** accumulating the data returned by the matchinfo() function.
*/
typedef struct MatchInfo MatchInfo;
@@ -244,7 +244,7 @@ static void fts3GetDeltaPosition(char **pp, i64 *piPos){
}
/*
** Helper function for fts3ExprIterate() (see below).
** Helper function for sqlite3Fts3ExprIterate() (see below).
*/
static int fts3ExprIterate2(
Fts3Expr *pExpr, /* Expression to iterate phrases of */
@@ -278,7 +278,7 @@ static int fts3ExprIterate2(
** Otherwise, SQLITE_OK is returned after a callback has been made for
** all eligible phrase nodes.
*/
static int fts3ExprIterate(
int sqlite3Fts3ExprIterate(
Fts3Expr *pExpr, /* Expression to iterate phrases of */
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
void *pCtx /* Second argument to pass to callback */
@@ -287,10 +287,9 @@ static int fts3ExprIterate(
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
}
/*
** This is an fts3ExprIterate() callback used while loading the doclists
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
** This is an sqlite3Fts3ExprIterate() callback used while loading the
** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
** fts3ExprLoadDoclists().
*/
static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
@@ -322,9 +321,9 @@ static int fts3ExprLoadDoclists(
int *pnToken /* OUT: Number of tokens in query */
){
int rc; /* Return Code */
LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
LoadDoclistCtx sCtx = {0,0,0}; /* Context for sqlite3Fts3ExprIterate() */
sCtx.pCsr = pCsr;
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
rc = sqlite3Fts3ExprIterate(pCsr->pExpr,fts3ExprLoadDoclistsCb,(void*)&sCtx);
if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
if( pnToken ) *pnToken = sCtx.nToken;
return rc;
@@ -337,7 +336,7 @@ static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
}
static int fts3ExprPhraseCount(Fts3Expr *pExpr){
int nPhrase = 0;
(void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
(void)sqlite3Fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
return nPhrase;
}
@@ -465,8 +464,9 @@ static void fts3SnippetDetails(
}
/*
** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
** Each invocation populates an element of the SnippetIter.aPhrase[] array.
** This function is an sqlite3Fts3ExprIterate() callback used by
** fts3BestSnippet(). Each invocation populates an element of the
** SnippetIter.aPhrase[] array.
*/
static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
SnippetIter *p = (SnippetIter *)ctx;
@@ -556,7 +556,9 @@ static int fts3BestSnippet(
sIter.nSnippet = nSnippet;
sIter.nPhrase = nList;
sIter.iCurrent = -1;
rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
rc = sqlite3Fts3ExprIterate(
pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter
);
if( rc==SQLITE_OK ){
/* Set the *pmSeen output variable. */
@@ -917,10 +919,10 @@ static int fts3ExprLHitGather(
}
/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo
** stats for a single query.
**
** fts3ExprIterate() callback to load the 'global' elements of a
** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a
** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
** of the matchinfo array that are constant for all rows returned by the
** current query.
@@ -955,7 +957,7 @@ static int fts3ExprGlobalHitsCb(
}
/*
** fts3ExprIterate() callback used to collect the "local" part of the
** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the
** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
** array that are different for each row returned by the query.
*/
@@ -1151,7 +1153,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
**/
aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase);
if( !aIter ) return SQLITE_NOMEM;
(void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
(void)sqlite3Fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
for(i=0; i<pInfo->nPhrase; i++){
LcsIterator *pIter = &aIter[i];
@@ -1328,11 +1330,11 @@ static int fts3MatchinfoValues(
rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
if( rc!=SQLITE_OK ) break;
}
rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
rc = sqlite3Fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
sqlite3Fts3EvalTestDeferred(pCsr, &rc);
if( rc!=SQLITE_OK ) break;
}
(void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
(void)sqlite3Fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
break;
}
}
@@ -1555,7 +1557,7 @@ struct TermOffsetCtx {
};
/*
** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets().
*/
static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
TermOffsetCtx *p = (TermOffsetCtx *)ctx;
@@ -1637,7 +1639,9 @@ void sqlite3Fts3Offsets(
*/
sCtx.iCol = iCol;
sCtx.iTerm = 0;
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
rc = sqlite3Fts3ExprIterate(
pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx
);
if( rc!=SQLITE_OK ) goto offsets_out;
/* Retreive the text stored in column iCol. If an SQL NULL is stored

View File

@@ -5073,7 +5073,7 @@ static void fts5MergePrefixLists(
/* Initialize a doclist-iterator for each input buffer. Arrange them in
** a linked-list starting at pHead in ascending order of rowid. Avoid
** linking any iterators already at EOF into the linked list at all. */
assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) );
memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
pHead = &aMerger[nBuf];
fts5DoclistIterInit(p1, &pHead->iter);

View File

@@ -1625,7 +1625,7 @@ static int fts5UpdateMethod(
int rc = SQLITE_OK; /* Return code */
/* A transaction must be open when this is called. */
assert( pTab->ts.eState==1 );
assert( pTab->ts.eState==1 || pTab->ts.eState==2 );
assert( pVtab->zErrMsg==0 );
assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
@@ -2867,7 +2867,9 @@ static int fts5Init(sqlite3 *db){
}
if( rc==SQLITE_OK ){
rc = sqlite3_create_function(
db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
db, "fts5_source_id", 0,
SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS,
p, fts5SourceIdFunc, 0, 0
);
}
}

View File

@@ -402,5 +402,46 @@ do_test 15.4 {
list [catch { db2 eval COMMIT } msg] $msg
} {0 {}}
#-------------------------------------------------------------------------
reset_db
forcedelete test.db2
sqlite3 db2 test.db
do_execsql_test 16.0 {
ATTACH 'test.db2' AS aux;
CREATE TABLE aux.t2(x,y);
INSERT INTO t2 VALUES(1, 2);
CREATE VIRTUAL TABLE x1 USING fts5(a);
BEGIN;
INSERT INTO x1 VALUES('abc');
INSERT INTO t2 VALUES(3, 4);
}
do_execsql_test -db db2 16.1 {
ATTACH 'test.db2' AS aux;
BEGIN;
SELECT * FROM t2
} {1 2}
do_catchsql_test 16.2 {
COMMIT;
} {1 {database is locked}}
do_execsql_test 16.3 {
INSERT INTO x1 VALUES('def');
}
do_execsql_test -db db2 16.4 {
END
}
do_execsql_test 16.5 {
COMMIT
}
do_execsql_test -db db2 16.6 {
SELECT * FROM x1
} {abc def}
finish_test

View File

@@ -44,6 +44,8 @@
# endif
#endif
/* #define LSM_DEBUG_EXPENSIVE 1 */
/*
** Default values for various data structure parameters. These may be
** overridden by calls to lsm_config().
@@ -405,7 +407,7 @@ struct Segment {
LsmPgno iFirst; /* First page of this run */
LsmPgno iLastPg; /* Last page of this run */
LsmPgno iRoot; /* Root page number (if any) */
int nSize; /* Size of this run in pages */
LsmPgno nSize; /* Size of this run in pages */
Redirect *pRedirect; /* Block redirects (or NULL) */
};
@@ -853,6 +855,8 @@ int lsmVarintGet32(u8 *, int *);
int lsmVarintPut64(u8 *aData, i64 iVal);
int lsmVarintGet64(const u8 *aData, i64 *piVal);
int lsmVarintLen64(i64);
int lsmVarintLen32(int);
int lsmVarintSize(u8 c);

View File

@@ -511,7 +511,7 @@ static void ckptNewSegment(
pSegment->iFirst = ckptGobble64(aIn, piIn);
pSegment->iLastPg = ckptGobble64(aIn, piIn);
pSegment->iRoot = ckptGobble64(aIn, piIn);
pSegment->nSize = (int)ckptGobble64(aIn, piIn);
pSegment->nSize = ckptGobble64(aIn, piIn);
assert( pSegment->iFirst );
}

View File

@@ -215,8 +215,8 @@ struct FileSystem {
char *zLog; /* Database file name */
int nMetasize; /* Size of meta pages in bytes */
int nMetaRwSize; /* Read/written size of meta pages in bytes */
int nPagesize; /* Database page-size in bytes */
int nBlocksize; /* Database block-size in bytes */
i64 nPagesize; /* Database page-size in bytes */
i64 nBlocksize; /* Database block-size in bytes */
/* r/w file descriptors for both files. */
LsmFile *pLsmFile; /* Used after lsm_close() to link into list */
@@ -889,7 +889,7 @@ static LsmPgno fsFirstPageOnBlock(FileSystem *pFS, int iBlock){
iPg = pFS->nBlocksize * (LsmPgno)(iBlock-1) + 4;
}
}else{
const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
const i64 nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
if( iBlock==1 ){
iPg = 1 + ((pFS->nMetasize*2 + pFS->nPagesize - 1) / pFS->nPagesize);
}else{
@@ -1131,7 +1131,6 @@ static void fsGrowMapping(
i64 iSz, /* Minimum size to extend mapping to */
int *pRc /* IN/OUT: Error code */
){
assert( pFS->pCompress==0 );
assert( PAGE_HASPREV==4 );
if( *pRc==LSM_OK && iSz>pFS->nMap ){
@@ -1872,7 +1871,7 @@ void lsmFsGobble(
assert( nPgno>0 && 0==fsPageRedirects(pFS, pRun, aPgno[0]) );
iBlk = fsPageToBlock(pFS, pRun->iFirst);
pRun->nSize += (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
pRun->nSize += (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
while( rc==LSM_OK ){
int iNext = 0;
@@ -1883,13 +1882,13 @@ void lsmFsGobble(
}
rc = fsBlockNext(pFS, pRun, iBlk, &iNext);
if( rc==LSM_OK ) rc = fsFreeBlock(pFS, pSnapshot, pRun, iBlk);
pRun->nSize -= (int)(
pRun->nSize -= (
1 + fsLastPageOnBlock(pFS, iBlk) - fsFirstPageOnBlock(pFS, iBlk)
);
iBlk = iNext;
}
pRun->nSize -= (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
pRun->nSize -= (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
assert( pRun->nSize>0 );
}

View File

@@ -432,7 +432,7 @@ int lsm_config(lsm_db *pDb, int eParam, ...){
}
void lsmAppendSegmentList(LsmString *pStr, char *zPre, Segment *pSeg){
lsmStringAppendf(pStr, "%s{%d %d %d %d}", zPre,
lsmStringAppendf(pStr, "%s{%lld %lld %lld %lld}", zPre,
pSeg->iFirst, pSeg->iLastPg, pSeg->iRoot, pSeg->nSize
);
}

View File

@@ -1306,6 +1306,24 @@ int lsmBeginRoTrans(lsm_db *db){
}
}
/* In 'lsm_open()' we don't update the page and block sizes in the
** Filesystem for 'readonly' connection. Because member 'db->pShmhdr' is a
** nullpointer, this prevents loading a checkpoint. Now that the system is
** live this member should be set. So we can update both values in
** the Filesystem.
**
** Configure the file-system connection with the page-size and block-size
** of this database. Even if the database file is zero bytes in size
** on disk, these values have been set in shared-memory by now, and so
** are guaranteed not to change during the lifetime of this connection. */
if( LSM_OK==rc
&& 0==lsmCheckpointClientCacheOk(db)
&& LSM_OK==(rc=lsmCheckpointLoad(db, 0))
){
lsmFsSetPageSize(db->pFS, lsmCheckpointPgsz(db->aSnapshot));
lsmFsSetBlockSize(db->pFS, lsmCheckpointBlksz(db->aSnapshot));
}
if( rc==LSM_OK ){
rc = lsmBeginReadTrans(db);
}

View File

@@ -525,7 +525,7 @@ static u8 *pageGetKey(
LsmBlob *pBlob /* If required, use this for dynamic memory */
){
u8 *pKey;
int nDummy;
i64 nDummy;
int eType;
u8 *aData;
int nData;
@@ -537,10 +537,10 @@ static u8 *pageGetKey(
pKey = pageGetCell(aData, nData, iCell);
eType = *pKey++;
pKey += lsmVarintGet32(pKey, &nDummy);
pKey += lsmVarintGet64(pKey, &nDummy);
pKey += lsmVarintGet32(pKey, pnKey);
if( rtIsWrite(eType) ){
pKey += lsmVarintGet32(pKey, &nDummy);
pKey += lsmVarintGet64(pKey, &nDummy);
}
*piTopic = rtTopic(eType);
@@ -657,14 +657,14 @@ static int btreeCursorLoadKey(BtreeCursor *pCsr){
return rc;
}
static int btreeCursorPtr(u8 *aData, int nData, int iCell){
static LsmPgno btreeCursorPtr(u8 *aData, int nData, int iCell){
int nCell;
nCell = pageGetNRec(aData, nData);
if( iCell>=nCell ){
return (int)pageGetPtr(aData, nData);
return pageGetPtr(aData, nData);
}
return (int)pageGetRecordPtr(aData, nData, iCell);
return pageGetRecordPtr(aData, nData, iCell);
}
static int btreeCursorNext(BtreeCursor *pCsr){
@@ -751,7 +751,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){
Page *pPg = 0;
FileSystem *pFS = pCsr->pFS;
int iPg = (int)pCsr->pSeg->iRoot;
LsmPgno iPg = pCsr->pSeg->iRoot;
do {
rc = lsmFsDbPageGet(pFS, pCsr->pSeg, iPg, &pPg);
@@ -779,7 +779,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){
assert( pCsr->aPg[pCsr->nDepth].iCell==0 );
pCsr->aPg[pCsr->nDepth].pPage = pPg;
pCsr->nDepth++;
iPg = (int)pageGetRecordPtr(aData, nData, 0);
iPg = pageGetRecordPtr(aData, nData, 0);
}
}
}while( rc==LSM_OK );
@@ -871,7 +871,7 @@ static int btreeCursorRestore(
int nSeek;
int iTopicSeek;
int iPg = 0;
int iLoad = (int)pSeg->iRoot;
LsmPgno iLoad = pSeg->iRoot;
Page *pPg = pCsr->aPg[nDepth-1].pPage;
if( pageObjGetNRec(pPg)==0 ){
@@ -903,7 +903,7 @@ static int btreeCursorRestore(
aData = fsPageData(pPg2, &nData);
assert( (pageGetFlags(aData, nData) & SEGMENT_BTREE_FLAG) );
iLoad = (int)pageGetPtr(aData, nData);
iLoad = pageGetPtr(aData, nData);
iCell2 = pageGetNRec(aData, nData);
iMax = iCell2-1;
iMin = 0;
@@ -926,7 +926,7 @@ static int btreeCursorRestore(
assert( res!=0 );
if( res<0 ){
iLoad = (int)iPtr;
iLoad = iPtr;
iCell2 = iTry;
iMax = iTry-1;
}else{
@@ -1013,7 +1013,7 @@ static void segmentPtrSetPage(SegmentPtr *pPtr, Page *pNext){
static int segmentPtrLoadPage(
FileSystem *pFS,
SegmentPtr *pPtr, /* Load page into this SegmentPtr object */
int iNew /* Page number of new page */
LsmPgno iNew /* Page number of new page */
){
Page *pPg = 0; /* The new page */
int rc; /* Return Code */
@@ -1633,7 +1633,7 @@ static int segmentPtrSeek(
int iTopic, /* Key topic to seek to */
void *pKey, int nKey, /* Key to seek to */
int eSeek, /* Search bias - see above */
int *piPtr, /* OUT: FC pointer */
LsmPgno *piPtr, /* OUT: FC pointer */
int *pbStop
){
int (*xCmp)(void *, int, void *, int) = pCsr->pDb->xCmp;
@@ -1759,7 +1759,7 @@ static int segmentPtrSeek(
}
assert( rc!=LSM_OK || assertSeekResult(pCsr,pPtr,iTopic,pKey,nKey,eSeek) );
*piPtr = (int)iPtrOut;
*piPtr = iPtrOut;
return rc;
}
@@ -1773,11 +1773,11 @@ static int seekInBtree(
){
int i = 0;
int rc;
int iPg;
LsmPgno iPg;
Page *pPg = 0;
LsmBlob blob = {0, 0, 0};
iPg = (int)pSeg->iRoot;
iPg = pSeg->iRoot;
do {
LsmPgno *piFirst = 0;
if( aPg ){
@@ -1799,7 +1799,7 @@ static int seekInBtree(
flags = pageGetFlags(aData, nData);
if( (flags & SEGMENT_BTREE_FLAG)==0 ) break;
iPg = (int)pageGetPtr(aData, nData);
iPg = pageGetPtr(aData, nData);
nRec = pageGetNRec(aData, nData);
iMin = 0;
@@ -1825,7 +1825,7 @@ static int seekInBtree(
pCsr->pDb->xCmp, iTopic, pKey, nKey, iTopicT, pKeyT, nKeyT
);
if( res<0 ){
iPg = (int)iPtr;
iPg = iPtr;
iMax = iTry-1;
}else{
iMin = iTry+1;
@@ -1851,12 +1851,12 @@ static int seekInSegment(
SegmentPtr *pPtr,
int iTopic,
void *pKey, int nKey,
int iPg, /* Page to search */
LsmPgno iPg, /* Page to search */
int eSeek, /* Search bias - see above */
int *piPtr, /* OUT: FC pointer */
LsmPgno *piPtr, /* OUT: FC pointer */
int *pbStop /* OUT: Stop search flag */
){
int iPtr = iPg;
LsmPgno iPtr = iPg;
int rc = LSM_OK;
if( pPtr->pSeg->iRoot ){
@@ -1866,7 +1866,7 @@ static int seekInSegment(
if( rc==LSM_OK ) segmentPtrSetPage(pPtr, pPg);
}else{
if( iPtr==0 ){
iPtr = (int)pPtr->pSeg->iFirst;
iPtr = pPtr->pSeg->iFirst;
}
if( rc==LSM_OK ){
rc = segmentPtrLoadPage(pCsr->pDb->pFS, pPtr, iPtr);
@@ -1904,7 +1904,7 @@ static int seekInLevel(
){
Level *pLvl = aPtr[0].pLevel; /* Level to seek within */
int rc = LSM_OK; /* Return code */
int iOut = 0; /* Pointer to return to caller */
LsmPgno iOut = 0; /* Pointer to return to caller */
int res = -1; /* Result of xCmp(pKey, split) */
int nRhs = pLvl->nRight; /* Number of right-hand-side segments */
int bStop = 0;
@@ -1923,8 +1923,8 @@ static int seekInLevel(
** left-hand-side of the level in this case. */
if( res<0 ){
int i;
int iPtr = 0;
if( nRhs==0 ) iPtr = (int)*piPgno;
LsmPgno iPtr = 0;
if( nRhs==0 ) iPtr = *piPgno;
rc = seekInSegment(
pCsr, &aPtr[0], iTopic, pKey, nKey, iPtr, eSeek, &iOut, &bStop
@@ -1939,7 +1939,7 @@ static int seekInLevel(
if( res>=0 ){
int bHit = 0; /* True if at least one rhs is not EOF */
int iPtr = (int)*piPgno;
LsmPgno iPtr = *piPgno;
int i;
segmentPtrReset(&aPtr[0], LSM_SEGMENTPTR_FREE_THRESHOLD);
for(i=1; rc==LSM_OK && i<=nRhs && bStop==0; i++){
@@ -3361,6 +3361,8 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){
int iOff;
int nKey;
int eType;
i64 nDummy;
nRec = lsmGetU16(&aData[SEGMENT_NRECORD_OFFSET(nData)]);
iOff = lsmGetU16(&aData[SEGMENT_CELLPTR_OFFSET(nData, nRec-1)]);
@@ -3370,7 +3372,7 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){
|| eType==(LSM_SEPARATOR)
);
iOff += lsmVarintGet32(&aData[iOff], &nKey);
iOff += lsmVarintGet64(&aData[iOff], &nDummy);
iOff += lsmVarintGet32(&aData[iOff], &nKey);
return iOff + (eType ? nKey : 0);
@@ -3480,7 +3482,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){
lsm_env *pEnv = pMW->pDb->pEnv;
Page **apHier = 0;
int nHier = 0;
int iPg = (int)pSeg->iRoot;
LsmPgno iPg = pSeg->iRoot;
do {
Page *pPg = 0;
@@ -3506,7 +3508,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){
nHier++;
apHier[0] = pPg;
iPg = (int)pageGetPtr(aData, nData);
iPg = pageGetPtr(aData, nData);
}else{
lsmFsPageRelease(pPg);
break;
@@ -3625,10 +3627,11 @@ static int mergeWorkerBtreeWrite(
assert( lsmFsPageWritable(pOld) );
aData = fsPageData(pOld, &nData);
if( eType==0 ){
nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32((int)iKeyPg);
nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen64(iKeyPg);
}else{
nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32(nKey) + nKey;
nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen32(nKey) + nKey;
}
nRec = pageGetNRec(aData, nData);
nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData);
if( nByte<=nFree ) break;
@@ -3672,11 +3675,11 @@ static int mergeWorkerBtreeWrite(
lsmPutU16(&aData[SEGMENT_NRECORD_OFFSET(nData)], (u16)(nRec+1));
if( eType==0 ){
aData[iOff++] = 0x00;
iOff += lsmVarintPut32(&aData[iOff], (int)iPtr);
iOff += lsmVarintPut32(&aData[iOff], (int)iKeyPg);
iOff += lsmVarintPut64(&aData[iOff], iPtr);
iOff += lsmVarintPut64(&aData[iOff], iKeyPg);
}else{
aData[iOff++] = eType;
iOff += lsmVarintPut32(&aData[iOff], (int)iPtr);
iOff += lsmVarintPut64(&aData[iOff], iPtr);
iOff += lsmVarintPut32(&aData[iOff], nKey);
memcpy(&aData[iOff], pKey, nKey);
}
@@ -3872,7 +3875,7 @@ static int mergeWorkerNextPage(
static int mergeWorkerData(
MergeWorker *pMW, /* Merge worker object */
int bSep, /* True to write to separators run */
int iFPtr, /* Footer ptr for new pages */
LsmPgno iFPtr, /* Footer ptr for new pages */
u8 *aWrite, /* Write data from this buffer */
int nWrite /* Size of aWrite[] in bytes */
){
@@ -3916,14 +3919,14 @@ static int mergeWorkerData(
static int mergeWorkerFirstPage(MergeWorker *pMW){
int rc = LSM_OK; /* Return code */
Page *pPg = 0; /* First page of run pSeg */
int iFPtr = 0; /* Pointer value read from footer of pPg */
LsmPgno iFPtr = 0; /* Pointer value read from footer of pPg */
MultiCursor *pCsr = pMW->pCsr;
assert( pMW->pPage==0 );
if( pCsr->pBtCsr ){
rc = LSM_OK;
iFPtr = (int)pMW->pLevel->pNext->lhs.iFirst;
iFPtr = pMW->pLevel->pNext->lhs.iFirst;
}else if( pCsr->nPtr>0 ){
Segment *pSeg;
pSeg = pCsr->aPtr[pCsr->nPtr-1].pSeg;
@@ -3932,7 +3935,7 @@ static int mergeWorkerFirstPage(MergeWorker *pMW){
u8 *aData; /* Buffer for page pPg */
int nData; /* Size of aData[] in bytes */
aData = fsPageData(pPg, &nData);
iFPtr = (int)pageGetPtr(aData, nData);
iFPtr = pageGetPtr(aData, nData);
lsmFsPageRelease(pPg);
}
}
@@ -3951,7 +3954,7 @@ static int mergeWorkerWrite(
int eType, /* One of SORTED_SEPARATOR, WRITE or DELETE */
void *pKey, int nKey, /* Key value */
void *pVal, int nVal, /* Value value */
int iPtr /* Absolute value of page pointer, or 0 */
LsmPgno iPtr /* Absolute value of page pointer, or 0 */
){
int rc = LSM_OK; /* Return code */
Merge *pMerge; /* Persistent part of level merge state */
@@ -3960,8 +3963,8 @@ static int mergeWorkerWrite(
u8 *aData; /* Data buffer for page pWriter->pPage */
int nData = 0; /* Size of buffer aData[] in bytes */
int nRec = 0; /* Number of records on page pPg */
int iFPtr = 0; /* Value of pointer in footer of pPg */
int iRPtr = 0; /* Value of pointer written into record */
LsmPgno iFPtr = 0; /* Value of pointer in footer of pPg */
LsmPgno iRPtr = 0; /* Value of pointer written into record */
int iOff = 0; /* Current write offset within page pPg */
Segment *pSeg; /* Segment being written */
int flags = 0; /* If != 0, flags value for page footer */
@@ -3978,8 +3981,8 @@ static int mergeWorkerWrite(
if( pPg ){
aData = fsPageData(pPg, &nData);
nRec = pageGetNRec(aData, nData);
iFPtr = (int)pageGetPtr(aData, nData);
iRPtr = iPtr - iFPtr;
iFPtr = pageGetPtr(aData, nData);
iRPtr = iPtr ? (iPtr - iFPtr) : 0;
}
/* Figure out how much space is required by the new record. The space
@@ -3997,7 +4000,7 @@ static int mergeWorkerWrite(
** 4) Value size - 1 varint (only if LSM_INSERT flag is set)
*/
if( rc==LSM_OK ){
nHdr = 1 + lsmVarintLen32(iRPtr) + lsmVarintLen32(nKey);
nHdr = 1 + lsmVarintLen64(iRPtr) + lsmVarintLen32(nKey);
if( rtIsWrite(eType) ) nHdr += lsmVarintLen32(nVal);
/* If the entire header will not fit on page pPg, or if page pPg is
@@ -4009,8 +4012,8 @@ static int mergeWorkerWrite(
assert( aData );
memset(&aData[iOff], 0, SEGMENT_EOF(nData, nRec)-iOff);
}
iFPtr = (int)*pMW->pCsr->pPrevMergePtr;
iRPtr = iPtr - iFPtr;
iFPtr = *pMW->pCsr->pPrevMergePtr;
iRPtr = iPtr ? (iPtr - iFPtr) : 0;
iOff = 0;
nRec = 0;
rc = mergeWorkerNextPage(pMW, iFPtr);
@@ -4049,7 +4052,7 @@ static int mergeWorkerWrite(
/* Write the entry header into the current page. */
aData[iOff++] = (u8)eType; /* 1 */
iOff += lsmVarintPut32(&aData[iOff], iRPtr); /* 2 */
iOff += lsmVarintPut64(&aData[iOff], iRPtr); /* 2 */
iOff += lsmVarintPut32(&aData[iOff], nKey); /* 3 */
if( rtIsWrite(eType) ) iOff += lsmVarintPut32(&aData[iOff], nVal); /* 4 */
pMerge->iOutputOff = iOff;
@@ -4283,7 +4286,7 @@ static int mergeWorkerStep(MergeWorker *pMW){
pVal = pCsr->val.pData;
}
if( rc==LSM_OK ){
rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, (int)iPtr);
rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, iPtr);
}
}
}
@@ -4604,7 +4607,7 @@ static int mergeWorkerInit(
SegmentPtr *pPtr;
assert( pCsr->aPtr[i].pPg==0 );
pPtr = &pCsr->aPtr[i];
rc = segmentPtrLoadPage(pDb->pFS, pPtr, (int)pInput->iPg);
rc = segmentPtrLoadPage(pDb->pFS, pPtr, pInput->iPg);
if( rc==LSM_OK && pPtr->nCell>0 ){
rc = segmentPtrLoadCell(pPtr, pInput->iCell);
}
@@ -5469,7 +5472,7 @@ int lsmFlushTreeToDisk(lsm_db *pDb){
** be freed by the caller using lsmFree().
*/
static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){
int nSize = pSeg->nSize;
LsmPgno nSize = pSeg->nSize;
LsmPgno iRoot = pSeg->iRoot;
LsmPgno iFirst = pSeg->iFirst;
LsmPgno iLast = pSeg->iLastPg;
@@ -5481,9 +5484,9 @@ static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){
z1 = lsmMallocPrintf(pEnv, "%d.%d", iFirst, iLast);
if( iRoot ){
z2 = lsmMallocPrintf(pEnv, "root=%d", iRoot);
z2 = lsmMallocPrintf(pEnv, "root=%lld", iRoot);
}else{
z2 = lsmMallocPrintf(pEnv, "size=%d", nSize);
z2 = lsmMallocPrintf(pEnv, "size=%lld", nSize);
}
nPad = nMin - 2 - strlen(z1) - 1 - strlen(z2);
@@ -5536,7 +5539,7 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
int i;
int nRec;
int iPtr;
LsmPgno iPtr;
int flags;
u8 *aData;
int nData;
@@ -5544,11 +5547,11 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
aData = fsPageData(pPg, &nData);
nRec = pageGetNRec(aData, nData);
iPtr = (int)pageGetPtr(aData, nData);
iPtr = pageGetPtr(aData, nData);
flags = pageGetFlags(aData, nData);
lsmStringInit(&s, pDb->pEnv);
lsmStringAppendf(&s,"nCell=%d iPtr=%d flags=%d {", nRec, iPtr, flags);
lsmStringAppendf(&s,"nCell=%d iPtr=%lld flags=%d {", nRec, iPtr, flags);
if( flags&SEGMENT_BTREE_FLAG ) iPtr = 0;
for(i=0; i<nRec; i++){
@@ -5558,13 +5561,13 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
u8 *aVal = 0; int nVal = 0; /* Value */
int iTopic;
u8 *aCell;
int iPgPtr;
i64 iPgPtr;
int eType;
aCell = pageGetCell(aData, nData, i);
eType = *aCell++;
assert( (flags & SEGMENT_BTREE_FLAG) || eType!=0 );
aCell += lsmVarintGet32(aCell, &iPgPtr);
aCell += lsmVarintGet64(aCell, &iPgPtr);
if( eType==0 ){
LsmPgno iRef; /* Page number of referenced page */
@@ -5590,7 +5593,7 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){
}
}
lsmStringAppendf(&s, " %d", iPgPtr+iPtr);
lsmStringAppendf(&s, " %lld", iPgPtr+iPtr);
lsmFsPageRelease(pRef);
}
lsmStringAppend(&s, "}", 1);
@@ -5720,20 +5723,20 @@ static int infoPageDump(
int nKeyWidth = 0;
LsmString str;
int nRec;
int iPtr;
LsmPgno iPtr;
int flags2;
int iCell;
u8 *aData; int nData; /* Page data and size thereof */
aData = fsPageData(pPg, &nData);
nRec = pageGetNRec(aData, nData);
iPtr = (int)pageGetPtr(aData, nData);
iPtr = pageGetPtr(aData, nData);
flags2 = pageGetFlags(aData, nData);
lsmStringInit(&str, pDb->pEnv);
lsmStringAppendf(&str, "Page : %lld (%d bytes)\n", iPg, nData);
lsmStringAppendf(&str, "nRec : %d\n", nRec);
lsmStringAppendf(&str, "iPtr : %d\n", iPtr);
lsmStringAppendf(&str, "iPtr : %lld\n", iPtr);
lsmStringAppendf(&str, "flags: %04x\n", flags2);
lsmStringAppendf(&str, "\n");

View File

@@ -185,6 +185,11 @@ int lsmVarintLen32(int n){
return lsmVarintPut32(aData, n);
}
int lsmVarintLen64(i64 n){
u8 aData[9];
return lsmVarintPut64(aData, n);
}
/*
** The argument is the first byte of a varint. This function returns the
** total number of bytes in the entire varint (including the first byte).

View File

@@ -53,8 +53,15 @@
#include <assert.h>
#ifndef SQLITE_SHELL_EXTFUNCS /* Guard for #include as built-in extension. */
#include "sqlite3ext.h"
#ifndef deliberate_fall_through
/* Quiet some compilers about some of our intentional code. */
# if GCC_VERSION>=7000000
# define deliberate_fall_through __attribute__((fallthrough));
# else
# define deliberate_fall_through
# endif
#endif
SQLITE_EXTENSION_INIT1;
@@ -64,12 +71,12 @@ SQLITE_EXTENSION_INIT1;
#define ND 0x82 /* Not above or digit-value */
#define PAD_CHAR '='
#ifndef UBYTE_TYPEDEF
typedef unsigned char ubyte;
# define UBYTE_TYPEDEF
#ifndef U8_TYPEDEF
typedef unsigned char u8;
#define U8_TYPEDEF
#endif
static const ubyte b64DigitValues[128] = {
static const u8 b64DigitValues[128] = {
/* HT LF VT FF CR */
ND,ND,ND,ND, ND,ND,ND,ND, ND,WS,WS,WS, WS,WS,ND,ND,
/* US */
@@ -92,18 +99,18 @@ static const char b64Numerals[64+1]
= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
#define BX_DV_PROTO(c) \
((((ubyte)(c))<0x80)? (ubyte)(b64DigitValues[(ubyte)(c)]) : 0x80)
#define IS_BX_DIGIT(bdp) (((ubyte)(bdp))<0x80)
((((u8)(c))<0x80)? (u8)(b64DigitValues[(u8)(c)]) : 0x80)
#define IS_BX_DIGIT(bdp) (((u8)(bdp))<0x80)
#define IS_BX_WS(bdp) ((bdp)==WS)
#define IS_BX_PAD(bdp) ((bdp)==PC)
#define BX_NUMERAL(dv) (b64Numerals[(ubyte)(dv)])
#define BX_NUMERAL(dv) (b64Numerals[(u8)(dv)])
/* Width of base64 lines. Should be an integer multiple of 4. */
#define B64_DARK_MAX 72
/* Encode a byte buffer into base64 text with linefeeds appended to limit
** encoded group lengths to B64_DARK_MAX or to terminate the last group.
*/
static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){
static char* toBase64( u8 *pIn, int nbIn, char *pOut ){
int nCol = 0;
while( nbIn >= 3 ){
/* Do the bit-shuffle, exploiting unsigned input to avoid masking. */
@@ -128,7 +135,7 @@ static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){
if( nbe<nbIn ) qv |= *pIn++;
}
for( nbe=3; nbe>=0; --nbe ){
char ce = (nbe<nco)? BX_NUMERAL((ubyte)(qv & 0x3f)) : PAD_CHAR;
char ce = (nbe<nco)? BX_NUMERAL((u8)(qv & 0x3f)) : PAD_CHAR;
qv >>= 6;
pOut[nbe] = ce;
}
@@ -147,7 +154,7 @@ static char * skipNonB64( char *s ){
}
/* Decode base64 text into a byte buffer. */
static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){
static u8* fromBase64( char *pIn, int ncIn, u8 *pOut ){
if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn;
while( ncIn>0 && *pIn!=PAD_CHAR ){
static signed char nboi[] = { 0, 0, 1, 2, 3 };
@@ -162,20 +169,20 @@ static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){
if( nbo==0 ) break;
for( nac=0; nac<4; ++nac ){
char c = (nac<nti)? *pIn++ : b64Numerals[0];
ubyte bdp = BX_DV_PROTO(c);
u8 bdp = BX_DV_PROTO(c);
switch( bdp ){
case ND:
/* Treat dark non-digits as pad, but they terminate decode too. */
ncIn = 0;
/* fall thru */
deliberate_fall_through;
case WS:
/* Treat whitespace as pad and terminate this group.*/
nti = nac;
/* fall thru */
deliberate_fall_through;
case PC:
bdp = 0;
--nbo;
/* fall thru */
deliberate_fall_through;
default: /* bdp is the digit value. */
qv = qv<<6 | bdp;
break;
@@ -200,7 +207,7 @@ static void base64(sqlite3_context *context, int na, sqlite3_value *av[]){
int nvMax = sqlite3_limit(sqlite3_context_db_handle(context),
SQLITE_LIMIT_LENGTH, -1);
char *cBuf;
ubyte *bBuf;
u8 *bBuf;
assert(na==1);
switch( sqlite3_value_type(av[0]) ){
case SQLITE_BLOB:
@@ -213,7 +220,7 @@ static void base64(sqlite3_context *context, int na, sqlite3_value *av[]){
}
cBuf = sqlite3_malloc(nc);
if( !cBuf ) goto memFail;
bBuf = (ubyte*)sqlite3_value_blob(av[0]);
bBuf = (u8*)sqlite3_value_blob(av[0]);
nc = (int)(toBase64(bBuf, nb, cBuf) - cBuf);
sqlite3_result_text(context, cBuf, nc, sqlite3_free);
break;

View File

@@ -85,9 +85,7 @@
#ifndef BASE85_STANDALONE
#ifndef SQLITE_SHELL_EXTFUNCS /* Guard for #include as built-in extension. */
# include "sqlite3ext.h"
#endif
SQLITE_EXTENSION_INIT1;
@@ -113,9 +111,9 @@ static void sayHelp(){
}
#endif
#ifndef UBYTE_TYPEDEF
typedef unsigned char ubyte;
# define UBYTE_TYPEDEF
#ifndef U8_TYPEDEF
typedef unsigned char u8;
#define U8_TYPEDEF
#endif
/* Classify c according to interval within USASCII set w.r.t. base85
@@ -124,15 +122,15 @@ typedef unsigned char ubyte;
#define B85_CLASS( c ) (((c)>='#')+((c)>'&')+((c)>='*')+((c)>'z'))
/* Provide digitValue to b85Numeral offset as a function of above class. */
static ubyte b85_cOffset[] = { 0, '#', 0, '*'-4, 0 };
static u8 b85_cOffset[] = { 0, '#', 0, '*'-4, 0 };
#define B85_DNOS( c ) b85_cOffset[B85_CLASS(c)]
/* Say whether c is a base85 numeral. */
#define IS_B85( c ) (B85_CLASS(c) & 1)
#if 0 /* Not used, */
static ubyte base85DigitValue( char c ){
ubyte dv = (ubyte)(c - '#');
static u8 base85DigitValue( char c ){
u8 dv = (u8)(c - '#');
if( dv>87 ) return 0xff;
return (dv > 3)? dv-3 : dv;
}
@@ -151,7 +149,7 @@ static char * skipNonB85( char *s ){
/* Convert small integer, known to be in 0..84 inclusive, to base85 numeral.
* Do not use the macro form with argument expression having a side-effect.*/
#if 0
static char base85Numeral( ubyte b ){
static char base85Numeral( u8 b ){
return (b < 4)? (char)(b + '#') : (char)(b - 4 + '*');
}
#else
@@ -169,11 +167,12 @@ static char *putcs(char *pc, char *s){
** to be appended to encoded groups to limit their length to B85_DARK_MAX
** or to terminate the last group (to aid concatenation.)
*/
static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
static char* toBase85( u8 *pIn, int nbIn, char *pOut, char *pSep ){
int nCol = 0;
while( nbIn >= 4 ){
int nco = 5;
unsigned long qbv = (pIn[0]<<24)|(pIn[1]<<16)|(pIn[2]<<8)|pIn[3];
unsigned long qbv = (((unsigned long)pIn[0])<<24) |
(pIn[1]<<16) | (pIn[2]<<8) | pIn[3];
while( nco > 0 ){
unsigned nqv = (unsigned)(qbv/85UL);
unsigned char dv = qbv - 85UL*nqv;
@@ -197,7 +196,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
}
nCol += nco;
while( nco > 0 ){
ubyte dv = (ubyte)(qv % 85);
u8 dv = (u8)(qv % 85);
qv /= 85;
pOut[--nco] = base85Numeral(dv);
}
@@ -209,7 +208,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){
}
/* Decode base85 text into a byte buffer. */
static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){
static u8* fromBase85( char *pIn, int ncIn, u8 *pOut ){
if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn;
while( ncIn>0 ){
static signed char nboi[] = { 0, 0, 1, 2, 3, 4 };
@@ -223,7 +222,7 @@ static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){
if( nbo==0 ) break;
while( nti>0 ){
char c = *pIn++;
ubyte cdo = B85_DNOS(c);
u8 cdo = B85_DNOS(c);
--ncIn;
if( cdo==0 ) break;
qv = 85 * qv + (c - cdo);
@@ -287,7 +286,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){
int nvMax = sqlite3_limit(sqlite3_context_db_handle(context),
SQLITE_LIMIT_LENGTH, -1);
char *cBuf;
ubyte *bBuf;
u8 *bBuf;
assert(na==1);
switch( sqlite3_value_type(av[0]) ){
case SQLITE_BLOB:
@@ -300,7 +299,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){
}
cBuf = sqlite3_malloc(nc);
if( !cBuf ) goto memFail;
bBuf = (ubyte*)sqlite3_value_blob(av[0]);
bBuf = (u8*)sqlite3_value_blob(av[0]);
nc = (int)(toBase85(bBuf, nb, cBuf, "\n") - cBuf);
sqlite3_result_text(context, cBuf, nc, sqlite3_free);
break;
@@ -370,7 +369,7 @@ static int sqlite3_base85_init
int main(int na, char *av[]){
int cin;
int rc = 0;
ubyte bBuf[4*(B85_DARK_MAX/5)];
u8 bBuf[4*(B85_DARK_MAX/5)];
char cBuf[5*(sizeof(bBuf)/4)+2];
size_t nio;
# ifndef OMIT_BASE85_CHECKER

View File

@@ -49,6 +49,9 @@ static void init_api_ptr(const sqlite3_api_routines *pApi){
#undef SQLITE_EXTENSION_INIT2
#define SQLITE_EXTENSION_INIT2(v) (void)v
typedef unsigned char u8;
#define U8_TYPEDEF
/* These next 2 undef's are only needed because the entry point names
* collide when formulated per the rules stated for loadable extension
* entry point names that will be deduced from the file basenames.

View File

@@ -28,7 +28,7 @@
**
** There is an optional third parameter to determine the datatype of
** the C-language array. Allowed values of the third parameter are
** 'int32', 'int64', 'double', 'char*'. Example:
** 'int32', 'int64', 'double', 'char*', 'struct iovec'. Example:
**
** SELECT * FROM carray($ptr,10,'char*');
**
@@ -56,6 +56,14 @@
SQLITE_EXTENSION_INIT1
#include <assert.h>
#include <string.h>
#ifdef _WIN32
struct iovec {
void *iov_base;
size_t iov_len;
};
#else
# include <sys/uio.h>
#endif
/* Allowed values for the mFlags parameter to sqlite3_carray_bind().
** Must exactly match the definitions in carray.h.
@@ -65,6 +73,7 @@ SQLITE_EXTENSION_INIT1
# define CARRAY_INT64 1 /* Data is 64-bit signed integers */
# define CARRAY_DOUBLE 2 /* Data is doubles */
# define CARRAY_TEXT 3 /* Data is char* */
# define CARRAY_BLOB 4 /* Data is struct iovec* */
#endif
#ifndef SQLITE_API
@@ -80,7 +89,8 @@ SQLITE_EXTENSION_INIT1
/*
** Names of allowed datatypes
*/
static const char *azType[] = { "int32", "int64", "double", "char*" };
static const char *azType[] = { "int32", "int64", "double", "char*",
"struct iovec" };
/*
** Structure used to hold the sqlite3_carray_bind() information
@@ -224,6 +234,12 @@ static int carrayColumn(
sqlite3_result_text(ctx, p[pCur->iRowid-1], -1, SQLITE_TRANSIENT);
return SQLITE_OK;
}
case CARRAY_BLOB: {
const struct iovec *p = (struct iovec*)pCur->pPtr;
sqlite3_result_blob(ctx, p[pCur->iRowid-1].iov_base,
(int)p[pCur->iRowid-1].iov_len, SQLITE_TRANSIENT);
return SQLITE_OK;
}
}
}
}
@@ -268,7 +284,7 @@ static int carrayFilter(
if( pBind==0 ) break;
pCur->pPtr = pBind->aData;
pCur->iCnt = pBind->nData;
pCur->eType = pBind->mFlags & 0x03;
pCur->eType = pBind->mFlags & 0x07;
break;
}
case 2:
@@ -431,24 +447,29 @@ SQLITE_API int sqlite3_carray_bind(
pNew->mFlags = mFlags;
if( xDestroy==SQLITE_TRANSIENT ){
sqlite3_int64 sz = nData;
switch( mFlags & 0x03 ){
switch( mFlags & 0x07 ){
case CARRAY_INT32: sz *= 4; break;
case CARRAY_INT64: sz *= 8; break;
case CARRAY_DOUBLE: sz *= 8; break;
case CARRAY_TEXT: sz *= sizeof(char*); break;
case CARRAY_BLOB: sz *= sizeof(struct iovec); break;
}
if( (mFlags & 0x03)==CARRAY_TEXT ){
if( (mFlags & 0x07)==CARRAY_TEXT ){
for(i=0; i<nData; i++){
const char *z = ((char**)aData)[i];
if( z ) sz += strlen(z) + 1;
}
}else if( (mFlags & 0x07)==CARRAY_BLOB ){
for(i=0; i<nData; i++){
sz += ((struct iovec*)aData)[i].iov_len;
}
}
pNew->aData = sqlite3_malloc64( sz );
if( pNew->aData==0 ){
sqlite3_free(pNew);
return SQLITE_NOMEM;
}
if( (mFlags & 0x03)==CARRAY_TEXT ){
if( (mFlags & 0x07)==CARRAY_TEXT ){
char **az = (char**)pNew->aData;
char *z = (char*)&az[nData];
for(i=0; i<nData; i++){
@@ -463,6 +484,16 @@ SQLITE_API int sqlite3_carray_bind(
memcpy(z, zData, n+1);
z += n+1;
}
}else if( (mFlags & 0x07)==CARRAY_BLOB ){
struct iovec *p = (struct iovec*)pNew->aData;
unsigned char *z = (unsigned char*)&p[nData];
for(i=0; i<nData; i++){
size_t n = ((struct iovec*)aData)[i].iov_len;
p[i].iov_len = n;
p[i].iov_base = z;
z += n;
memcpy(p[i].iov_base, ((struct iovec*)aData)[i].iov_base, n);
}
}else{
memcpy(pNew->aData, aData, sz);
}

View File

@@ -41,10 +41,10 @@ SQLITE_API int sqlite3_carray_bind(
#define CARRAY_INT64 1 /* Data is 64-bit signed integers */
#define CARRAY_DOUBLE 2 /* Data is doubles */
#define CARRAY_TEXT 3 /* Data is char* */
#define CARRAY_BLOB 4 /* Data is struct iovec */
#ifdef __cplusplus
} /* end of the 'extern "C"' block */
#endif
#endif /* ifndef _CARRAY_H */

View File

@@ -616,7 +616,7 @@ int sqlite3_decimal_init(
SQLITE_EXTENSION_INIT2(pApi);
for(i=0; i<sizeof(aFunc)/sizeof(aFunc[0]) && rc==SQLITE_OK; i++){
for(i=0; i<(int)(sizeof(aFunc)/sizeof(aFunc[0])) && rc==SQLITE_OK; i++){
rc = sqlite3_create_function(db, aFunc[i].zFuncName, aFunc[i].nArg,
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
0, aFunc[i].xFunc, 0, 0);

View File

@@ -803,6 +803,7 @@ static void re_bytecode_func(
int i;
int n;
char *z;
(void)argc;
zPattern = (const char*)sqlite3_value_text(argv[0]);
if( zPattern==0 ) return;

View File

@@ -91,7 +91,9 @@ static void sqlarUncompressFunc(
}else{
const Bytef *pData= sqlite3_value_blob(argv[0]);
Bytef *pOut = sqlite3_malloc(sz);
if( Z_OK!=uncompress(pOut, &sz, pData, nData) ){
if( pOut==0 ){
sqlite3_result_error_nomem(context);
}else if( Z_OK!=uncompress(pOut, &sz, pData, nData) ){
sqlite3_result_error(context, "error in uncompress()", -1);
}else{
sqlite3_result_blob(context, pOut, sz, SQLITE_TRANSIENT);
@@ -100,7 +102,6 @@ static void sqlarUncompressFunc(
}
}
#ifdef _WIN32
__declspec(dllexport)
#endif

View File

@@ -97,6 +97,10 @@ static int stmtConnect(
#define STMT_COLUMN_MEM 10 /* SQLITE_STMTSTATUS_MEMUSED */
(void)pAux;
(void)argc;
(void)argv;
(void)pzErr;
rc = sqlite3_declare_vtab(db,
"CREATE TABLE x(sql,ncol,ro,busy,nscan,nsort,naidx,nstep,"
"reprep,run,mem)");
@@ -216,6 +220,10 @@ static int stmtFilter(
sqlite3_int64 iRowid = 1;
StmtRow **ppRow = 0;
(void)idxNum;
(void)idxStr;
(void)argc;
(void)argv;
stmtCsrReset(pCur);
ppRow = &pCur->pRow;
for(p=sqlite3_next_stmt(pCur->db, 0); p; p=sqlite3_next_stmt(pCur->db, p)){
@@ -271,6 +279,7 @@ static int stmtBestIndex(
sqlite3_vtab *tab,
sqlite3_index_info *pIdxInfo
){
(void)tab;
pIdxInfo->estimatedCost = (double)500;
pIdxInfo->estimatedRows = 500;
return SQLITE_OK;

View File

@@ -352,6 +352,7 @@ static int zipfileConnect(
const char *zFile = 0;
ZipfileTab *pNew = 0;
int rc;
(void)pAux;
/* If the table name is not "zipfile", require that the argument be
** specified. This stops zipfile tables from being created as:
@@ -808,6 +809,7 @@ static int zipfileGetEntry(
u8 *aRead;
char **pzErr = &pTab->base.zErrMsg;
int rc = SQLITE_OK;
(void)nBlob;
if( aBlob==0 ){
aRead = pTab->aBuffer;
@@ -1254,6 +1256,9 @@ static int zipfileFilter(
int rc = SQLITE_OK; /* Return Code */
int bInMemory = 0; /* True for an in-memory zipfile */
(void)idxStr;
(void)argc;
zipfileResetCursor(pCsr);
if( pTab->zFile ){
@@ -1280,7 +1285,7 @@ static int zipfileFilter(
}
if( 0==pTab->pWriteFd && 0==bInMemory ){
pCsr->pFile = fopen(zFile, "rb");
pCsr->pFile = zFile ? fopen(zFile, "rb") : 0;
if( pCsr->pFile==0 ){
zipfileCursorErr(pCsr, "cannot open file: %s", zFile);
rc = SQLITE_ERROR;
@@ -1314,6 +1319,7 @@ static int zipfileBestIndex(
int i;
int idx = -1;
int unusable = 0;
(void)tab;
for(i=0; i<pIdxInfo->nConstraint; i++){
const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i];
@@ -1564,6 +1570,8 @@ static int zipfileUpdate(
int bIsDir = 0;
u32 iCrc32 = 0;
(void)pRowid;
if( pTab->pWriteFd==0 ){
rc = zipfileBegin(pVtab);
if( rc!=SQLITE_OK ) return rc;
@@ -1898,6 +1906,7 @@ static int zipfileFindFunction(
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
void **ppArg /* OUT: User data for *pxFunc */
){
(void)nArg;
if( sqlite3_stricmp("zipfile_cds", zName)==0 ){
*pxFunc = zipfileFunctionCds;
*ppArg = (void*)pVtab;

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu1
db close

View File

@@ -10,13 +10,10 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu10
#--------------------------------------------------------------------
# Test that UPDATE commands work even if the input columns are in a
# different order to the output columns.

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu11

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/lock_common.tcl
set ::testprefix rbu12

View File

@@ -13,10 +13,8 @@
# for UPDATE statements. This tests RBU's internal UPDATE statement cache.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/lock_common.tcl
set ::testprefix rbu13

View File

@@ -13,10 +13,8 @@
# table.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/lock_common.tcl
set ::testprefix rbu14

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu3

View File

@@ -13,6 +13,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu5

View File

@@ -13,10 +13,8 @@
# outcome of some other client writing to the database while an RBU update
# is being applied.
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu6
proc setup_test {} {

View File

@@ -13,10 +13,8 @@
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu7
# Test index:

View File

@@ -12,10 +12,8 @@
# Test the rbu_delta() feature.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu8
do_execsql_test 1.0 {

View File

@@ -12,10 +12,8 @@
# Test RBU with virtual tables. And tables with no PRIMARY KEY declarations.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbu9
ifcapable !fts3 {

View File

@@ -14,10 +14,8 @@
# a wal mode database via RBU.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuA
set db_sql {

View File

@@ -12,6 +12,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuB
db close

View File

@@ -13,6 +13,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuC
#-------------------------------------------------------------------------

View File

@@ -12,6 +12,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbubusy
db close

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbucollate
ifcapable !icu_collations {

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbucrash
db close

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbucrash2
db close

View File

@@ -12,10 +12,9 @@
# Tests for the [sqldiff --rbu] command.
#
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set testprefix rbudiff
set PROG [test_find_sqldiff]

View File

@@ -13,10 +13,8 @@
# enabled by SQLITE_DIRECT_OVERFLOW_READ - Direct Overflow Read.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbudor
set bigA [string repeat a 5000]

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuexlock
db close

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuexpr
db close

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/malloc_common.tcl
set ::testprefix rbufault

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/malloc_common.tcl
set ::testprefix rbufault2

View File

@@ -13,6 +13,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/malloc_common.tcl
set ::testprefix rbufault3

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
source $testdir/malloc_common.tcl
set ::testprefix rbufault4

View File

@@ -13,10 +13,8 @@
# contains tests to ensure that RBU works with FTS tables.
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbufts
ifcapable !fts3 {

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbumisc
db close

View File

@@ -9,11 +9,14 @@
#
#***********************************************************************
#
# TESTRUNNER: slow
#
# This file contains tests of multiple RBU operations running
# concurrently within the same process.
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbumulti
db close

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbupartial
db close

81
ext/rbu/rbupass.test Normal file
View File

@@ -0,0 +1,81 @@
# 2023 January 13
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbupass
if {[info commands register_demovfs]==""} {
finish_test
return
}
db close
sqlite3_shutdown
sqlite3_config_uri 1
register_demovfs
sqlite3rbu_create_vfs myvfs demo
sqlite3 db file:test.db?vfs=myvfs
do_execsql_test 1.0 {
CREATE TABLE t1(a INTEGER PRIMARY KEY, b);
INSERT INTO t1 VALUES(1, 2);
SELECT * FROM t1;
} {1 2}
do_execsql_test 1.1 {
PRAGMA journal_mode = wal;
} {delete}
do_execsql_test 1.2 {
SELECT * FROM t1;
} {1 2}
do_test 1.3 {
forcedelete rbu.db
sqlite3 rbu rbu.db
rbu eval {
CREATE TABLE data_t1(a, b, rbu_control);
INSERT INTO data_t1 VALUES(2, 4, 0);
}
rbu close
} {}
do_test 1.4 {
sqlite3rbu rbu test.db rbu.db
} {rbu}
do_test 1.5 {
rbu step
} {SQLITE_CANTOPEN}
do_test 1.6 {
list [catch { rbu close } msg] $msg
} {1 {SQLITE_CANTOPEN - unable to open database file}}
do_test 1.7 {
sqlite3rbu_vacuum rbu test.db
} {rbu}
do_test 1.8 {
rbu step
catch { rbu close }
} {1}
do_execsql_test 1.9 {
SELECT * FROM t1;
} {1 2}
db close
sqlite3rbu_destroy_vfs myvfs
unregister_demovfs
sqlite3_shutdown
finish_test

View File

@@ -11,6 +11,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuprogress

View File

@@ -12,6 +12,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rburename

View File

@@ -9,11 +9,14 @@
#
#***********************************************************************
#
# TESTRUNNER: slow
#
# This file contains tests for resumption of RBU operations in the
# case where the previous RBU process crashed.
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rburesume
forcedelete test.db-shm test.db-oal

View File

@@ -10,10 +10,8 @@
#***********************************************************************
#
if {![info exists testdir]} {
set testdir [file join [file dirname [info script]] .. .. test]
}
source $testdir/tester.tcl
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbusave
do_execsql_test 1.0 {

View File

@@ -12,6 +12,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbusplit
db close

View File

@@ -9,8 +9,10 @@
#
#***********************************************************************
#
# TESTRUNNER: slow
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbutemplimit
db close

View File

@@ -15,6 +15,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set ::testprefix rbuvacuum
foreach step {0 1} {

View File

@@ -15,6 +15,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
foreach {step} {0 1} {
foreach {ttt state} {

View File

@@ -15,6 +15,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set testprefix rbuvacuum3
do_execsql_test 1.0 {

View File

@@ -15,6 +15,7 @@
#
source [file join [file dirname [info script]] rbu_common.tcl]
ifcapable !rbu { finish_test ; return }
set testprefix rbuvacuum4
set step 1

View File

@@ -3830,7 +3830,8 @@ static void rbuSetupOal(sqlite3rbu *p, RbuState *pState){
static void rbuDeleteOalFile(sqlite3rbu *p){
char *zOal = rbuMPrintf(p, "%s-oal", p->zTarget);
if( zOal ){
sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
sqlite3_vfs *pVfs = 0;
sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_VFS_POINTER, &pVfs);
assert( pVfs && p->rc==SQLITE_OK && p->zErrmsg==0 );
pVfs->xDelete(pVfs, zOal, 0);
sqlite3_free(zOal);
@@ -4587,9 +4588,12 @@ static int rbuVfsClose(sqlite3_file *pFile){
sqlite3_free(p->zDel);
if( p->openFlags & SQLITE_OPEN_MAIN_DB ){
const sqlite3_io_methods *pMeth = p->pReal->pMethods;
rbuMainlistRemove(p);
rbuUnlockShm(p);
p->pReal->pMethods->xShmUnmap(p->pReal, 0);
if( pMeth->iVersion>1 && pMeth->xShmUnmap ){
pMeth->xShmUnmap(p->pReal, 0);
}
}
else if( (p->openFlags & SQLITE_OPEN_DELETEONCLOSE) && p->pRbu ){
rbuUpdateTempSize(p, 0);
@@ -5048,6 +5052,25 @@ static int rbuVfsOpen(
rbuVfsShmUnmap, /* xShmUnmap */
0, 0 /* xFetch, xUnfetch */
};
static sqlite3_io_methods rbuvfs_io_methods1 = {
1, /* iVersion */
rbuVfsClose, /* xClose */
rbuVfsRead, /* xRead */
rbuVfsWrite, /* xWrite */
rbuVfsTruncate, /* xTruncate */
rbuVfsSync, /* xSync */
rbuVfsFileSize, /* xFileSize */
rbuVfsLock, /* xLock */
rbuVfsUnlock, /* xUnlock */
rbuVfsCheckReservedLock, /* xCheckReservedLock */
rbuVfsFileControl, /* xFileControl */
rbuVfsSectorSize, /* xSectorSize */
rbuVfsDeviceCharacteristics, /* xDeviceCharacteristics */
0, 0, 0, 0, 0, 0
};
rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs;
sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs;
rbu_file *pFd = (rbu_file *)pFile;
@@ -5102,10 +5125,15 @@ static int rbuVfsOpen(
rc = pRealVfs->xOpen(pRealVfs, zOpen, pFd->pReal, oflags, pOutFlags);
}
if( pFd->pReal->pMethods ){
const sqlite3_io_methods *pMeth = pFd->pReal->pMethods;
/* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods
** pointer and, if the file is a main database file, link it into the
** mutex protected linked list of all such files. */
if( pMeth->iVersion<2 || pMeth->xShmLock==0 ){
pFile->pMethods = &rbuvfs_io_methods1;
}else{
pFile->pMethods = &rbuvfs_io_methods;
}
if( flags & SQLITE_OPEN_MAIN_DB ){
rbuMainlistAdd(pFd);
}

View File

@@ -98,7 +98,7 @@
** The order of the columns in the data_% table does not matter.
**
** Instead of a regular table, the RBU database may also contain virtual
** tables or view named using the data_<target> naming scheme.
** tables or views named using the data_<target> naming scheme.
**
** Instead of the plain data_<target> naming scheme, RBU database tables
** may also be named data<integer>_<target>, where <integer> is any sequence
@@ -111,7 +111,7 @@
**
** If the target database table is a virtual table or a table that has no
** PRIMARY KEY declaration, the data_% table must also contain a column
** named "rbu_rowid". This column is mapped to the tables implicit primary
** named "rbu_rowid". This column is mapped to the table's implicit primary
** key column - "rowid". Virtual tables for which the "rowid" column does
** not function like a primary key value cannot be updated using RBU. For
** example, if the target db contains either of the following:

View File

@@ -164,6 +164,9 @@ static int dbdataConnect(
DbdataTable *pTab = 0;
int rc = sqlite3_declare_vtab(db, pAux ? DBPTR_SCHEMA : DBDATA_SCHEMA);
(void)argc;
(void)argv;
(void)pzErr;
if( rc==SQLITE_OK ){
pTab = (DbdataTable*)sqlite3_malloc64(sizeof(DbdataTable));
if( pTab==0 ){
@@ -768,6 +771,8 @@ static int dbdataFilter(
DbdataTable *pTab = (DbdataTable*)pCursor->pVtab;
int rc = SQLITE_OK;
const char *zSchema = "main";
(void)idxStr;
(void)argc;
dbdataResetCursor(pCsr);
assert( pCsr->iPgno==1 );
@@ -936,6 +941,7 @@ int sqlite3_dbdata_init(
const sqlite3_api_routines *pApi
){
SQLITE_EXTENSION_INIT2(pApi);
(void)pzErrMsg;
return sqlite3DbdataRegister(db);
}

View File

@@ -761,6 +761,7 @@ static void recoverEscapeCrnl(
sqlite3_value **argv
){
const char *zText = (const char*)sqlite3_value_text(argv[0]);
(void)argc;
if( zText && zText[0]=='\'' ){
int nText = sqlite3_value_bytes(argv[0]);
int i;
@@ -913,7 +914,7 @@ static void recoverTransferSettings(sqlite3_recover *p){
return;
}
for(ii=0; ii<sizeof(aPragma)/sizeof(aPragma[0]); ii++){
for(ii=0; ii<(int)(sizeof(aPragma)/sizeof(aPragma[0])); ii++){
const char *zPrag = aPragma[ii];
sqlite3_stmt *p1 = 0;
p1 = recoverPreparePrintf(p, p->dbIn, "PRAGMA %Q.%s", p->zDb, zPrag);
@@ -991,7 +992,9 @@ static int recoverOpenOutput(sqlite3_recover *p){
}
/* Register the custom user-functions with the output handle. */
for(ii=0; p->errCode==SQLITE_OK && ii<sizeof(aFunc)/sizeof(aFunc[0]); ii++){
for(ii=0;
p->errCode==SQLITE_OK && ii<(int)(sizeof(aFunc)/sizeof(aFunc[0]));
ii++){
p->errCode = sqlite3_create_function(db, aFunc[ii].zName,
aFunc[ii].nArg, SQLITE_UTF8, (void*)p, aFunc[ii].xFunc, 0, 0
);
@@ -2388,7 +2391,7 @@ static int recoverVfsRead(sqlite3_file *pFd, void *aBuf, int nByte, i64 iOff){
if( pgsz==65536 ) pgsz = 1;
recoverPutU16(&aHdr[16], pgsz);
aHdr[20] = nReserve;
for(ii=0; ii<sizeof(aPreserve)/sizeof(aPreserve[0]); ii++){
for(ii=0; ii<(int)(sizeof(aPreserve)/sizeof(aPreserve[0])); ii++){
memcpy(&aHdr[aPreserve[ii]], &a[aPreserve[ii]], 4);
}
memcpy(aBuf, aHdr, sizeof(aHdr));
@@ -2512,10 +2515,16 @@ static int recoverVfsFetch(
int iAmt,
void **pp
){
(void)pFd;
(void)iOff;
(void)iAmt;
*pp = 0;
return SQLITE_OK;
}
static int recoverVfsUnfetch(sqlite3_file *pFd, sqlite3_int64 iOff, void *p){
(void)pFd;
(void)iOff;
(void)p;
return SQLITE_OK;
}

View File

@@ -304,7 +304,7 @@ static GeoPoly *geopolyFuncParam(
int nByte;
testcase( pCtx==0 );
if( sqlite3_value_type(pVal)==SQLITE_BLOB
&& (nByte = sqlite3_value_bytes(pVal))>=(4+6*sizeof(GeoCoord))
&& (nByte = sqlite3_value_bytes(pVal))>=(int)(4+6*sizeof(GeoCoord))
){
const unsigned char *a = sqlite3_value_blob(pVal);
int nVertex;
@@ -362,6 +362,7 @@ static void geopolyBlobFunc(
sqlite3_value **argv
){
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
(void)argc;
if( p ){
sqlite3_result_blob(context, p->hdr,
4+8*p->nVertex, SQLITE_TRANSIENT);
@@ -381,6 +382,7 @@ static void geopolyJsonFunc(
sqlite3_value **argv
){
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
(void)argc;
if( p ){
sqlite3 *db = sqlite3_context_db_handle(context);
sqlite3_str *x = sqlite3_str_new(db);
@@ -462,6 +464,7 @@ static void geopolyXformFunc(
double F = sqlite3_value_double(argv[6]);
GeoCoord x1, y1, x0, y0;
int ii;
(void)argc;
if( p ){
for(ii=0; ii<p->nVertex; ii++){
x0 = GeoX(p,ii);
@@ -512,6 +515,7 @@ static void geopolyAreaFunc(
sqlite3_value **argv
){
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
(void)argc;
if( p ){
sqlite3_result_double(context, geopolyArea(p));
sqlite3_free(p);
@@ -537,6 +541,7 @@ static void geopolyCcwFunc(
sqlite3_value **argv
){
GeoPoly *p = geopolyFuncParam(context, argv[0], 0);
(void)argc;
if( p ){
if( geopolyArea(p)<0.0 ){
int ii, jj;
@@ -591,6 +596,7 @@ static void geopolyRegularFunc(
int n = sqlite3_value_int(argv[3]);
int i;
GeoPoly *p;
(void)argc;
if( n<3 || r<=0.0 ) return;
if( n>1000 ) n = 1000;
@@ -700,6 +706,7 @@ static void geopolyBBoxFunc(
sqlite3_value **argv
){
GeoPoly *p = geopolyBBox(context, argv[0], 0, 0);
(void)argc;
if( p ){
sqlite3_result_blob(context, p->hdr,
4+8*p->nVertex, SQLITE_TRANSIENT);
@@ -727,6 +734,7 @@ static void geopolyBBoxStep(
){
RtreeCoord a[4];
int rc = SQLITE_OK;
(void)argc;
(void)geopolyBBox(context, argv[0], a, &rc);
if( rc==SQLITE_OK ){
GeoBBox *pBBox;
@@ -815,6 +823,8 @@ static void geopolyContainsPointFunc(
int v = 0;
int cnt = 0;
int ii;
(void)argc;
if( p1==0 ) return;
for(ii=0; ii<p1->nVertex-1; ii++){
v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii),
@@ -854,6 +864,7 @@ static void geopolyWithinFunc(
){
GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
(void)argc;
if( p1 && p2 ){
int x = geopolyOverlap(p1, p2);
if( x<0 ){
@@ -1184,6 +1195,7 @@ static void geopolyOverlapFunc(
){
GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0);
GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0);
(void)argc;
if( p1 && p2 ){
int x = geopolyOverlap(p1, p2);
if( x<0 ){
@@ -1204,8 +1216,12 @@ static void geopolyDebugFunc(
int argc,
sqlite3_value **argv
){
(void)context;
(void)argc;
#ifdef GEOPOLY_ENABLE_DEBUG
geo_debug = sqlite3_value_int(argv[0]);
#else
(void)argv;
#endif
}
@@ -1233,6 +1249,7 @@ static int geopolyInit(
sqlite3_str *pSql;
char *zSql;
int ii;
(void)pAux;
sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
@@ -1349,6 +1366,7 @@ static int geopolyFilter(
RtreeNode *pRoot = 0;
int rc = SQLITE_OK;
int iCell = 0;
(void)idxStr;
rtreeReference(pRtree);
@@ -1475,6 +1493,7 @@ static int geopolyBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
int iRowidTerm = -1;
int iFuncTerm = -1;
int idxNum = 0;
(void)tab;
for(ii=0; ii<pIdxInfo->nConstraint; ii++){
struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii];
@@ -1721,6 +1740,8 @@ static int geopolyFindFunction(
void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
void **ppArg
){
(void)pVtab;
(void)nArg;
if( sqlite3_stricmp(zName, "geopoly_overlap")==0 ){
*pxFunc = geopolyOverlapFunc;
*ppArg = 0;
@@ -1790,7 +1811,7 @@ static int sqlite3_geopoly_init(sqlite3 *db){
} aAgg[] = {
{ geopolyBBoxStep, geopolyBBoxFinal, "geopoly_group_bbox" },
};
int i;
unsigned int i;
for(i=0; i<sizeof(aFunc)/sizeof(aFunc[0]) && rc==SQLITE_OK; i++){
int enc;
if( aFunc[i].bPure ){

View File

@@ -501,7 +501,7 @@ static int readInt16(u8 *p){
return (p[0]<<8) + p[1];
}
static void readCoord(u8 *p, RtreeCoord *pCoord){
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
assert( (((sqlite3_uint64)p)&3)==0 ); /* p is always 4-byte aligned */
#if SQLITE_BYTEORDER==1234 && MSVC_VERSION>=1300
pCoord->u = _byteswap_ulong(*(u32*)p);
#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
@@ -555,7 +555,7 @@ static void writeInt16(u8 *p, int i){
}
static int writeCoord(u8 *p, RtreeCoord *pCoord){
u32 i;
assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */
assert( (((sqlite3_uint64)p)&3)==0 ); /* p is always 4-byte aligned */
assert( sizeof(RtreeCoord)==4 );
assert( sizeof(u32)==4 );
#if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000
@@ -1283,7 +1283,7 @@ static void rtreeNonleafConstraint(
assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE
|| p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
|| p->op==RTREE_FALSE );
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */
switch( p->op ){
case RTREE_TRUE: return; /* Always satisfied */
case RTREE_FALSE: break; /* Never satisfied */
@@ -1336,7 +1336,7 @@ static void rtreeLeafConstraint(
|| p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE
|| p->op==RTREE_FALSE );
pCellData += 8 + p->iCoord*4;
assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */
assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */
RTREE_DECODE_COORD(eInt, pCellData, xN);
switch( p->op ){
case RTREE_TRUE: return; /* Always satisfied */

View File

@@ -157,4 +157,19 @@ do_execsql_test 5.2 {
SELECT rtreecheck('r3')=='ok'
} 0
#-------------------------------------------------------------------------
# dbsqlfuzz 4a1399d39bf9feccbf6b290da51d3b30103a4bf6
#
reset_db
do_execsql_test 6.0 {
PRAGMA encoding = 'utf16';
CREATE VIRTUAL TABLE t1 USING rtree(id, x, y);
}
db close
sqlite3 db test.db
do_catchsql_test 6.1 {
SELECT ( 'elvis' IN(SELECT rtreecheck('t1')) ) FROM (SELECT 1) GROUP BY 1;
} {1 {database table is locked}}
finish_test

Some files were not shown because too many files have changed in this diff Show More