diff --git a/Makefile.in b/Makefile.in index b7b0fa5f6a..7bc60e45fe 100644 --- a/Makefile.in +++ b/Makefile.in @@ -315,24 +315,6 @@ SRC = \ # Source code for extensions # -SRC += \ - $(TOP)/ext/fts1/fts1.c \ - $(TOP)/ext/fts1/fts1.h \ - $(TOP)/ext/fts1/fts1_hash.c \ - $(TOP)/ext/fts1/fts1_hash.h \ - $(TOP)/ext/fts1/fts1_porter.c \ - $(TOP)/ext/fts1/fts1_tokenizer.h \ - $(TOP)/ext/fts1/fts1_tokenizer1.c -SRC += \ - $(TOP)/ext/fts2/fts2.c \ - $(TOP)/ext/fts2/fts2.h \ - $(TOP)/ext/fts2/fts2_hash.c \ - $(TOP)/ext/fts2/fts2_hash.h \ - $(TOP)/ext/fts2/fts2_icu.c \ - $(TOP)/ext/fts2/fts2_porter.c \ - $(TOP)/ext/fts2/fts2_tokenizer.h \ - $(TOP)/ext/fts2/fts2_tokenizer.c \ - $(TOP)/ext/fts2/fts2_tokenizer1.c SRC += \ $(TOP)/ext/fts3/fts3.c \ $(TOP)/ext/fts3/fts3.h \ @@ -438,7 +420,7 @@ TESTSRC = \ $(TOP)/ext/recover/sqlite3recover.c \ $(TOP)/ext/recover/dbdata.c \ $(TOP)/ext/recover/test_recover.c \ - $(TOP)/ext/rbu/test_rbu.c + $(TOP)/ext/rbu/test_rbu.c # Statically linked extensions # @@ -567,14 +549,6 @@ HDR = \ # Header files used by extensions # -EXTHDR += \ - $(TOP)/ext/fts1/fts1.h \ - $(TOP)/ext/fts1/fts1_hash.h \ - $(TOP)/ext/fts1/fts1_tokenizer.h -EXTHDR += \ - $(TOP)/ext/fts2/fts2.h \ - $(TOP)/ext/fts2/fts2_hash.h \ - $(TOP)/ext/fts2/fts2_tokenizer.h EXTHDR += \ $(TOP)/ext/fts3/fts3.h \ $(TOP)/ext/fts3/fts3Int.h \ @@ -1120,6 +1094,9 @@ SHELL_SRC = \ $(TOP)/ext/misc/appendvfs.c \ $(TOP)/ext/misc/completion.c \ $(TOP)/ext/misc/decimal.c \ + $(TOP)/ext/misc/basexx.c \ + $(TOP)/ext/misc/base64.c \ + $(TOP)/ext/misc/base85.c \ $(TOP)/ext/misc/fileio.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/regexp.c \ @@ -1147,24 +1124,6 @@ shell.c: $(SHELL_SRC) $(TOP)/tool/mkshellc.tcl icu.lo: $(TOP)/ext/icu/icu.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/icu/icu.c -fts2.lo: $(TOP)/ext/fts2/fts2.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2.c - -fts2_hash.lo: $(TOP)/ext/fts2/fts2_hash.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_hash.c - -fts2_icu.lo: $(TOP)/ext/fts2/fts2_icu.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_icu.c - -fts2_porter.lo: $(TOP)/ext/fts2/fts2_porter.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_porter.c - -fts2_tokenizer.lo: $(TOP)/ext/fts2/fts2_tokenizer.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer.c - -fts2_tokenizer1.lo: $(TOP)/ext/fts2/fts2_tokenizer1.c $(HDR) $(EXTHDR) - $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts2/fts2_tokenizer1.c - fts3.lo: $(TOP)/ext/fts3/fts3.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3.c diff --git a/Makefile.msc b/Makefile.msc index 80a67cd99e..cc7d3d854f 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -1412,20 +1412,6 @@ SRC05 = \ $(TOP)\src\wal.h \ $(TOP)\src\whereInt.h -# Extension source code files, part 1. -# -SRC06 = \ - $(TOP)\ext\fts1\fts1.c \ - $(TOP)\ext\fts1\fts1_hash.c \ - $(TOP)\ext\fts1\fts1_porter.c \ - $(TOP)\ext\fts1\fts1_tokenizer1.c \ - $(TOP)\ext\fts2\fts2.c \ - $(TOP)\ext\fts2\fts2_hash.c \ - $(TOP)\ext\fts2\fts2_icu.c \ - $(TOP)\ext\fts2\fts2_porter.c \ - $(TOP)\ext\fts2\fts2_tokenizer.c \ - $(TOP)\ext\fts2\fts2_tokenizer1.c - # Extension source code files, part 2. # SRC07 = \ @@ -1448,16 +1434,6 @@ SRC07 = \ $(TOP)\ext\rbu\sqlite3rbu.c \ $(TOP)\ext\misc\stmt.c -# Extension header files, part 1. -# -SRC08 = \ - $(TOP)\ext\fts1\fts1.h \ - $(TOP)\ext\fts1\fts1_hash.h \ - $(TOP)\ext\fts1\fts1_tokenizer.h \ - $(TOP)\ext\fts2\fts2.h \ - $(TOP)\ext\fts2\fts2_hash.h \ - $(TOP)\ext\fts2\fts2_tokenizer.h - # Extension header files, part 2. # SRC09 = \ @@ -1498,7 +1474,7 @@ SRC12 = # All source code files. # -SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC06) $(SRC07) $(SRC08) $(SRC09) $(SRC10) $(SRC11) $(SRC12) +SRC = $(SRC00) $(SRC01) $(SRC03) $(SRC04) $(SRC05) $(SRC07) $(SRC09) $(SRC10) $(SRC11) $(SRC12) # Source code to the test files. # @@ -1607,7 +1583,6 @@ TESTEXT = $(TESTEXT) $(TOP)\ext\misc\zipfile.c TESTSRC2 = \ $(SRC00) \ $(SRC01) \ - $(SRC06) \ $(SRC07) \ $(SRC10) \ $(TOP)\ext\async\sqlite3async.c @@ -1642,14 +1617,6 @@ HDR = \ # Header files used by extensions # -EXTHDR = $(EXTHDR) \ - $(TOP)\ext\fts1\fts1.h \ - $(TOP)\ext\fts1\fts1_hash.h \ - $(TOP)\ext\fts1\fts1_tokenizer.h -EXTHDR = $(EXTHDR) \ - $(TOP)\ext\fts2\fts2.h \ - $(TOP)\ext\fts2\fts2_hash.h \ - $(TOP)\ext\fts2\fts2_tokenizer.h EXTHDR = $(EXTHDR) \ $(TOP)\ext\fts3\fts3.h \ $(TOP)\ext\fts3\fts3Int.h \ @@ -1861,9 +1828,7 @@ mptest: mptester.exe for %i in ($(SRC03)) do copy /Y %i tsrc for %i in ($(SRC04)) do copy /Y %i tsrc for %i in ($(SRC05)) do copy /Y %i tsrc - for %i in ($(SRC06)) do copy /Y %i tsrc for %i in ($(SRC07)) do copy /Y %i tsrc - for %i in ($(SRC08)) do copy /Y %i tsrc for %i in ($(SRC09)) do copy /Y %i tsrc for %i in ($(SRC10)) do copy /Y %i tsrc for %i in ($(SRC11)) do copy /Y %i tsrc @@ -2233,6 +2198,8 @@ SHELL_SRC = \ $(TOP)\src\shell.c.in \ $(TOP)\ext\misc\appendvfs.c \ $(TOP)\ext\misc\completion.c \ + $(TOP)\ext\misc\base64.c \ + $(TOP)\ext\misc\base85.c \ $(TOP)\ext\misc\decimal.c \ $(TOP)\ext\misc\fileio.c \ $(TOP)\ext\misc\ieee754.c \ @@ -2266,24 +2233,6 @@ zlib: icu.lo: $(TOP)\ext\icu\icu.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\icu\icu.c -fts2.lo: $(TOP)\ext\fts2\fts2.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2.c - -fts2_hash.lo: $(TOP)\ext\fts2\fts2_hash.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_hash.c - -fts2_icu.lo: $(TOP)\ext\fts2\fts2_icu.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_icu.c - -fts2_porter.lo: $(TOP)\ext\fts2\fts2_porter.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_porter.c - -fts2_tokenizer.lo: $(TOP)\ext\fts2\fts2_tokenizer.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer.c - -fts2_tokenizer1.lo: $(TOP)\ext\fts2\fts2_tokenizer1.c $(HDR) $(EXTHDR) - $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts2\fts2_tokenizer1.c - fts3.lo: $(TOP)\ext\fts3\fts3.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3.c diff --git a/ext/fts1/README.txt b/ext/fts1/README.txt deleted file mode 100644 index 292b7daa0b..0000000000 --- a/ext/fts1/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -This folder contains source code to the first full-text search -extension for SQLite. diff --git a/ext/fts1/ft_hash.c b/ext/fts1/ft_hash.c deleted file mode 100644 index 8b3a7064ee..0000000000 --- a/ext/fts1/ft_hash.c +++ /dev/null @@ -1,404 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ -#include -#include -#include - -#include "ft_hash.h" - -void *malloc_and_zero(int n){ - void *p = malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} - -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants HASH_INT, HASH_POINTER, -** HASH_BINARY, or HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. CopyKey only makes -** sense for HASH_STRING and HASH_BINARY and is ignored -** for other key classes. -*/ -void HashInit(Hash *pNew, int keyClass, int copyKey){ - assert( pNew!=0 ); - assert( keyClass>=HASH_STRING && keyClass<=HASH_BINARY ); - pNew->keyClass = keyClass; -#if 0 - if( keyClass==HASH_POINTER || keyClass==HASH_INT ) copyKey = 0; -#endif - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; - pNew->xMalloc = malloc_and_zero; - pNew->xFree = free; -} - -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -void HashClear(Hash *pH){ - HashElem *elem; /* For looping over all elements of the table */ - - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree(elem); - elem = next_elem; - } - pH->count = 0; -} - -#if 0 /* NOT USED */ -/* -** Hash and comparison functions when the mode is HASH_INT -*/ -static int intHash(const void *pKey, int nKey){ - return nKey ^ (nKey<<8) ^ (nKey>>8); -} -static int intCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - return n2 - n1; -} -#endif - -#if 0 /* NOT USED */ -/* -** Hash and comparison functions when the mode is HASH_POINTER -*/ -static int ptrHash(const void *pKey, int nKey){ - uptr x = Addr(pKey); - return x ^ (x<<8) ^ (x>>8); -} -static int ptrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( pKey1==pKey2 ) return 0; - if( pKey1 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return h & 0x7fffffff; -} -static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} - -/* -** Hash and comparison functions when the mode is HASH_BINARY -*/ -static int binHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "hashFunction". The function takes a -** single parameter "keyClass". The return value of hashFunction() -** is a pointer to another function. Specifically, the return value -** of hashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*hashFunction(int keyClass))(const void*,int){ -#if 0 /* HASH_INT and HASH_POINTER are never used */ - switch( keyClass ){ - case HASH_INT: return &intHash; - case HASH_POINTER: return &ptrHash; - case HASH_STRING: return &strHash; - case HASH_BINARY: return &binHash;; - default: break; - } - return 0; -#else - if( keyClass==HASH_STRING ){ - return &strHash; - }else{ - assert( keyClass==HASH_BINARY ); - return &binHash; - } -#endif -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ -#if 0 /* HASH_INT and HASH_POINTER are never used */ - switch( keyClass ){ - case HASH_INT: return &intCompare; - case HASH_POINTER: return &ptrCompare; - case HASH_STRING: return &strCompare; - case HASH_BINARY: return &binCompare; - default: break; - } - return 0; -#else - if( keyClass==HASH_STRING ){ - return &strCompare; - }else{ - assert( keyClass==HASH_BINARY ); - return &binCompare; - } -#endif -} - -/* Link an element into the hash table -*/ -static void insertElement( - Hash *pH, /* The complete hash table */ - struct _ht *pEntry, /* The entry into which pNew is inserted */ - HashElem *pNew /* The element to be inserted */ -){ - HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - - -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -*/ -static void rehash(Hash *pH, int new_size){ - struct _ht *new_ht; /* The new hash table */ - HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _ht *)pH->xMalloc( new_size*sizeof(struct _ht) ); - if( new_ht==0 ) return; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = hashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); - } -} - -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static HashElem *findElementGivenHash( - const Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ -){ - HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ - - if( pH->ht ){ - struct _ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = compareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} - -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void removeElementGivenHash( - Hash *pH, /* The pH containing "elem" */ - HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - HashClear(pH); - } -} - -/* Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -void *HashFind(const Hash *pH, const void *pKey, int nKey){ - int h; /* A hash on key */ - HashElem *elem; /* The element that matches key */ - int (*xHash)(const void*,int); /* The hash function */ - - if( pH==0 || pH->ht==0 ) return 0; - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); - return elem ? elem->data : 0; -} - -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -void *HashInsert(Hash *pH, const void *pKey, int nKey, void *data){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - HashElem *elem; /* Used to loop thru the element list */ - HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( pH!=0 ); - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = findElementGivenHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - removeElementGivenHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; - } - if( data==0 ) return 0; - new_elem = (HashElem*)pH->xMalloc( sizeof(HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = pH->xMalloc( nKey ); - if( new_elem->pKey==0 ){ - pH->xFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; - } - new_elem->nKey = nKey; - pH->count++; - if( pH->htsize==0 ){ - rehash(pH,8); - if( pH->htsize==0 ){ - pH->count = 0; - pH->xFree(new_elem); - return data; - } - } - if( pH->count > pH->htsize ){ - rehash(pH,pH->htsize*2); - } - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - insertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; -} diff --git a/ext/fts1/ft_hash.h b/ext/fts1/ft_hash.h deleted file mode 100644 index 95871a4590..0000000000 --- a/ext/fts1/ft_hash.h +++ /dev/null @@ -1,111 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _HASH_H_ -#define _HASH_H_ - -/* Forward declarations of structures. */ -typedef struct Hash Hash; -typedef struct HashElem HashElem; - -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - HashElem *first; /* The first element of the array */ - void *(*xMalloc)(int); /* malloc() function to use */ - void (*xFree)(void *); /* free() function to use */ - int htsize; /* Number of buckets in the hash table */ - struct _ht { /* the hash table */ - int count; /* Number of entries with this hash */ - HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; - -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct HashElem { - HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; - -/* -** There are 4 different modes of operation for a hash table: -** -** HASH_INT nKey is used as the key and pKey is ignored. -** -** HASH_POINTER pKey is used as the key and nKey is ignored. -** -** HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made for HASH_STRING and HASH_BINARY -** if the copyKey parameter to HashInit is 1. -*/ -/* #define HASH_INT 1 // NOT USED */ -/* #define HASH_POINTER 2 // NOT USED */ -#define HASH_STRING 3 -#define HASH_BINARY 4 - -/* -** Access routines. To delete, insert a NULL pointer. -*/ -void HashInit(Hash*, int keytype, int copyKey); -void *HashInsert(Hash*, const void *pKey, int nKey, void *pData); -void *HashFind(const Hash*, const void *pKey, int nKey); -void HashClear(Hash*); - -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** Hash h; -** HashElem *p; -** ... -** for(p=HashFirst(&h); p; p=HashNext(p)){ -** SomeStructure *pData = HashData(p); -** // do something with pData -** } -*/ -#define HashFirst(H) ((H)->first) -#define HashNext(E) ((E)->next) -#define HashData(E) ((E)->data) -#define HashKey(E) ((E)->pKey) -#define HashKeysize(E) ((E)->nKey) - -/* -** Number of entries in a hash table -*/ -#define HashCount(H) ((H)->count) - -#endif /* _HASH_H_ */ diff --git a/ext/fts1/fts1.c b/ext/fts1/fts1.c deleted file mode 100644 index 77fa9e23f5..0000000000 --- a/ext/fts1/fts1.c +++ /dev/null @@ -1,3348 +0,0 @@ -/* fts1 has a design flaw which can lead to database corruption (see -** below). It is recommended not to use it any longer, instead use -** fts3 (or higher). If you believe that your use of fts1 is safe, -** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS. -*/ -#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \ - && !defined(SQLITE_ENABLE_BROKEN_FTS1) -#error fts1 has a design flaw and has been deprecated. -#endif -/* The flaw is that fts1 uses the content table's unaliased rowid as -** the unique docid. fts1 embeds the rowid in the index it builds, -** and expects the rowid to not change. The SQLite VACUUM operation -** will renumber such rowids, thereby breaking fts1. If you are using -** fts1 in a system which has disabled VACUUM, then you can continue -** to use it safely. Note that PRAGMA auto_vacuum does NOT disable -** VACUUM, though systems using auto_vacuum are unlikely to invoke -** VACUUM. -** -** fts1 should be safe even across VACUUM if you only insert documents -** and never delete. -*/ - -/* The author disclaims copyright to this source code. - * - * This is an SQLite module implementing full-text search. - */ - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - -#if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE) -# define SQLITE_CORE 1 -#endif - -#include -#include -#include -#include -#include - -#include "fts1.h" -#include "fts1_hash.h" -#include "fts1_tokenizer.h" -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - - -#if 0 -# define TRACE(A) printf A; fflush(stdout) -#else -# define TRACE(A) -#endif - -/* utility functions */ - -typedef struct StringBuffer { - int len; /* length, not including null terminator */ - int alloced; /* Space allocated for s[] */ - char *s; /* Content of the string */ -} StringBuffer; - -static void initStringBuffer(StringBuffer *sb){ - sb->len = 0; - sb->alloced = 100; - sb->s = malloc(100); - sb->s[0] = '\0'; -} - -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ - if( sb->len + nFrom >= sb->alloced ){ - sb->alloced = sb->len + nFrom + 100; - sb->s = realloc(sb->s, sb->alloced+1); - if( sb->s==0 ){ - initStringBuffer(sb); - return; - } - } - memcpy(sb->s + sb->len, zFrom, nFrom); - sb->len += nFrom; - sb->s[sb->len] = 0; -} -static void append(StringBuffer *sb, const char *zFrom){ - nappend(sb, zFrom, strlen(zFrom)); -} - -/* We encode variable-length integers in little-endian order using seven bits - * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -*/ - -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ -#define VARINT_MAX 10 - -/* Write a 64-bit variable-length integer to memory starting at p[0]. - * The length of data written will be between 1 and VARINT_MAX bytes. - * The number of bytes written is returned. */ -static int putVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} - -/* Read a 64-bit variable-length integer from memory starting at p[0]. - * Return the number of bytes read, or 0 on error. - * The value is stored in *v. */ -static int getVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q & 0x80) == 0x80 ){ - x += y * (*q++ & 0x7f); - y <<= 7; - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ - assert( 0 ); - return 0; - } - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); -} - -static int getVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = getVarint(p, &i); - *pi = (int) i; - assert( *pi==i ); - return ret; -} - -/*** Document lists *** - * - * A document list holds a sorted list of varint-encoded document IDs. - * - * A doclist with type DL_POSITIONS_OFFSETS is stored like this: - * - * array { - * varint docid; - * array { - * varint position; (delta from previous position plus POS_BASE) - * varint startOffset; (delta from previous startOffset) - * varint endOffset; (delta from startOffset) - * } - * } - * - * Here, array { X } means zero or more occurrences of X, adjacent in memory. - * - * A position list may hold positions for text in multiple columns. A position - * POS_COLUMN is followed by a varint containing the index of the column for - * following positions in the list. Any positions appearing before any - * occurrences of POS_COLUMN are for column 0. - * - * A doclist with type DL_POSITIONS is like the above, but holds only docids - * and positions without offset information. - * - * A doclist with type DL_DOCIDS is like the above, but holds only docids - * without positions or offset information. - * - * On disk, every document list has positions and offsets, so we don't bother - * to serialize a doclist's type. - * - * We don't yet delta-encode document IDs; doing so will probably be a - * modest win. - * - * NOTE(shess) I've thought of a slightly (1%) better offset encoding. - * After the first offset, estimate the next offset by using the - * current token position and the previous token position and offset, - * offset to handle some variance. So the estimate would be - * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded - * as normal. Offsets more than 64 chars from the estimate are - * encoded as the delta to the previous start offset + 128. An - * additional tiny increment can be gained by using the end offset of - * the previous token to make the estimate a tiny bit more precise. -*/ - -/* It is not safe to call isspace(), tolower(), or isalnum() on -** hi-bit-set characters. This is the same solution used in the -** tokenizer. -*/ -/* TODO(shess) The snippet-generation code should be using the -** tokenizer-generated tokens rather than doing its own local -** tokenization. -*/ -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ -static int safe_isspace(char c){ - return (c&0x80)==0 ? isspace((unsigned char)c) : 0; -} -static int safe_tolower(char c){ - return (c&0x80)==0 ? tolower((unsigned char)c) : c; -} -static int safe_isalnum(char c){ - return (c&0x80)==0 ? isalnum((unsigned char)c) : 0; -} - -typedef enum DocListType { - DL_DOCIDS, /* docids only */ - DL_POSITIONS, /* docids + positions */ - DL_POSITIONS_OFFSETS /* docids + positions + offsets */ -} DocListType; - -/* -** By default, only positions and not offsets are stored in the doclists. -** To change this so that offsets are stored too, compile with -** -** -DDL_DEFAULT=DL_POSITIONS_OFFSETS -** -*/ -#ifndef DL_DEFAULT -# define DL_DEFAULT DL_POSITIONS -#endif - -typedef struct DocList { - char *pData; - int nData; - DocListType iType; - int iLastColumn; /* the last column written */ - int iLastPos; /* the last position written */ - int iLastOffset; /* the last start offset written */ -} DocList; - -enum { - POS_END = 0, /* end of this position list */ - POS_COLUMN, /* followed by new column number */ - POS_BASE -}; - -/* Initialize a new DocList to hold the given data. */ -static void docListInit(DocList *d, DocListType iType, - const char *pData, int nData){ - d->nData = nData; - if( nData>0 ){ - d->pData = malloc(nData); - memcpy(d->pData, pData, nData); - } else { - d->pData = NULL; - } - d->iType = iType; - d->iLastColumn = 0; - d->iLastPos = d->iLastOffset = 0; -} - -/* Create a new dynamically-allocated DocList. */ -static DocList *docListNew(DocListType iType){ - DocList *d = (DocList *) malloc(sizeof(DocList)); - docListInit(d, iType, 0, 0); - return d; -} - -static void docListDestroy(DocList *d){ - free(d->pData); -#ifndef NDEBUG - memset(d, 0x55, sizeof(*d)); -#endif -} - -static void docListDelete(DocList *d){ - docListDestroy(d); - free(d); -} - -static char *docListEnd(DocList *d){ - return d->pData + d->nData; -} - -/* Append a varint to a DocList's data. */ -static void appendVarint(DocList *d, sqlite_int64 i){ - char c[VARINT_MAX]; - int n = putVarint(c, i); - d->pData = realloc(d->pData, d->nData + n); - memcpy(d->pData + d->nData, c, n); - d->nData += n; -} - -static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ - appendVarint(d, iDocid); - if( d->iType>=DL_POSITIONS ){ - appendVarint(d, POS_END); /* initially empty position list */ - d->iLastColumn = 0; - d->iLastPos = d->iLastOffset = 0; - } -} - -/* helper function for docListAddPos and docListAddPosOffset */ -static void addPos(DocList *d, int iColumn, int iPos){ - assert( d->nData>0 ); - --d->nData; /* remove previous terminator */ - if( iColumn!=d->iLastColumn ){ - assert( iColumn>d->iLastColumn ); - appendVarint(d, POS_COLUMN); - appendVarint(d, iColumn); - d->iLastColumn = iColumn; - d->iLastPos = d->iLastOffset = 0; - } - assert( iPos>=d->iLastPos ); - appendVarint(d, iPos-d->iLastPos+POS_BASE); - d->iLastPos = iPos; -} - -/* Add a position to the last position list in a doclist. */ -static void docListAddPos(DocList *d, int iColumn, int iPos){ - assert( d->iType==DL_POSITIONS ); - addPos(d, iColumn, iPos); - appendVarint(d, POS_END); /* add new terminator */ -} - -/* -** Add a position and starting and ending offsets to a doclist. -** -** If the doclist is setup to handle only positions, then insert -** the position only and ignore the offsets. -*/ -static void docListAddPosOffset( - DocList *d, /* Doclist under construction */ - int iColumn, /* Column the inserted term is part of */ - int iPos, /* Position of the inserted term */ - int iStartOffset, /* Starting offset of inserted term */ - int iEndOffset /* Ending offset of inserted term */ -){ - assert( d->iType>=DL_POSITIONS ); - addPos(d, iColumn, iPos); - if( d->iType==DL_POSITIONS_OFFSETS ){ - assert( iStartOffset>=d->iLastOffset ); - appendVarint(d, iStartOffset-d->iLastOffset); - d->iLastOffset = iStartOffset; - assert( iEndOffset>=iStartOffset ); - appendVarint(d, iEndOffset-iStartOffset); - } - appendVarint(d, POS_END); /* add new terminator */ -} - -/* -** A DocListReader object is a cursor into a doclist. Initialize -** the cursor to the beginning of the doclist by calling readerInit(). -** Then use routines -** -** peekDocid() -** readDocid() -** readPosition() -** skipPositionList() -** and so forth... -** -** to read information out of the doclist. When we reach the end -** of the doclist, atEnd() returns TRUE. -*/ -typedef struct DocListReader { - DocList *pDoclist; /* The document list we are stepping through */ - char *p; /* Pointer to next unread byte in the doclist */ - int iLastColumn; - int iLastPos; /* the last position read, or -1 when not in a position list */ -} DocListReader; - -/* -** Initialize the DocListReader r to point to the beginning of pDoclist. -*/ -static void readerInit(DocListReader *r, DocList *pDoclist){ - r->pDoclist = pDoclist; - if( pDoclist!=NULL ){ - r->p = pDoclist->pData; - } - r->iLastColumn = -1; - r->iLastPos = -1; -} - -/* -** Return TRUE if we have reached then end of pReader and there is -** nothing else left to read. -*/ -static int atEnd(DocListReader *pReader){ - return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist)); -} - -/* Peek at the next docid without advancing the read pointer. -*/ -static sqlite_int64 peekDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !atEnd(pReader) ); - assert( pReader->iLastPos==-1 ); - getVarint(pReader->p, &ret); - return ret; -} - -/* Read the next docid. See also nextDocid(). -*/ -static sqlite_int64 readDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !atEnd(pReader) ); - assert( pReader->iLastPos==-1 ); - pReader->p += getVarint(pReader->p, &ret); - if( pReader->pDoclist->iType>=DL_POSITIONS ){ - pReader->iLastColumn = 0; - pReader->iLastPos = 0; - } - return ret; -} - -/* Read the next position and column index from a position list. - * Returns the position, or -1 at the end of the list. */ -static int readPosition(DocListReader *pReader, int *iColumn){ - int i; - int iType = pReader->pDoclist->iType; - - if( pReader->iLastPos==-1 ){ - return -1; - } - assert( !atEnd(pReader) ); - - if( iTypep += getVarint32(pReader->p, &i); - if( i==POS_END ){ - pReader->iLastColumn = pReader->iLastPos = -1; - *iColumn = -1; - return -1; - } - if( i==POS_COLUMN ){ - pReader->p += getVarint32(pReader->p, &pReader->iLastColumn); - pReader->iLastPos = 0; - pReader->p += getVarint32(pReader->p, &i); - assert( i>=POS_BASE ); - } - pReader->iLastPos += ((int) i)-POS_BASE; - if( iType>=DL_POSITIONS_OFFSETS ){ - /* Skip over offsets, ignoring them for now. */ - int iStart, iEnd; - pReader->p += getVarint32(pReader->p, &iStart); - pReader->p += getVarint32(pReader->p, &iEnd); - } - *iColumn = pReader->iLastColumn; - return pReader->iLastPos; -} - -/* Skip past the end of a position list. */ -static void skipPositionList(DocListReader *pReader){ - DocList *p = pReader->pDoclist; - if( p && p->iType>=DL_POSITIONS ){ - int iColumn; - while( readPosition(pReader, &iColumn)!=-1 ){} - } -} - -/* Skip over a docid, including its position list if the doclist has - * positions. */ -static void skipDocument(DocListReader *pReader){ - readDocid(pReader); - skipPositionList(pReader); -} - -/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid - * matching [iDocid] was found. */ -static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){ - sqlite_int64 d = 0; - while( !atEnd(pReader) && (d=peekDocid(pReader))iType>=DL_POSITIONS ){ - int iPos, iCol; - const char *zDiv = ""; - printf("("); - while( (iPos = readPosition(&r, &iCol))>=0 ){ - printf("%s%d:%d", zDiv, iCol, iPos); - zDiv = ":"; - } - printf(")"); - } - } - printf("\n"); - fflush(stdout); -} -#endif /* SQLITE_DEBUG */ - -/* Trim the given doclist to contain only positions in column - * [iRestrictColumn]. */ -static void docListRestrictColumn(DocList *in, int iRestrictColumn){ - DocListReader r; - DocList out; - - assert( in->iType>=DL_POSITIONS ); - readerInit(&r, in); - docListInit(&out, DL_POSITIONS, NULL, 0); - - while( !atEnd(&r) ){ - sqlite_int64 iDocid = readDocid(&r); - int iPos, iColumn; - - docListAddDocid(&out, iDocid); - while( (iPos = readPosition(&r, &iColumn)) != -1 ){ - if( iColumn==iRestrictColumn ){ - docListAddPos(&out, iColumn, iPos); - } - } - } - - docListDestroy(in); - *in = out; -} - -/* Trim the given doclist by discarding any docids without any remaining - * positions. */ -static void docListDiscardEmpty(DocList *in) { - DocListReader r; - DocList out; - - /* TODO: It would be nice to implement this operation in place; that - * could save a significant amount of memory in queries with long doclists. */ - assert( in->iType>=DL_POSITIONS ); - readerInit(&r, in); - docListInit(&out, DL_POSITIONS, NULL, 0); - - while( !atEnd(&r) ){ - sqlite_int64 iDocid = readDocid(&r); - int match = 0; - int iPos, iColumn; - while( (iPos = readPosition(&r, &iColumn)) != -1 ){ - if( !match ){ - docListAddDocid(&out, iDocid); - match = 1; - } - docListAddPos(&out, iColumn, iPos); - } - } - - docListDestroy(in); - *in = out; -} - -/* Helper function for docListUpdate() and docListAccumulate(). -** Splices a doclist element into the doclist represented by r, -** leaving r pointing after the newly spliced element. -*/ -static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid, - const char *pSource, int nSource){ - DocList *d = r->pDoclist; - char *pTarget; - int nTarget, found; - - found = skipToDocid(r, iDocid); - - /* Describe slice in d to place pSource/nSource. */ - pTarget = r->p; - if( found ){ - skipDocument(r); - nTarget = r->p-pTarget; - }else{ - nTarget = 0; - } - - /* The sense of the following is that there are three possibilities. - ** If nTarget==nSource, we should not move any memory nor realloc. - ** If nTarget>nSource, trim target and realloc. - ** If nTargetnSource ){ - memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget)); - } - if( nTarget!=nSource ){ - int iDoclist = pTarget-d->pData; - d->pData = realloc(d->pData, d->nData+nSource-nTarget); - pTarget = d->pData+iDoclist; - } - if( nTargetnData += nSource-nTarget; - r->p = pTarget+nSource; -} - -/* Insert/update pUpdate into the doclist. */ -static void docListUpdate(DocList *d, DocList *pUpdate){ - DocListReader reader; - - assert( d!=NULL && pUpdate!=NULL ); - assert( d->iType==pUpdate->iType); - - readerInit(&reader, d); - docListSpliceElement(&reader, firstDocid(pUpdate), - pUpdate->pData, pUpdate->nData); -} - -/* Propagate elements from pUpdate to pAcc, overwriting elements with -** matching docids. -*/ -static void docListAccumulate(DocList *pAcc, DocList *pUpdate){ - DocListReader accReader, updateReader; - - /* Handle edge cases where one doclist is empty. */ - assert( pAcc!=NULL ); - if( pUpdate==NULL || pUpdate->nData==0 ) return; - if( pAcc->nData==0 ){ - pAcc->pData = malloc(pUpdate->nData); - memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData); - pAcc->nData = pUpdate->nData; - return; - } - - readerInit(&accReader, pAcc); - readerInit(&updateReader, pUpdate); - - while( !atEnd(&updateReader) ){ - char *pSource = updateReader.p; - sqlite_int64 iDocid = readDocid(&updateReader); - skipPositionList(&updateReader); - docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource); - } -} - -/* -** Read the next docid off of pIn. Return 0 if we reach the end. -* -* TODO: This assumes that docids are never 0, but they may actually be 0 since -* users can choose docids when inserting into a full-text table. Fix this. -*/ -static sqlite_int64 nextDocid(DocListReader *pIn){ - skipPositionList(pIn); - return atEnd(pIn) ? 0 : readDocid(pIn); -} - -/* -** pLeft and pRight are two DocListReaders that are pointing to -** positions lists of the same document: iDocid. -** -** If there are no instances in pLeft or pRight where the position -** of pLeft is one less than the position of pRight, then this -** routine adds nothing to pOut. -** -** If there are one or more instances where positions from pLeft -** are exactly one less than positions from pRight, then add a new -** document record to pOut. If pOut wants to hold positions, then -** include the positions from pRight that are one more than a -** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. -** -** pLeft and pRight are left pointing at the next document record. -*/ -static void mergePosList( - DocListReader *pLeft, /* Left position list */ - DocListReader *pRight, /* Right position list */ - sqlite_int64 iDocid, /* The docid from pLeft and pRight */ - DocList *pOut /* Write the merged document record here */ -){ - int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol); - int iRightCol, iRightPos = readPosition(pRight, &iRightCol); - int match = 0; - - /* Loop until we've reached the end of both position lists. */ - while( iLeftPos!=-1 && iRightPos!=-1 ){ - if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){ - if( !match ){ - docListAddDocid(pOut, iDocid); - match = 1; - } - if( pOut->iType>=DL_POSITIONS ){ - docListAddPos(pOut, iRightCol, iRightPos); - } - iLeftPos = readPosition(pLeft, &iLeftCol); - iRightPos = readPosition(pRight, &iRightCol); - }else if( iRightCol=0 ) skipPositionList(pLeft); - if( iRightPos>=0 ) skipPositionList(pRight); -} - -/* We have two doclists: pLeft and pRight. -** Write the phrase intersection of these two doclists into pOut. -** -** A phrase intersection means that two documents only match -** if pLeft.iPos+1==pRight.iPos. -** -** The output pOut may or may not contain positions. If pOut -** does contain positions, they are the positions of pRight. -*/ -static void docListPhraseMerge( - DocList *pLeft, /* Doclist resulting from the words on the left */ - DocList *pRight, /* Doclist for the next word to the right */ - DocList *pOut /* Write the combined doclist here */ -){ - DocListReader left, right; - sqlite_int64 docidLeft, docidRight; - - readerInit(&left, pLeft); - readerInit(&right, pRight); - docidLeft = nextDocid(&left); - docidRight = nextDocid(&right); - - while( docidLeft>0 && docidRight>0 ){ - if( docidLeftiType0 && docidRight>0 ){ - if( docidLeft0 && docidRight>0 ){ - if( docidLeft<=docidRight ){ - docListAddDocid(pOut, docidLeft); - }else{ - docListAddDocid(pOut, docidRight); - } - priorLeft = docidLeft; - if( docidLeft<=docidRight ){ - docidLeft = nextDocid(&left); - } - if( docidRight>0 && docidRight<=priorLeft ){ - docidRight = nextDocid(&right); - } - } - while( docidLeft>0 ){ - docListAddDocid(pOut, docidLeft); - docidLeft = nextDocid(&left); - } - while( docidRight>0 ){ - docListAddDocid(pOut, docidRight); - docidRight = nextDocid(&right); - } -} - -/* We have two doclists: pLeft and pRight. -** Write into pOut all documents that occur in pLeft but not -** in pRight. -** -** Only docids are matched. Position information is ignored. -** -** The output pOut never holds positions. -*/ -static void docListExceptMerge( - DocList *pLeft, /* Doclist resulting from the words on the left */ - DocList *pRight, /* Doclist for the next word to the right */ - DocList *pOut /* Write the combined doclist here */ -){ - DocListReader left, right; - sqlite_int64 docidLeft, docidRight, priorLeft; - - readerInit(&left, pLeft); - readerInit(&right, pRight); - docidLeft = nextDocid(&left); - docidRight = nextDocid(&right); - - while( docidLeft>0 && docidRight>0 ){ - priorLeft = docidLeft; - if( docidLeft0 && docidRight<=priorLeft ){ - docidRight = nextDocid(&right); - } - } - while( docidLeft>0 ){ - docListAddDocid(pOut, docidLeft); - docidLeft = nextDocid(&left); - } -} - -static char *string_dup_n(const char *s, int n){ - char *str = malloc(n + 1); - memcpy(str, s, n); - str[n] = '\0'; - return str; -} - -/* Duplicate a string; the caller must free() the returned string. - * (We don't use strdup() since it is not part of the standard C library and - * may not be available everywhere.) */ -static char *string_dup(const char *s){ - return string_dup_n(s, strlen(s)); -} - -/* Format a string, replacing each occurrence of the % character with - * zDb.zName. This may be more convenient than sqlite_mprintf() - * when one string is used repeatedly in a format string. - * The caller must free() the returned string. */ -static char *string_format(const char *zFormat, - const char *zDb, const char *zName){ - const char *p; - size_t len = 0; - size_t nDb = strlen(zDb); - size_t nName = strlen(zName); - size_t nFullTableName = nDb+1+nName; - char *result; - char *r; - - /* first compute length needed */ - for(p = zFormat ; *p ; ++p){ - len += (*p=='%' ? nFullTableName : 1); - } - len += 1; /* for null terminator */ - - r = result = malloc(len); - for(p = zFormat; *p; ++p){ - if( *p=='%' ){ - memcpy(r, zDb, nDb); - r += nDb; - *r++ = '.'; - memcpy(r, zName, nName); - r += nName; - } else { - *r++ = *p; - } - } - *r++ = '\0'; - assert( r == result + len ); - return result; -} - -static int sql_exec(sqlite3 *db, const char *zDb, const char *zName, - const char *zFormat){ - char *zCommand = string_format(zFormat, zDb, zName); - int rc; - TRACE(("FTS1 sql: %s\n", zCommand)); - rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); - free(zCommand); - return rc; -} - -static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName, - sqlite3_stmt **ppStmt, const char *zFormat){ - char *zCommand = string_format(zFormat, zDb, zName); - int rc; - TRACE(("FTS1 prepare: %s\n", zCommand)); - rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); - free(zCommand); - return rc; -} - -/* end utility functions */ - -/* Forward reference */ -typedef struct fulltext_vtab fulltext_vtab; - -/* A single term in a query is represented by an instances of -** the following structure. -*/ -typedef struct QueryTerm { - short int nPhrase; /* How many following terms are part of the same phrase */ - short int iPhrase; /* This is the i-th term of a phrase. */ - short int iColumn; /* Column of the index that must match this term */ - signed char isOr; /* this term is preceded by "OR" */ - signed char isNot; /* this term is preceded by "-" */ - char *pTerm; /* text of the term. '\000' terminated. malloced */ - int nTerm; /* Number of bytes in pTerm[] */ -} QueryTerm; - - -/* A query string is parsed into a Query structure. - * - * We could, in theory, allow query strings to be complicated - * nested expressions with precedence determined by parentheses. - * But none of the major search engines do this. (Perhaps the - * feeling is that an parenthesized expression is two complex of - * an idea for the average user to grasp.) Taking our lead from - * the major search engines, we will allow queries to be a list - * of terms (with an implied AND operator) or phrases in double-quotes, - * with a single optional "-" before each non-phrase term to designate - * negation and an optional OR connector. - * - * OR binds more tightly than the implied AND, which is what the - * major search engines seem to do. So, for example: - * - * [one two OR three] ==> one AND (two OR three) - * [one OR two three] ==> (one OR two) AND three - * - * A "-" before a term matches all entries that lack that term. - * The "-" must occur immediately before the term with in intervening - * space. This is how the search engines do it. - * - * A NOT term cannot be the right-hand operand of an OR. If this - * occurs in the query string, the NOT is ignored: - * - * [one OR -two] ==> one OR two - * - */ -typedef struct Query { - fulltext_vtab *pFts; /* The full text index */ - int nTerms; /* Number of terms in the query */ - QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ - int nextIsOr; /* Set the isOr flag on the next inserted term */ - int nextColumn; /* Next word parsed must be in this column */ - int dfltColumn; /* The default column */ -} Query; - - -/* -** An instance of the following structure keeps track of generated -** matching-word offset information and snippets. -*/ -typedef struct Snippet { - int nMatch; /* Total number of matches */ - int nAlloc; /* Space allocated for aMatch[] */ - struct snippetMatch { /* One entry for each matching term */ - char snStatus; /* Status flag for use while constructing snippets */ - short int iCol; /* The column that contains the match */ - short int iTerm; /* The index in Query.pTerms[] of the matching term */ - short int nByte; /* Number of bytes in the term */ - int iStart; /* The offset to the first character of the term */ - } *aMatch; /* Points to space obtained from malloc */ - char *zOffset; /* Text rendering of aMatch[] */ - int nOffset; /* strlen(zOffset) */ - char *zSnippet; /* Snippet text */ - int nSnippet; /* strlen(zSnippet) */ -} Snippet; - - -typedef enum QueryType { - QUERY_GENERIC, /* table scan */ - QUERY_ROWID, /* lookup by rowid */ - QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ -} QueryType; - -/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0 -** before we start aggregating into larger segments. Lower CHUNK_MAX -** means that for a given input we have more individual segments per -** term, which means more rows in the table and a bigger index (due to -** both more rows and bigger rowids). But it also reduces the average -** cost of adding new elements to the segment 0 doclist, and it seems -** to reduce the number of pages read and written during inserts. 256 -** was chosen by measuring insertion times for a certain input (first -** 10k documents of Enron corpus), though including query performance -** in the decision may argue for a larger value. -*/ -#define CHUNK_MAX 256 - -typedef enum fulltext_statement { - CONTENT_INSERT_STMT, - CONTENT_SELECT_STMT, - CONTENT_UPDATE_STMT, - CONTENT_DELETE_STMT, - - TERM_SELECT_STMT, - TERM_SELECT_ALL_STMT, - TERM_INSERT_STMT, - TERM_UPDATE_STMT, - TERM_DELETE_STMT, - - MAX_STMT /* Always at end! */ -} fulltext_statement; - -/* These must exactly match the enum above. */ -/* TODO(adam): Is there some risk that a statement (in particular, -** pTermSelectStmt) will be used in two cursors at once, e.g. if a -** query joins a virtual table to itself? If so perhaps we should -** move some of these to the cursor object. -*/ -static const char *const fulltext_zStatement[MAX_STMT] = { - /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ - /* CONTENT_SELECT */ "select * from %_content where rowid = ?", - /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ - /* CONTENT_DELETE */ "delete from %_content where rowid = ?", - - /* TERM_SELECT */ - "select rowid, doclist from %_term where term = ? and segment = ?", - /* TERM_SELECT_ALL */ - "select doclist from %_term where term = ? order by segment", - /* TERM_INSERT */ - "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)", - /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", - /* TERM_DELETE */ "delete from %_term where rowid = ?", -}; - -/* -** A connection to a fulltext index is an instance of the following -** structure. The xCreate and xConnect methods create an instance -** of this structure and xDestroy and xDisconnect free that instance. -** All other methods receive a pointer to the structure as one of their -** arguments. -*/ -struct fulltext_vtab { - sqlite3_vtab base; /* Base class used by SQLite core */ - sqlite3 *db; /* The database connection */ - const char *zDb; /* logical database name */ - const char *zName; /* virtual table name */ - int nColumn; /* number of columns in virtual table */ - char **azColumn; /* column names. malloced */ - char **azContentColumn; /* column names in content table; malloced */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - - /* Precompiled statements which we keep as long as the table is - ** open. - */ - sqlite3_stmt *pFulltextStatements[MAX_STMT]; -}; - -/* -** When the core wants to do a query, it create a cursor using a -** call to xOpen. This structure is an instance of a cursor. It -** is destroyed by xClose. -*/ -typedef struct fulltext_cursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ - sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ - int eof; /* True if at End Of Results */ - Query q; /* Parsed query string */ - Snippet snippet; /* Cached snippet for the current row */ - int iColumn; /* Column being searched */ - DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */ -} fulltext_cursor; - -static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ - return (fulltext_vtab *) c->base.pVtab; -} - -static const sqlite3_module fulltextModule; /* forward declaration */ - -/* Append a list of strings separated by commas to a StringBuffer. */ -static void appendList(StringBuffer *sb, int nString, char **azString){ - int i; - for(i=0; i0 ) append(sb, ", "); - append(sb, azString[i]); - } -} - -/* Return a dynamically generated statement of the form - * insert into %_content (rowid, ...) values (?, ...) - */ -static const char *contentInsertStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "insert into %_content (rowid, "); - appendList(&sb, v->nColumn, v->azContentColumn); - append(&sb, ") values (?"); - for(i=0; inColumn; ++i) - append(&sb, ", ?"); - append(&sb, ")"); - return sb.s; -} - -/* Return a dynamically generated statement of the form - * update %_content set [col_0] = ?, [col_1] = ?, ... - * where rowid = ? - */ -static const char *contentUpdateStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "update %_content set "); - for(i=0; inColumn; ++i) { - if( i>0 ){ - append(&sb, ", "); - } - append(&sb, v->azContentColumn[i]); - append(&sb, " = ?"); - } - append(&sb, " where rowid = ?"); - return sb.s; -} - -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. -** If the indicated statement has never been prepared, it is prepared -** and cached, otherwise the cached version is reset. -*/ -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - assert( iStmtpFulltextStatements[iStmt]==NULL ){ - const char *zStmt; - int rc; - switch( iStmt ){ - case CONTENT_INSERT_STMT: - zStmt = contentInsertStatement(v); break; - case CONTENT_UPDATE_STMT: - zStmt = contentUpdateStatement(v); break; - default: - zStmt = fulltext_zStatement[iStmt]; - } - rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], - zStmt); - if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt); - if( rc!=SQLITE_OK ) return rc; - } else { - int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pFulltextStatements[iStmt]; - return SQLITE_OK; -} - -/* Step the indicated statement, handling errors SQLITE_BUSY (by -** retrying) and SQLITE_SCHEMA (by re-preparing and transferring -** bindings to the new statement). -** TODO(adam): We should extend this function so that it can work with -** statements declared locally, not only globally cached statements. -*/ -static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc; - sqlite3_stmt *s = *ppStmt; - assert( iStmtpFulltextStatements[iStmt] ); - - while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ - if( rc==SQLITE_BUSY ) continue; - if( rc!=SQLITE_ERROR ) return rc; - - /* If an SQLITE_SCHEMA error has occurred, then finalizing this - * statement is going to delete the fulltext_vtab structure. If - * the statement just executed is in the pFulltextStatements[] - * array, it will be finalized twice. So remove it before - * calling sqlite3_finalize(). - */ - v->pFulltextStatements[iStmt] = NULL; - rc = sqlite3_finalize(s); - break; - } - return rc; - - err: - sqlite3_finalize(s); - return rc; -} - -/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. -** Useful for statements like UPDATE, where we expect no results. -*/ -static int sql_single_step_statement(fulltext_vtab *v, - fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc = sql_step_statement(v, iStmt, ppStmt); - return (rc==SQLITE_DONE) ? SQLITE_OK : rc; -} - -/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ -static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, - sqlite3_value **pValues){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_value(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 2+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); -} - -/* update %_content set col0 = pValues[0], col1 = pValues[1], ... - * where rowid = [iRowid] */ -static int content_update(fulltext_vtab *v, sqlite3_value **pValues, - sqlite_int64 iRowid){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 1+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s); -} - -static void freeStringArray(int nString, const char **pString){ - int i; - - for (i=0 ; i < nString ; ++i) { - if( pString[i]!=NULL ) free((void *) pString[i]); - } - free((void *) pString); -} - -/* select * from %_content where rowid = [iRow] - * The caller must delete the returned array and all strings in it. - * null fields will be NULL in the returned array. - * - * TODO: Perhaps we should return pointer/length strings here for consistency - * with other code which uses pointer/length. */ -static int content_select(fulltext_vtab *v, sqlite_int64 iRow, - const char ***pValues){ - sqlite3_stmt *s; - const char **values; - int i; - int rc; - - *pValues = NULL; - - rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc; - - values = (const char **) malloc(v->nColumn * sizeof(const char *)); - for(i=0; inColumn; ++i){ - if( sqlite3_column_type(s, i)==SQLITE_NULL ){ - values[i] = NULL; - }else{ - values[i] = string_dup((char*)sqlite3_column_text(s, i)); - } - } - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - *pValues = values; - return SQLITE_OK; - } - - freeStringArray(v->nColumn, values); - return rc; -} - -/* delete from %_content where rowid = [iRow ] */ -static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); -} - -/* select rowid, doclist from %_term - * where term = [pTerm] and segment = [iSegment] - * If found, returns SQLITE_ROW; the caller must free the - * returned doclist. If no rows found, returns SQLITE_DONE. */ -static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm, - int iSegment, - sqlite_int64 *rowid, DocList *out){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 2, iSegment); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc; - - *rowid = sqlite3_column_int64(s, 0); - docListInit(out, DL_DEFAULT, - sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - return rc==SQLITE_DONE ? SQLITE_ROW : rc; -} - -/* Load the segment doclists for term pTerm and merge them in -** appropriate order into out. Returns SQLITE_OK if successful. If -** there are no segments for pTerm, successfully returns an empty -** doclist in out. -** -** Each document consists of 1 or more "columns". The number of -** columns is v->nColumn. If iColumn==v->nColumn, then return -** position information about all columns. If iColumnnColumn, -** then only return position information about the iColumn-th column -** (where the first column is 0). -*/ -static int term_select_all( - fulltext_vtab *v, /* The fulltext index we are querying against */ - int iColumn, /* If nColumn ){ /* querying a single column */ - docListRestrictColumn(&old, iColumn); - } - - /* doclist contains the newer data, so write it over old. Then - ** steal accumulated result for doclist. - */ - docListAccumulate(&old, &doclist); - docListDestroy(&doclist); - doclist = old; - } - if( rc!=SQLITE_DONE ){ - docListDestroy(&doclist); - return rc; - } - - docListDiscardEmpty(&doclist); - *out = doclist; - return SQLITE_OK; -} - -/* insert into %_term (rowid, term, segment, doclist) - values ([piRowid], [pTerm], [iSegment], [doclist]) -** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid. -** -** NOTE(shess) piRowid is IN, with values of "space of int64" plus -** null, it is not used to pass data back to the caller. -*/ -static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid, - const char *pTerm, int nTerm, - int iSegment, DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - if( piRowid==NULL ){ - rc = sqlite3_bind_null(s, 1); - }else{ - rc = sqlite3_bind_int64(s, 1, *piRowid); - } - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 3, iSegment); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_INSERT_STMT, &s); -} - -/* update %_term set doclist = [doclist] where rowid = [rowid] */ -static int term_update(fulltext_vtab *v, sqlite_int64 rowid, - DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); -} - -static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_DELETE_STMT, &s); -} - -/* -** Free the memory used to contain a fulltext_vtab structure. -*/ -static void fulltext_vtab_destroy(fulltext_vtab *v){ - int iStmt, i; - - TRACE(("FTS1 Destroy %p\n", v)); - for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ - sqlite3_finalize(v->pFulltextStatements[iStmt]); - v->pFulltextStatements[iStmt] = NULL; - } - } - - if( v->pTokenizer!=NULL ){ - v->pTokenizer->pModule->xDestroy(v->pTokenizer); - v->pTokenizer = NULL; - } - - free(v->azColumn); - for(i = 0; i < v->nColumn; ++i) { - sqlite3_free(v->azContentColumn[i]); - } - free(v->azContentColumn); - free(v); -} - -/* -** Token types for parsing the arguments to xConnect or xCreate. -*/ -#define TOKEN_EOF 0 /* End of file */ -#define TOKEN_SPACE 1 /* Any kind of whitespace */ -#define TOKEN_ID 2 /* An identifier */ -#define TOKEN_STRING 3 /* A string literal */ -#define TOKEN_PUNCT 4 /* A single punctuation character */ - -/* -** If X is a character that can be used in an identifier then -** IdChar(X) will be true. Otherwise it is false. -** -** For ASCII, any character with the high-order bit set is -** allowed in an identifier. For 7-bit characters, -** sqlite3IsIdChar[X] must be 1. -** -** Ticket #1066. the SQL standard does not allow '$' in the -** middle of identfiers. But many SQL implementations do. -** SQLite will allow '$' in identifiers for compatibility. -** But the feature is undocumented. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) - - -/* -** Return the length of the token that begins at z[0]. -** Store the token type in *tokenType before returning. -*/ -static int getToken(const char *z, int *tokenType){ - int i, c; - switch( *z ){ - case 0: { - *tokenType = TOKEN_EOF; - return 0; - } - case ' ': case '\t': case '\n': case '\f': case '\r': { - for(i=1; safe_isspace(z[i]); i++){} - *tokenType = TOKEN_SPACE; - return i; - } - case '`': - case '\'': - case '"': { - int delim = z[0]; - for(i=1; (c=z[i])!=0; i++){ - if( c==delim ){ - if( z[i+1]==delim ){ - i++; - }else{ - break; - } - } - } - *tokenType = TOKEN_STRING; - return i + (c!=0); - } - case '[': { - for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} - *tokenType = TOKEN_ID; - return i; - } - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} - *tokenType = TOKEN_ID; - return i; - } - } - *tokenType = TOKEN_PUNCT; - return 1; -} - -/* -** A token extracted from a string is an instance of the following -** structure. -*/ -typedef struct Token { - const char *z; /* Pointer to token text. Not '\000' terminated */ - short int n; /* Length of the token text in bytes. */ -} Token; - -/* -** Given a input string (which is really one of the argv[] parameters -** passed into xConnect or xCreate) split the string up into tokens. -** Return an array of pointers to '\000' terminated strings, one string -** for each non-whitespace token. -** -** The returned array is terminated by a single NULL pointer. -** -** Space to hold the returned array is obtained from a single -** malloc and should be freed by passing the return value to free(). -** The individual strings within the token list are all a part of -** the single memory allocation and will all be freed at once. -*/ -static char **tokenizeString(const char *z, int *pnToken){ - int nToken = 0; - Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) ); - int n = 1; - int e, i; - int totalSize = 0; - char **azToken; - char *zCopy; - while( n>0 ){ - n = getToken(z, &e); - if( e!=TOKEN_SPACE ){ - aToken[nToken].z = z; - aToken[nToken].n = n; - nToken++; - totalSize += n+1; - } - z += n; - } - azToken = (char**)malloc( nToken*sizeof(char*) + totalSize ); - zCopy = (char*)&azToken[nToken]; - nToken--; - for(i=0; i=0 ){ - azIn[j] = azIn[i]; - } - j++; - } - } - azIn[j] = 0; - } -} - - -/* -** Find the first alphanumeric token in the string zIn. Null-terminate -** this token. Remove any quotation marks. And return a pointer to -** the result. -*/ -static char *firstToken(char *zIn, char **pzTail){ - int n, ttype; - while(1){ - n = getToken(zIn, &ttype); - if( ttype==TOKEN_SPACE ){ - zIn += n; - }else if( ttype==TOKEN_EOF ){ - *pzTail = zIn; - return 0; - }else{ - zIn[n] = 0; - *pzTail = &zIn[1]; - dequoteString(zIn); - return zIn; - } - } - /*NOTREACHED*/ -} - -/* Return true if... -** -** * s begins with the string t, ignoring case -** * s is longer than t -** * The first character of s beyond t is not a alphanumeric -** -** Ignore leading space in *s. -** -** To put it another way, return true if the first token of -** s[] is t[]. -*/ -static int startsWith(const char *s, const char *t){ - while( safe_isspace(*s) ){ s++; } - while( *t ){ - if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; - } - return *s!='_' && !safe_isalnum(*s); -} - -/* -** An instance of this structure defines the "spec" of a -** full text index. This structure is populated by parseSpec -** and use by fulltextConnect and fulltextCreate. -*/ -typedef struct TableSpec { - const char *zDb; /* Logical database name */ - const char *zName; /* Name of the full-text index */ - int nColumn; /* Number of columns to be indexed */ - char **azColumn; /* Original names of columns to be indexed */ - char **azContentColumn; /* Column names for %_content */ - char **azTokenizer; /* Name of tokenizer and its arguments */ -} TableSpec; - -/* -** Reclaim all of the memory used by a TableSpec -*/ -static void clearTableSpec(TableSpec *p) { - free(p->azColumn); - free(p->azContentColumn); - free(p->azTokenizer); -} - -/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: - * - * CREATE VIRTUAL TABLE email - * USING fts1(subject, body, tokenize mytokenizer(myarg)) - * - * We return parsed information in a TableSpec structure. - * - */ -static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, - char**pzErr){ - int i, n; - char *z, *zDummy; - char **azArg; - const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ - - assert( argc>=3 ); - /* Current interface: - ** argv[0] - module name - ** argv[1] - database name - ** argv[2] - table name - ** argv[3..] - columns, optionally followed by tokenizer specification - ** and snippet delimiters specification. - */ - - /* Make a copy of the complete argv[][] array in a single allocation. - ** The argv[][] array is read-only and transient. We can write to the - ** copy in order to modify things and the copy is persistent. - */ - memset(pSpec, 0, sizeof(*pSpec)); - for(i=n=0; izDb = azArg[1]; - pSpec->zName = azArg[2]; - pSpec->nColumn = 0; - pSpec->azColumn = azArg; - zTokenizer = "tokenize simple"; - for(i=3; inColumn] = firstToken(azArg[i], &zDummy); - pSpec->nColumn++; - } - } - if( pSpec->nColumn==0 ){ - azArg[0] = "content"; - pSpec->nColumn = 1; - } - - /* - ** Construct the list of content column names. - ** - ** Each content column name will be of the form cNNAAAA - ** where NN is the column number and AAAA is the sanitized - ** column name. "sanitized" means that special characters are - ** converted to "_". The cNN prefix guarantees that all column - ** names are unique. - ** - ** The AAAA suffix is not strictly necessary. It is included - ** for the convenience of people who might examine the generated - ** %_content table and wonder what the columns are used for. - */ - pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) ); - if( pSpec->azContentColumn==0 ){ - clearTableSpec(pSpec); - return SQLITE_NOMEM; - } - for(i=0; inColumn; i++){ - char *p; - pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); - for (p = pSpec->azContentColumn[i]; *p ; ++p) { - if( !safe_isalnum(*p) ) *p = '_'; - } - } - - /* - ** Parse the tokenizer specification string. - */ - pSpec->azTokenizer = tokenizeString(zTokenizer, &n); - tokenListToIdList(pSpec->azTokenizer); - - return SQLITE_OK; -} - -/* -** Generate a CREATE TABLE statement that describes the schema of -** the virtual table. Return a pointer to this schema string. -** -** Space is obtained from sqlite3_mprintf() and should be freed -** using sqlite3_free(). -*/ -static char *fulltextSchema( - int nColumn, /* Number of columns */ - const char *const* azColumn, /* List of columns */ - const char *zTableName /* Name of the table */ -){ - int i; - char *zSchema, *zNext; - const char *zSep = "("; - zSchema = sqlite3_mprintf("CREATE TABLE x"); - for(i=0; ibase */ - v->db = db; - v->zDb = spec->zDb; /* Freed when azColumn is freed */ - v->zName = spec->zName; /* Freed when azColumn is freed */ - v->nColumn = spec->nColumn; - v->azContentColumn = spec->azContentColumn; - spec->azContentColumn = 0; - v->azColumn = spec->azColumn; - spec->azColumn = 0; - - if( spec->azTokenizer==0 ){ - return SQLITE_NOMEM; - } - /* TODO(shess) For now, add new tokenizers as else if clauses. */ - if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ - sqlite3Fts1SimpleTokenizerModule(&m); - }else if( startsWith(spec->azTokenizer[0], "porter") ){ - sqlite3Fts1PorterTokenizerModule(&m); - }else{ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); - rc = SQLITE_ERROR; - goto err; - } - for(n=0; spec->azTokenizer[n]; n++){} - if( n ){ - rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], - &v->pTokenizer); - }else{ - rc = m->xCreate(0, 0, &v->pTokenizer); - } - if( rc!=SQLITE_OK ) goto err; - v->pTokenizer->pModule = m; - - /* TODO: verify the existence of backing tables foo_content, foo_term */ - - schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, - spec->zName); - rc = sqlite3_declare_vtab(db, schema); - sqlite3_free(schema); - if( rc!=SQLITE_OK ) goto err; - - memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); - - *ppVTab = &v->base; - TRACE(("FTS1 Connect %p\n", v)); - - return rc; - -err: - fulltext_vtab_destroy(v); - return rc; -} - -static int fulltextConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - TableSpec spec; - int rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - rc = constructVtab(db, &spec, ppVTab, pzErr); - clearTableSpec(&spec); - return rc; -} - - /* The %_content table holds the text of each document, with - ** the rowid used as the docid. - ** - ** The %_term table maps each term to a document list blob - ** containing elements sorted by ascending docid, each element - ** encoded as: - ** - ** docid varint-encoded - ** token elements: - ** position+1 varint-encoded as delta from previous position - ** start offset varint-encoded as delta from previous start offset - ** end offset varint-encoded as delta from start offset - ** - ** The sentinel position of 0 indicates the end of the token list. - ** - ** Additionally, doclist blobs are chunked into multiple segments, - ** using segment to order the segments. New elements are added to - ** the segment at segment 0, until it exceeds CHUNK_MAX. Then - ** segment 0 is deleted, and the doclist is inserted at segment 1. - ** If there is already a doclist at segment 1, the segment 0 doclist - ** is merged with it, the segment 1 doclist is deleted, and the - ** merged doclist is inserted at segment 2, repeating those - ** operations until an insert succeeds. - ** - ** Since this structure doesn't allow us to update elements in place - ** in case of deletion or update, these are simply written to - ** segment 0 (with an empty token list in case of deletion), with - ** docListAccumulate() taking care to retain lower-segment - ** information in preference to higher-segment information. - */ - /* TODO(shess) Provide a VACUUM type operation which both removes - ** deleted elements which are no longer necessary, and duplicated - ** elements. I suspect this will probably not be necessary in - ** practice, though. - */ -static int fulltextCreate(sqlite3 *db, void *pAux, - int argc, const char * const *argv, - sqlite3_vtab **ppVTab, char **pzErr){ - int rc; - TableSpec spec; - StringBuffer schema; - TRACE(("FTS1 Create\n")); - - rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - initStringBuffer(&schema); - append(&schema, "CREATE TABLE %_content("); - appendList(&schema, spec.nColumn, spec.azContentColumn); - append(&schema, ")"); - rc = sql_exec(db, spec.zDb, spec.zName, schema.s); - free(schema.s); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_term(term text, segment integer, doclist blob, " - "primary key(term, segment));"); - if( rc!=SQLITE_OK ) goto out; - - rc = constructVtab(db, &spec, ppVTab, pzErr); - -out: - clearTableSpec(&spec); - return rc; -} - -/* Decide how to handle an SQL query. */ -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - int i; - TRACE(("FTS1 BestIndex\n")); - - for(i=0; inConstraint; ++i){ - const struct sqlite3_index_constraint *pConstraint; - pConstraint = &pInfo->aConstraint[i]; - if( pConstraint->usable ) { - if( pConstraint->iColumn==-1 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ - TRACE(("FTS1 QUERY_ROWID\n")); - } else if( pConstraint->iColumn>=0 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ - /* full-text search */ - pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; - TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); - } else continue; - - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - - /* An arbitrary value for now. - * TODO: Perhaps rowid matches should be considered cheaper than - * full-text searches. */ - pInfo->estimatedCost = 1.0; - - return SQLITE_OK; - } - } - pInfo->idxNum = QUERY_GENERIC; - return SQLITE_OK; -} - -static int fulltextDisconnect(sqlite3_vtab *pVTab){ - TRACE(("FTS1 Disconnect %p\n", pVTab)); - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextDestroy(sqlite3_vtab *pVTab){ - fulltext_vtab *v = (fulltext_vtab *)pVTab; - int rc; - - TRACE(("FTS1 Destroy %p\n", pVTab)); - rc = sql_exec(v->db, v->zDb, v->zName, - "drop table if exists %_content;" - "drop table if exists %_term;" - ); - if( rc!=SQLITE_OK ) return rc; - - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fulltext_cursor *c; - - c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); - /* sqlite will initialize c->base */ - *ppCursor = &c->base; - TRACE(("FTS1 Open %p: %p\n", pVTab, c)); - - return SQLITE_OK; -} - - -/* Free all of the dynamically allocated memory held by *q -*/ -static void queryClear(Query *q){ - int i; - for(i = 0; i < q->nTerms; ++i){ - free(q->pTerms[i].pTerm); - } - free(q->pTerms); - memset(q, 0, sizeof(*q)); -} - -/* Free all of the dynamically allocated memory held by the -** Snippet -*/ -static void snippetClear(Snippet *p){ - free(p->aMatch); - free(p->zOffset); - free(p->zSnippet); - memset(p, 0, sizeof(*p)); -} -/* -** Append a single entry to the p->aMatch[] log. -*/ -static void snippetAppendMatch( - Snippet *p, /* Append the entry to this snippet */ - int iCol, int iTerm, /* The column and query term */ - int iStart, int nByte /* Offset and size of the match */ -){ - int i; - struct snippetMatch *pMatch; - if( p->nMatch+1>=p->nAlloc ){ - p->nAlloc = p->nAlloc*2 + 10; - p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( p->aMatch==0 ){ - p->nMatch = 0; - p->nAlloc = 0; - return; - } - } - i = p->nMatch++; - pMatch = &p->aMatch[i]; - pMatch->iCol = iCol; - pMatch->iTerm = iTerm; - pMatch->iStart = iStart; - pMatch->nByte = nByte; -} - -/* -** Sizing information for the circular buffer used in snippetOffsetsOfColumn() -*/ -#define FTS1_ROTOR_SZ (32) -#define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1) - -/* -** Add entries to pSnippet->aMatch[] for every match that occurs against -** document zDoc[0..nDoc-1] which is stored in column iColumn. -*/ -static void snippetOffsetsOfColumn( - Query *pQuery, - Snippet *pSnippet, - int iColumn, - const char *zDoc, - int nDoc -){ - const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ - sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ - sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ - fulltext_vtab *pVtab; /* The full text index */ - int nColumn; /* Number of columns in the index */ - const QueryTerm *aTerm; /* Query string terms */ - int nTerm; /* Number of query string terms */ - int i, j; /* Loop counters */ - int rc; /* Return code */ - unsigned int match, prevMatch; /* Phrase search bitmasks */ - const char *zToken; /* Next token from the tokenizer */ - int nToken; /* Size of zToken */ - int iBegin, iEnd, iPos; /* Offsets of beginning and end */ - - /* The following variables keep a circular buffer of the last - ** few tokens */ - unsigned int iRotor = 0; /* Index of current token */ - int iRotorBegin[FTS1_ROTOR_SZ]; /* Beginning offset of token */ - int iRotorLen[FTS1_ROTOR_SZ]; /* Length of token */ - - pVtab = pQuery->pFts; - nColumn = pVtab->nColumn; - pTokenizer = pVtab->pTokenizer; - pTModule = pTokenizer->pModule; - rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return; - pTCursor->pTokenizer = pTokenizer; - aTerm = pQuery->pTerms; - nTerm = pQuery->nTerms; - if( nTerm>=FTS1_ROTOR_SZ ){ - nTerm = FTS1_ROTOR_SZ - 1; - } - prevMatch = 0; - while(1){ - rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); - if( rc ) break; - iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin; - iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin; - match = 0; - for(i=0; i=0 && iCol1 && (prevMatch & (1<=0; j--){ - int k = (iRotor-j) & FTS1_ROTOR_MASK; - snippetAppendMatch(pSnippet, iColumn, i-j, - iRotorBegin[k], iRotorLen[k]); - } - } - } - prevMatch = match<<1; - iRotor++; - } - pTModule->xClose(pTCursor); -} - - -/* -** Compute all offsets for the current row of the query. -** If the offsets have already been computed, this routine is a no-op. -*/ -static void snippetAllOffsets(fulltext_cursor *p){ - int nColumn; - int iColumn, i; - int iFirst, iLast; - fulltext_vtab *pFts; - - if( p->snippet.nMatch ) return; - if( p->q.nTerms==0 ) return; - pFts = p->q.pFts; - nColumn = pFts->nColumn; - iColumn = p->iCursorType - QUERY_FULLTEXT; - if( iColumn<0 || iColumn>=nColumn ){ - iFirst = 0; - iLast = nColumn-1; - }else{ - iFirst = iColumn; - iLast = iColumn; - } - for(i=iFirst; i<=iLast; i++){ - const char *zDoc; - int nDoc; - zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); - nDoc = sqlite3_column_bytes(p->pStmt, i+1); - snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); - } -} - -/* -** Convert the information in the aMatch[] array of the snippet -** into the string zOffset[0..nOffset-1]. -*/ -static void snippetOffsetText(Snippet *p){ - int i; - int cnt = 0; - StringBuffer sb; - char zBuf[200]; - if( p->zOffset ) return; - initStringBuffer(&sb); - for(i=0; inMatch; i++){ - struct snippetMatch *pMatch = &p->aMatch[i]; - zBuf[0] = ' '; - sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", - pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - append(&sb, zBuf); - cnt++; - } - p->zOffset = sb.s; - p->nOffset = sb.len; -} - -/* -** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set -** of matching words some of which might be in zDoc. zDoc is column -** number iCol. -** -** iBreak is suggested spot in zDoc where we could begin or end an -** excerpt. Return a value similar to iBreak but possibly adjusted -** to be a little left or right so that the break point is better. -*/ -static int wordBoundary( - int iBreak, /* The suggested break point */ - const char *zDoc, /* Document text */ - int nDoc, /* Number of bytes in zDoc[] */ - struct snippetMatch *aMatch, /* Matching words */ - int nMatch, /* Number of entries in aMatch[] */ - int iCol /* The column number for zDoc[] */ -){ - int i; - if( iBreak<=10 ){ - return 0; - } - if( iBreak>=nDoc-10 ){ - return nDoc; - } - for(i=0; i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ - return aMatch[i-1].iStart; - } - } - for(i=1; i<=10; i++){ - if( safe_isspace(zDoc[iBreak-i]) ){ - return iBreak - i + 1; - } - if( safe_isspace(zDoc[iBreak+i]) ){ - return iBreak + i + 1; - } - } - return iBreak; -} - -/* -** If the StringBuffer does not end in white space, add a single -** space character to the end. -*/ -static void appendWhiteSpace(StringBuffer *p){ - if( p->len==0 ) return; - if( safe_isspace(p->s[p->len-1]) ) return; - append(p, " "); -} - -/* -** Remove white space from teh end of the StringBuffer -*/ -static void trimWhiteSpace(StringBuffer *p){ - while( p->len>0 && safe_isspace(p->s[p->len-1]) ){ - p->len--; - } -} - - - -/* -** Allowed values for Snippet.aMatch[].snStatus -*/ -#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ -#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ - -/* -** Generate the text of a snippet. -*/ -static void snippetText( - fulltext_cursor *pCursor, /* The cursor we need the snippet for */ - const char *zStartMark, /* Markup to appear before each match */ - const char *zEndMark, /* Markup to appear after each match */ - const char *zEllipsis /* Ellipsis mark */ -){ - int i, j; - struct snippetMatch *aMatch; - int nMatch; - int nDesired; - StringBuffer sb; - int tailCol; - int tailOffset; - int iCol; - int nDoc; - const char *zDoc; - int iStart, iEnd; - int tailEllipsis = 0; - int iMatch; - - - free(pCursor->snippet.zSnippet); - pCursor->snippet.zSnippet = 0; - aMatch = pCursor->snippet.aMatch; - nMatch = pCursor->snippet.nMatch; - initStringBuffer(&sb); - - for(i=0; iq.nTerms; i++){ - for(j=0; j0; i++){ - if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; - nDesired--; - iCol = aMatch[i].iCol; - zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); - iStart = aMatch[i].iStart - 40; - iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); - if( iStart<=10 ){ - iStart = 0; - } - if( iCol==tailCol && iStart<=tailOffset+20 ){ - iStart = tailOffset; - } - if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ - trimWhiteSpace(&sb); - appendWhiteSpace(&sb); - append(&sb, zEllipsis); - appendWhiteSpace(&sb); - } - iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; - iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); - if( iEnd>=nDoc-10 ){ - iEnd = nDoc; - tailEllipsis = 0; - }else{ - tailEllipsis = 1; - } - while( iMatchsnippet.zSnippet = sb.s; - pCursor->snippet.nSnippet = sb.len; -} - - -/* -** Close the cursor. For additional information see the documentation -** on the xClose method of the virtual table interface. -*/ -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - TRACE(("FTS1 Close %p\n", c)); - sqlite3_finalize(c->pStmt); - queryClear(&c->q); - snippetClear(&c->snippet); - if( c->result.pDoclist!=NULL ){ - docListDelete(c->result.pDoclist); - } - free(c); - return SQLITE_OK; -} - -static int fulltextNext(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - sqlite_int64 iDocid; - int rc; - - TRACE(("FTS1 Next %p\n", pCursor)); - snippetClear(&c->snippet); - if( c->iCursorType < QUERY_FULLTEXT ){ - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - switch( rc ){ - case SQLITE_ROW: - c->eof = 0; - return SQLITE_OK; - case SQLITE_DONE: - c->eof = 1; - return SQLITE_OK; - default: - c->eof = 1; - return rc; - } - } else { /* full-text query */ - rc = sqlite3_reset(c->pStmt); - if( rc!=SQLITE_OK ) return rc; - - iDocid = nextDocid(&c->result); - if( iDocid==0 ){ - c->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); - if( rc!=SQLITE_OK ) return rc; - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - if( rc==SQLITE_ROW ){ /* the case we expect */ - c->eof = 0; - return SQLITE_OK; - } - /* an error occurred; abort */ - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - } -} - - -/* Return a DocList corresponding to the query term *pTerm. If *pTerm -** is the first term of a phrase query, go ahead and evaluate the phrase -** query and return the doclist for the entire phrase query. -** -** The result is stored in pTerm->doclist. -*/ -static int docListOfTerm( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* column to restrict to. No restrition if >=nColumn */ - QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */ - DocList **ppResult /* Write the result here */ -){ - DocList *pLeft, *pRight, *pNew; - int i, rc; - - pLeft = docListNew(DL_POSITIONS); - rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft); - if( rc ){ - docListDelete(pLeft); - return rc; - } - for(i=1; i<=pQTerm->nPhrase; i++){ - pRight = docListNew(DL_POSITIONS); - rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight); - if( rc ){ - docListDelete(pLeft); - return rc; - } - pNew = docListNew(inPhrase ? DL_POSITIONS : DL_DOCIDS); - docListPhraseMerge(pLeft, pRight, pNew); - docListDelete(pLeft); - docListDelete(pRight); - pLeft = pNew; - } - *ppResult = pLeft; - return SQLITE_OK; -} - -/* Add a new term pTerm[0..nTerm-1] to the query *q. -*/ -static void queryAdd(Query *q, const char *pTerm, int nTerm){ - QueryTerm *t; - ++q->nTerms; - q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); - if( q->pTerms==0 ){ - q->nTerms = 0; - return; - } - t = &q->pTerms[q->nTerms - 1]; - memset(t, 0, sizeof(*t)); - t->pTerm = malloc(nTerm+1); - memcpy(t->pTerm, pTerm, nTerm); - t->pTerm[nTerm] = 0; - t->nTerm = nTerm; - t->isOr = q->nextIsOr; - q->nextIsOr = 0; - t->iColumn = q->nextColumn; - q->nextColumn = q->dfltColumn; -} - -/* -** Check to see if the string zToken[0...nToken-1] matches any -** column name in the virtual table. If it does, -** return the zero-indexed column number. If not, return -1. -*/ -static int checkColumnSpecifier( - fulltext_vtab *pVtab, /* The virtual table */ - const char *zToken, /* Text of the token */ - int nToken /* Number of characters in the token */ -){ - int i; - for(i=0; inColumn; i++){ - if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 - && pVtab->azColumn[i][nToken]==0 ){ - return i; - } - } - return -1; -} - -/* -** Parse the text at pSegment[0..nSegment-1]. Add additional terms -** to the query being assemblied in pQuery. -** -** inPhrase is true if pSegment[0..nSegement-1] is contained within -** double-quotes. If inPhrase is true, then the first term -** is marked with the number of terms in the phrase less one and -** OR and "-" syntax is ignored. If inPhrase is false, then every -** term found is marked with nPhrase=0 and OR and "-" syntax is significant. -*/ -static int tokenizeSegment( - sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ - const char *pSegment, int nSegment, /* Query expression being parsed */ - int inPhrase, /* True if within "..." */ - Query *pQuery /* Append results here */ -){ - const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCursor; - int firstIndex = pQuery->nTerms; - int iCol; - int nTerm = 1; - - int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); - if( rc!=SQLITE_OK ) return rc; - pCursor->pTokenizer = pTokenizer; - - while( 1 ){ - const char *pToken; - int nToken, iBegin, iEnd, iPos; - - rc = pModule->xNext(pCursor, - &pToken, &nToken, - &iBegin, &iEnd, &iPos); - if( rc!=SQLITE_OK ) break; - if( !inPhrase && - pSegment[iEnd]==':' && - (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ - pQuery->nextColumn = iCol; - continue; - } - if( !inPhrase && pQuery->nTerms>0 && nToken==2 - && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ - pQuery->nextIsOr = 1; - continue; - } - queryAdd(pQuery, pToken, nToken); - if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ - pQuery->pTerms[pQuery->nTerms-1].isNot = 1; - } - pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; - if( inPhrase ){ - nTerm++; - } - } - - if( inPhrase && pQuery->nTerms>firstIndex ){ - pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; - } - - return pModule->xClose(pCursor); -} - -/* Parse a query string, yielding a Query object pQuery. -** -** The calling function will need to queryClear() to clean up -** the dynamically allocated memory held by pQuery. -*/ -static int parseQuery( - fulltext_vtab *v, /* The fulltext index */ - const char *zInput, /* Input text of the query string */ - int nInput, /* Size of the input text */ - int dfltColumn, /* Default column of the index to match against */ - Query *pQuery /* Write the parse results here. */ -){ - int iInput, inPhrase = 0; - - if( zInput==0 ) nInput = 0; - if( nInput<0 ) nInput = strlen(zInput); - pQuery->nTerms = 0; - pQuery->pTerms = NULL; - pQuery->nextIsOr = 0; - pQuery->nextColumn = dfltColumn; - pQuery->dfltColumn = dfltColumn; - pQuery->pFts = v; - - for(iInput=0; iInputiInput ){ - tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, - pQuery); - } - iInput = i; - if( i=nColumn -** they are allowed to match against any column. -*/ -static int fulltextQuery( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* Match against this column by default */ - const char *zInput, /* The query string */ - int nInput, /* Number of bytes in zInput[] */ - DocList **pResult, /* Write the result doclist here */ - Query *pQuery /* Put parsed query string here */ -){ - int i, iNext, rc; - DocList *pLeft = NULL; - DocList *pRight, *pNew, *pOr; - int nNot = 0; - QueryTerm *aTerm; - - rc = parseQuery(v, zInput, nInput, iColumn, pQuery); - if( rc!=SQLITE_OK ) return rc; - - /* Merge AND terms. */ - aTerm = pQuery->pTerms; - for(i = 0; inTerms; i=iNext){ - if( aTerm[i].isNot ){ - /* Handle all NOT terms in a separate pass */ - nNot++; - iNext = i + aTerm[i].nPhrase+1; - continue; - } - iNext = i + aTerm[i].nPhrase + 1; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); - if( rc ){ - queryClear(pQuery); - return rc; - } - while( iNextnTerms && aTerm[iNext].isOr ){ - rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr); - iNext += aTerm[iNext].nPhrase + 1; - if( rc ){ - queryClear(pQuery); - return rc; - } - pNew = docListNew(DL_DOCIDS); - docListOrMerge(pRight, pOr, pNew); - docListDelete(pRight); - docListDelete(pOr); - pRight = pNew; - } - if( pLeft==0 ){ - pLeft = pRight; - }else{ - pNew = docListNew(DL_DOCIDS); - docListAndMerge(pLeft, pRight, pNew); - docListDelete(pRight); - docListDelete(pLeft); - pLeft = pNew; - } - } - - if( nNot && pLeft==0 ){ - /* We do not yet know how to handle a query of only NOT terms */ - return SQLITE_ERROR; - } - - /* Do the EXCEPT terms */ - for(i=0; inTerms; i += aTerm[i].nPhrase + 1){ - if( !aTerm[i].isNot ) continue; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); - if( rc ){ - queryClear(pQuery); - docListDelete(pLeft); - return rc; - } - pNew = docListNew(DL_DOCIDS); - docListExceptMerge(pLeft, pRight, pNew); - docListDelete(pRight); - docListDelete(pLeft); - pLeft = pNew; - } - - *pResult = pLeft; - return rc; -} - -/* -** This is the xFilter interface for the virtual table. See -** the virtual table xFilter method documentation for additional -** information. -** -** If idxNum==QUERY_GENERIC then do a full table scan against -** the %_content table. -** -** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry -** in the %_content table. -** -** If idxNum>=QUERY_FULLTEXT then use the full text index. The -** column on the left-hand side of the MATCH operator is column -** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand -** side of the MATCH operator. -*/ -/* TODO(shess) Upgrade the cursor initialization and destruction to -** account for fulltextFilter() being called multiple times on the -** same cursor. The current solution is very fragile. Apply fix to -** fts2 as appropriate. -*/ -static int fulltextFilter( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, const char *idxStr, /* Which indexing scheme to use */ - int argc, sqlite3_value **argv /* Arguments for the indexing scheme */ -){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - int rc; - char *zSql; - - TRACE(("FTS1 Filter %p\n",pCursor)); - - zSql = sqlite3_mprintf("select rowid, * from %%_content %s", - idxNum==QUERY_GENERIC ? "" : "where rowid=?"); - sqlite3_finalize(c->pStmt); - rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql); - sqlite3_free(zSql); - if( rc!=SQLITE_OK ) return rc; - - c->iCursorType = idxNum; - switch( idxNum ){ - case QUERY_GENERIC: - break; - - case QUERY_ROWID: - rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); - if( rc!=SQLITE_OK ) return rc; - break; - - default: /* full-text search */ - { - const char *zQuery = (const char *)sqlite3_value_text(argv[0]); - DocList *pResult; - assert( idxNum<=QUERY_FULLTEXT+v->nColumn); - assert( argc==1 ); - queryClear(&c->q); - rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q); - if( rc!=SQLITE_OK ) return rc; - if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist); - readerInit(&c->result, pResult); - break; - } - } - - return fulltextNext(pCursor); -} - -/* This is the xEof method of the virtual table. The SQLite core -** calls this routine to find out if it has reached the end of -** a query's results set. -*/ -static int fulltextEof(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - return c->eof; -} - -/* This is the xColumn method of the virtual table. The SQLite -** core calls this method during a query when it needs the value -** of a column from the virtual table. This method needs to use -** one of the sqlite3_result_*() routines to store the requested -** value back in the pContext. -*/ -static int fulltextColumn(sqlite3_vtab_cursor *pCursor, - sqlite3_context *pContext, int idxCol){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - - if( idxColnColumn ){ - sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); - sqlite3_result_value(pContext, pVal); - }else if( idxCol==v->nColumn ){ - /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor - */ - sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); - } - return SQLITE_OK; -} - -/* This is the xRowid method. The SQLite core calls this routine to -** retrive the rowid for the current row of the result set. The -** rowid should be written to *pRowid. -*/ -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - - *pRowid = sqlite3_column_int64(c->pStmt, 0); - return SQLITE_OK; -} - -/* Add all terms in [zText] to the given hash table. If [iColumn] > 0, - * we also store positions and offsets in the hash table using the given - * column number. */ -static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid, - const char *zText, int iColumn){ - sqlite3_tokenizer *pTokenizer = v->pTokenizer; - sqlite3_tokenizer_cursor *pCursor; - const char *pToken; - int nTokenBytes; - int iStartOffset, iEndOffset, iPosition; - int rc; - - rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - - pCursor->pTokenizer = pTokenizer; - while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, - &pToken, &nTokenBytes, - &iStartOffset, &iEndOffset, - &iPosition) ){ - DocList *p; - - /* Positions can't be negative; we use -1 as a terminator internally. */ - if( iPosition<0 ){ - pTokenizer->pModule->xClose(pCursor); - return SQLITE_ERROR; - } - - p = fts1HashFind(terms, pToken, nTokenBytes); - if( p==NULL ){ - p = docListNew(DL_DEFAULT); - docListAddDocid(p, iDocid); - fts1HashInsert(terms, pToken, nTokenBytes, p); - } - if( iColumn>=0 ){ - docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset); - } - } - - /* TODO(shess) Check return? Should this be able to cause errors at - ** this point? Actually, same question about sqlite3_finalize(), - ** though one could argue that failure there means that the data is - ** not durable. *ponder* - */ - pTokenizer->pModule->xClose(pCursor); - return rc; -} - -/* Update the %_terms table to map the term [pTerm] to the given rowid. */ -static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm, - DocList *d){ - sqlite_int64 iIndexRow; - DocList doclist; - int iSegment = 0, rc; - - rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist); - if( rc==SQLITE_DONE ){ - docListInit(&doclist, DL_DEFAULT, 0, 0); - docListUpdate(&doclist, d); - /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */ - rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist); - goto err; - } - if( rc!=SQLITE_ROW ) return SQLITE_ERROR; - - docListUpdate(&doclist, d); - if( doclist.nData<=CHUNK_MAX ){ - rc = term_update(v, iIndexRow, &doclist); - goto err; - } - - /* Doclist doesn't fit, delete what's there, and accumulate - ** forward. - */ - rc = term_delete(v, iIndexRow); - if( rc!=SQLITE_OK ) goto err; - - /* Try to insert the doclist into a higher segment bucket. On - ** failure, accumulate existing doclist with the doclist from that - ** bucket, and put results in the next bucket. - */ - iSegment++; - while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment, - &doclist))!=SQLITE_OK ){ - sqlite_int64 iSegmentRow; - DocList old; - int rc2; - - /* Retain old error in case the term_insert() error was really an - ** error rather than a bounced insert. - */ - rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old); - if( rc2!=SQLITE_ROW ) goto err; - - rc = term_delete(v, iSegmentRow); - if( rc!=SQLITE_OK ) goto err; - - /* Reusing lowest-number deleted row keeps the index smaller. */ - if( iSegmentRownColumn ; ++i){ - char *zText = (char*)sqlite3_value_text(pValues[i]); - int rc = buildTerms(v, terms, iRowid, zText, i); - if( rc!=SQLITE_OK ) return rc; - } - return SQLITE_OK; -} - -/* Add empty doclists for all terms in the given row's content to the hash - * table [pTerms]. */ -static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){ - const char **pValues; - int i; - - int rc = content_select(v, iRowid, &pValues); - if( rc!=SQLITE_OK ) return rc; - - for(i = 0 ; i < v->nColumn; ++i) { - rc = buildTerms(v, pTerms, iRowid, pValues[i], -1); - if( rc!=SQLITE_OK ) break; - } - - freeStringArray(v->nColumn, pValues); - return SQLITE_OK; -} - -/* Insert a row into the %_content table; set *piRowid to be the ID of the - * new row. Fill [pTerms] with new doclists for the %_term table. */ -static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, - sqlite3_value **pValues, - sqlite_int64 *piRowid, fts1Hash *pTerms){ - int rc; - - rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ - if( rc!=SQLITE_OK ) return rc; - *piRowid = sqlite3_last_insert_rowid(v->db); - return insertTerms(v, pTerms, *piRowid, pValues); -} - -/* Delete a row from the %_content table; fill [pTerms] with empty doclists - * to be written to the %_term table. */ -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){ - int rc = deleteTerms(v, pTerms, iRow); - if( rc!=SQLITE_OK ) return rc; - return content_delete(v, iRow); /* execute an SQL DELETE */ -} - -/* Update a row in the %_content table; fill [pTerms] with new doclists for the - * %_term table. */ -static int index_update(fulltext_vtab *v, sqlite_int64 iRow, - sqlite3_value **pValues, fts1Hash *pTerms){ - /* Generate an empty doclist for each term that previously appeared in this - * row. */ - int rc = deleteTerms(v, pTerms, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */ - if( rc!=SQLITE_OK ) return rc; - - /* Now add positions for terms which appear in the updated row. */ - return insertTerms(v, pTerms, iRow, pValues); -} - -/* This function implements the xUpdate callback; it is the top-level entry - * point for inserting, deleting or updating a row in a full-text table. */ -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, - sqlite_int64 *pRowid){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - fts1Hash terms; /* maps term string -> PosList */ - int rc; - fts1HashElem *e; - - TRACE(("FTS1 Update %p\n", pVtab)); - - fts1HashInit(&terms, FTS1_HASH_STRING, 1); - - if( nArg<2 ){ - rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms); - } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ - /* An update: - * ppArg[0] = old rowid - * ppArg[1] = new rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); - if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || - sqlite3_value_int64(ppArg[1]) != rowid ){ - rc = SQLITE_ERROR; /* we don't allow changing the rowid */ - } else { - assert( nArg==2+v->nColumn+1); - rc = index_update(v, rowid, &ppArg[2], &terms); - } - } else { - /* An insert: - * ppArg[1] = requested rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - assert( nArg==2+v->nColumn+1); - rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); - } - - if( rc==SQLITE_OK ){ - /* Write updated doclists to disk. */ - for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ - DocList *p = fts1HashData(e); - rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p); - if( rc!=SQLITE_OK ) break; - } - } - - /* clean up */ - for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){ - DocList *p = fts1HashData(e); - docListDelete(p); - } - fts1HashClear(&terms); - - return rc; -} - -/* -** Implementation of the snippet() function for FTS1 -*/ -static void snippetFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); - }else{ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - if( argc>=2 ){ - zStart = (const char*)sqlite3_value_text(argv[1]); - if( argc>=3 ){ - zEnd = (const char*)sqlite3_value_text(argv[2]); - if( argc>=4 ){ - zEllipsis = (const char*)sqlite3_value_text(argv[3]); - } - } - } - snippetAllOffsets(pCursor); - snippetText(pCursor, zStart, zEnd, zEllipsis); - sqlite3_result_text(pContext, pCursor->snippet.zSnippet, - pCursor->snippet.nSnippet, SQLITE_STATIC); - } -} - -/* -** Implementation of the offsets() function for FTS1 -*/ -static void snippetOffsetsFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to offsets",-1); - }else{ - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - snippetAllOffsets(pCursor); - snippetOffsetText(&pCursor->snippet); - sqlite3_result_text(pContext, - pCursor->snippet.zOffset, pCursor->snippet.nOffset, - SQLITE_STATIC); - } -} - -/* -** This routine implements the xFindFunction method for the FTS1 -** virtual table. -*/ -static int fulltextFindFunction( - sqlite3_vtab *pVtab, - int nArg, - const char *zName, - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), - void **ppArg -){ - if( strcmp(zName,"snippet")==0 ){ - *pxFunc = snippetFunc; - return 1; - }else if( strcmp(zName,"offsets")==0 ){ - *pxFunc = snippetOffsetsFunc; - return 1; - } - return 0; -} - -/* -** Rename an fts1 table. -*/ -static int fulltextRename( - sqlite3_vtab *pVtab, - const char *zName -){ - fulltext_vtab *p = (fulltext_vtab *)pVtab; - int rc = SQLITE_NOMEM; - char *zSql = sqlite3_mprintf( - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';" - "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';" - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - ); - if( zSql ){ - rc = sqlite3_exec(p->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; -} - -static const sqlite3_module fulltextModule = { - /* iVersion */ 0, - /* xCreate */ fulltextCreate, - /* xConnect */ fulltextConnect, - /* xBestIndex */ fulltextBestIndex, - /* xDisconnect */ fulltextDisconnect, - /* xDestroy */ fulltextDestroy, - /* xOpen */ fulltextOpen, - /* xClose */ fulltextClose, - /* xFilter */ fulltextFilter, - /* xNext */ fulltextNext, - /* xEof */ fulltextEof, - /* xColumn */ fulltextColumn, - /* xRowid */ fulltextRowid, - /* xUpdate */ fulltextUpdate, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindFunction */ fulltextFindFunction, - /* xRename */ fulltextRename, -}; - -int sqlite3Fts1Init(sqlite3 *db){ - sqlite3_overload_function(db, "snippet", -1); - sqlite3_overload_function(db, "offsets", -1); - return sqlite3_create_module(db, "fts1", &fulltextModule, 0); -} - -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -int sqlite3_fts1_init(sqlite3 *db, char **pzErrMsg, - const sqlite3_api_routines *pApi){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts1Init(db); -} -#endif - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/ext/fts1/fts1.h b/ext/fts1/fts1.h deleted file mode 100644 index d55e689733..0000000000 --- a/ext/fts1/fts1.h +++ /dev/null @@ -1,11 +0,0 @@ -#include "sqlite3.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -int sqlite3Fts1Init(sqlite3 *db); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/ext/fts1/fts1_hash.c b/ext/fts1/fts1_hash.c deleted file mode 100644 index 463a52b645..0000000000 --- a/ext/fts1/fts1_hash.c +++ /dev/null @@ -1,369 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ -#include -#include -#include - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - - -#include "fts1_hash.h" - -static void *malloc_and_zero(int n){ - void *p = malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} - -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS1_HASH_BINARY or FTS1_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. -*/ -void sqlite3Fts1HashInit(fts1Hash *pNew, int keyClass, int copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS1_HASH_STRING && keyClass<=FTS1_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; - pNew->xMalloc = malloc_and_zero; - pNew->xFree = free; -} - -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -void sqlite3Fts1HashClear(fts1Hash *pH){ - fts1HashElem *elem; /* For looping over all elements of the table */ - - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - fts1HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree(elem); - elem = next_elem; - } - pH->count = 0; -} - -/* -** Hash and comparison functions when the mode is FTS1_HASH_STRING -*/ -static int strHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - int h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return h & 0x7fffffff; -} -static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} - -/* -** Hash and comparison functions when the mode is FTS1_HASH_BINARY -*/ -static int binHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "hashFunction". The function takes a -** single parameter "keyClass". The return value of hashFunction() -** is a pointer to another function. Specifically, the return value -** of hashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*hashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS1_HASH_STRING ){ - return &strHash; - }else{ - assert( keyClass==FTS1_HASH_BINARY ); - return &binHash; - } -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS1_HASH_STRING ){ - return &strCompare; - }else{ - assert( keyClass==FTS1_HASH_BINARY ); - return &binCompare; - } -} - -/* Link an element into the hash table -*/ -static void insertElement( - fts1Hash *pH, /* The complete hash table */ - struct _fts1ht *pEntry, /* The entry into which pNew is inserted */ - fts1HashElem *pNew /* The element to be inserted */ -){ - fts1HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - - -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -*/ -static void rehash(fts1Hash *pH, int new_size){ - struct _fts1ht *new_ht; /* The new hash table */ - fts1HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts1ht *)pH->xMalloc( new_size*sizeof(struct _fts1ht) ); - if( new_ht==0 ) return; - if( pH->ht ) pH->xFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = hashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); - } -} - -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static fts1HashElem *findElementGivenHash( - const fts1Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ -){ - fts1HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ - - if( pH->ht ){ - struct _fts1ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = compareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} - -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void removeElementGivenHash( - fts1Hash *pH, /* The pH containing "elem" */ - fts1HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _fts1ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - pH->xFree(elem->pKey); - } - pH->xFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts1HashClear(pH); - } -} - -/* Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -void *sqlite3Fts1HashFind(const fts1Hash *pH, const void *pKey, int nKey){ - int h; /* A hash on key */ - fts1HashElem *elem; /* The element that matches key */ - int (*xHash)(const void*,int); /* The hash function */ - - if( pH==0 || pH->ht==0 ) return 0; - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); - return elem ? elem->data : 0; -} - -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -void *sqlite3Fts1HashInsert( - fts1Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ -){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - fts1HashElem *elem; /* Used to loop thru the element list */ - fts1HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( pH!=0 ); - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = findElementGivenHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - removeElementGivenHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; - } - if( data==0 ) return 0; - new_elem = (fts1HashElem*)pH->xMalloc( sizeof(fts1HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = pH->xMalloc( nKey ); - if( new_elem->pKey==0 ){ - pH->xFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; - } - new_elem->nKey = nKey; - pH->count++; - if( pH->htsize==0 ){ - rehash(pH,8); - if( pH->htsize==0 ){ - pH->count = 0; - pH->xFree(new_elem); - return data; - } - } - if( pH->count > pH->htsize ){ - rehash(pH,pH->htsize*2); - } - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - insertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/ext/fts1/fts1_hash.h b/ext/fts1/fts1_hash.h deleted file mode 100644 index 9001152931..0000000000 --- a/ext/fts1/fts1_hash.h +++ /dev/null @@ -1,112 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _FTS1_HASH_H_ -#define _FTS1_HASH_H_ - -/* Forward declarations of structures. */ -typedef struct fts1Hash fts1Hash; -typedef struct fts1HashElem fts1HashElem; - -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct fts1Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - fts1HashElem *first; /* The first element of the array */ - void *(*xMalloc)(int); /* malloc() function to use */ - void (*xFree)(void *); /* free() function to use */ - int htsize; /* Number of buckets in the hash table */ - struct _fts1ht { /* the hash table */ - int count; /* Number of entries with this hash */ - fts1HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; - -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct fts1HashElem { - fts1HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; - -/* -** There are 2 different modes of operation for a hash table: -** -** FTS1_HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** FTS1_HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made if the copyKey parameter to fts1HashInit is 1. -*/ -#define FTS1_HASH_STRING 1 -#define FTS1_HASH_BINARY 2 - -/* -** Access routines. To delete, insert a NULL pointer. -*/ -void sqlite3Fts1HashInit(fts1Hash*, int keytype, int copyKey); -void *sqlite3Fts1HashInsert(fts1Hash*, const void *pKey, int nKey, void *pData); -void *sqlite3Fts1HashFind(const fts1Hash*, const void *pKey, int nKey); -void sqlite3Fts1HashClear(fts1Hash*); - -/* -** Shorthand for the functions above -*/ -#define fts1HashInit sqlite3Fts1HashInit -#define fts1HashInsert sqlite3Fts1HashInsert -#define fts1HashFind sqlite3Fts1HashFind -#define fts1HashClear sqlite3Fts1HashClear - -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** fts1Hash h; -** fts1HashElem *p; -** ... -** for(p=fts1HashFirst(&h); p; p=fts1HashNext(p)){ -** SomeStructure *pData = fts1HashData(p); -** // do something with pData -** } -*/ -#define fts1HashFirst(H) ((H)->first) -#define fts1HashNext(E) ((E)->next) -#define fts1HashData(E) ((E)->data) -#define fts1HashKey(E) ((E)->pKey) -#define fts1HashKeysize(E) ((E)->nKey) - -/* -** Number of entries in a hash table -*/ -#define fts1HashCount(H) ((H)->count) - -#endif /* _FTS1_HASH_H_ */ diff --git a/ext/fts1/fts1_porter.c b/ext/fts1/fts1_porter.c deleted file mode 100644 index 1d26236681..0000000000 --- a/ext/fts1/fts1_porter.c +++ /dev/null @@ -1,643 +0,0 @@ -/* -** 2006 September 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - - -#include -#include -#include -#include -#include - -#include "fts1_tokenizer.h" - -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; - -/* -** Class derived from sqlit3_tokenizer_cursor -*/ -typedef struct porter_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module porterTokenizerModule; - - -/* -** Create a new tokenizer instance. -*/ -static int porterCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - porter_tokenizer *t; - t = (porter_tokenizer *) calloc(sizeof(*t), 1); - if( t==NULL ) return SQLITE_NOMEM; - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ - free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int porterOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - porter_tokenizer_cursor *c; - - c = (porter_tokenizer_cursor *) malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); - }else{ - c->nInput = nInput; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** porterOpen() above. -*/ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - free(c->zToken); - free(c); - return SQLITE_OK; -} -/* -** Vowel or consonant -*/ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 -}; - -/* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. -** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. -** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. -*/ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); -} - -/* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: -** -** [C] (VC){m} [V] -** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. -** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. -** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of of a consonant followed by a vowel. -*/ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 -*/ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} - -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if there is a vowel anywhere within z[0..n-1] -*/ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. -*/ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1] && isConsonant(z+1); -} - -/* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. -*/ -static int star_oh(const char *z){ - return - z[0]!=0 && isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - z[1]!=0 && isVowel(z+1) && - z[2]!=0 && isConsonant(z+2); -} - -/* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. -** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. -*/ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ -){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} - -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; - } - } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; i=sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; - - - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; - } - } - - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } - } - - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; - } - - /* Step 2 */ - switch( z[1] ){ - case 'a': - stem(&z, "lanoita", "ate", m_gt_0) || - stem(&z, "lanoit", "tion", m_gt_0); - break; - case 'c': - stem(&z, "icne", "ence", m_gt_0) || - stem(&z, "icna", "ance", m_gt_0); - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - stem(&z, "ilb", "ble", m_gt_0) || - stem(&z, "illa", "al", m_gt_0) || - stem(&z, "iltne", "ent", m_gt_0) || - stem(&z, "ile", "e", m_gt_0) || - stem(&z, "ilsuo", "ous", m_gt_0); - break; - case 'o': - stem(&z, "noitazi", "ize", m_gt_0) || - stem(&z, "noita", "ate", m_gt_0) || - stem(&z, "rota", "ate", m_gt_0); - break; - case 's': - stem(&z, "msila", "al", m_gt_0) || - stem(&z, "ssenevi", "ive", m_gt_0) || - stem(&z, "ssenluf", "ful", m_gt_0) || - stem(&z, "ssensuo", "ous", m_gt_0); - break; - case 't': - stem(&z, "itila", "al", m_gt_0) || - stem(&z, "itivi", "ive", m_gt_0) || - stem(&z, "itilib", "ble", m_gt_0); - break; - } - - /* Step 3 */ - switch( z[0] ){ - case 'e': - stem(&z, "etaci", "ic", m_gt_0) || - stem(&z, "evita", "", m_gt_0) || - stem(&z, "ezila", "al", m_gt_0); - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - stem(&z, "laci", "ic", m_gt_0) || - stem(&z, "luf", "", m_gt_0); - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; - } - - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - stem(&z, "tneme", "", m_gt_1) || - stem(&z, "tnem", "", m_gt_1) || - stem(&z, "tne", "", m_gt_1); - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - stem(&z, "eta", "", m_gt_1) || - stem(&z, "iti", "", m_gt_1); - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - } - - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; - } - } - - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; - } - - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. - */ - *pnOut = i = strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); - } -} - -/* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30])) -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30])) - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). -*/ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; - - while( c->iOffsetnInput ){ - int iStartOffset, ch; - - /* Scan past delimiter characters */ - while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - c->nAllocated = n+20; - c->zToken = realloc(c->zToken, c->nAllocated); - if( c->zToken==NULL ) return SQLITE_NOMEM; - } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the porter-stemmer tokenizer -*/ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, -}; - -/* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts1PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &porterTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/ext/fts1/fts1_tokenizer.h b/ext/fts1/fts1_tokenizer.h deleted file mode 100644 index a48cb74519..0000000000 --- a/ext/fts1/fts1_tokenizer.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -** 2006 July 10 -** -** The author disclaims copyright to this source code. -** -************************************************************************* -** Defines the interface to tokenizers used by fulltext-search. There -** are three basic components: -** -** sqlite3_tokenizer_module is a singleton defining the tokenizer -** interface functions. This is essentially the class structure for -** tokenizers. -** -** sqlite3_tokenizer is used to define a particular tokenizer, perhaps -** including customization information defined at creation time. -** -** sqlite3_tokenizer_cursor is generated by a tokenizer to generate -** tokens from a particular input. -*/ -#ifndef _FTS1_TOKENIZER_H_ -#define _FTS1_TOKENIZER_H_ - -/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. -** If tokenizers are to be allowed to call sqlite3_*() functions, then -** we will need a way to register the API consistently. -*/ -#include "sqlite3.h" - -/* -** Structures used by the tokenizer interface. -*/ -typedef struct sqlite3_tokenizer sqlite3_tokenizer; -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; - -struct sqlite3_tokenizer_module { - int iVersion; /* currently 0 */ - - /* - ** Create and destroy a tokenizer. argc/argv are passed down from - ** the fulltext virtual table creation to allow customization. - */ - int (*xCreate)(int argc, const char *const*argv, - sqlite3_tokenizer **ppTokenizer); - int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - - /* - ** Tokenize a particular input. Call xOpen() to prepare to - ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then - ** xClose() to free any internal state. The pInput passed to - ** xOpen() must exist until the cursor is closed. The ppToken - ** result from xNext() is only valid until the next call to xNext() - ** or until xClose() is called. - */ - /* TODO(shess) current implementation requires pInput to be - ** nul-terminated. This should either be fixed, or pInput/nBytes - ** should be converted to zInput. - */ - int (*xOpen)(sqlite3_tokenizer *pTokenizer, - const char *pInput, int nBytes, - sqlite3_tokenizer_cursor **ppCursor); - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); - int (*xNext)(sqlite3_tokenizer_cursor *pCursor, - const char **ppToken, int *pnBytes, - int *piStartOffset, int *piEndOffset, int *piPosition); -}; - -struct sqlite3_tokenizer { - const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ - /* Tokenizer implementations will typically add additional fields */ -}; - -struct sqlite3_tokenizer_cursor { - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ - /* Tokenizer implementations will typically add additional fields */ -}; - -/* -** Get the module for a tokenizer which generates tokens based on a -** set of non-token characters. The default is to break tokens at any -** non-alnum character, though the set of delimiters can also be -** specified by the first argv argument to xCreate(). -*/ -/* TODO(shess) This doesn't belong here. Need some sort of -** registration process. -*/ -void sqlite3Fts1SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts1PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); - -#endif /* _FTS1_TOKENIZER_H_ */ diff --git a/ext/fts1/fts1_tokenizer1.c b/ext/fts1/fts1_tokenizer1.c deleted file mode 100644 index f58fba8f8e..0000000000 --- a/ext/fts1/fts1_tokenizer1.c +++ /dev/null @@ -1,221 +0,0 @@ -/* -** The author disclaims copyright to this source code. -** -************************************************************************* -** Implementation of the "simple" full-text-search tokenizer. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS1 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS1 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS1 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) - - -#include -#include -#include -#include -#include - -#include "fts1_tokenizer.h" - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module simpleTokenizerModule; - -static int isDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} - -/* -** Create a new tokenizer instance. -*/ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - simple_tokenizer *t; - - t = (simple_tokenizer *) calloc(sizeof(*t), 1); - if( t==NULL ) return SQLITE_NOMEM; - - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = strlen(argv[1]); - for(i=0; i=0x80 ){ - free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; - } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !isalnum(i); - } - } - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - simple_tokenizer_cursor *c; - - c = (simple_tokenizer_cursor *) malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. -*/ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - free(c->pToken); - free(c); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). -*/ -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - unsigned char *p = (unsigned char *)c->pInput; - - while( c->iOffsetnBytes ){ - int iStartOffset; - - /* Scan past delimiter characters */ - while( c->iOffsetnBytes && isDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnBytes && !isDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - c->nTokenAllocated = n+20; - c->pToken = realloc(c->pToken, c->nTokenAllocated); - if( c->pToken==NULL ) return SQLITE_NOMEM; - } - for(i=0; ipToken[i] = ch<0x80 ? tolower(ch) : ch; - } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, -}; - -/* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts1SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &simpleTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */ diff --git a/ext/fts1/fulltext.c b/ext/fts1/fulltext.c deleted file mode 100644 index 313ff303e1..0000000000 --- a/ext/fts1/fulltext.c +++ /dev/null @@ -1,1511 +0,0 @@ -/* The author disclaims copyright to this source code. - * - * This is an SQLite module implementing full-text search. - */ - -#include -#if !defined(__APPLE__) -#include -#else -#include -#endif -#include -#include -#include - -#include "fulltext.h" -#include "ft_hash.h" -#include "tokenizer.h" -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - -/* utility functions */ - -/* We encode variable-length integers in little-endian order using seven bits - * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -*/ - -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ -#define VARINT_MAX 10 - -/* Write a 64-bit variable-length integer to memory starting at p[0]. - * The length of data written will be between 1 and VARINT_MAX bytes. - * The number of bytes written is returned. */ -static int putVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} - -/* Read a 64-bit variable-length integer from memory starting at p[0]. - * Return the number of bytes read, or 0 on error. - * The value is stored in *v. */ -static int getVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q & 0x80) == 0x80 ){ - x += y * (*q++ & 0x7f); - y <<= 7; - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ - assert( 0 ); - return 0; - } - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); -} - -static int getVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = getVarint(p, &i); - *pi = (int) i; - assert( *pi==i ); - return ret; -} - -/*** Document lists *** - * - * A document list holds a sorted list of varint-encoded document IDs. - * - * A doclist with type DL_POSITIONS_OFFSETS is stored like this: - * - * array { - * varint docid; - * array { - * varint position; (delta from previous position plus 1, or 0 for end) - * varint startOffset; (delta from previous startOffset) - * varint endOffset; (delta from startOffset) - * } - * } - * - * Here, array { X } means zero or more occurrences of X, adjacent in memory. - * - * A doclist with type DL_POSITIONS is like the above, but holds only docids - * and positions without offset information. - * - * A doclist with type DL_DOCIDS is like the above, but holds only docids - * without positions or offset information. - * - * On disk, every document list has positions and offsets, so we don't bother - * to serialize a doclist's type. - * - * We don't yet delta-encode document IDs; doing so will probably be a - * modest win. - * - * NOTE(shess) I've thought of a slightly (1%) better offset encoding. - * After the first offset, estimate the next offset by using the - * current token position and the previous token position and offset, - * offset to handle some variance. So the estimate would be - * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded - * as normal. Offsets more than 64 chars from the estimate are - * encoded as the delta to the previous start offset + 128. An - * additional tiny increment can be gained by using the end offset of - * the previous token to make the estimate a tiny bit more precise. -*/ - -typedef enum DocListType { - DL_DOCIDS, /* docids only */ - DL_POSITIONS, /* docids + positions */ - DL_POSITIONS_OFFSETS /* docids + positions + offsets */ -} DocListType; - -typedef struct DocList { - char *pData; - int nData; - DocListType iType; - int iLastPos; /* the last position written */ - int iLastOffset; /* the last start offset written */ -} DocList; - -/* Initialize a new DocList to hold the given data. */ -static void docListInit(DocList *d, DocListType iType, - const char *pData, int nData){ - d->nData = nData; - if( nData>0 ){ - d->pData = malloc(nData); - memcpy(d->pData, pData, nData); - } else { - d->pData = NULL; - } - d->iType = iType; - d->iLastPos = 0; - d->iLastOffset = 0; -} - -/* Create a new dynamically-allocated DocList. */ -static DocList *docListNew(DocListType iType){ - DocList *d = (DocList *) malloc(sizeof(DocList)); - docListInit(d, iType, 0, 0); - return d; -} - -static void docListDestroy(DocList *d){ - free(d->pData); -#ifndef NDEBUG - memset(d, 0x55, sizeof(*d)); -#endif -} - -static void docListDelete(DocList *d){ - docListDestroy(d); - free(d); -} - -static char *docListEnd(DocList *d){ - return d->pData + d->nData; -} - -/* Append a varint to a DocList's data. */ -static void appendVarint(DocList *d, sqlite_int64 i){ - char c[VARINT_MAX]; - int n = putVarint(c, i); - d->pData = realloc(d->pData, d->nData + n); - memcpy(d->pData + d->nData, c, n); - d->nData += n; -} - -static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ - appendVarint(d, iDocid); - d->iLastPos = 0; -} - -/* Add a position to the last position list in a doclist. */ -static void docListAddPos(DocList *d, int iPos){ - assert( d->iType>=DL_POSITIONS ); - appendVarint(d, iPos-d->iLastPos+1); - d->iLastPos = iPos; -} - -static void docListAddPosOffset(DocList *d, int iPos, - int iStartOffset, int iEndOffset){ - assert( d->iType==DL_POSITIONS_OFFSETS ); - docListAddPos(d, iPos); - appendVarint(d, iStartOffset-d->iLastOffset); - d->iLastOffset = iStartOffset; - appendVarint(d, iEndOffset-iStartOffset); -} - -/* Terminate the last position list in the given doclist. */ -static void docListAddEndPos(DocList *d){ - appendVarint(d, 0); -} - -typedef struct DocListReader { - DocList *pDoclist; - char *p; - int iLastPos; /* the last position read */ -} DocListReader; - -static void readerInit(DocListReader *r, DocList *pDoclist){ - r->pDoclist = pDoclist; - if( pDoclist!=NULL ){ - r->p = pDoclist->pData; - } - r->iLastPos = 0; -} - -static int readerAtEnd(DocListReader *pReader){ - return pReader->p >= docListEnd(pReader->pDoclist); -} - -/* Peek at the next docid without advancing the read pointer. */ -static sqlite_int64 peekDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !readerAtEnd(pReader) ); - getVarint(pReader->p, &ret); - return ret; -} - -/* Read the next docid. */ -static sqlite_int64 readDocid(DocListReader *pReader){ - sqlite_int64 ret; - assert( !readerAtEnd(pReader) ); - pReader->p += getVarint(pReader->p, &ret); - pReader->iLastPos = 0; - return ret; -} - -/* Read the next position from a position list. - * Returns the position, or -1 at the end of the list. */ -static int readPosition(DocListReader *pReader){ - int i; - int iType = pReader->pDoclist->iType; - assert( iType>=DL_POSITIONS ); - assert( !readerAtEnd(pReader) ); - - pReader->p += getVarint32(pReader->p, &i); - if( i==0 ){ - pReader->iLastPos = -1; - return -1; - } - pReader->iLastPos += ((int) i)-1; - if( iType>=DL_POSITIONS_OFFSETS ){ - /* Skip over offsets, ignoring them for now. */ - int iStart, iEnd; - pReader->p += getVarint32(pReader->p, &iStart); - pReader->p += getVarint32(pReader->p, &iEnd); - } - return pReader->iLastPos; -} - -/* Skip past the end of a position list. */ -static void skipPositionList(DocListReader *pReader){ - while( readPosition(pReader)!=-1 ) - ; -} - -/* Skip over a docid, including its position list if the doclist has - * positions. */ -static void skipDocument(DocListReader *pReader){ - readDocid(pReader); - if( pReader->pDoclist->iType >= DL_POSITIONS ){ - skipPositionList(pReader); - } -} - -static sqlite_int64 firstDocid(DocList *d){ - DocListReader r; - readerInit(&r, d); - return readDocid(&r); -} - -/* Doclist multi-tool. Pass pUpdate==NULL to delete the indicated docid; - * otherwise pUpdate, which must contain only the single docid [iDocid], is - * inserted (if not present) or updated (if already present). */ -static int docListUpdate(DocList *d, sqlite_int64 iDocid, DocList *pUpdate){ - int modified = 0; - DocListReader reader; - char *p; - - if( pUpdate!=NULL ){ - assert( d->iType==pUpdate->iType); - assert( iDocid==firstDocid(pUpdate) ); - } - - readerInit(&reader, d); - while( !readerAtEnd(&reader) && peekDocid(&reader)nData -= (reader.p - p); - modified = 1; - } - - /* Insert if indicated. */ - if( pUpdate!=NULL ){ - int iDoclist = p-d->pData; - docListAddEndPos(pUpdate); - - d->pData = realloc(d->pData, d->nData+pUpdate->nData); - p = d->pData + iDoclist; - - memmove(p+pUpdate->nData, p, docListEnd(d) - p); - memcpy(p, pUpdate->pData, pUpdate->nData); - d->nData += pUpdate->nData; - modified = 1; - } - - return modified; -} - -/* Split the second half of doclist d into a separate doclist d2. Returns 1 - * if successful, or 0 if d contains a single document and hence can't be - * split. */ -static int docListSplit(DocList *d, DocList *d2){ - const char *pSplitPoint = d->pData + d->nData / 2; - DocListReader reader; - - readerInit(&reader, d); - while( reader.piType, reader.p, docListEnd(d) - reader.p); - d->nData = reader.p - d->pData; - d->pData = realloc(d->pData, d->nData); - return 1; -} - -/* A DocListMerge computes the AND of an in-memory DocList [in] and a chunked - * on-disk doclist, resulting in another in-memory DocList [out]. [in] - * and [out] may or may not store position information according to the - * caller's wishes. The on-disk doclist always comes with positions. - * - * The caller must read each chunk of the on-disk doclist in succession and - * pass it to mergeBlock(). - * - * If [in] has positions, then the merge output contains only documents with - * matching positions in the two input doclists. If [in] does not have - * positions, then the merge output contains all documents common to the two - * input doclists. - * - * If [in] is NULL, then the on-disk doclist is copied to [out] directly. - * - * A merge is performed using an integer [iOffset] provided by the caller. - * [iOffset] is subtracted from each position in the on-disk doclist for the - * purpose of position comparison; this is helpful in implementing phrase - * searches. - * - * A DocListMerge is not yet able to propagate offsets through query - * processing; we should add that capability soon. -*/ -typedef struct DocListMerge { - DocListReader in; - DocList *pOut; - int iOffset; -} DocListMerge; - -static void mergeInit(DocListMerge *m, - DocList *pIn, int iOffset, DocList *pOut){ - readerInit(&m->in, pIn); - m->pOut = pOut; - m->iOffset = iOffset; - - /* can't handle offsets yet */ - assert( pIn==NULL || pIn->iType <= DL_POSITIONS ); - assert( pOut->iType <= DL_POSITIONS ); -} - -/* A helper function for mergeBlock(), below. Merge the position lists - * pointed to by m->in and pBlockReader. - * If the merge matches, write [iDocid] to m->pOut; if m->pOut - * has positions then write all matching positions as well. */ -static void mergePosList(DocListMerge *m, sqlite_int64 iDocid, - DocListReader *pBlockReader){ - int block_pos = readPosition(pBlockReader); - int in_pos = readPosition(&m->in); - int match = 0; - while( block_pos!=-1 || in_pos!=-1 ){ - if( block_pos-m->iOffset==in_pos ){ - if( !match ){ - docListAddDocid(m->pOut, iDocid); - match = 1; - } - if( m->pOut->iType >= DL_POSITIONS ){ - docListAddPos(m->pOut, in_pos); - } - block_pos = readPosition(pBlockReader); - in_pos = readPosition(&m->in); - } else if( in_pos==-1 || (block_pos!=-1 && block_pos-m->iOffsetin); - } - } - if( m->pOut->iType >= DL_POSITIONS && match ){ - docListAddEndPos(m->pOut); - } -} - -/* Merge one block of an on-disk doclist into a DocListMerge. */ -static void mergeBlock(DocListMerge *m, DocList *pBlock){ - DocListReader blockReader; - assert( pBlock->iType >= DL_POSITIONS ); - readerInit(&blockReader, pBlock); - while( !readerAtEnd(&blockReader) ){ - sqlite_int64 iDocid = readDocid(&blockReader); - if( m->in.pDoclist!=NULL ){ - while( 1 ){ - if( readerAtEnd(&m->in) ) return; /* nothing more to merge */ - if( peekDocid(&m->in)>=iDocid ) break; - skipDocument(&m->in); - } - if( peekDocid(&m->in)>iDocid ){ /* [pIn] has no match with iDocid */ - skipPositionList(&blockReader); /* skip this docid in the block */ - continue; - } - readDocid(&m->in); - } - /* We have a document match. */ - if( m->in.pDoclist==NULL || m->in.pDoclist->iType < DL_POSITIONS ){ - /* We don't need to do a poslist merge. */ - docListAddDocid(m->pOut, iDocid); - if( m->pOut->iType >= DL_POSITIONS ){ - /* Copy all positions to the output doclist. */ - while( 1 ){ - int pos = readPosition(&blockReader); - if( pos==-1 ) break; - docListAddPos(m->pOut, pos); - } - docListAddEndPos(m->pOut); - } else skipPositionList(&blockReader); - continue; - } - mergePosList(m, iDocid, &blockReader); - } -} - -static char *string_dup_n(const char *s, int n){ - char *str = malloc(n + 1); - memcpy(str, s, n); - str[n] = '\0'; - return str; -} - -/* Duplicate a string; the caller must free() the returned string. - * (We don't use strdup() since it's not part of the standard C library and - * may not be available everywhere.) */ -static char *string_dup(const char *s){ - return string_dup_n(s, strlen(s)); -} - -/* Format a string, replacing each occurrence of the % character with - * zName. This may be more convenient than sqlite_mprintf() - * when one string is used repeatedly in a format string. - * The caller must free() the returned string. */ -static char *string_format(const char *zFormat, const char *zName){ - const char *p; - size_t len = 0; - size_t nName = strlen(zName); - char *result; - char *r; - - /* first compute length needed */ - for(p = zFormat ; *p ; ++p){ - len += (*p=='%' ? nName : 1); - } - len += 1; /* for null terminator */ - - r = result = malloc(len); - for(p = zFormat; *p; ++p){ - if( *p=='%' ){ - memcpy(r, zName, nName); - r += nName; - } else { - *r++ = *p; - } - } - *r++ = '\0'; - assert( r == result + len ); - return result; -} - -static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){ - char *zCommand = string_format(zFormat, zName); - int rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); - free(zCommand); - return rc; -} - -static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt, - const char *zFormat){ - char *zCommand = string_format(zFormat, zName); - int rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); - free(zCommand); - return rc; -} - -/* end utility functions */ - -#define QUERY_GENERIC 0 -#define QUERY_FULLTEXT 1 - -#define CHUNK_MAX 1024 - -typedef enum fulltext_statement { - CONTENT_INSERT_STMT, - CONTENT_SELECT_STMT, - CONTENT_DELETE_STMT, - - TERM_SELECT_STMT, - TERM_CHUNK_SELECT_STMT, - TERM_INSERT_STMT, - TERM_UPDATE_STMT, - TERM_DELETE_STMT, - - MAX_STMT /* Always at end! */ -} fulltext_statement; - -/* These must exactly match the enum above. */ -/* TODO(adam): Is there some risk that a statement (in particular, -** pTermSelectStmt) will be used in two cursors at once, e.g. if a -** query joins a virtual table to itself? If so perhaps we should -** move some of these to the cursor object. -*/ -static const char *fulltext_zStatement[MAX_STMT] = { - /* CONTENT_INSERT */ "insert into %_content (rowid, content) values (?, ?)", - /* CONTENT_SELECT */ "select content from %_content where rowid = ?", - /* CONTENT_DELETE */ "delete from %_content where rowid = ?", - - /* TERM_SELECT */ - "select rowid, doclist from %_term where term = ? and first = ?", - /* TERM_CHUNK_SELECT */ - "select max(first) from %_term where term = ? and first <= ?", - /* TERM_INSERT */ - "insert into %_term (term, first, doclist) values (?, ?, ?)", - /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", - /* TERM_DELETE */ "delete from %_term where rowid = ?", -}; - -typedef struct fulltext_vtab { - sqlite3_vtab base; - sqlite3 *db; - const char *zName; /* virtual table name */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - - /* Precompiled statements which we keep as long as the table is - ** open. - */ - sqlite3_stmt *pFulltextStatements[MAX_STMT]; -} fulltext_vtab; - -typedef struct fulltext_cursor { - sqlite3_vtab_cursor base; - int iCursorType; /* QUERY_GENERIC or QUERY_FULLTEXT */ - - sqlite3_stmt *pStmt; - - int eof; - - /* The following is used only when iCursorType == QUERY_FULLTEXT. */ - DocListReader result; -} fulltext_cursor; - -static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ - return (fulltext_vtab *) c->base.pVtab; -} - -static sqlite3_module fulltextModule; /* forward declaration */ - -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. -** If the indicated statement has never been prepared, it is prepared -** and cached, otherwise the cached version is reset. -*/ -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - assert( iStmtpFulltextStatements[iStmt]==NULL ){ - int rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt], - fulltext_zStatement[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } else { - int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pFulltextStatements[iStmt]; - return SQLITE_OK; -} - -/* Step the indicated statement, handling errors SQLITE_BUSY (by -** retrying) and SQLITE_SCHEMA (by re-preparing and transferring -** bindings to the new statement). -** TODO(adam): We should extend this function so that it can work with -** statements declared locally, not only globally cached statements. -*/ -static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc; - sqlite3_stmt *s = *ppStmt; - assert( iStmtpFulltextStatements[iStmt] ); - - while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ - sqlite3_stmt *pNewStmt; - - if( rc==SQLITE_BUSY ) continue; - if( rc!=SQLITE_ERROR ) return rc; - - rc = sqlite3_reset(s); - if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR; - - v->pFulltextStatements[iStmt] = NULL; /* Still in s */ - rc = sql_get_statement(v, iStmt, &pNewStmt); - if( rc!=SQLITE_OK ) goto err; - *ppStmt = pNewStmt; - - rc = sqlite3_transfer_bindings(s, pNewStmt); - if( rc!=SQLITE_OK ) goto err; - - rc = sqlite3_finalize(s); - if( rc!=SQLITE_OK ) return rc; - s = pNewStmt; - } - return rc; - - err: - sqlite3_finalize(s); - return rc; -} - -/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. -** Useful for statements like UPDATE, where we expect no results. -*/ -static int sql_single_step_statement(fulltext_vtab *v, - fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - int rc = sql_step_statement(v, iStmt, ppStmt); - return (rc==SQLITE_DONE) ? SQLITE_OK : rc; -} - -/* insert into %_content (rowid, content) values ([rowid], [zContent]) */ -static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, - const char *zContent, int nContent){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_value(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 2, zContent, nContent, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); -} - -/* select content from %_content where rowid = [iRow] - * The caller must delete the returned string. */ -static int content_select(fulltext_vtab *v, sqlite_int64 iRow, - char **pzContent){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc; - - *pzContent = string_dup((const char *)sqlite3_column_text(s, 0)); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_OK; - - free(*pzContent); - return rc; -} - -/* delete from %_content where rowid = [iRow ] */ -static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); -} - -/* select rowid, doclist from %_term where term = [zTerm] and first = [iFirst] - * If found, returns SQLITE_OK; the caller must free the returned doclist. - * If no rows found, returns SQLITE_ERROR. */ -static int term_select(fulltext_vtab *v, const char *zTerm, int nTerm, - sqlite_int64 iFirst, - sqlite_int64 *rowid, - DocList *out){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_TRANSIENT); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iFirst); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, TERM_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - - *rowid = sqlite3_column_int64(s, 0); - docListInit(out, DL_POSITIONS_OFFSETS, - sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - return rc==SQLITE_DONE ? SQLITE_OK : rc; -} - -/* select max(first) from %_term where term = [zTerm] and first <= [iFirst] - * If found, returns SQLITE_ROW and result in *piResult; if the query returns - * NULL (meaning no row found) returns SQLITE_DONE. - */ -static int term_chunk_select(fulltext_vtab *v, const char *zTerm, int nTerm, - sqlite_int64 iFirst, sqlite_int64 *piResult){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_CHUNK_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iFirst); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_step_statement(v, TERM_CHUNK_SELECT_STMT, &s); - if( rc!=SQLITE_ROW ) return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - - switch( sqlite3_column_type(s, 0) ){ - case SQLITE_NULL: - rc = SQLITE_DONE; - break; - case SQLITE_INTEGER: - *piResult = sqlite3_column_int64(s, 0); - break; - default: - return SQLITE_ERROR; - } - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - if( sqlite3_step(s) != SQLITE_DONE ) return SQLITE_ERROR; - return rc; -} - -/* insert into %_term (term, first, doclist) - values ([zTerm], [iFirst], [doclist]) */ -static int term_insert(fulltext_vtab *v, const char *zTerm, int nTerm, - sqlite_int64 iFirst, DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(s, 1, zTerm, nTerm, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iFirst); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 3, doclist->pData, doclist->nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_INSERT_STMT, &s); -} - -/* update %_term set doclist = [doclist] where rowid = [rowid] */ -static int term_update(fulltext_vtab *v, sqlite_int64 rowid, - DocList *doclist){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, - SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); -} - -static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step_statement(v, TERM_DELETE_STMT, &s); -} - -static void fulltext_vtab_destroy(fulltext_vtab *v){ - int iStmt; - - for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ - sqlite3_finalize(v->pFulltextStatements[iStmt]); - v->pFulltextStatements[iStmt] = NULL; - } - } - - if( v->pTokenizer!=NULL ){ - v->pTokenizer->pModule->xDestroy(v->pTokenizer); - v->pTokenizer = NULL; - } - - free((void *) v->zName); - free(v); -} - -/* Current interface: -** argv[0] - module name -** argv[1] - database name -** argv[2] - table name -** argv[3] - tokenizer name (optional, a sensible default is provided) -** argv[4..] - passed to tokenizer (optional based on tokenizer) -**/ -static int fulltextConnect( - sqlite3 *db, - void *pAux, - int argc, - const char * const *argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - int rc; - fulltext_vtab *v; - sqlite3_tokenizer_module *m = NULL; - - assert( argc>=3 ); - v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab)); - /* sqlite will initialize v->base */ - v->db = db; - v->zName = string_dup(argv[2]); - v->pTokenizer = NULL; - - if( argc==3 ){ - get_simple_tokenizer_module(&m); - } else { - /* TODO(shess) For now, add new tokenizers as else if clauses. */ - if( !strcmp(argv[3], "simple") ){ - get_simple_tokenizer_module(&m); - } else { - assert( "unrecognized tokenizer"==NULL ); - } - } - - /* TODO(shess) Since tokenization impacts the index, the parameters - ** to the tokenizer need to be identical when a persistent virtual - ** table is re-created. One solution would be a meta-table to track - ** such information in the database. Then we could verify that the - ** information is identical on subsequent creates. - */ - /* TODO(shess) Why isn't argv already (const char **)? */ - rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer); - if( rc!=SQLITE_OK ) return rc; - v->pTokenizer->pModule = m; - - /* TODO: verify the existence of backing tables foo_content, foo_term */ - - rc = sqlite3_declare_vtab(db, "create table x(content text)"); - if( rc!=SQLITE_OK ) return rc; - - memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); - - *ppVTab = &v->base; - return SQLITE_OK; -} - -static int fulltextCreate( - sqlite3 *db, - void *pAux, - int argc, - const char * const *argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - int rc; - assert( argc>=3 ); - - /* The %_content table holds the text of each full-text item, with - ** the rowid used as the docid. - ** - ** The %_term table maps each term to a document list blob - ** containing elements sorted by ascending docid, each element - ** encoded as: - ** - ** docid varint-encoded - ** token count varint-encoded - ** "count" token elements (poslist): - ** position varint-encoded as delta from previous position - ** start offset varint-encoded as delta from previous start offset - ** end offset varint-encoded as delta from start offset - ** - ** Additionally, doclist blobs can be chunked into multiple rows, - ** using "first" to order the blobs. "first" is simply the first - ** docid in the blob. - */ - /* - ** NOTE(shess) That last sentence is incorrect in the face of - ** deletion, which can leave a doclist that doesn't contain the - ** first from that row. I _believe_ this does not matter to the - ** operation of the system, but it might be reasonable to update - ** appropriately in case this assumption becomes more important. - */ - rc = sql_exec(db, argv[2], - "create table %_content(content text);" - "create table %_term(term text, first integer, doclist blob);" - "create index %_index on %_term(term, first)"); - if( rc!=SQLITE_OK ) return rc; - - return fulltextConnect(db, pAux, argc, argv, ppVTab, pzErr); -} - -/* Decide how to handle an SQL query. - * At the moment, MATCH queries can include implicit boolean ANDs; we - * haven't implemented phrase searches or OR yet. */ -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - int i; - - for(i=0; inConstraint; ++i){ - const struct sqlite3_index_constraint *pConstraint; - pConstraint = &pInfo->aConstraint[i]; - if( pConstraint->iColumn==0 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH && - pConstraint->usable ){ /* a full-text search */ - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - pInfo->idxNum = QUERY_FULLTEXT; - pInfo->estimatedCost = 1.0; /* an arbitrary value for now */ - return SQLITE_OK; - } - } - pInfo->idxNum = QUERY_GENERIC; - return SQLITE_OK; -} - -static int fulltextDisconnect(sqlite3_vtab *pVTab){ - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextDestroy(sqlite3_vtab *pVTab){ - fulltext_vtab *v = (fulltext_vtab *)pVTab; - - int rc = sql_exec(v->db, v->zName, - "drop table %_content; drop table %_term"); - if( rc!=SQLITE_OK ) return rc; - - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fulltext_cursor *c; - - c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); - /* sqlite will initialize c->base */ - *ppCursor = &c->base; - - return SQLITE_OK; -} - -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - sqlite3_finalize(c->pStmt); - if( c->result.pDoclist!=NULL ){ - docListDelete(c->result.pDoclist); - } - free(c); - return SQLITE_OK; -} - -static int fulltextNext(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - sqlite_int64 iDocid; - int rc; - - switch( c->iCursorType ){ - case QUERY_GENERIC: - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - switch( rc ){ - case SQLITE_ROW: - c->eof = 0; - return SQLITE_OK; - case SQLITE_DONE: - c->eof = 1; - return SQLITE_OK; - default: - c->eof = 1; - return rc; - } - case QUERY_FULLTEXT: - rc = sqlite3_reset(c->pStmt); - if( rc!=SQLITE_OK ) return rc; - - if( readerAtEnd(&c->result)){ - c->eof = 1; - return SQLITE_OK; - } - iDocid = readDocid(&c->result); - rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); - if( rc!=SQLITE_OK ) return rc; - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - if( rc==SQLITE_ROW ){ /* the case we expect */ - c->eof = 0; - return SQLITE_OK; - } - /* an error occurred; abort */ - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - default: - assert( 0 ); - return SQLITE_ERROR; /* not reached */ - } -} - -static int term_select_doclist(fulltext_vtab *v, const char *pTerm, int nTerm, - sqlite3_stmt **ppStmt){ - int rc; - if( *ppStmt ){ - rc = sqlite3_reset(*ppStmt); - } else { - rc = sql_prepare(v->db, v->zName, ppStmt, - "select doclist from %_term where term = ? order by first"); - } - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_text(*ppStmt, 1, pTerm, nTerm, SQLITE_TRANSIENT); - if( rc!=SQLITE_OK ) return rc; - - return sqlite3_step(*ppStmt); /* TODO(adamd): handle schema error */ -} - -/* Read the posting list for [zTerm]; AND it with the doclist [in] to - * produce the doclist [out], using the given offset [iOffset] for phrase - * matching. - * (*pSelect) is used to hold an SQLite statement used inside this function; - * the caller should initialize *pSelect to NULL before the first call. - */ -static int query_merge(fulltext_vtab *v, sqlite3_stmt **pSelect, - const char *zTerm, - DocList *pIn, int iOffset, DocList *out){ - int rc; - DocListMerge merge; - - if( pIn!=NULL && !pIn->nData ){ - /* If [pIn] is already empty, there's no point in reading the - * posting list to AND it in; return immediately. */ - return SQLITE_OK; - } - - rc = term_select_doclist(v, zTerm, -1, pSelect); - if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; - - mergeInit(&merge, pIn, iOffset, out); - while( rc==SQLITE_ROW ){ - DocList block; - docListInit(&block, DL_POSITIONS_OFFSETS, - sqlite3_column_blob(*pSelect, 0), - sqlite3_column_bytes(*pSelect, 0)); - mergeBlock(&merge, &block); - docListDestroy(&block); - - rc = sqlite3_step(*pSelect); - if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ){ - return rc; - } - } - - return SQLITE_OK; -} - -typedef struct QueryTerm { - int is_phrase; /* true if this term begins a new phrase */ - const char *zTerm; -} QueryTerm; - -/* A parsed query. - * - * As an example, parsing the query ["four score" years "new nation"] will - * yield a Query with 5 terms: - * "four", is_phrase = 1 - * "score", is_phrase = 0 - * "years", is_phrase = 1 - * "new", is_phrase = 1 - * "nation", is_phrase = 0 - */ -typedef struct Query { - int nTerms; - QueryTerm *pTerm; -} Query; - -static void query_add(Query *q, int is_phrase, const char *zTerm){ - QueryTerm *t; - ++q->nTerms; - q->pTerm = realloc(q->pTerm, q->nTerms * sizeof(q->pTerm[0])); - t = &q->pTerm[q->nTerms - 1]; - t->is_phrase = is_phrase; - t->zTerm = zTerm; -} - -static void query_free(Query *q){ - int i; - for(i = 0; i < q->nTerms; ++i){ - free((void *) q->pTerm[i].zTerm); - } - free(q->pTerm); -} - -static int tokenize_segment(sqlite3_tokenizer *pTokenizer, - const char *zQuery, int in_phrase, - Query *pQuery){ - sqlite3_tokenizer_module *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCursor; - int is_first = 1; - - int rc = pModule->xOpen(pTokenizer, zQuery, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - pCursor->pTokenizer = pTokenizer; - - while( 1 ){ - const char *zToken; - int nToken, iStartOffset, iEndOffset, dummy_pos; - - rc = pModule->xNext(pCursor, - &zToken, &nToken, - &iStartOffset, &iEndOffset, - &dummy_pos); - if( rc!=SQLITE_OK ) break; - query_add(pQuery, !in_phrase || is_first, string_dup_n(zToken, nToken)); - is_first = 0; - } - - return pModule->xClose(pCursor); -} - -/* Parse a query string, yielding a Query object. */ -static int parse_query(fulltext_vtab *v, const char *zQuery, Query *pQuery){ - char *zQuery1 = string_dup(zQuery); - int in_phrase = 0; - char *s = zQuery1; - pQuery->nTerms = 0; - pQuery->pTerm = NULL; - - while( *s ){ - char *t = s; - while( *t ){ - if( *t=='"' ){ - *t++ = '\0'; - break; - } - ++t; - } - if( *s ){ - tokenize_segment(v->pTokenizer, s, in_phrase, pQuery); - } - s = t; - in_phrase = !in_phrase; - } - - free(zQuery1); - return SQLITE_OK; -} - -/* Perform a full-text query; return a list of documents in [pResult]. */ -static int fulltext_query(fulltext_vtab *v, const char *zQuery, - DocList **pResult){ - Query q; - int phrase_start = -1; - int i; - sqlite3_stmt *pSelect = NULL; - DocList *d = NULL; - - int rc = parse_query(v, zQuery, &q); - if( rc!=SQLITE_OK ) return rc; - - /* Merge terms. */ - for(i = 0 ; i < q.nTerms ; ++i){ - /* In each merge step, we need to generate positions whenever we're - * processing a phrase which hasn't ended yet. */ - int need_positions = iiCursorType = idxNum; - switch( idxNum ){ - case QUERY_GENERIC: - zStatement = "select rowid, content from %_content"; - break; - - case QUERY_FULLTEXT: /* full-text search */ - { - const char *zQuery = (const char *)sqlite3_value_text(argv[0]); - DocList *pResult; - assert( argc==1 ); - rc = fulltext_query(v, zQuery, &pResult); - if( rc!=SQLITE_OK ) return rc; - readerInit(&c->result, pResult); - zStatement = "select rowid, content from %_content where rowid = ?"; - break; - } - - default: - assert( 0 ); - } - - rc = sql_prepare(v->db, v->zName, &c->pStmt, zStatement); - if( rc!=SQLITE_OK ) return rc; - - return fulltextNext(pCursor); -} - -static int fulltextEof(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - return c->eof; -} - -static int fulltextColumn(sqlite3_vtab_cursor *pCursor, - sqlite3_context *pContext, int idxCol){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - const char *s; - - assert( idxCol==0 ); - s = (const char *) sqlite3_column_text(c->pStmt, 1); - sqlite3_result_text(pContext, s, -1, SQLITE_TRANSIENT); - - return SQLITE_OK; -} - -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - - *pRowid = sqlite3_column_int64(c->pStmt, 0); - return SQLITE_OK; -} - -/* Build a hash table containing all terms in zText. */ -static int build_terms(Hash *terms, sqlite3_tokenizer *pTokenizer, - const char *zText, sqlite_int64 iDocid){ - sqlite3_tokenizer_cursor *pCursor; - const char *pToken; - int nTokenBytes; - int iStartOffset, iEndOffset, iPosition; - - int rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - - pCursor->pTokenizer = pTokenizer; - HashInit(terms, HASH_STRING, 1); - while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, - &pToken, &nTokenBytes, - &iStartOffset, &iEndOffset, - &iPosition) ){ - DocList *p; - - /* Positions can't be negative; we use -1 as a terminator internally. */ - if( iPosition<0 ) { - rc = SQLITE_ERROR; - goto err; - } - - p = HashFind(terms, pToken, nTokenBytes); - if( p==NULL ){ - p = docListNew(DL_POSITIONS_OFFSETS); - docListAddDocid(p, iDocid); - HashInsert(terms, pToken, nTokenBytes, p); - } - docListAddPosOffset(p, iPosition, iStartOffset, iEndOffset); - } - -err: - /* TODO(shess) Check return? Should this be able to cause errors at - ** this point? Actually, same question about sqlite3_finalize(), - ** though one could argue that failure there means that the data is - ** not durable. *ponder* - */ - pTokenizer->pModule->xClose(pCursor); - return rc; -} -/* Update the %_terms table to map the term [zTerm] to the given rowid. */ -static int index_insert_term(fulltext_vtab *v, const char *zTerm, int nTerm, - sqlite_int64 iDocid, DocList *p){ - sqlite_int64 iFirst; - sqlite_int64 iIndexRow; - DocList doclist; - - int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); - if( rc==SQLITE_DONE ){ - docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0); - if( docListUpdate(&doclist, iDocid, p) ){ - rc = term_insert(v, zTerm, nTerm, iDocid, &doclist); - docListDestroy(&doclist); - return rc; - } - return SQLITE_OK; - } - if( rc!=SQLITE_ROW ) return SQLITE_ERROR; - - /* This word is in the index; add this document ID to its blob. */ - - rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); - if( rc!=SQLITE_OK ) return rc; - - if( docListUpdate(&doclist, iDocid, p) ){ - /* If the blob is too big, split it in half. */ - if( doclist.nData>CHUNK_MAX ){ - DocList half; - if( docListSplit(&doclist, &half) ){ - rc = term_insert(v, zTerm, nTerm, firstDocid(&half), &half); - docListDestroy(&half); - if( rc!=SQLITE_OK ) goto err; - } - } - rc = term_update(v, iIndexRow, &doclist); - } - -err: - docListDestroy(&doclist); - return rc; -} - -/* Insert a row into the full-text index; set *piRowid to be the ID of the - * new row. */ -static int index_insert(fulltext_vtab *v, - sqlite3_value *pRequestRowid, const char *zText, - sqlite_int64 *piRowid){ - Hash terms; /* maps term string -> PosList */ - HashElem *e; - - int rc = content_insert(v, pRequestRowid, zText, -1); - if( rc!=SQLITE_OK ) return rc; - *piRowid = sqlite3_last_insert_rowid(v->db); - - if( !zText ) return SQLITE_OK; /* nothing to index */ - - rc = build_terms(&terms, v->pTokenizer, zText, *piRowid); - if( rc!=SQLITE_OK ) return rc; - - for(e=HashFirst(&terms); e; e=HashNext(e)){ - DocList *p = HashData(e); - rc = index_insert_term(v, HashKey(e), HashKeysize(e), *piRowid, p); - if( rc!=SQLITE_OK ) break; - } - - for(e=HashFirst(&terms); e; e=HashNext(e)){ - DocList *p = HashData(e); - docListDelete(p); - } - HashClear(&terms); - return rc; -} - -static int index_delete_term(fulltext_vtab *v, const char *zTerm, int nTerm, - sqlite_int64 iDocid){ - sqlite_int64 iFirst; - sqlite_int64 iIndexRow; - DocList doclist; - - int rc = term_chunk_select(v, zTerm, nTerm, iDocid, &iFirst); - if( rc!=SQLITE_ROW ) return SQLITE_ERROR; - - rc = term_select(v, zTerm, nTerm, iFirst, &iIndexRow, &doclist); - if( rc!=SQLITE_OK ) return rc; - - if( docListUpdate(&doclist, iDocid, NULL) ){ - if( doclist.nData>0 ){ - rc = term_update(v, iIndexRow, &doclist); - } else { /* empty posting list */ - rc = term_delete(v, iIndexRow); - } - } - docListDestroy(&doclist); - return rc; -} - -/* Delete a row from the full-text index. */ -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ - char *zText; - Hash terms; - HashElem *e; - - int rc = content_select(v, iRow, &zText); - if( rc!=SQLITE_OK ) return rc; - - rc = build_terms(&terms, v->pTokenizer, zText, iRow); - free(zText); - if( rc!=SQLITE_OK ) return rc; - - for(e=HashFirst(&terms); e; e=HashNext(e)){ - rc = index_delete_term(v, HashKey(e), HashKeysize(e), iRow); - if( rc!=SQLITE_OK ) break; - } - for(e=HashFirst(&terms); e; e=HashNext(e)){ - DocList *p = HashData(e); - docListDelete(p); - } - HashClear(&terms); - - return content_delete(v, iRow); -} - -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, - sqlite_int64 *pRowid){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - - if( nArg<2 ){ - return index_delete(v, sqlite3_value_int64(ppArg[0])); - } - - if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ - return SQLITE_ERROR; /* an update; not yet supported */ - } - - assert( nArg==3 ); /* ppArg[1] = rowid, ppArg[2] = content */ - return index_insert(v, ppArg[1], - (const char *)sqlite3_value_text(ppArg[2]), pRowid); -} - -static sqlite3_module fulltextModule = { - 0, - fulltextCreate, - fulltextConnect, - fulltextBestIndex, - fulltextDisconnect, - fulltextDestroy, - fulltextOpen, - fulltextClose, - fulltextFilter, - fulltextNext, - fulltextEof, - fulltextColumn, - fulltextRowid, - fulltextUpdate -}; - -int fulltext_init(sqlite3 *db){ - return sqlite3_create_module(db, "fulltext", &fulltextModule, 0); -} - -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -int sqlite3_fulltext_init(sqlite3 *db, char **pzErrMsg, - const sqlite3_api_routines *pApi){ - SQLITE_EXTENSION_INIT2(pApi) - return fulltext_init(db); -} -#endif diff --git a/ext/fts1/fulltext.h b/ext/fts1/fulltext.h deleted file mode 100644 index 477dcab2ad..0000000000 --- a/ext/fts1/fulltext.h +++ /dev/null @@ -1,11 +0,0 @@ -#include "sqlite3.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -int fulltext_init(sqlite3 *db); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/ext/fts1/simple_tokenizer.c b/ext/fts1/simple_tokenizer.c deleted file mode 100644 index 0ddc7055af..0000000000 --- a/ext/fts1/simple_tokenizer.c +++ /dev/null @@ -1,174 +0,0 @@ -/* -** The author disclaims copyright to this source code. -** -************************************************************************* -** Implementation of the "simple" full-text-search tokenizer. -*/ - -#include -#if !defined(__APPLE__) -#include -#else -#include -#endif -#include -#include -#include - -#include "tokenizer.h" - -/* Duplicate a string; the caller must free() the returned string. - * (We don't use strdup() since it's not part of the standard C library and - * may not be available everywhere.) */ -/* TODO(shess) Copied from fulltext.c, consider util.c for such -** things. */ -static char *string_dup(const char *s){ - char *str = malloc(strlen(s) + 1); - strcpy(str, s); - return str; -} - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - const char *zDelim; /* token delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - const char *pCurrent; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nTokenBytes; /* actual size of current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - -static sqlite3_tokenizer_module simpleTokenizerModule;/* forward declaration */ - -static int simpleCreate( - int argc, const char **argv, - sqlite3_tokenizer **ppTokenizer -){ - simple_tokenizer *t; - - t = (simple_tokenizer *) malloc(sizeof(simple_tokenizer)); - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - t->zDelim = string_dup(argv[1]); - } else { - /* Build a string excluding alphanumeric ASCII characters */ - char zDelim[0x80]; /* nul-terminated, so nul not a member */ - int i, j; - for(i=1, j=0; i<0x80; i++){ - if( !isalnum(i) ){ - zDelim[j++] = i; - } - } - zDelim[j++] = '\0'; - assert( j<=sizeof(zDelim) ); - t->zDelim = string_dup(zDelim); - } - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - simple_tokenizer *t = (simple_tokenizer *) pTokenizer; - - free((void *) t->zDelim); - free(t); - - return SQLITE_OK; -} - -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, - const char *pInput, int nBytes, - sqlite3_tokenizer_cursor **ppCursor -){ - simple_tokenizer_cursor *c; - - c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor)); - c->pInput = pInput; - c->nBytes = nBytes<0 ? (int) strlen(pInput) : nBytes; - c->pCurrent = c->pInput; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nTokenBytes = 0; - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - - if( NULL!=c->zToken ){ - free(c->zToken); - } - free(c); - - return SQLITE_OK; -} - -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, - const char **ppToken, int *pnBytes, - int *piStartOffset, int *piEndOffset, int *piPosition -){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - int ii; - - while( c->pCurrent-c->pInputnBytes ){ - int n = (int) strcspn(c->pCurrent, t->zDelim); - if( n>0 ){ - if( n+1>c->nTokenAllocated ){ - c->zToken = realloc(c->zToken, n+1); - } - for(ii=0; iipCurrent[ii]; - c->zToken[ii] = (unsigned char)ch<0x80 ? tolower((unsigned char)ch):ch; - } - c->zToken[n] = '\0'; - *ppToken = c->zToken; - *pnBytes = n; - *piStartOffset = (int) (c->pCurrent-c->pInput); - *piEndOffset = *piStartOffset+n; - *piPosition = c->iToken++; - c->pCurrent += n + 1; - - return SQLITE_OK; - } - c->pCurrent += n + 1; - /* TODO(shess) could strspn() to skip delimiters en masse. Needs - ** to happen in two places, though, which is annoying. - */ - } - return SQLITE_DONE; -} - -static sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, -}; - -void get_simple_tokenizer_module( - sqlite3_tokenizer_module **ppModule -){ - *ppModule = &simpleTokenizerModule; -} diff --git a/ext/fts1/tokenizer.h b/ext/fts1/tokenizer.h deleted file mode 100644 index 1d7bd1f670..0000000000 --- a/ext/fts1/tokenizer.h +++ /dev/null @@ -1,89 +0,0 @@ -/* -** 2006 July 10 -** -** The author disclaims copyright to this source code. -** -************************************************************************* -** Defines the interface to tokenizers used by fulltext-search. There -** are three basic components: -** -** sqlite3_tokenizer_module is a singleton defining the tokenizer -** interface functions. This is essentially the class structure for -** tokenizers. -** -** sqlite3_tokenizer is used to define a particular tokenizer, perhaps -** including customization information defined at creation time. -** -** sqlite3_tokenizer_cursor is generated by a tokenizer to generate -** tokens from a particular input. -*/ -#ifndef _TOKENIZER_H_ -#define _TOKENIZER_H_ - -/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. -** If tokenizers are to be allowed to call sqlite3_*() functions, then -** we will need a way to register the API consistently. -*/ -#include "sqlite3.h" - -/* -** Structures used by the tokenizer interface. -*/ -typedef struct sqlite3_tokenizer sqlite3_tokenizer; -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; - -struct sqlite3_tokenizer_module { - int iVersion; /* currently 0 */ - - /* - ** Create and destroy a tokenizer. argc/argv are passed down from - ** the fulltext virtual table creation to allow customization. - */ - int (*xCreate)(int argc, const char **argv, - sqlite3_tokenizer **ppTokenizer); - int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - - /* - ** Tokenize a particular input. Call xOpen() to prepare to - ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then - ** xClose() to free any internal state. The pInput passed to - ** xOpen() must exist until the cursor is closed. The ppToken - ** result from xNext() is only valid until the next call to xNext() - ** or until xClose() is called. - */ - /* TODO(shess) current implementation requires pInput to be - ** nul-terminated. This should either be fixed, or pInput/nBytes - ** should be converted to zInput. - */ - int (*xOpen)(sqlite3_tokenizer *pTokenizer, - const char *pInput, int nBytes, - sqlite3_tokenizer_cursor **ppCursor); - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); - int (*xNext)(sqlite3_tokenizer_cursor *pCursor, - const char **ppToken, int *pnBytes, - int *piStartOffset, int *piEndOffset, int *piPosition); -}; - -struct sqlite3_tokenizer { - sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ - /* Tokenizer implementations will typically add additional fields */ -}; - -struct sqlite3_tokenizer_cursor { - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ - /* Tokenizer implementations will typically add additional fields */ -}; - -/* -** Get the module for a tokenizer which generates tokens based on a -** set of non-token characters. The default is to break tokens at any -** non-alnum character, though the set of delimiters can also be -** specified by the first argv argument to xCreate(). -*/ -/* TODO(shess) This doesn't belong here. Need some sort of -** registration process. -*/ -void get_simple_tokenizer_module(sqlite3_tokenizer_module **ppModule); - -#endif /* _TOKENIZER_H_ */ diff --git a/ext/fts2/README.tokenizers b/ext/fts2/README.tokenizers deleted file mode 100644 index 98d2021ba1..0000000000 --- a/ext/fts2/README.tokenizers +++ /dev/null @@ -1,133 +0,0 @@ - -1. FTS2 Tokenizers - - When creating a new full-text table, FTS2 allows the user to select - the text tokenizer implementation to be used when indexing text - by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE - statement: - - CREATE VIRTUAL TABLE USING fts2( - [, tokenizer []] - ); - - The built-in tokenizers (valid values to pass as ) are - "simple" and "porter". - - should consist of zero or more white-space separated - arguments to pass to the selected tokenizer implementation. The - interpretation of the arguments, if any, depends on the individual - tokenizer. - -2. Custom Tokenizers - - FTS2 allows users to provide custom tokenizer implementations. The - interface used to create a new tokenizer is defined and described in - the fts2_tokenizer.h source file. - - Registering a new FTS2 tokenizer is similar to registering a new - virtual table module with SQLite. The user passes a pointer to a - structure containing pointers to various callback functions that - make up the implementation of the new tokenizer type. For tokenizers, - the structure (defined in fts2_tokenizer.h) is called - "sqlite3_tokenizer_module". - - FTS2 does not expose a C-function that users call to register new - tokenizer types with a database handle. Instead, the pointer must - be encoded as an SQL blob value and passed to FTS2 through the SQL - engine by evaluating a special scalar function, "fts2_tokenizer()". - The fts2_tokenizer() function may be called with one or two arguments, - as follows: - - SELECT fts2_tokenizer(); - SELECT fts2_tokenizer(, ); - - Where is a string identifying the tokenizer and - is a pointer to an sqlite3_tokenizer_module - structure encoded as an SQL blob. If the second argument is present, - it is registered as tokenizer and a copy of it - returned. If only one argument is passed, a pointer to the tokenizer - implementation currently registered as is returned, - encoded as a blob. Or, if no such tokenizer exists, an SQL exception - (error) is raised. - - SECURITY: If the fts2 extension is used in an environment where potentially - malicious users may execute arbitrary SQL (i.e. gears), they should be - prevented from invoking the fts2_tokenizer() function, possibly using the - authorisation callback. - - See "Sample code" below for an example of calling the fts2_tokenizer() - function from C code. - -3. ICU Library Tokenizers - - If this extension is compiled with the SQLITE_ENABLE_ICU pre-processor - symbol defined, then there exists a built-in tokenizer named "icu" - implemented using the ICU library. The first argument passed to the - xCreate() method (see fts2_tokenizer.h) of this tokenizer may be - an ICU locale identifier. For example "tr_TR" for Turkish as used - in Turkey, or "en_AU" for English as used in Australia. For example: - - "CREATE VIRTUAL TABLE thai_text USING fts2(text, tokenizer icu th_TH)" - - The ICU tokenizer implementation is very simple. It splits the input - text according to the ICU rules for finding word boundaries and discards - any tokens that consist entirely of white-space. This may be suitable - for some applications in some locales, but not all. If more complex - processing is required, for example to implement stemming or - discard punctuation, this can be done by creating a tokenizer - implementation that uses the ICU tokenizer as part of its implementation. - - When using the ICU tokenizer this way, it is safe to overwrite the - contents of the strings returned by the xNext() method (see - fts2_tokenizer.h). - -4. Sample code. - - The following two code samples illustrate the way C code should invoke - the fts2_tokenizer() scalar function: - - int registerTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module *p - ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; - - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); - sqlite3_step(pStmt); - - return sqlite3_finalize(pStmt); - } - - int queryTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module **pp - ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } - } - - return sqlite3_finalize(pStmt); - } diff --git a/ext/fts2/README.txt b/ext/fts2/README.txt deleted file mode 100644 index 517a2a0434..0000000000 --- a/ext/fts2/README.txt +++ /dev/null @@ -1,4 +0,0 @@ -This folder contains source code to the second full-text search -extension for SQLite. While the API is the same, this version uses a -substantially different storage schema from fts1, so tables will need -to be rebuilt. diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c deleted file mode 100644 index 0405fb7b1e..0000000000 --- a/ext/fts2/fts2.c +++ /dev/null @@ -1,6860 +0,0 @@ -/* fts2 has a design flaw which can lead to database corruption (see -** below). It is recommended not to use it any longer, instead use -** fts3 (or higher). If you believe that your use of fts2 is safe, -** add -DSQLITE_ENABLE_BROKEN_FTS2=1 to your CFLAGS. -*/ -#if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)) \ - && !defined(SQLITE_ENABLE_BROKEN_FTS2) -#error fts2 has a design flaw and has been deprecated. -#endif -/* The flaw is that fts2 uses the content table's unaliased rowid as -** the unique docid. fts2 embeds the rowid in the index it builds, -** and expects the rowid to not change. The SQLite VACUUM operation -** will renumber such rowids, thereby breaking fts2. If you are using -** fts2 in a system which has disabled VACUUM, then you can continue -** to use it safely. Note that PRAGMA auto_vacuum does NOT disable -** VACUUM, though systems using auto_vacuum are unlikely to invoke -** VACUUM. -** -** Unlike fts1, which is safe across VACUUM if you never delete -** documents, fts2 has a second exposure to this flaw, in the segments -** table. So fts2 should be considered unsafe across VACUUM in all -** cases. -*/ - -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is an SQLite module implementing full-text search. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ - -/* TODO(shess) Consider exporting this comment to an HTML file or the -** wiki. -*/ -/* The full-text index is stored in a series of b+tree (-like) -** structures called segments which map terms to doclists. The -** structures are like b+trees in layout, but are constructed from the -** bottom up in optimal fashion and are not updatable. Since trees -** are built from the bottom up, things will be described from the -** bottom up. -** -** -**** Varints **** -** The basic unit of encoding is a variable-length integer called a -** varint. We encode variable-length integers in little-endian order -** using seven bits * per byte as follows: -** -** KEY: -** A = 0xxxxxxx 7 bits of data and one flag bit -** B = 1xxxxxxx 7 bits of data and one flag bit -** -** 7 bits - A -** 14 bits - BA -** 21 bits - BBA -** and so on. -** -** This is identical to how sqlite encodes varints (see util.c). -** -** -**** Document lists **** -** A doclist (document list) holds a docid-sorted list of hits for a -** given term. Doclists hold docids, and can optionally associate -** token positions and offsets with docids. -** -** A DL_POSITIONS_OFFSETS doclist is stored like this: -** -** array { -** varint docid; -** array { (position list for column 0) -** varint position; (delta from previous position plus POS_BASE) -** varint startOffset; (delta from previous startOffset) -** varint endOffset; (delta from startOffset) -** } -** array { -** varint POS_COLUMN; (marks start of position list for new column) -** varint column; (index of new column) -** array { -** varint position; (delta from previous position plus POS_BASE) -** varint startOffset;(delta from previous startOffset) -** varint endOffset; (delta from startOffset) -** } -** } -** varint POS_END; (marks end of positions for this document. -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. A "position" is an index of a token in the token stream -** generated by the tokenizer, while an "offset" is a byte offset, -** both based at 0. Note that POS_END and POS_COLUMN occur in the -** same logical place as the position element, and act as sentinals -** ending a position list array. -** -** A DL_POSITIONS doclist omits the startOffset and endOffset -** information. A DL_DOCIDS doclist omits both the position and -** offset information, becoming an array of varint-encoded docids. -** -** On-disk data is stored as type DL_DEFAULT, so we don't serialize -** the type. Due to how deletion is implemented in the segmentation -** system, on-disk doclists MUST store at least positions. -** -** -**** Segment leaf nodes **** -** Segment leaf nodes store terms and doclists, ordered by term. Leaf -** nodes are written using LeafWriter, and read using LeafReader (to -** iterate through a single leaf node's data) and LeavesReader (to -** iterate through a segment's entire leaf layer). Leaf nodes have -** the format: -** -** varint iHeight; (height from leaf level, always 0) -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of prefix shared with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix];(unshared suffix of next term) -** varint nDoclist; (length of term's associated doclist) -** char pDoclist[nDoclist]; (content of doclist) -** } -** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. -** -** Leaf nodes are broken into blocks which are stored contiguously in -** the %_segments table in sorted order. This means that when the end -** of a node is reached, the next term is in the node with the next -** greater node id. -** -** New data is spilled to a new leaf node when the current node -** exceeds LEAF_MAX bytes (default 2048). New data which itself is -** larger than STANDALONE_MIN (default 1024) is placed in a standalone -** node (a leaf node with a single term and doclist). The goal of -** these settings is to pack together groups of small doclists while -** making it efficient to directly access large doclists. The -** assumption is that large doclists represent terms which are more -** likely to be query targets. -** -** TODO(shess) It may be useful for blocking decisions to be more -** dynamic. For instance, it may make more sense to have a 2.5k leaf -** node rather than splitting into 2k and .5k nodes. My intuition is -** that this might extend through 2x or 4x the pagesize. -** -** -**** Segment interior nodes **** -** Segment interior nodes store blockids for subtree nodes and terms -** to describe what data is stored by the each subtree. Interior -** nodes are written using InteriorWriter, and read using -** InteriorReader. InteriorWriters are created as needed when -** SegmentWriter creates new leaf nodes, or when an interior node -** itself grows too big and must be split. The format of interior -** nodes: -** -** varint iHeight; (height from leaf level, always >0) -** varint iBlockid; (block id of node's leftmost subtree) -** optional { -** varint nTerm; (length of first term) -** char pTerm[nTerm]; (content of first term) -** array { -** (further terms are delta-encoded) -** varint nPrefix; (length of shared prefix with previous term) -** varint nSuffix; (length of unshared suffix) -** char pTermSuffix[nSuffix]; (unshared suffix of next term) -** } -** } -** -** Here, optional { X } means an optional element, while array { X } -** means zero or more occurrences of X, adjacent in memory. -** -** An interior node encodes n terms separating n+1 subtrees. The -** subtree blocks are contiguous, so only the first subtree's blockid -** is encoded. The subtree at iBlockid will contain all terms less -** than the first term encoded (or all terms if no term is encoded). -** Otherwise, for terms greater than or equal to pTerm[i] but less -** than pTerm[i+1], the subtree for that term will be rooted at -** iBlockid+i. Interior nodes only store enough term data to -** distinguish adjacent children (if the rightmost term of the left -** child is "something", and the leftmost term of the right child is -** "wicked", only "w" is stored). -** -** New data is spilled to a new interior node at the same height when -** the current node exceeds INTERIOR_MAX bytes (default 2048). -** INTERIOR_MIN_TERMS (default 7) keeps large terms from monopolizing -** interior nodes and making the tree too skinny. The interior nodes -** at a given height are naturally tracked by interior nodes at -** height+1, and so on. -** -** -**** Segment directory **** -** The segment directory in table %_segdir stores meta-information for -** merging and deleting segments, and also the root node of the -** segment's tree. -** -** The root node is the top node of the segment's tree after encoding -** the entire segment, restricted to ROOT_MAX bytes (default 1024). -** This could be either a leaf node or an interior node. If the top -** node requires more than ROOT_MAX bytes, it is flushed to %_segments -** and a new root interior node is generated (which should always fit -** within ROOT_MAX because it only needs space for 2 varints, the -** height and the blockid of the previous root). -** -** The meta-information in the segment directory is: -** level - segment level (see below) -** idx - index within level -** - (level,idx uniquely identify a segment) -** start_block - first leaf node -** leaves_end_block - last leaf node -** end_block - last block (including interior nodes) -** root - contents of root node -** -** If the root node is a leaf node, then start_block, -** leaves_end_block, and end_block are all 0. -** -** -**** Segment merging **** -** To amortize update costs, segments are groups into levels and -** merged in matches. Each increase in level represents exponentially -** more documents. -** -** New documents (actually, document updates) are tokenized and -** written individually (using LeafWriter) to a level 0 segment, with -** incrementing idx. When idx reaches MERGE_COUNT (default 16), all -** level 0 segments are merged into a single level 1 segment. Level 1 -** is populated like level 0, and eventually MERGE_COUNT level 1 -** segments are merged to a single level 2 segment (representing -** MERGE_COUNT^2 updates), and so on. -** -** A segment merge traverses all segments at a given level in -** parallel, performing a straightforward sorted merge. Since segment -** leaf nodes are written in to the %_segments table in order, this -** merge traverses the underlying sqlite disk structures efficiently. -** After the merge, all segment blocks from the merged level are -** deleted. -** -** MERGE_COUNT controls how often we merge segments. 16 seems to be -** somewhat of a sweet spot for insertion performance. 32 and 64 show -** very similar performance numbers to 16 on insertion, though they're -** a tiny bit slower (perhaps due to more overhead in merge-time -** sorting). 8 is about 20% slower than 16, 4 about 50% slower than -** 16, 2 about 66% slower than 16. -** -** At query time, high MERGE_COUNT increases the number of segments -** which need to be scanned and merged. For instance, with 100k docs -** inserted: -** -** MERGE_COUNT segments -** 16 25 -** 8 12 -** 4 10 -** 2 6 -** -** This appears to have only a moderate impact on queries for very -** frequent terms (which are somewhat dominated by segment merge -** costs), and infrequent and non-existent terms still seem to be fast -** even with many segments. -** -** TODO(shess) That said, it would be nice to have a better query-side -** argument for MERGE_COUNT of 16. Also, it is possible/likely that -** optimizations to things like doclist merging will swing the sweet -** spot around. -** -** -** -**** Handling of deletions and updates **** -** Since we're using a segmented structure, with no docid-oriented -** index into the term index, we clearly cannot simply update the term -** index when a document is deleted or updated. For deletions, we -** write an empty doclist (varint(docid) varint(POS_END)), for updates -** we simply write the new doclist. Segment merges overwrite older -** data for a particular docid with newer data, so deletes or updates -** will eventually overtake the earlier data and knock it out. The -** query logic likewise merges doclists so that newer data knocks out -** older data. -** -** TODO(shess) Provide a VACUUM type operation to clear out all -** deletions and duplications. This would basically be a forced merge -** into a single segment. -*/ - -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -#if defined(SQLITE_ENABLE_FTS2) && !defined(SQLITE_CORE) -# define SQLITE_CORE 1 -#endif - -#include -#include -#include -#include -#include "fts2.h" -#include "fts2_hash.h" -#include "fts2_tokenizer.h" -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT1 - - -/* TODO(shess) MAN, this thing needs some refactoring. At minimum, it -** would be nice to order the file better, perhaps something along the -** lines of: -** -** - utility functions -** - table setup functions -** - table update functions -** - table query functions -** -** Put the query functions last because they're likely to reference -** typedefs or functions from the table update section. -*/ - -#if 0 -# define TRACE(A) printf A; fflush(stdout) -#else -# define TRACE(A) -#endif - -/* It is not safe to call isspace(), tolower(), or isalnum() on -** hi-bit-set characters. This is the same solution used in the -** tokenizer. -*/ -/* TODO(shess) The snippet-generation code should be using the -** tokenizer-generated tokens rather than doing its own local -** tokenization. -*/ -/* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */ -static int safe_isspace(char c){ - return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; -} -static int safe_tolower(char c){ - return (c>='A' && c<='Z') ? (c - 'A' + 'a') : c; -} -static int safe_isalnum(char c){ - return (c>='0' && c<='9') || (c>='A' && c<='Z') || (c>='a' && c<='z'); -} - -typedef enum DocListType { - DL_DOCIDS, /* docids only */ - DL_POSITIONS, /* docids + positions */ - DL_POSITIONS_OFFSETS /* docids + positions + offsets */ -} DocListType; - -/* -** By default, only positions and not offsets are stored in the doclists. -** To change this so that offsets are stored too, compile with -** -** -DDL_DEFAULT=DL_POSITIONS_OFFSETS -** -** If DL_DEFAULT is set to DL_DOCIDS, your table can only be inserted -** into (no deletes or updates). -*/ -#ifndef DL_DEFAULT -# define DL_DEFAULT DL_POSITIONS -#endif - -enum { - POS_END = 0, /* end of this position list */ - POS_COLUMN, /* followed by new column number */ - POS_BASE -}; - -/* MERGE_COUNT controls how often we merge segments (see comment at -** top of file). -*/ -#define MERGE_COUNT 16 - -/* utility functions */ - -/* CLEAR() and SCRAMBLE() abstract memset() on a pointer to a single -** record to prevent errors of the form: -** -** my_function(SomeType *b){ -** memset(b, '\0', sizeof(b)); // sizeof(b)!=sizeof(*b) -** } -*/ -/* TODO(shess) Obvious candidates for a header file. */ -#define CLEAR(b) memset(b, '\0', sizeof(*(b))) - -#ifndef NDEBUG -# define SCRAMBLE(b) memset(b, 0x55, sizeof(*(b))) -#else -# define SCRAMBLE(b) -#endif - -/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ -#define VARINT_MAX 10 - -/* Write a 64-bit variable-length integer to memory starting at p[0]. - * The length of data written will be between 1 and VARINT_MAX bytes. - * The number of bytes written is returned. */ -static int putVarint(char *p, sqlite_int64 v){ - unsigned char *q = (unsigned char *) p; - sqlite_uint64 vu = v; - do{ - *q++ = (unsigned char) ((vu & 0x7f) | 0x80); - vu >>= 7; - }while( vu!=0 ); - q[-1] &= 0x7f; /* turn off high bit in final byte */ - assert( q - (unsigned char *)p <= VARINT_MAX ); - return (int) (q - (unsigned char *)p); -} - -/* Read a 64-bit variable-length integer from memory starting at p[0]. - * Return the number of bytes read, or 0 on error. - * The value is stored in *v. */ -static int getVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q & 0x80) == 0x80 ){ - x += y * (*q++ & 0x7f); - y <<= 7; - if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ - assert( 0 ); - return 0; - } - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); -} - -static int getVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = getVarint(p, &i); - *pi = (int) i; - assert( *pi==i ); - return ret; -} - -/*******************************************************************/ -/* DataBuffer is used to collect data into a buffer in piecemeal -** fashion. It implements the usual distinction between amount of -** data currently stored (nData) and buffer capacity (nCapacity). -** -** dataBufferInit - create a buffer with given initial capacity. -** dataBufferReset - forget buffer's data, retaining capacity. -** dataBufferDestroy - free buffer's data. -** dataBufferSwap - swap contents of two buffers. -** dataBufferExpand - expand capacity without adding data. -** dataBufferAppend - append data. -** dataBufferAppend2 - append two pieces of data at once. -** dataBufferReplace - replace buffer's data. -*/ -typedef struct DataBuffer { - char *pData; /* Pointer to malloc'ed buffer. */ - int nCapacity; /* Size of pData buffer. */ - int nData; /* End of data loaded into pData. */ -} DataBuffer; - -static void dataBufferInit(DataBuffer *pBuffer, int nCapacity){ - assert( nCapacity>=0 ); - pBuffer->nData = 0; - pBuffer->nCapacity = nCapacity; - pBuffer->pData = nCapacity==0 ? NULL : sqlite3_malloc(nCapacity); -} -static void dataBufferReset(DataBuffer *pBuffer){ - pBuffer->nData = 0; -} -static void dataBufferDestroy(DataBuffer *pBuffer){ - if( pBuffer->pData!=NULL ) sqlite3_free(pBuffer->pData); - SCRAMBLE(pBuffer); -} -static void dataBufferSwap(DataBuffer *pBuffer1, DataBuffer *pBuffer2){ - DataBuffer tmp = *pBuffer1; - *pBuffer1 = *pBuffer2; - *pBuffer2 = tmp; -} -static void dataBufferExpand(DataBuffer *pBuffer, int nAddCapacity){ - assert( nAddCapacity>0 ); - /* TODO(shess) Consider expanding more aggressively. Note that the - ** underlying malloc implementation may take care of such things for - ** us already. - */ - if( pBuffer->nData+nAddCapacity>pBuffer->nCapacity ){ - pBuffer->nCapacity = pBuffer->nData+nAddCapacity; - pBuffer->pData = sqlite3_realloc(pBuffer->pData, pBuffer->nCapacity); - } -} -static void dataBufferAppend(DataBuffer *pBuffer, - const char *pSource, int nSource){ - assert( nSource>0 && pSource!=NULL ); - dataBufferExpand(pBuffer, nSource); - memcpy(pBuffer->pData+pBuffer->nData, pSource, nSource); - pBuffer->nData += nSource; -} -static void dataBufferAppend2(DataBuffer *pBuffer, - const char *pSource1, int nSource1, - const char *pSource2, int nSource2){ - assert( nSource1>0 && pSource1!=NULL ); - assert( nSource2>0 && pSource2!=NULL ); - dataBufferExpand(pBuffer, nSource1+nSource2); - memcpy(pBuffer->pData+pBuffer->nData, pSource1, nSource1); - memcpy(pBuffer->pData+pBuffer->nData+nSource1, pSource2, nSource2); - pBuffer->nData += nSource1+nSource2; -} -static void dataBufferReplace(DataBuffer *pBuffer, - const char *pSource, int nSource){ - dataBufferReset(pBuffer); - dataBufferAppend(pBuffer, pSource, nSource); -} - -/* StringBuffer is a null-terminated version of DataBuffer. */ -typedef struct StringBuffer { - DataBuffer b; /* Includes null terminator. */ -} StringBuffer; - -static void initStringBuffer(StringBuffer *sb){ - dataBufferInit(&sb->b, 100); - dataBufferReplace(&sb->b, "", 1); -} -static int stringBufferLength(StringBuffer *sb){ - return sb->b.nData-1; -} -static char *stringBufferData(StringBuffer *sb){ - return sb->b.pData; -} -static void stringBufferDestroy(StringBuffer *sb){ - dataBufferDestroy(&sb->b); -} - -static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ - assert( sb->b.nData>0 ); - if( nFrom>0 ){ - sb->b.nData--; - dataBufferAppend2(&sb->b, zFrom, nFrom, "", 1); - } -} -static void append(StringBuffer *sb, const char *zFrom){ - nappend(sb, zFrom, strlen(zFrom)); -} - -/* Append a list of strings separated by commas. */ -static void appendList(StringBuffer *sb, int nString, char **azString){ - int i; - for(i=0; i0 ) append(sb, ", "); - append(sb, azString[i]); - } -} - -static int endsInWhiteSpace(StringBuffer *p){ - return stringBufferLength(p)>0 && - safe_isspace(stringBufferData(p)[stringBufferLength(p)-1]); -} - -/* If the StringBuffer ends in something other than white space, add a -** single space character to the end. -*/ -static void appendWhiteSpace(StringBuffer *p){ - if( stringBufferLength(p)==0 ) return; - if( !endsInWhiteSpace(p) ) append(p, " "); -} - -/* Remove white space from the end of the StringBuffer */ -static void trimWhiteSpace(StringBuffer *p){ - while( endsInWhiteSpace(p) ){ - p->b.pData[--p->b.nData-1] = '\0'; - } -} - -/*******************************************************************/ -/* DLReader is used to read document elements from a doclist. The -** current docid is cached, so dlrDocid() is fast. DLReader does not -** own the doclist buffer. -** -** dlrAtEnd - true if there's no more data to read. -** dlrDocid - docid of current document. -** dlrDocData - doclist data for current document (including docid). -** dlrDocDataBytes - length of same. -** dlrAllDataBytes - length of all remaining data. -** dlrPosData - position data for current document. -** dlrPosDataLen - length of pos data for current document (incl POS_END). -** dlrStep - step to current document. -** dlrInit - initial for doclist of given type against given data. -** dlrDestroy - clean up. -** -** Expected usage is something like: -** -** DLReader reader; -** dlrInit(&reader, pData, nData); -** while( !dlrAtEnd(&reader) ){ -** // calls to dlrDocid() and kin. -** dlrStep(&reader); -** } -** dlrDestroy(&reader); -*/ -typedef struct DLReader { - DocListType iType; - const char *pData; - int nData; - - sqlite_int64 iDocid; - int nElement; -} DLReader; - -static int dlrAtEnd(DLReader *pReader){ - assert( pReader->nData>=0 ); - return pReader->nData==0; -} -static sqlite_int64 dlrDocid(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->iDocid; -} -static const char *dlrDocData(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->pData; -} -static int dlrDocDataBytes(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->nElement; -} -static int dlrAllDataBytes(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - return pReader->nData; -} -/* TODO(shess) Consider adding a field to track iDocid varint length -** to make these two functions faster. This might matter (a tiny bit) -** for queries. -*/ -static const char *dlrPosData(DLReader *pReader){ - sqlite_int64 iDummy; - int n = getVarint(pReader->pData, &iDummy); - assert( !dlrAtEnd(pReader) ); - return pReader->pData+n; -} -static int dlrPosDataLen(DLReader *pReader){ - sqlite_int64 iDummy; - int n = getVarint(pReader->pData, &iDummy); - assert( !dlrAtEnd(pReader) ); - return pReader->nElement-n; -} -static void dlrStep(DLReader *pReader){ - assert( !dlrAtEnd(pReader) ); - - /* Skip past current doclist element. */ - assert( pReader->nElement<=pReader->nData ); - pReader->pData += pReader->nElement; - pReader->nData -= pReader->nElement; - - /* If there is more data, read the next doclist element. */ - if( pReader->nData!=0 ){ - sqlite_int64 iDocidDelta; - int iDummy, n = getVarint(pReader->pData, &iDocidDelta); - pReader->iDocid += iDocidDelta; - if( pReader->iType>=DL_POSITIONS ){ - assert( nnData ); - while( 1 ){ - n += getVarint32(pReader->pData+n, &iDummy); - assert( n<=pReader->nData ); - if( iDummy==POS_END ) break; - if( iDummy==POS_COLUMN ){ - n += getVarint32(pReader->pData+n, &iDummy); - assert( nnData ); - }else if( pReader->iType==DL_POSITIONS_OFFSETS ){ - n += getVarint32(pReader->pData+n, &iDummy); - n += getVarint32(pReader->pData+n, &iDummy); - assert( nnData ); - } - } - } - pReader->nElement = n; - assert( pReader->nElement<=pReader->nData ); - } -} -static void dlrInit(DLReader *pReader, DocListType iType, - const char *pData, int nData){ - assert( pData!=NULL && nData!=0 ); - pReader->iType = iType; - pReader->pData = pData; - pReader->nData = nData; - pReader->nElement = 0; - pReader->iDocid = 0; - - /* Load the first element's data. There must be a first element. */ - dlrStep(pReader); -} -static void dlrDestroy(DLReader *pReader){ - SCRAMBLE(pReader); -} - -#ifndef NDEBUG -/* Verify that the doclist can be validly decoded. Also returns the -** last docid found because it is convenient in other assertions for -** DLWriter. -*/ -static void docListValidate(DocListType iType, const char *pData, int nData, - sqlite_int64 *pLastDocid){ - sqlite_int64 iPrevDocid = 0; - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - while( nData!=0 ){ - sqlite_int64 iDocidDelta; - int n = getVarint(pData, &iDocidDelta); - iPrevDocid += iDocidDelta; - if( iType>DL_DOCIDS ){ - int iDummy; - while( 1 ){ - n += getVarint32(pData+n, &iDummy); - if( iDummy==POS_END ) break; - if( iDummy==POS_COLUMN ){ - n += getVarint32(pData+n, &iDummy); - }else if( iType>DL_POSITIONS ){ - n += getVarint32(pData+n, &iDummy); - n += getVarint32(pData+n, &iDummy); - } - assert( n<=nData ); - } - } - assert( n<=nData ); - pData += n; - nData -= n; - } - if( pLastDocid ) *pLastDocid = iPrevDocid; -} -#define ASSERT_VALID_DOCLIST(i, p, n, o) docListValidate(i, p, n, o) -#else -#define ASSERT_VALID_DOCLIST(i, p, n, o) assert( 1 ) -#endif - -/*******************************************************************/ -/* DLWriter is used to write doclist data to a DataBuffer. DLWriter -** always appends to the buffer and does not own it. -** -** dlwInit - initialize to write a given type doclistto a buffer. -** dlwDestroy - clear the writer's memory. Does not free buffer. -** dlwAppend - append raw doclist data to buffer. -** dlwCopy - copy next doclist from reader to writer. -** dlwAdd - construct doclist element and append to buffer. -** Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter). -*/ -typedef struct DLWriter { - DocListType iType; - DataBuffer *b; - sqlite_int64 iPrevDocid; -#ifndef NDEBUG - int has_iPrevDocid; -#endif -} DLWriter; - -static void dlwInit(DLWriter *pWriter, DocListType iType, DataBuffer *b){ - pWriter->b = b; - pWriter->iType = iType; - pWriter->iPrevDocid = 0; -#ifndef NDEBUG - pWriter->has_iPrevDocid = 0; -#endif -} -static void dlwDestroy(DLWriter *pWriter){ - SCRAMBLE(pWriter); -} -/* iFirstDocid is the first docid in the doclist in pData. It is -** needed because pData may point within a larger doclist, in which -** case the first item would be delta-encoded. -** -** iLastDocid is the final docid in the doclist in pData. It is -** needed to create the new iPrevDocid for future delta-encoding. The -** code could decode the passed doclist to recreate iLastDocid, but -** the only current user (docListMerge) already has decoded this -** information. -*/ -/* TODO(shess) This has become just a helper for docListMerge. -** Consider a refactor to make this cleaner. -*/ -static void dlwAppend(DLWriter *pWriter, - const char *pData, int nData, - sqlite_int64 iFirstDocid, sqlite_int64 iLastDocid){ - sqlite_int64 iDocid = 0; - char c[VARINT_MAX]; - int nFirstOld, nFirstNew; /* Old and new varint len of first docid. */ -#ifndef NDEBUG - sqlite_int64 iLastDocidDelta; -#endif - - /* Recode the initial docid as delta from iPrevDocid. */ - nFirstOld = getVarint(pData, &iDocid); - assert( nFirstOldiType==DL_DOCIDS) ); - nFirstNew = putVarint(c, iFirstDocid-pWriter->iPrevDocid); - - /* Verify that the incoming doclist is valid AND that it ends with - ** the expected docid. This is essential because we'll trust this - ** docid in future delta-encoding. - */ - ASSERT_VALID_DOCLIST(pWriter->iType, pData, nData, &iLastDocidDelta); - assert( iLastDocid==iFirstDocid-iDocid+iLastDocidDelta ); - - /* Append recoded initial docid and everything else. Rest of docids - ** should have been delta-encoded from previous initial docid. - */ - if( nFirstOldb, c, nFirstNew, - pData+nFirstOld, nData-nFirstOld); - }else{ - dataBufferAppend(pWriter->b, c, nFirstNew); - } - pWriter->iPrevDocid = iLastDocid; -} -static void dlwCopy(DLWriter *pWriter, DLReader *pReader){ - dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader), - dlrDocid(pReader), dlrDocid(pReader)); -} -static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){ - char c[VARINT_MAX]; - int n = putVarint(c, iDocid-pWriter->iPrevDocid); - - /* Docids must ascend. */ - assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid ); - assert( pWriter->iType==DL_DOCIDS ); - - dataBufferAppend(pWriter->b, c, n); - pWriter->iPrevDocid = iDocid; -#ifndef NDEBUG - pWriter->has_iPrevDocid = 1; -#endif -} - -/*******************************************************************/ -/* PLReader is used to read data from a document's position list. As -** the caller steps through the list, data is cached so that varints -** only need to be decoded once. -** -** plrInit, plrDestroy - create/destroy a reader. -** plrColumn, plrPosition, plrStartOffset, plrEndOffset - accessors -** plrAtEnd - at end of stream, only call plrDestroy once true. -** plrStep - step to the next element. -*/ -typedef struct PLReader { - /* These refer to the next position's data. nData will reach 0 when - ** reading the last position, so plrStep() signals EOF by setting - ** pData to NULL. - */ - const char *pData; - int nData; - - DocListType iType; - int iColumn; /* the last column read */ - int iPosition; /* the last position read */ - int iStartOffset; /* the last start offset read */ - int iEndOffset; /* the last end offset read */ -} PLReader; - -static int plrAtEnd(PLReader *pReader){ - return pReader->pData==NULL; -} -static int plrColumn(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iColumn; -} -static int plrPosition(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iPosition; -} -static int plrStartOffset(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iStartOffset; -} -static int plrEndOffset(PLReader *pReader){ - assert( !plrAtEnd(pReader) ); - return pReader->iEndOffset; -} -static void plrStep(PLReader *pReader){ - int i, n; - - assert( !plrAtEnd(pReader) ); - - if( pReader->nData==0 ){ - pReader->pData = NULL; - return; - } - - n = getVarint32(pReader->pData, &i); - if( i==POS_COLUMN ){ - n += getVarint32(pReader->pData+n, &pReader->iColumn); - pReader->iPosition = 0; - pReader->iStartOffset = 0; - n += getVarint32(pReader->pData+n, &i); - } - /* Should never see adjacent column changes. */ - assert( i!=POS_COLUMN ); - - if( i==POS_END ){ - pReader->nData = 0; - pReader->pData = NULL; - return; - } - - pReader->iPosition += i-POS_BASE; - if( pReader->iType==DL_POSITIONS_OFFSETS ){ - n += getVarint32(pReader->pData+n, &i); - pReader->iStartOffset += i; - n += getVarint32(pReader->pData+n, &i); - pReader->iEndOffset = pReader->iStartOffset+i; - } - assert( n<=pReader->nData ); - pReader->pData += n; - pReader->nData -= n; -} - -static void plrInit(PLReader *pReader, DLReader *pDLReader){ - pReader->pData = dlrPosData(pDLReader); - pReader->nData = dlrPosDataLen(pDLReader); - pReader->iType = pDLReader->iType; - pReader->iColumn = 0; - pReader->iPosition = 0; - pReader->iStartOffset = 0; - pReader->iEndOffset = 0; - plrStep(pReader); -} -static void plrDestroy(PLReader *pReader){ - SCRAMBLE(pReader); -} - -/*******************************************************************/ -/* PLWriter is used in constructing a document's position list. As a -** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op. -** PLWriter writes to the associated DLWriter's buffer. -** -** plwInit - init for writing a document's poslist. -** plwDestroy - clear a writer. -** plwAdd - append position and offset information. -** plwCopy - copy next position's data from reader to writer. -** plwTerminate - add any necessary doclist terminator. -** -** Calling plwAdd() after plwTerminate() may result in a corrupt -** doclist. -*/ -/* TODO(shess) Until we've written the second item, we can cache the -** first item's information. Then we'd have three states: -** -** - initialized with docid, no positions. -** - docid and one position. -** - docid and multiple positions. -** -** Only the last state needs to actually write to dlw->b, which would -** be an improvement in the DLCollector case. -*/ -typedef struct PLWriter { - DLWriter *dlw; - - int iColumn; /* the last column written */ - int iPos; /* the last position written */ - int iOffset; /* the last start offset written */ -} PLWriter; - -/* TODO(shess) In the case where the parent is reading these values -** from a PLReader, we could optimize to a copy if that PLReader has -** the same type as pWriter. -*/ -static void plwAdd(PLWriter *pWriter, int iColumn, int iPos, - int iStartOffset, int iEndOffset){ - /* Worst-case space for POS_COLUMN, iColumn, iPosDelta, - ** iStartOffsetDelta, and iEndOffsetDelta. - */ - char c[5*VARINT_MAX]; - int n = 0; - - /* Ban plwAdd() after plwTerminate(). */ - assert( pWriter->iPos!=-1 ); - - if( pWriter->dlw->iType==DL_DOCIDS ) return; - - if( iColumn!=pWriter->iColumn ){ - n += putVarint(c+n, POS_COLUMN); - n += putVarint(c+n, iColumn); - pWriter->iColumn = iColumn; - pWriter->iPos = 0; - pWriter->iOffset = 0; - } - assert( iPos>=pWriter->iPos ); - n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos)); - pWriter->iPos = iPos; - if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){ - assert( iStartOffset>=pWriter->iOffset ); - n += putVarint(c+n, iStartOffset-pWriter->iOffset); - pWriter->iOffset = iStartOffset; - assert( iEndOffset>=iStartOffset ); - n += putVarint(c+n, iEndOffset-iStartOffset); - } - dataBufferAppend(pWriter->dlw->b, c, n); -} -static void plwCopy(PLWriter *pWriter, PLReader *pReader){ - plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader), - plrStartOffset(pReader), plrEndOffset(pReader)); -} -static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){ - char c[VARINT_MAX]; - int n; - - pWriter->dlw = dlw; - - /* Docids must ascend. */ - assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid ); - n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid); - dataBufferAppend(pWriter->dlw->b, c, n); - pWriter->dlw->iPrevDocid = iDocid; -#ifndef NDEBUG - pWriter->dlw->has_iPrevDocid = 1; -#endif - - pWriter->iColumn = 0; - pWriter->iPos = 0; - pWriter->iOffset = 0; -} -/* TODO(shess) Should plwDestroy() also terminate the doclist? But -** then plwDestroy() would no longer be just a destructor, it would -** also be doing work, which isn't consistent with the overall idiom. -** Another option would be for plwAdd() to always append any necessary -** terminator, so that the output is always correct. But that would -** add incremental work to the common case with the only benefit being -** API elegance. Punt for now. -*/ -static void plwTerminate(PLWriter *pWriter){ - if( pWriter->dlw->iType>DL_DOCIDS ){ - char c[VARINT_MAX]; - int n = putVarint(c, POS_END); - dataBufferAppend(pWriter->dlw->b, c, n); - } -#ifndef NDEBUG - /* Mark as terminated for assert in plwAdd(). */ - pWriter->iPos = -1; -#endif -} -static void plwDestroy(PLWriter *pWriter){ - SCRAMBLE(pWriter); -} - -/*******************************************************************/ -/* DLCollector wraps PLWriter and DLWriter to provide a -** dynamically-allocated doclist area to use during tokenization. -** -** dlcNew - malloc up and initialize a collector. -** dlcDelete - destroy a collector and all contained items. -** dlcAddPos - append position and offset information. -** dlcAddDoclist - add the collected doclist to the given buffer. -** dlcNext - terminate the current document and open another. -*/ -typedef struct DLCollector { - DataBuffer b; - DLWriter dlw; - PLWriter plw; -} DLCollector; - -/* TODO(shess) This could also be done by calling plwTerminate() and -** dataBufferAppend(). I tried that, expecting nominal performance -** differences, but it seemed to pretty reliably be worth 1% to code -** it this way. I suspect it is the incremental malloc overhead (some -** percentage of the plwTerminate() calls will cause a realloc), so -** this might be worth revisiting if the DataBuffer implementation -** changes. -*/ -static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){ - if( pCollector->dlw.iType>DL_DOCIDS ){ - char c[VARINT_MAX]; - int n = putVarint(c, POS_END); - dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n); - }else{ - dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData); - } -} -static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){ - plwTerminate(&pCollector->plw); - plwDestroy(&pCollector->plw); - plwInit(&pCollector->plw, &pCollector->dlw, iDocid); -} -static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, - int iStartOffset, int iEndOffset){ - plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); -} - -static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ - DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector)); - dataBufferInit(&pCollector->b, 0); - dlwInit(&pCollector->dlw, iType, &pCollector->b); - plwInit(&pCollector->plw, &pCollector->dlw, iDocid); - return pCollector; -} -static void dlcDelete(DLCollector *pCollector){ - plwDestroy(&pCollector->plw); - dlwDestroy(&pCollector->dlw); - dataBufferDestroy(&pCollector->b); - SCRAMBLE(pCollector); - sqlite3_free(pCollector); -} - - -/* Copy the doclist data of iType in pData/nData into *out, trimming -** unnecessary data as we go. Only columns matching iColumn are -** copied, all columns copied if iColumn is -1. Elements with no -** matching columns are dropped. The output is an iOutType doclist. -*/ -/* NOTE(shess) This code is only valid after all doclists are merged. -** If this is run before merges, then doclist items which represent -** deletion will be trimmed, and will thus not effect a deletion -** during the merge. -*/ -static void docListTrim(DocListType iType, const char *pData, int nData, - int iColumn, DocListType iOutType, DataBuffer *out){ - DLReader dlReader; - DLWriter dlWriter; - - assert( iOutType<=iType ); - - dlrInit(&dlReader, iType, pData, nData); - dlwInit(&dlWriter, iOutType, out); - - while( !dlrAtEnd(&dlReader) ){ - PLReader plReader; - PLWriter plWriter; - int match = 0; - - plrInit(&plReader, &dlReader); - - while( !plrAtEnd(&plReader) ){ - if( iColumn==-1 || plrColumn(&plReader)==iColumn ){ - if( !match ){ - plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader)); - match = 1; - } - plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader), - plrStartOffset(&plReader), plrEndOffset(&plReader)); - } - plrStep(&plReader); - } - if( match ){ - plwTerminate(&plWriter); - plwDestroy(&plWriter); - } - - plrDestroy(&plReader); - dlrStep(&dlReader); - } - dlwDestroy(&dlWriter); - dlrDestroy(&dlReader); -} - -/* Used by docListMerge() to keep doclists in the ascending order by -** docid, then ascending order by age (so the newest comes first). -*/ -typedef struct OrderedDLReader { - DLReader *pReader; - - /* TODO(shess) If we assume that docListMerge pReaders is ordered by - ** age (which we do), then we could use pReader comparisons to break - ** ties. - */ - int idx; -} OrderedDLReader; - -/* Order eof to end, then by docid asc, idx desc. */ -static int orderedDLReaderCmp(OrderedDLReader *r1, OrderedDLReader *r2){ - if( dlrAtEnd(r1->pReader) ){ - if( dlrAtEnd(r2->pReader) ) return 0; /* Both atEnd(). */ - return 1; /* Only r1 atEnd(). */ - } - if( dlrAtEnd(r2->pReader) ) return -1; /* Only r2 atEnd(). */ - - if( dlrDocid(r1->pReader)pReader) ) return -1; - if( dlrDocid(r1->pReader)>dlrDocid(r2->pReader) ) return 1; - - /* Descending on idx. */ - return r2->idx-r1->idx; -} - -/* Bubble p[0] to appropriate place in p[1..n-1]. Assumes that -** p[1..n-1] is already sorted. -*/ -/* TODO(shess) Is this frequent enough to warrant a binary search? -** Before implementing that, instrument the code to check. In most -** current usage, I expect that p[0] will be less than p[1] a very -** high proportion of the time. -*/ -static void orderedDLReaderReorder(OrderedDLReader *p, int n){ - while( n>1 && orderedDLReaderCmp(p, p+1)>0 ){ - OrderedDLReader tmp = p[0]; - p[0] = p[1]; - p[1] = tmp; - n--; - p++; - } -} - -/* Given an array of doclist readers, merge their doclist elements -** into out in sorted order (by docid), dropping elements from older -** readers when there is a duplicate docid. pReaders is assumed to be -** ordered by age, oldest first. -*/ -/* TODO(shess) nReaders must be <= MERGE_COUNT. This should probably -** be fixed. -*/ -static void docListMerge(DataBuffer *out, - DLReader *pReaders, int nReaders){ - OrderedDLReader readers[MERGE_COUNT]; - DLWriter writer; - int i, n; - const char *pStart = 0; - int nStart = 0; - sqlite_int64 iFirstDocid = 0, iLastDocid = 0; - - assert( nReaders>0 ); - if( nReaders==1 ){ - dataBufferAppend(out, dlrDocData(pReaders), dlrAllDataBytes(pReaders)); - return; - } - - assert( nReaders<=MERGE_COUNT ); - n = 0; - for(i=0; i0 ){ - orderedDLReaderReorder(readers+i, nReaders-i); - } - - dlwInit(&writer, pReaders[0].iType, out); - while( !dlrAtEnd(readers[0].pReader) ){ - sqlite_int64 iDocid = dlrDocid(readers[0].pReader); - - /* If this is a continuation of the current buffer to copy, extend - ** that buffer. memcpy() seems to be more efficient if it has a - ** lots of data to copy. - */ - if( dlrDocData(readers[0].pReader)==pStart+nStart ){ - nStart += dlrDocDataBytes(readers[0].pReader); - }else{ - if( pStart!=0 ){ - dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); - } - pStart = dlrDocData(readers[0].pReader); - nStart = dlrDocDataBytes(readers[0].pReader); - iFirstDocid = iDocid; - } - iLastDocid = iDocid; - dlrStep(readers[0].pReader); - - /* Drop all of the older elements with the same docid. */ - for(i=1; i0 ){ - orderedDLReaderReorder(readers+i, nReaders-i); - } - } - - /* Copy over any remaining elements. */ - if( nStart>0 ) dlwAppend(&writer, pStart, nStart, iFirstDocid, iLastDocid); - dlwDestroy(&writer); -} - -/* Helper function for posListUnion(). Compares the current position -** between left and right, returning as standard C idiom of <0 if -** left0 if left>right, and 0 if left==right. "End" always -** compares greater. -*/ -static int posListCmp(PLReader *pLeft, PLReader *pRight){ - assert( pLeft->iType==pRight->iType ); - if( pLeft->iType==DL_DOCIDS ) return 0; - - if( plrAtEnd(pLeft) ) return plrAtEnd(pRight) ? 0 : 1; - if( plrAtEnd(pRight) ) return -1; - - if( plrColumn(pLeft)plrColumn(pRight) ) return 1; - - if( plrPosition(pLeft)plrPosition(pRight) ) return 1; - if( pLeft->iType==DL_POSITIONS ) return 0; - - if( plrStartOffset(pLeft)plrStartOffset(pRight) ) return 1; - - if( plrEndOffset(pLeft)plrEndOffset(pRight) ) return 1; - - return 0; -} - -/* Write the union of position lists in pLeft and pRight to pOut. -** "Union" in this case meaning "All unique position tuples". Should -** work with any doclist type, though both inputs and the output -** should be the same type. -*/ -static void posListUnion(DLReader *pLeft, DLReader *pRight, DLWriter *pOut){ - PLReader left, right; - PLWriter writer; - - assert( dlrDocid(pLeft)==dlrDocid(pRight) ); - assert( pLeft->iType==pRight->iType ); - assert( pLeft->iType==pOut->iType ); - - plrInit(&left, pLeft); - plrInit(&right, pRight); - plwInit(&writer, pOut, dlrDocid(pLeft)); - - while( !plrAtEnd(&left) || !plrAtEnd(&right) ){ - int c = posListCmp(&left, &right); - if( c<0 ){ - plwCopy(&writer, &left); - plrStep(&left); - }else if( c>0 ){ - plwCopy(&writer, &right); - plrStep(&right); - }else{ - plwCopy(&writer, &left); - plrStep(&left); - plrStep(&right); - } - } - - plwTerminate(&writer); - plwDestroy(&writer); - plrDestroy(&left); - plrDestroy(&right); -} - -/* Write the union of doclists in pLeft and pRight to pOut. For -** docids in common between the inputs, the union of the position -** lists is written. Inputs and outputs are always type DL_DEFAULT. -*/ -static void docListUnion( - const char *pLeft, int nLeft, - const char *pRight, int nRight, - DataBuffer *pOut /* Write the combined doclist here */ -){ - DLReader left, right; - DLWriter writer; - - if( nLeft==0 ){ - if( nRight!=0) dataBufferAppend(pOut, pRight, nRight); - return; - } - if( nRight==0 ){ - dataBufferAppend(pOut, pLeft, nLeft); - return; - } - - dlrInit(&left, DL_DEFAULT, pLeft, nLeft); - dlrInit(&right, DL_DEFAULT, pRight, nRight); - dlwInit(&writer, DL_DEFAULT, pOut); - - while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){ - if( dlrAtEnd(&right) ){ - dlwCopy(&writer, &left); - dlrStep(&left); - }else if( dlrAtEnd(&left) ){ - dlwCopy(&writer, &right); - dlrStep(&right); - }else if( dlrDocid(&left)dlrDocid(&right) ){ - dlwCopy(&writer, &right); - dlrStep(&right); - }else{ - posListUnion(&left, &right, &writer); - dlrStep(&left); - dlrStep(&right); - } - } - - dlrDestroy(&left); - dlrDestroy(&right); - dlwDestroy(&writer); -} - -/* pLeft and pRight are DLReaders positioned to the same docid. -** -** If there are no instances in pLeft or pRight where the position -** of pLeft is one less than the position of pRight, then this -** routine adds nothing to pOut. -** -** If there are one or more instances where positions from pLeft -** are exactly one less than positions from pRight, then add a new -** document record to pOut. If pOut wants to hold positions, then -** include the positions from pRight that are one more than a -** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. -*/ -static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight, - DLWriter *pOut){ - PLReader left, right; - PLWriter writer; - int match = 0; - - assert( dlrDocid(pLeft)==dlrDocid(pRight) ); - assert( pOut->iType!=DL_POSITIONS_OFFSETS ); - - plrInit(&left, pLeft); - plrInit(&right, pRight); - - while( !plrAtEnd(&left) && !plrAtEnd(&right) ){ - if( plrColumn(&left)plrColumn(&right) ){ - plrStep(&right); - }else if( plrPosition(&left)+1plrPosition(&right) ){ - plrStep(&right); - }else{ - if( !match ){ - plwInit(&writer, pOut, dlrDocid(pLeft)); - match = 1; - } - plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0); - plrStep(&left); - plrStep(&right); - } - } - - if( match ){ - plwTerminate(&writer); - plwDestroy(&writer); - } - - plrDestroy(&left); - plrDestroy(&right); -} - -/* We have two doclists with positions: pLeft and pRight. -** Write the phrase intersection of these two doclists into pOut. -** -** A phrase intersection means that two documents only match -** if pLeft.iPos+1==pRight.iPos. -** -** iType controls the type of data written to pOut. If iType is -** DL_POSITIONS, the positions are those from pRight. -*/ -static void docListPhraseMerge( - const char *pLeft, int nLeft, - const char *pRight, int nRight, - DocListType iType, - DataBuffer *pOut /* Write the combined doclist here */ -){ - DLReader left, right; - DLWriter writer; - - if( nLeft==0 || nRight==0 ) return; - - assert( iType!=DL_POSITIONS_OFFSETS ); - - dlrInit(&left, DL_POSITIONS, pLeft, nLeft); - dlrInit(&right, DL_POSITIONS, pRight, nRight); - dlwInit(&writer, iType, pOut); - - while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){ - if( dlrDocid(&left) one AND (two OR three) - * [one OR two three] ==> (one OR two) AND three - * - * A "-" before a term matches all entries that lack that term. - * The "-" must occur immediately before the term with in intervening - * space. This is how the search engines do it. - * - * A NOT term cannot be the right-hand operand of an OR. If this - * occurs in the query string, the NOT is ignored: - * - * [one OR -two] ==> one OR two - * - */ -typedef struct Query { - fulltext_vtab *pFts; /* The full text index */ - int nTerms; /* Number of terms in the query */ - QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ - int nextIsOr; /* Set the isOr flag on the next inserted term */ - int nextColumn; /* Next word parsed must be in this column */ - int dfltColumn; /* The default column */ -} Query; - - -/* -** An instance of the following structure keeps track of generated -** matching-word offset information and snippets. -*/ -typedef struct Snippet { - int nMatch; /* Total number of matches */ - int nAlloc; /* Space allocated for aMatch[] */ - struct snippetMatch { /* One entry for each matching term */ - char snStatus; /* Status flag for use while constructing snippets */ - short int iCol; /* The column that contains the match */ - short int iTerm; /* The index in Query.pTerms[] of the matching term */ - short int nByte; /* Number of bytes in the term */ - int iStart; /* The offset to the first character of the term */ - } *aMatch; /* Points to space obtained from malloc */ - char *zOffset; /* Text rendering of aMatch[] */ - int nOffset; /* strlen(zOffset) */ - char *zSnippet; /* Snippet text */ - int nSnippet; /* strlen(zSnippet) */ -} Snippet; - - -typedef enum QueryType { - QUERY_GENERIC, /* table scan */ - QUERY_ROWID, /* lookup by rowid */ - QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ -} QueryType; - -typedef enum fulltext_statement { - CONTENT_INSERT_STMT, - CONTENT_SELECT_STMT, - CONTENT_UPDATE_STMT, - CONTENT_DELETE_STMT, - CONTENT_EXISTS_STMT, - - BLOCK_INSERT_STMT, - BLOCK_SELECT_STMT, - BLOCK_DELETE_STMT, - BLOCK_DELETE_ALL_STMT, - - SEGDIR_MAX_INDEX_STMT, - SEGDIR_SET_STMT, - SEGDIR_SELECT_LEVEL_STMT, - SEGDIR_SPAN_STMT, - SEGDIR_DELETE_STMT, - SEGDIR_SELECT_SEGMENT_STMT, - SEGDIR_SELECT_ALL_STMT, - SEGDIR_DELETE_ALL_STMT, - SEGDIR_COUNT_STMT, - - MAX_STMT /* Always at end! */ -} fulltext_statement; - -/* These must exactly match the enum above. */ -/* TODO(shess): Is there some risk that a statement will be used in two -** cursors at once, e.g. if a query joins a virtual table to itself? -** If so perhaps we should move some of these to the cursor object. -*/ -static const char *const fulltext_zStatement[MAX_STMT] = { - /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ - /* CONTENT_SELECT */ "select * from %_content where rowid = ?", - /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ - /* CONTENT_DELETE */ "delete from %_content where rowid = ?", - /* CONTENT_EXISTS */ "select rowid from %_content limit 1", - - /* BLOCK_INSERT */ "insert into %_segments values (?)", - /* BLOCK_SELECT */ "select block from %_segments where rowid = ?", - /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?", - /* BLOCK_DELETE_ALL */ "delete from %_segments", - - /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", - /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", - /* SEGDIR_SELECT_LEVEL */ - "select start_block, leaves_end_block, root from %_segdir " - " where level = ? order by idx", - /* SEGDIR_SPAN */ - "select min(start_block), max(end_block) from %_segdir " - " where level = ? and start_block <> 0", - /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", - - /* NOTE(shess): The first three results of the following two - ** statements must match. - */ - /* SEGDIR_SELECT_SEGMENT */ - "select start_block, leaves_end_block, root from %_segdir " - " where level = ? and idx = ?", - /* SEGDIR_SELECT_ALL */ - "select start_block, leaves_end_block, root from %_segdir " - " order by level desc, idx asc", - /* SEGDIR_DELETE_ALL */ "delete from %_segdir", - /* SEGDIR_COUNT */ "select count(*), ifnull(max(level),0) from %_segdir", -}; - -/* -** A connection to a fulltext index is an instance of the following -** structure. The xCreate and xConnect methods create an instance -** of this structure and xDestroy and xDisconnect free that instance. -** All other methods receive a pointer to the structure as one of their -** arguments. -*/ -struct fulltext_vtab { - sqlite3_vtab base; /* Base class used by SQLite core */ - sqlite3 *db; /* The database connection */ - const char *zDb; /* logical database name */ - const char *zName; /* virtual table name */ - int nColumn; /* number of columns in virtual table */ - char **azColumn; /* column names. malloced */ - char **azContentColumn; /* column names in content table; malloced */ - sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ - - /* Precompiled statements which we keep as long as the table is - ** open. - */ - sqlite3_stmt *pFulltextStatements[MAX_STMT]; - - /* Precompiled statements used for segment merges. We run a - ** separate select across the leaf level of each tree being merged. - */ - sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT]; - /* The statement used to prepare pLeafSelectStmts. */ -#define LEAF_SELECT \ - "select block from %_segments where rowid between ? and ? order by rowid" - - /* These buffer pending index updates during transactions. - ** nPendingData estimates the memory size of the pending data. It - ** doesn't include the hash-bucket overhead, nor any malloc - ** overhead. When nPendingData exceeds kPendingThreshold, the - ** buffer is flushed even before the transaction closes. - ** pendingTerms stores the data, and is only valid when nPendingData - ** is >=0 (nPendingData<0 means pendingTerms has not been - ** initialized). iPrevDocid is the last docid written, used to make - ** certain we're inserting in sorted order. - */ - int nPendingData; -#define kPendingThreshold (1*1024*1024) - sqlite_int64 iPrevDocid; - fts2Hash pendingTerms; -}; - -/* -** When the core wants to do a query, it create a cursor using a -** call to xOpen. This structure is an instance of a cursor. It -** is destroyed by xClose. -*/ -typedef struct fulltext_cursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ - sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ - int eof; /* True if at End Of Results */ - Query q; /* Parsed query string */ - Snippet snippet; /* Cached snippet for the current row */ - int iColumn; /* Column being searched */ - DataBuffer result; /* Doclist results from fulltextQuery */ - DLReader reader; /* Result reader if result not empty */ -} fulltext_cursor; - -static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ - return (fulltext_vtab *) c->base.pVtab; -} - -static const sqlite3_module fts2Module; /* forward declaration */ - -/* Return a dynamically generated statement of the form - * insert into %_content (rowid, ...) values (?, ...) - */ -static const char *contentInsertStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "insert into %_content (rowid, "); - appendList(&sb, v->nColumn, v->azContentColumn); - append(&sb, ") values (?"); - for(i=0; inColumn; ++i) - append(&sb, ", ?"); - append(&sb, ")"); - return stringBufferData(&sb); -} - -/* Return a dynamically generated statement of the form - * update %_content set [col_0] = ?, [col_1] = ?, ... - * where rowid = ? - */ -static const char *contentUpdateStatement(fulltext_vtab *v){ - StringBuffer sb; - int i; - - initStringBuffer(&sb); - append(&sb, "update %_content set "); - for(i=0; inColumn; ++i) { - if( i>0 ){ - append(&sb, ", "); - } - append(&sb, v->azContentColumn[i]); - append(&sb, " = ?"); - } - append(&sb, " where rowid = ?"); - return stringBufferData(&sb); -} - -/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. -** If the indicated statement has never been prepared, it is prepared -** and cached, otherwise the cached version is reset. -*/ -static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, - sqlite3_stmt **ppStmt){ - assert( iStmtpFulltextStatements[iStmt]==NULL ){ - const char *zStmt; - int rc; - switch( iStmt ){ - case CONTENT_INSERT_STMT: - zStmt = contentInsertStatement(v); break; - case CONTENT_UPDATE_STMT: - zStmt = contentUpdateStatement(v); break; - default: - zStmt = fulltext_zStatement[iStmt]; - } - rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt], - zStmt); - if( zStmt != fulltext_zStatement[iStmt]) sqlite3_free((void *) zStmt); - if( rc!=SQLITE_OK ) return rc; - } else { - int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pFulltextStatements[iStmt]; - return SQLITE_OK; -} - -/* Like sqlite3_step(), but convert SQLITE_DONE to SQLITE_OK and -** SQLITE_ROW to SQLITE_ERROR. Useful for statements like UPDATE, -** where we expect no results. -*/ -static int sql_single_step(sqlite3_stmt *s){ - int rc = sqlite3_step(s); - return (rc==SQLITE_DONE) ? SQLITE_OK : rc; -} - -/* Like sql_get_statement(), but for special replicated LEAF_SELECT -** statements. idx -1 is a special case for an uncached version of -** the statement (used in the optimize implementation). -*/ -/* TODO(shess) Write version for generic statements and then share -** that between the cached-statement functions. -*/ -static int sql_get_leaf_statement(fulltext_vtab *v, int idx, - sqlite3_stmt **ppStmt){ - assert( idx>=-1 && idxdb, v->zDb, v->zName, ppStmt, LEAF_SELECT); - }else if( v->pLeafSelectStmts[idx]==NULL ){ - int rc = sql_prepare(v->db, v->zDb, v->zName, &v->pLeafSelectStmts[idx], - LEAF_SELECT); - if( rc!=SQLITE_OK ) return rc; - }else{ - int rc = sqlite3_reset(v->pLeafSelectStmts[idx]); - if( rc!=SQLITE_OK ) return rc; - } - - *ppStmt = v->pLeafSelectStmts[idx]; - return SQLITE_OK; -} - -/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ -static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, - sqlite3_value **pValues){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_value(s, 1, rowid); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 2+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - return sql_single_step(s); -} - -/* update %_content set col0 = pValues[0], col1 = pValues[1], ... - * where rowid = [iRowid] */ -static int content_update(fulltext_vtab *v, sqlite3_value **pValues, - sqlite_int64 iRowid){ - sqlite3_stmt *s; - int i; - int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - for(i=0; inColumn; ++i){ - rc = sqlite3_bind_value(s, 1+i, pValues[i]); - if( rc!=SQLITE_OK ) return rc; - } - - rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -static void freeStringArray(int nString, const char **pString){ - int i; - - for (i=0 ; i < nString ; ++i) { - if( pString[i]!=NULL ) sqlite3_free((void *) pString[i]); - } - sqlite3_free((void *) pString); -} - -/* select * from %_content where rowid = [iRow] - * The caller must delete the returned array and all strings in it. - * null fields will be NULL in the returned array. - * - * TODO: Perhaps we should return pointer/length strings here for consistency - * with other code which uses pointer/length. */ -static int content_select(fulltext_vtab *v, sqlite_int64 iRow, - const char ***pValues){ - sqlite3_stmt *s; - const char **values; - int i; - int rc; - - *pValues = NULL; - - rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc!=SQLITE_ROW ) return rc; - - values = (const char **) sqlite3_malloc(v->nColumn * sizeof(const char *)); - for(i=0; inColumn; ++i){ - if( sqlite3_column_type(s, i)==SQLITE_NULL ){ - values[i] = NULL; - }else{ - values[i] = string_dup((char*)sqlite3_column_text(s, i)); - } - } - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - *pValues = values; - return SQLITE_OK; - } - - freeStringArray(v->nColumn, values); - return rc; -} - -/* delete from %_content where rowid = [iRow ] */ -static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iRow); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if -** no rows exist, and any error in case of failure. -*/ -static int content_exists(fulltext_vtab *v){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc!=SQLITE_ROW ) return rc; - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_ROW; - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; -} - -/* insert into %_segments values ([pData]) -** returns assigned rowid in *piBlockid -*/ -static int block_insert(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 *piBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, BLOCK_INSERT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 1, pData, nData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - - *piBlockid = sqlite3_last_insert_rowid(v->db); - return SQLITE_OK; -} - -/* delete from %_segments -** where rowid between [iStartBlockid] and [iEndBlockid] -** -** Deletes the range of blocks, inclusive, used to delete the blocks -** which form a segment. -*/ -static int block_delete(fulltext_vtab *v, - sqlite_int64 iStartBlockid, sqlite_int64 iEndBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, BLOCK_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_ROW with *pidx set to the maximum segment idx found -** at iLevel. Returns SQLITE_DONE if there are no segments at -** iLevel. Otherwise returns an error. -*/ -static int segdir_max_index(fulltext_vtab *v, int iLevel, int *pidx){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_MAX_INDEX_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - /* Should always get at least one row due to how max() works. */ - if( rc==SQLITE_DONE ) return SQLITE_DONE; - if( rc!=SQLITE_ROW ) return rc; - - /* NULL means that there were no inputs to max(). */ - if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; - } - - *pidx = sqlite3_column_int(s, 0); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - return SQLITE_ROW; -} - -/* insert into %_segdir values ( -** [iLevel], [idx], -** [iStartBlockid], [iLeavesEndBlockid], [iEndBlockid], -** [pRootData] -** ) -*/ -static int segdir_set(fulltext_vtab *v, int iLevel, int idx, - sqlite_int64 iStartBlockid, - sqlite_int64 iLeavesEndBlockid, - sqlite_int64 iEndBlockid, - const char *pRootData, int nRootData){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SET_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 2, idx); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 3, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 4, iLeavesEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 5, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_blob(s, 6, pRootData, nRootData, SQLITE_STATIC); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Queries %_segdir for the block span of the segments in level -** iLevel. Returns SQLITE_DONE if there are no blocks for iLevel, -** SQLITE_ROW if there are blocks, else an error. -*/ -static int segdir_span(fulltext_vtab *v, int iLevel, - sqlite_int64 *piStartBlockid, - sqlite_int64 *piEndBlockid){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SPAN_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_DONE; /* Should never happen */ - if( rc!=SQLITE_ROW ) return rc; - - /* This happens if all segments at this level are entirely inline. */ - if( SQLITE_NULL==sqlite3_column_type(s, 0) ){ - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - int rc2 = sqlite3_step(s); - if( rc2==SQLITE_ROW ) return SQLITE_ERROR; - return rc2; - } - - *piStartBlockid = sqlite3_column_int64(s, 0); - *piEndBlockid = sqlite3_column_int64(s, 1); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - return SQLITE_ROW; -} - -/* Delete the segment blocks and segment directory records for all -** segments at iLevel. -*/ -static int segdir_delete(fulltext_vtab *v, int iLevel){ - sqlite3_stmt *s; - sqlite_int64 iStartBlockid, iEndBlockid; - int rc = segdir_span(v, iLevel, &iStartBlockid, &iEndBlockid); - if( rc!=SQLITE_ROW && rc!=SQLITE_DONE ) return rc; - - if( rc==SQLITE_ROW ){ - rc = block_delete(v, iStartBlockid, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - } - - /* Delete the segment directory itself. */ - rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Delete entire fts index, SQLITE_OK on success, relevant error on -** failure. -*/ -static int segdir_delete_all(fulltext_vtab *v){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_single_step(s); - if( rc!=SQLITE_OK ) return rc; - - rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - return sql_single_step(s); -} - -/* Returns SQLITE_OK with *pnSegments set to the number of entries in -** %_segdir and *piMaxLevel set to the highest level which has a -** segment. Otherwise returns the SQLite error which caused failure. -*/ -static int segdir_count(fulltext_vtab *v, int *pnSegments, int *piMaxLevel){ - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_COUNT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - /* TODO(shess): This case should not be possible? Should stronger - ** measures be taken if it happens? - */ - if( rc==SQLITE_DONE ){ - *pnSegments = 0; - *piMaxLevel = 0; - return SQLITE_OK; - } - if( rc!=SQLITE_ROW ) return rc; - - *pnSegments = sqlite3_column_int(s, 0); - *piMaxLevel = sqlite3_column_int(s, 1); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_OK; - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - return rc; -} - -/* TODO(shess) clearPendingTerms() is far down the file because -** writeZeroSegment() is far down the file because LeafWriter is far -** down the file. Consider refactoring the code to move the non-vtab -** code above the vtab code so that we don't need this forward -** reference. -*/ -static int clearPendingTerms(fulltext_vtab *v); - -/* -** Free the memory used to contain a fulltext_vtab structure. -*/ -static void fulltext_vtab_destroy(fulltext_vtab *v){ - int iStmt, i; - - TRACE(("FTS2 Destroy %p\n", v)); - for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ - sqlite3_finalize(v->pFulltextStatements[iStmt]); - v->pFulltextStatements[iStmt] = NULL; - } - } - - for( i=0; ipLeafSelectStmts[i]!=NULL ){ - sqlite3_finalize(v->pLeafSelectStmts[i]); - v->pLeafSelectStmts[i] = NULL; - } - } - - if( v->pTokenizer!=NULL ){ - v->pTokenizer->pModule->xDestroy(v->pTokenizer); - v->pTokenizer = NULL; - } - - clearPendingTerms(v); - - sqlite3_free(v->azColumn); - for(i = 0; i < v->nColumn; ++i) { - sqlite3_free(v->azContentColumn[i]); - } - sqlite3_free(v->azContentColumn); - sqlite3_free(v); -} - -/* -** Token types for parsing the arguments to xConnect or xCreate. -*/ -#define TOKEN_EOF 0 /* End of file */ -#define TOKEN_SPACE 1 /* Any kind of whitespace */ -#define TOKEN_ID 2 /* An identifier */ -#define TOKEN_STRING 3 /* A string literal */ -#define TOKEN_PUNCT 4 /* A single punctuation character */ - -/* -** If X is a character that can be used in an identifier then -** IdChar(X) will be true. Otherwise it is false. -** -** For ASCII, any character with the high-order bit set is -** allowed in an identifier. For 7-bit characters, -** sqlite3IsIdChar[X] must be 1. -** -** Ticket #1066. the SQL standard does not allow '$' in the -** middle of identfiers. But many SQL implementations do. -** SQLite will allow '$' in identifiers for compatibility. -** But the feature is undocumented. -*/ -static const char isIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) - - -/* -** Return the length of the token that begins at z[0]. -** Store the token type in *tokenType before returning. -*/ -static int getToken(const char *z, int *tokenType){ - int i, c; - switch( *z ){ - case 0: { - *tokenType = TOKEN_EOF; - return 0; - } - case ' ': case '\t': case '\n': case '\f': case '\r': { - for(i=1; safe_isspace(z[i]); i++){} - *tokenType = TOKEN_SPACE; - return i; - } - case '`': - case '\'': - case '"': { - int delim = z[0]; - for(i=1; (c=z[i])!=0; i++){ - if( c==delim ){ - if( z[i+1]==delim ){ - i++; - }else{ - break; - } - } - } - *tokenType = TOKEN_STRING; - return i + (c!=0); - } - case '[': { - for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} - *tokenType = TOKEN_ID; - return i; - } - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} - *tokenType = TOKEN_ID; - return i; - } - } - *tokenType = TOKEN_PUNCT; - return 1; -} - -/* -** A token extracted from a string is an instance of the following -** structure. -*/ -typedef struct Token { - const char *z; /* Pointer to token text. Not '\000' terminated */ - short int n; /* Length of the token text in bytes. */ -} Token; - -/* -** Given a input string (which is really one of the argv[] parameters -** passed into xConnect or xCreate) split the string up into tokens. -** Return an array of pointers to '\000' terminated strings, one string -** for each non-whitespace token. -** -** The returned array is terminated by a single NULL pointer. -** -** Space to hold the returned array is obtained from a single -** malloc and should be freed by passing the return value to free(). -** The individual strings within the token list are all a part of -** the single memory allocation and will all be freed at once. -*/ -static char **tokenizeString(const char *z, int *pnToken){ - int nToken = 0; - Token *aToken = sqlite3_malloc( strlen(z) * sizeof(aToken[0]) ); - int n = 1; - int e, i; - int totalSize = 0; - char **azToken; - char *zCopy; - while( n>0 ){ - n = getToken(z, &e); - if( e!=TOKEN_SPACE ){ - aToken[nToken].z = z; - aToken[nToken].n = n; - nToken++; - totalSize += n+1; - } - z += n; - } - azToken = (char**)sqlite3_malloc( nToken*sizeof(char*) + totalSize ); - zCopy = (char*)&azToken[nToken]; - nToken--; - for(i=0; i=0 ){ - azIn[j] = azIn[i]; - } - j++; - } - } - azIn[j] = 0; - } -} - - -/* -** Find the first alphanumeric token in the string zIn. Null-terminate -** this token. Remove any quotation marks. And return a pointer to -** the result. -*/ -static char *firstToken(char *zIn, char **pzTail){ - int n, ttype; - while(1){ - n = getToken(zIn, &ttype); - if( ttype==TOKEN_SPACE ){ - zIn += n; - }else if( ttype==TOKEN_EOF ){ - *pzTail = zIn; - return 0; - }else{ - zIn[n] = 0; - *pzTail = &zIn[1]; - dequoteString(zIn); - return zIn; - } - } - /*NOTREACHED*/ -} - -/* Return true if... -** -** * s begins with the string t, ignoring case -** * s is longer than t -** * The first character of s beyond t is not a alphanumeric -** -** Ignore leading space in *s. -** -** To put it another way, return true if the first token of -** s[] is t[]. -*/ -static int startsWith(const char *s, const char *t){ - while( safe_isspace(*s) ){ s++; } - while( *t ){ - if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0; - } - return *s!='_' && !safe_isalnum(*s); -} - -/* -** An instance of this structure defines the "spec" of a -** full text index. This structure is populated by parseSpec -** and use by fulltextConnect and fulltextCreate. -*/ -typedef struct TableSpec { - const char *zDb; /* Logical database name */ - const char *zName; /* Name of the full-text index */ - int nColumn; /* Number of columns to be indexed */ - char **azColumn; /* Original names of columns to be indexed */ - char **azContentColumn; /* Column names for %_content */ - char **azTokenizer; /* Name of tokenizer and its arguments */ -} TableSpec; - -/* -** Reclaim all of the memory used by a TableSpec -*/ -static void clearTableSpec(TableSpec *p) { - sqlite3_free(p->azColumn); - sqlite3_free(p->azContentColumn); - sqlite3_free(p->azTokenizer); -} - -/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: - * - * CREATE VIRTUAL TABLE email - * USING fts2(subject, body, tokenize mytokenizer(myarg)) - * - * We return parsed information in a TableSpec structure. - * - */ -static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, - char**pzErr){ - int i, n; - char *z, *zDummy; - char **azArg; - const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ - - assert( argc>=3 ); - /* Current interface: - ** argv[0] - module name - ** argv[1] - database name - ** argv[2] - table name - ** argv[3..] - columns, optionally followed by tokenizer specification - ** and snippet delimiters specification. - */ - - /* Make a copy of the complete argv[][] array in a single allocation. - ** The argv[][] array is read-only and transient. We can write to the - ** copy in order to modify things and the copy is persistent. - */ - CLEAR(pSpec); - for(i=n=0; izDb = azArg[1]; - pSpec->zName = azArg[2]; - pSpec->nColumn = 0; - pSpec->azColumn = azArg; - zTokenizer = "tokenize simple"; - for(i=3; inColumn] = firstToken(azArg[i], &zDummy); - pSpec->nColumn++; - } - } - if( pSpec->nColumn==0 ){ - azArg[0] = "content"; - pSpec->nColumn = 1; - } - - /* - ** Construct the list of content column names. - ** - ** Each content column name will be of the form cNNAAAA - ** where NN is the column number and AAAA is the sanitized - ** column name. "sanitized" means that special characters are - ** converted to "_". The cNN prefix guarantees that all column - ** names are unique. - ** - ** The AAAA suffix is not strictly necessary. It is included - ** for the convenience of people who might examine the generated - ** %_content table and wonder what the columns are used for. - */ - pSpec->azContentColumn = sqlite3_malloc( pSpec->nColumn * sizeof(char *) ); - if( pSpec->azContentColumn==0 ){ - clearTableSpec(pSpec); - return SQLITE_NOMEM; - } - for(i=0; inColumn; i++){ - char *p; - pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); - for (p = pSpec->azContentColumn[i]; *p ; ++p) { - if( !safe_isalnum(*p) ) *p = '_'; - } - } - - /* - ** Parse the tokenizer specification string. - */ - pSpec->azTokenizer = tokenizeString(zTokenizer, &n); - tokenListToIdList(pSpec->azTokenizer); - - return SQLITE_OK; -} - -/* -** Generate a CREATE TABLE statement that describes the schema of -** the virtual table. Return a pointer to this schema string. -** -** Space is obtained from sqlite3_mprintf() and should be freed -** using sqlite3_free(). -*/ -static char *fulltextSchema( - int nColumn, /* Number of columns */ - const char *const* azColumn, /* List of columns */ - const char *zTableName /* Name of the table */ -){ - int i; - char *zSchema, *zNext; - const char *zSep = "("; - zSchema = sqlite3_mprintf("CREATE TABLE x"); - for(i=0; ibase */ - v->db = db; - v->zDb = spec->zDb; /* Freed when azColumn is freed */ - v->zName = spec->zName; /* Freed when azColumn is freed */ - v->nColumn = spec->nColumn; - v->azContentColumn = spec->azContentColumn; - spec->azContentColumn = 0; - v->azColumn = spec->azColumn; - spec->azColumn = 0; - - if( spec->azTokenizer==0 ){ - return SQLITE_NOMEM; - } - - zTok = spec->azTokenizer[0]; - if( !zTok ){ - zTok = "simple"; - } - nTok = strlen(zTok)+1; - - m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok); - if( !m ){ - *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); - rc = SQLITE_ERROR; - goto err; - } - - for(n=0; spec->azTokenizer[n]; n++){} - if( n ){ - rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], - &v->pTokenizer); - }else{ - rc = m->xCreate(0, 0, &v->pTokenizer); - } - if( rc!=SQLITE_OK ) goto err; - v->pTokenizer->pModule = m; - - /* TODO: verify the existence of backing tables foo_content, foo_term */ - - schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, - spec->zName); - rc = sqlite3_declare_vtab(db, schema); - sqlite3_free(schema); - if( rc!=SQLITE_OK ) goto err; - - memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); - - /* Indicate that the buffer is not live. */ - v->nPendingData = -1; - - *ppVTab = &v->base; - TRACE(("FTS2 Connect %p\n", v)); - - return rc; - -err: - fulltext_vtab_destroy(v); - return rc; -} - -static int fulltextConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVTab, - char **pzErr -){ - TableSpec spec; - int rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); - clearTableSpec(&spec); - return rc; -} - -/* The %_content table holds the text of each document, with -** the rowid used as the docid. -*/ -/* TODO(shess) This comment needs elaboration to match the updated -** code. Work it into the top-of-file comment at that time. -*/ -static int fulltextCreate(sqlite3 *db, void *pAux, - int argc, const char * const *argv, - sqlite3_vtab **ppVTab, char **pzErr){ - int rc; - TableSpec spec; - StringBuffer schema; - TRACE(("FTS2 Create\n")); - - rc = parseSpec(&spec, argc, argv, pzErr); - if( rc!=SQLITE_OK ) return rc; - - initStringBuffer(&schema); - append(&schema, "CREATE TABLE %_content("); - appendList(&schema, spec.nColumn, spec.azContentColumn); - append(&schema, ")"); - rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema)); - stringBufferDestroy(&schema); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_segments(block blob);"); - if( rc!=SQLITE_OK ) goto out; - - rc = sql_exec(db, spec.zDb, spec.zName, - "create table %_segdir(" - " level integer," - " idx integer," - " start_block integer," - " leaves_end_block integer," - " end_block integer," - " root blob," - " primary key(level, idx)" - ");"); - if( rc!=SQLITE_OK ) goto out; - - rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr); - -out: - clearTableSpec(&spec); - return rc; -} - -/* Decide how to handle an SQL query. */ -static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ - int i; - TRACE(("FTS2 BestIndex\n")); - - for(i=0; inConstraint; ++i){ - const struct sqlite3_index_constraint *pConstraint; - pConstraint = &pInfo->aConstraint[i]; - if( pConstraint->usable ) { - if( pConstraint->iColumn==-1 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ - TRACE(("FTS2 QUERY_ROWID\n")); - } else if( pConstraint->iColumn>=0 && - pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ - /* full-text search */ - pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; - TRACE(("FTS2 QUERY_FULLTEXT %d\n", pConstraint->iColumn)); - } else continue; - - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - - /* An arbitrary value for now. - * TODO: Perhaps rowid matches should be considered cheaper than - * full-text searches. */ - pInfo->estimatedCost = 1.0; - - return SQLITE_OK; - } - } - pInfo->idxNum = QUERY_GENERIC; - return SQLITE_OK; -} - -static int fulltextDisconnect(sqlite3_vtab *pVTab){ - TRACE(("FTS2 Disconnect %p\n", pVTab)); - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextDestroy(sqlite3_vtab *pVTab){ - fulltext_vtab *v = (fulltext_vtab *)pVTab; - int rc; - - TRACE(("FTS2 Destroy %p\n", pVTab)); - rc = sql_exec(v->db, v->zDb, v->zName, - "drop table if exists %_content;" - "drop table if exists %_segments;" - "drop table if exists %_segdir;" - ); - if( rc!=SQLITE_OK ) return rc; - - fulltext_vtab_destroy((fulltext_vtab *)pVTab); - return SQLITE_OK; -} - -static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - fulltext_cursor *c; - - c = (fulltext_cursor *) sqlite3_malloc(sizeof(fulltext_cursor)); - if( c ){ - memset(c, 0, sizeof(fulltext_cursor)); - /* sqlite will initialize c->base */ - *ppCursor = &c->base; - TRACE(("FTS2 Open %p: %p\n", pVTab, c)); - return SQLITE_OK; - }else{ - return SQLITE_NOMEM; - } -} - - -/* Free all of the dynamically allocated memory held by *q -*/ -static void queryClear(Query *q){ - int i; - for(i = 0; i < q->nTerms; ++i){ - sqlite3_free(q->pTerms[i].pTerm); - } - sqlite3_free(q->pTerms); - CLEAR(q); -} - -/* Free all of the dynamically allocated memory held by the -** Snippet -*/ -static void snippetClear(Snippet *p){ - sqlite3_free(p->aMatch); - sqlite3_free(p->zOffset); - sqlite3_free(p->zSnippet); - CLEAR(p); -} -/* -** Append a single entry to the p->aMatch[] log. -*/ -static void snippetAppendMatch( - Snippet *p, /* Append the entry to this snippet */ - int iCol, int iTerm, /* The column and query term */ - int iStart, int nByte /* Offset and size of the match */ -){ - int i; - struct snippetMatch *pMatch; - if( p->nMatch+1>=p->nAlloc ){ - p->nAlloc = p->nAlloc*2 + 10; - p->aMatch = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( p->aMatch==0 ){ - p->nMatch = 0; - p->nAlloc = 0; - return; - } - } - i = p->nMatch++; - pMatch = &p->aMatch[i]; - pMatch->iCol = iCol; - pMatch->iTerm = iTerm; - pMatch->iStart = iStart; - pMatch->nByte = nByte; -} - -/* -** Sizing information for the circular buffer used in snippetOffsetsOfColumn() -*/ -#define FTS2_ROTOR_SZ (32) -#define FTS2_ROTOR_MASK (FTS2_ROTOR_SZ-1) - -/* -** Add entries to pSnippet->aMatch[] for every match that occurs against -** document zDoc[0..nDoc-1] which is stored in column iColumn. -*/ -static void snippetOffsetsOfColumn( - Query *pQuery, - Snippet *pSnippet, - int iColumn, - const char *zDoc, - int nDoc -){ - const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ - sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ - sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ - fulltext_vtab *pVtab; /* The full text index */ - int nColumn; /* Number of columns in the index */ - const QueryTerm *aTerm; /* Query string terms */ - int nTerm; /* Number of query string terms */ - int i, j; /* Loop counters */ - int rc; /* Return code */ - unsigned int match, prevMatch; /* Phrase search bitmasks */ - const char *zToken; /* Next token from the tokenizer */ - int nToken; /* Size of zToken */ - int iBegin, iEnd, iPos; /* Offsets of beginning and end */ - - /* The following variables keep a circular buffer of the last - ** few tokens */ - unsigned int iRotor = 0; /* Index of current token */ - int iRotorBegin[FTS2_ROTOR_SZ]; /* Beginning offset of token */ - int iRotorLen[FTS2_ROTOR_SZ]; /* Length of token */ - - pVtab = pQuery->pFts; - nColumn = pVtab->nColumn; - pTokenizer = pVtab->pTokenizer; - pTModule = pTokenizer->pModule; - rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return; - pTCursor->pTokenizer = pTokenizer; - aTerm = pQuery->pTerms; - nTerm = pQuery->nTerms; - if( nTerm>=FTS2_ROTOR_SZ ){ - nTerm = FTS2_ROTOR_SZ - 1; - } - prevMatch = 0; - while(1){ - rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); - if( rc ) break; - iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin; - iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin; - match = 0; - for(i=0; i=0 && iColnToken ) continue; - if( !aTerm[i].isPrefix && aTerm[i].nTerm1 && (prevMatch & (1<=0; j--){ - int k = (iRotor-j) & FTS2_ROTOR_MASK; - snippetAppendMatch(pSnippet, iColumn, i-j, - iRotorBegin[k], iRotorLen[k]); - } - } - } - prevMatch = match<<1; - iRotor++; - } - pTModule->xClose(pTCursor); -} - - -/* -** Compute all offsets for the current row of the query. -** If the offsets have already been computed, this routine is a no-op. -*/ -static void snippetAllOffsets(fulltext_cursor *p){ - int nColumn; - int iColumn, i; - int iFirst, iLast; - fulltext_vtab *pFts; - - if( p->snippet.nMatch ) return; - if( p->q.nTerms==0 ) return; - pFts = p->q.pFts; - nColumn = pFts->nColumn; - iColumn = (p->iCursorType - QUERY_FULLTEXT); - if( iColumn<0 || iColumn>=nColumn ){ - iFirst = 0; - iLast = nColumn-1; - }else{ - iFirst = iColumn; - iLast = iColumn; - } - for(i=iFirst; i<=iLast; i++){ - const char *zDoc; - int nDoc; - zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); - nDoc = sqlite3_column_bytes(p->pStmt, i+1); - snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); - } -} - -/* -** Convert the information in the aMatch[] array of the snippet -** into the string zOffset[0..nOffset-1]. -*/ -static void snippetOffsetText(Snippet *p){ - int i; - int cnt = 0; - StringBuffer sb; - char zBuf[200]; - if( p->zOffset ) return; - initStringBuffer(&sb); - for(i=0; inMatch; i++){ - struct snippetMatch *pMatch = &p->aMatch[i]; - zBuf[0] = ' '; - sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", - pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - append(&sb, zBuf); - cnt++; - } - p->zOffset = stringBufferData(&sb); - p->nOffset = stringBufferLength(&sb); -} - -/* -** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set -** of matching words some of which might be in zDoc. zDoc is column -** number iCol. -** -** iBreak is suggested spot in zDoc where we could begin or end an -** excerpt. Return a value similar to iBreak but possibly adjusted -** to be a little left or right so that the break point is better. -*/ -static int wordBoundary( - int iBreak, /* The suggested break point */ - const char *zDoc, /* Document text */ - int nDoc, /* Number of bytes in zDoc[] */ - struct snippetMatch *aMatch, /* Matching words */ - int nMatch, /* Number of entries in aMatch[] */ - int iCol /* The column number for zDoc[] */ -){ - int i; - if( iBreak<=10 ){ - return 0; - } - if( iBreak>=nDoc-10 ){ - return nDoc; - } - for(i=0; i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ - return aMatch[i-1].iStart; - } - } - for(i=1; i<=10; i++){ - if( safe_isspace(zDoc[iBreak-i]) ){ - return iBreak - i + 1; - } - if( safe_isspace(zDoc[iBreak+i]) ){ - return iBreak + i + 1; - } - } - return iBreak; -} - - - -/* -** Allowed values for Snippet.aMatch[].snStatus -*/ -#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ -#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ - -/* -** Generate the text of a snippet. -*/ -static void snippetText( - fulltext_cursor *pCursor, /* The cursor we need the snippet for */ - const char *zStartMark, /* Markup to appear before each match */ - const char *zEndMark, /* Markup to appear after each match */ - const char *zEllipsis /* Ellipsis mark */ -){ - int i, j; - struct snippetMatch *aMatch; - int nMatch; - int nDesired; - StringBuffer sb; - int tailCol; - int tailOffset; - int iCol; - int nDoc; - const char *zDoc; - int iStart, iEnd; - int tailEllipsis = 0; - int iMatch; - - - sqlite3_free(pCursor->snippet.zSnippet); - pCursor->snippet.zSnippet = 0; - aMatch = pCursor->snippet.aMatch; - nMatch = pCursor->snippet.nMatch; - initStringBuffer(&sb); - - for(i=0; iq.nTerms; i++){ - for(j=0; j0; i++){ - if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; - nDesired--; - iCol = aMatch[i].iCol; - zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); - iStart = aMatch[i].iStart - 40; - iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); - if( iStart<=10 ){ - iStart = 0; - } - if( iCol==tailCol && iStart<=tailOffset+20 ){ - iStart = tailOffset; - } - if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ - trimWhiteSpace(&sb); - appendWhiteSpace(&sb); - append(&sb, zEllipsis); - appendWhiteSpace(&sb); - } - iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; - iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); - if( iEnd>=nDoc-10 ){ - iEnd = nDoc; - tailEllipsis = 0; - }else{ - tailEllipsis = 1; - } - while( iMatchsnippet.zSnippet = stringBufferData(&sb); - pCursor->snippet.nSnippet = stringBufferLength(&sb); -} - - -/* -** Close the cursor. For additional information see the documentation -** on the xClose method of the virtual table interface. -*/ -static int fulltextClose(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - TRACE(("FTS2 Close %p\n", c)); - sqlite3_finalize(c->pStmt); - queryClear(&c->q); - snippetClear(&c->snippet); - if( c->result.nData!=0 ) dlrDestroy(&c->reader); - dataBufferDestroy(&c->result); - sqlite3_free(c); - return SQLITE_OK; -} - -static int fulltextNext(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - int rc; - - TRACE(("FTS2 Next %p\n", pCursor)); - snippetClear(&c->snippet); - if( c->iCursorType < QUERY_FULLTEXT ){ - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - switch( rc ){ - case SQLITE_ROW: - c->eof = 0; - return SQLITE_OK; - case SQLITE_DONE: - c->eof = 1; - return SQLITE_OK; - default: - c->eof = 1; - return rc; - } - } else { /* full-text query */ - rc = sqlite3_reset(c->pStmt); - if( rc!=SQLITE_OK ) return rc; - - if( c->result.nData==0 || dlrAtEnd(&c->reader) ){ - c->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader)); - dlrStep(&c->reader); - if( rc!=SQLITE_OK ) return rc; - /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ - rc = sqlite3_step(c->pStmt); - if( rc==SQLITE_ROW ){ /* the case we expect */ - c->eof = 0; - return SQLITE_OK; - } - /* an error occurred; abort */ - return rc==SQLITE_DONE ? SQLITE_ERROR : rc; - } -} - - -/* TODO(shess) If we pushed LeafReader to the top of the file, or to -** another file, term_select() could be pushed above -** docListOfTerm(). -*/ -static int termSelect(fulltext_vtab *v, int iColumn, - const char *pTerm, int nTerm, int isPrefix, - DocListType iType, DataBuffer *out); - -/* Return a DocList corresponding to the query term *pTerm. If *pTerm -** is the first term of a phrase query, go ahead and evaluate the phrase -** query and return the doclist for the entire phrase query. -** -** The resulting DL_DOCIDS doclist is stored in pResult, which is -** overwritten. -*/ -static int docListOfTerm( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* column to restrict to. No restriction if >=nColumn */ - QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */ - DataBuffer *pResult /* Write the result here */ -){ - DataBuffer left, right, new; - int i, rc; - - /* No phrase search if no position info. */ - assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS ); - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&left, 0); - rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix, - 0nPhrase ? DL_POSITIONS : DL_DOCIDS, &left); - if( rc ) return rc; - for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){ - dataBufferInit(&right, 0); - rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, - pQTerm[i].isPrefix, DL_POSITIONS, &right); - if( rc ){ - dataBufferDestroy(&left); - return rc; - } - dataBufferInit(&new, 0); - docListPhraseMerge(left.pData, left.nData, right.pData, right.nData, - inPhrase ? DL_POSITIONS : DL_DOCIDS, &new); - dataBufferDestroy(&left); - dataBufferDestroy(&right); - left = new; - } - *pResult = left; - return SQLITE_OK; -} - -/* Add a new term pTerm[0..nTerm-1] to the query *q. -*/ -static void queryAdd(Query *q, const char *pTerm, int nTerm){ - QueryTerm *t; - ++q->nTerms; - q->pTerms = sqlite3_realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); - if( q->pTerms==0 ){ - q->nTerms = 0; - return; - } - t = &q->pTerms[q->nTerms - 1]; - CLEAR(t); - t->pTerm = sqlite3_malloc(nTerm+1); - memcpy(t->pTerm, pTerm, nTerm); - t->pTerm[nTerm] = 0; - t->nTerm = nTerm; - t->isOr = q->nextIsOr; - t->isPrefix = 0; - q->nextIsOr = 0; - t->iColumn = q->nextColumn; - q->nextColumn = q->dfltColumn; -} - -/* -** Check to see if the string zToken[0...nToken-1] matches any -** column name in the virtual table. If it does, -** return the zero-indexed column number. If not, return -1. -*/ -static int checkColumnSpecifier( - fulltext_vtab *pVtab, /* The virtual table */ - const char *zToken, /* Text of the token */ - int nToken /* Number of characters in the token */ -){ - int i; - for(i=0; inColumn; i++){ - if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 - && pVtab->azColumn[i][nToken]==0 ){ - return i; - } - } - return -1; -} - -/* -** Parse the text at pSegment[0..nSegment-1]. Add additional terms -** to the query being assemblied in pQuery. -** -** inPhrase is true if pSegment[0..nSegement-1] is contained within -** double-quotes. If inPhrase is true, then the first term -** is marked with the number of terms in the phrase less one and -** OR and "-" syntax is ignored. If inPhrase is false, then every -** term found is marked with nPhrase=0 and OR and "-" syntax is significant. -*/ -static int tokenizeSegment( - sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ - const char *pSegment, int nSegment, /* Query expression being parsed */ - int inPhrase, /* True if within "..." */ - Query *pQuery /* Append results here */ -){ - const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCursor; - int firstIndex = pQuery->nTerms; - int iCol; - int nTerm = 1; - - int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); - if( rc!=SQLITE_OK ) return rc; - pCursor->pTokenizer = pTokenizer; - - while( 1 ){ - const char *pToken; - int nToken, iBegin, iEnd, iPos; - - rc = pModule->xNext(pCursor, - &pToken, &nToken, - &iBegin, &iEnd, &iPos); - if( rc!=SQLITE_OK ) break; - if( !inPhrase && - pSegment[iEnd]==':' && - (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ - pQuery->nextColumn = iCol; - continue; - } - if( !inPhrase && pQuery->nTerms>0 && nToken==2 - && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ - pQuery->nextIsOr = 1; - continue; - } - queryAdd(pQuery, pToken, nToken); - if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ - pQuery->pTerms[pQuery->nTerms-1].isNot = 1; - } - if( iEndpTerms[pQuery->nTerms-1].isPrefix = 1; - } - pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; - if( inPhrase ){ - nTerm++; - } - } - - if( inPhrase && pQuery->nTerms>firstIndex ){ - pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; - } - - return pModule->xClose(pCursor); -} - -/* Parse a query string, yielding a Query object pQuery. -** -** The calling function will need to queryClear() to clean up -** the dynamically allocated memory held by pQuery. -*/ -static int parseQuery( - fulltext_vtab *v, /* The fulltext index */ - const char *zInput, /* Input text of the query string */ - int nInput, /* Size of the input text */ - int dfltColumn, /* Default column of the index to match against */ - Query *pQuery /* Write the parse results here. */ -){ - int iInput, inPhrase = 0; - - if( zInput==0 ) nInput = 0; - if( nInput<0 ) nInput = strlen(zInput); - pQuery->nTerms = 0; - pQuery->pTerms = NULL; - pQuery->nextIsOr = 0; - pQuery->nextColumn = dfltColumn; - pQuery->dfltColumn = dfltColumn; - pQuery->pFts = v; - - for(iInput=0; iInputiInput ){ - tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, - pQuery); - } - iInput = i; - if( i=nColumn -** they are allowed to match against any column. -*/ -static int fulltextQuery( - fulltext_vtab *v, /* The full text index */ - int iColumn, /* Match against this column by default */ - const char *zInput, /* The query string */ - int nInput, /* Number of bytes in zInput[] */ - DataBuffer *pResult, /* Write the result doclist here */ - Query *pQuery /* Put parsed query string here */ -){ - int i, iNext, rc; - DataBuffer left, right, or, new; - int nNot = 0; - QueryTerm *aTerm; - - /* TODO(shess) Instead of flushing pendingTerms, we could query for - ** the relevant term and merge the doclist into what we receive from - ** the database. Wait and see if this is a common issue, first. - ** - ** A good reason not to flush is to not generate update-related - ** error codes from here. - */ - - /* Flush any buffered updates before executing the query. */ - rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) return rc; - - /* TODO(shess) I think that the queryClear() calls below are not - ** necessary, because fulltextClose() already clears the query. - */ - rc = parseQuery(v, zInput, nInput, iColumn, pQuery); - if( rc!=SQLITE_OK ) return rc; - - /* Empty or NULL queries return no results. */ - if( pQuery->nTerms==0 ){ - dataBufferInit(pResult, 0); - return SQLITE_OK; - } - - /* Merge AND terms. */ - /* TODO(shess) I think we can early-exit if( i>nNot && left.nData==0 ). */ - aTerm = pQuery->pTerms; - for(i = 0; inTerms; i=iNext){ - if( aTerm[i].isNot ){ - /* Handle all NOT terms in a separate pass */ - nNot++; - iNext = i + aTerm[i].nPhrase+1; - continue; - } - iNext = i + aTerm[i].nPhrase + 1; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); - if( rc ){ - if( i!=nNot ) dataBufferDestroy(&left); - queryClear(pQuery); - return rc; - } - while( iNextnTerms && aTerm[iNext].isOr ){ - rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &or); - iNext += aTerm[iNext].nPhrase + 1; - if( rc ){ - if( i!=nNot ) dataBufferDestroy(&left); - dataBufferDestroy(&right); - queryClear(pQuery); - return rc; - } - dataBufferInit(&new, 0); - docListOrMerge(right.pData, right.nData, or.pData, or.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&or); - right = new; - } - if( i==nNot ){ /* first term processed. */ - left = right; - }else{ - dataBufferInit(&new, 0); - docListAndMerge(left.pData, left.nData, right.pData, right.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&left); - left = new; - } - } - - if( nNot==pQuery->nTerms ){ - /* We do not yet know how to handle a query of only NOT terms */ - return SQLITE_ERROR; - } - - /* Do the EXCEPT terms */ - for(i=0; inTerms; i += aTerm[i].nPhrase + 1){ - if( !aTerm[i].isNot ) continue; - rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &right); - if( rc ){ - queryClear(pQuery); - dataBufferDestroy(&left); - return rc; - } - dataBufferInit(&new, 0); - docListExceptMerge(left.pData, left.nData, right.pData, right.nData, &new); - dataBufferDestroy(&right); - dataBufferDestroy(&left); - left = new; - } - - *pResult = left; - return rc; -} - -/* -** This is the xFilter interface for the virtual table. See -** the virtual table xFilter method documentation for additional -** information. -** -** If idxNum==QUERY_GENERIC then do a full table scan against -** the %_content table. -** -** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry -** in the %_content table. -** -** If idxNum>=QUERY_FULLTEXT then use the full text index. The -** column on the left-hand side of the MATCH operator is column -** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand -** side of the MATCH operator. -*/ -/* TODO(shess) Upgrade the cursor initialization and destruction to -** account for fulltextFilter() being called multiple times on the -** same cursor. The current solution is very fragile. Apply fix to -** fts2 as appropriate. -*/ -static int fulltextFilter( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, const char *idxStr, /* Which indexing scheme to use */ - int argc, sqlite3_value **argv /* Arguments for the indexing scheme */ -){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - int rc; - - TRACE(("FTS2 Filter %p\n",pCursor)); - - /* If the cursor has a statement that was not prepared according to - ** idxNum, clear it. I believe all calls to fulltextFilter with a - ** given cursor will have the same idxNum , but in this case it's - ** easy to be safe. - */ - if( c->pStmt && c->iCursorType!=idxNum ){ - sqlite3_finalize(c->pStmt); - c->pStmt = NULL; - } - - /* Get a fresh statement appropriate to idxNum. */ - /* TODO(shess): Add a prepared-statement cache in the vt structure. - ** The cache must handle multiple open cursors. Easier to cache the - ** statement variants at the vt to reduce malloc/realloc/free here. - ** Or we could have a StringBuffer variant which allowed stack - ** construction for small values. - */ - if( !c->pStmt ){ - char *zSql = sqlite3_mprintf("select rowid, * from %%_content %s", - idxNum==QUERY_GENERIC ? "" : "where rowid=?"); - rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql); - sqlite3_free(zSql); - if( rc!=SQLITE_OK ) return rc; - c->iCursorType = idxNum; - }else{ - sqlite3_reset(c->pStmt); - assert( c->iCursorType==idxNum ); - } - - switch( idxNum ){ - case QUERY_GENERIC: - break; - - case QUERY_ROWID: - rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); - if( rc!=SQLITE_OK ) return rc; - break; - - default: /* full-text search */ - { - const char *zQuery = (const char *)sqlite3_value_text(argv[0]); - assert( idxNum<=QUERY_FULLTEXT+v->nColumn); - assert( argc==1 ); - queryClear(&c->q); - if( c->result.nData!=0 ){ - /* This case happens if the same cursor is used repeatedly. */ - dlrDestroy(&c->reader); - dataBufferReset(&c->result); - }else{ - dataBufferInit(&c->result, 0); - } - rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q); - if( rc!=SQLITE_OK ) return rc; - if( c->result.nData!=0 ){ - dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData); - } - break; - } - } - - return fulltextNext(pCursor); -} - -/* This is the xEof method of the virtual table. The SQLite core -** calls this routine to find out if it has reached the end of -** a query's results set. -*/ -static int fulltextEof(sqlite3_vtab_cursor *pCursor){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - return c->eof; -} - -/* This is the xColumn method of the virtual table. The SQLite -** core calls this method during a query when it needs the value -** of a column from the virtual table. This method needs to use -** one of the sqlite3_result_*() routines to store the requested -** value back in the pContext. -*/ -static int fulltextColumn(sqlite3_vtab_cursor *pCursor, - sqlite3_context *pContext, int idxCol){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - fulltext_vtab *v = cursor_vtab(c); - - if( idxColnColumn ){ - sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); - sqlite3_result_value(pContext, pVal); - }else if( idxCol==v->nColumn ){ - /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor - */ - sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); - } - return SQLITE_OK; -} - -/* This is the xRowid method. The SQLite core calls this routine to -** retrive the rowid for the current row of the result set. The -** rowid should be written to *pRowid. -*/ -static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ - fulltext_cursor *c = (fulltext_cursor *) pCursor; - - *pRowid = sqlite3_column_int64(c->pStmt, 0); - return SQLITE_OK; -} - -/* Add all terms in [zText] to pendingTerms table. If [iColumn] > 0, -** we also store positions and offsets in the hash table using that -** column number. -*/ -static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid, - const char *zText, int iColumn){ - sqlite3_tokenizer *pTokenizer = v->pTokenizer; - sqlite3_tokenizer_cursor *pCursor; - const char *pToken; - int nTokenBytes; - int iStartOffset, iEndOffset, iPosition; - int rc; - - rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); - if( rc!=SQLITE_OK ) return rc; - - pCursor->pTokenizer = pTokenizer; - while( SQLITE_OK==(rc=pTokenizer->pModule->xNext(pCursor, - &pToken, &nTokenBytes, - &iStartOffset, &iEndOffset, - &iPosition)) ){ - DLCollector *p; - int nData; /* Size of doclist before our update. */ - - /* Positions can't be negative; we use -1 as a terminator - * internally. Token can't be NULL or empty. */ - if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){ - rc = SQLITE_ERROR; - break; - } - - p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes); - if( p==NULL ){ - nData = 0; - p = dlcNew(iDocid, DL_DEFAULT); - fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p); - - /* Overhead for our hash table entry, the key, and the value. */ - v->nPendingData += sizeof(struct fts2HashElem)+sizeof(*p)+nTokenBytes; - }else{ - nData = p->b.nData; - if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid); - } - if( iColumn>=0 ){ - dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); - } - - /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */ - v->nPendingData += p->b.nData-nData; - } - - /* TODO(shess) Check return? Should this be able to cause errors at - ** this point? Actually, same question about sqlite3_finalize(), - ** though one could argue that failure there means that the data is - ** not durable. *ponder* - */ - pTokenizer->pModule->xClose(pCursor); - if( SQLITE_DONE == rc ) return SQLITE_OK; - return rc; -} - -/* Add doclists for all terms in [pValues] to pendingTerms table. */ -static int insertTerms(fulltext_vtab *v, sqlite_int64 iRowid, - sqlite3_value **pValues){ - int i; - for(i = 0; i < v->nColumn ; ++i){ - char *zText = (char*)sqlite3_value_text(pValues[i]); - int rc = buildTerms(v, iRowid, zText, i); - if( rc!=SQLITE_OK ) return rc; - } - return SQLITE_OK; -} - -/* Add empty doclists for all terms in the given row's content to -** pendingTerms. -*/ -static int deleteTerms(fulltext_vtab *v, sqlite_int64 iRowid){ - const char **pValues; - int i, rc; - - /* TODO(shess) Should we allow such tables at all? */ - if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR; - - rc = content_select(v, iRowid, &pValues); - if( rc!=SQLITE_OK ) return rc; - - for(i = 0 ; i < v->nColumn; ++i) { - rc = buildTerms(v, iRowid, pValues[i], -1); - if( rc!=SQLITE_OK ) break; - } - - freeStringArray(v->nColumn, pValues); - return SQLITE_OK; -} - -/* TODO(shess) Refactor the code to remove this forward decl. */ -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid); - -/* Insert a row into the %_content table; set *piRowid to be the ID of the -** new row. Add doclists for terms to pendingTerms. -*/ -static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, - sqlite3_value **pValues, sqlite_int64 *piRowid){ - int rc; - - rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ - if( rc!=SQLITE_OK ) return rc; - - *piRowid = sqlite3_last_insert_rowid(v->db); - rc = initPendingTerms(v, *piRowid); - if( rc!=SQLITE_OK ) return rc; - - return insertTerms(v, *piRowid, pValues); -} - -/* Delete a row from the %_content table; add empty doclists for terms -** to pendingTerms. -*/ -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){ - int rc = initPendingTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = deleteTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - return content_delete(v, iRow); /* execute an SQL DELETE */ -} - -/* Update a row in the %_content table; add delete doclists to -** pendingTerms for old terms not in the new data, add insert doclists -** to pendingTerms for terms in the new data. -*/ -static int index_update(fulltext_vtab *v, sqlite_int64 iRow, - sqlite3_value **pValues){ - int rc = initPendingTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - /* Generate an empty doclist for each term that previously appeared in this - * row. */ - rc = deleteTerms(v, iRow); - if( rc!=SQLITE_OK ) return rc; - - rc = content_update(v, pValues, iRow); /* execute an SQL UPDATE */ - if( rc!=SQLITE_OK ) return rc; - - /* Now add positions for terms which appear in the updated row. */ - return insertTerms(v, iRow, pValues); -} - -/*******************************************************************/ -/* InteriorWriter is used to collect terms and block references into -** interior nodes in %_segments. See commentary at top of file for -** format. -*/ - -/* How large interior nodes can grow. */ -#define INTERIOR_MAX 2048 - -/* Minimum number of terms per interior node (except the root). This -** prevents large terms from making the tree too skinny - must be >0 -** so that the tree always makes progress. Note that the min tree -** fanout will be INTERIOR_MIN_TERMS+1. -*/ -#define INTERIOR_MIN_TERMS 7 -#if INTERIOR_MIN_TERMS<1 -# error INTERIOR_MIN_TERMS must be greater than 0. -#endif - -/* ROOT_MAX controls how much data is stored inline in the segment -** directory. -*/ -/* TODO(shess) Push ROOT_MAX down to whoever is writing things. It's -** only here so that interiorWriterRootInfo() and leafWriterRootInfo() -** can both see it, but if the caller passed it in, we wouldn't even -** need a define. -*/ -#define ROOT_MAX 1024 -#if ROOT_MAXterm, 0); - dataBufferReplace(&block->term, pTerm, nTerm); - - n = putVarint(c, iHeight); - n += putVarint(c+n, iChildBlock); - dataBufferInit(&block->data, INTERIOR_MAX); - dataBufferReplace(&block->data, c, n); - } - return block; -} - -#ifndef NDEBUG -/* Verify that the data is readable as an interior node. */ -static void interiorBlockValidate(InteriorBlock *pBlock){ - const char *pData = pBlock->data.pData; - int nData = pBlock->data.nData; - int n, iDummy; - sqlite_int64 iBlockid; - - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - - /* Must lead with height of node as a varint(n), n>0 */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>0 ); - assert( n0 ); - assert( n<=nData ); - pData += n; - nData -= n; - - /* Zero or more terms of positive length */ - if( nData!=0 ){ - /* First term is not delta-encoded. */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>0 ); - assert( n+iDummy>0); - assert( n+iDummy<=nData ); - pData += n+iDummy; - nData -= n+iDummy; - - /* Following terms delta-encoded. */ - while( nData!=0 ){ - /* Length of shared prefix. */ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>=0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0); - assert( n+iDummy<=nData ); - pData += n+iDummy; - nData -= n+iDummy; - } - } -} -#define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x) -#else -#define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 ) -#endif - -typedef struct InteriorWriter { - int iHeight; /* from 0 at leaves. */ - InteriorBlock *first, *last; - struct InteriorWriter *parentWriter; - - DataBuffer term; /* Last term written to block "last". */ - sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ -#ifndef NDEBUG - sqlite_int64 iLastChildBlock; /* for consistency checks. */ -#endif -} InteriorWriter; - -/* Initialize an interior node where pTerm[nTerm] marks the leftmost -** term in the tree. iChildBlock is the leftmost child block at the -** next level down the tree. -*/ -static void interiorWriterInit(int iHeight, const char *pTerm, int nTerm, - sqlite_int64 iChildBlock, - InteriorWriter *pWriter){ - InteriorBlock *block; - assert( iHeight>0 ); - CLEAR(pWriter); - - pWriter->iHeight = iHeight; - pWriter->iOpeningChildBlock = iChildBlock; -#ifndef NDEBUG - pWriter->iLastChildBlock = iChildBlock; -#endif - block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); - pWriter->last = pWriter->first = block; - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); - dataBufferInit(&pWriter->term, 0); -} - -/* Append the child node rooted at iChildBlock to the interior node, -** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. -*/ -static void interiorWriterAppend(InteriorWriter *pWriter, - const char *pTerm, int nTerm, - sqlite_int64 iChildBlock){ - char c[VARINT_MAX+VARINT_MAX]; - int n, nPrefix = 0; - - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); - - /* The first term written into an interior node is actually - ** associated with the second child added (the first child was added - ** in interiorWriterInit, or in the if clause at the bottom of this - ** function). That term gets encoded straight up, with nPrefix left - ** at 0. - */ - if( pWriter->term.nData==0 ){ - n = putVarint(c, nTerm); - }else{ - while( nPrefixterm.nData && - pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ - nPrefix++; - } - - n = putVarint(c, nPrefix); - n += putVarint(c+n, nTerm-nPrefix); - } - -#ifndef NDEBUG - pWriter->iLastChildBlock++; -#endif - assert( pWriter->iLastChildBlock==iChildBlock ); - - /* Overflow to a new block if the new term makes the current block - ** too big, and the current block already has enough terms. - */ - if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX && - iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ - pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, - pTerm, nTerm); - pWriter->last = pWriter->last->next; - pWriter->iOpeningChildBlock = iChildBlock; - dataBufferReset(&pWriter->term); - }else{ - dataBufferAppend2(&pWriter->last->data, c, n, - pTerm+nPrefix, nTerm-nPrefix); - dataBufferReplace(&pWriter->term, pTerm, nTerm); - } - ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); -} - -/* Free the space used by pWriter, including the linked-list of -** InteriorBlocks, and parentWriter, if present. -*/ -static int interiorWriterDestroy(InteriorWriter *pWriter){ - InteriorBlock *block = pWriter->first; - - while( block!=NULL ){ - InteriorBlock *b = block; - block = block->next; - dataBufferDestroy(&b->term); - dataBufferDestroy(&b->data); - sqlite3_free(b); - } - if( pWriter->parentWriter!=NULL ){ - interiorWriterDestroy(pWriter->parentWriter); - sqlite3_free(pWriter->parentWriter); - } - dataBufferDestroy(&pWriter->term); - SCRAMBLE(pWriter); - return SQLITE_OK; -} - -/* If pWriter can fit entirely in ROOT_MAX, return it as the root info -** directly, leaving *piEndBlockid unchanged. Otherwise, flush -** pWriter to %_segments, building a new layer of interior nodes, and -** recursively ask for their root into. -*/ -static int interiorWriterRootInfo(fulltext_vtab *v, InteriorWriter *pWriter, - char **ppRootInfo, int *pnRootInfo, - sqlite_int64 *piEndBlockid){ - InteriorBlock *block = pWriter->first; - sqlite_int64 iBlockid = 0; - int rc; - - /* If we can fit the segment inline */ - if( block==pWriter->last && block->data.nDatadata.pData; - *pnRootInfo = block->data.nData; - return SQLITE_OK; - } - - /* Flush the first block to %_segments, and create a new level of - ** interior node. - */ - ASSERT_VALID_INTERIOR_BLOCK(block); - rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - *piEndBlockid = iBlockid; - - pWriter->parentWriter = sqlite3_malloc(sizeof(*pWriter->parentWriter)); - interiorWriterInit(pWriter->iHeight+1, - block->term.pData, block->term.nData, - iBlockid, pWriter->parentWriter); - - /* Flush additional blocks and append to the higher interior - ** node. - */ - for(block=block->next; block!=NULL; block=block->next){ - ASSERT_VALID_INTERIOR_BLOCK(block); - rc = block_insert(v, block->data.pData, block->data.nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - *piEndBlockid = iBlockid; - - interiorWriterAppend(pWriter->parentWriter, - block->term.pData, block->term.nData, iBlockid); - } - - /* Parent node gets the chance to be the root. */ - return interiorWriterRootInfo(v, pWriter->parentWriter, - ppRootInfo, pnRootInfo, piEndBlockid); -} - -/****************************************************************/ -/* InteriorReader is used to read off the data from an interior node -** (see comment at top of file for the format). -*/ -typedef struct InteriorReader { - const char *pData; - int nData; - - DataBuffer term; /* previous term, for decoding term delta. */ - - sqlite_int64 iBlockid; -} InteriorReader; - -static void interiorReaderDestroy(InteriorReader *pReader){ - dataBufferDestroy(&pReader->term); - SCRAMBLE(pReader); -} - -/* TODO(shess) The assertions are great, but what if we're in NDEBUG -** and the blob is empty or otherwise contains suspect data? -*/ -static void interiorReaderInit(const char *pData, int nData, - InteriorReader *pReader){ - int n, nTerm; - - /* Require at least the leading flag byte */ - assert( nData>0 ); - assert( pData[0]!='\0' ); - - CLEAR(pReader); - - /* Decode the base blockid, and set the cursor to the first term. */ - n = getVarint(pData+1, &pReader->iBlockid); - assert( 1+n<=nData ); - pReader->pData = pData+1+n; - pReader->nData = nData-(1+n); - - /* A single-child interior node (such as when a leaf node was too - ** large for the segment directory) won't have any terms. - ** Otherwise, decode the first term. - */ - if( pReader->nData==0 ){ - dataBufferInit(&pReader->term, 0); - }else{ - n = getVarint32(pReader->pData, &nTerm); - dataBufferInit(&pReader->term, nTerm); - dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); - assert( n+nTerm<=pReader->nData ); - pReader->pData += n+nTerm; - pReader->nData -= n+nTerm; - } -} - -static int interiorReaderAtEnd(InteriorReader *pReader){ - return pReader->term.nData==0; -} - -static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ - return pReader->iBlockid; -} - -static int interiorReaderTermBytes(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - return pReader->term.nData; -} -static const char *interiorReaderTerm(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - return pReader->term.pData; -} - -/* Step forward to the next term in the node. */ -static void interiorReaderStep(InteriorReader *pReader){ - assert( !interiorReaderAtEnd(pReader) ); - - /* If the last term has been read, signal eof, else construct the - ** next term. - */ - if( pReader->nData==0 ){ - dataBufferReset(&pReader->term); - }else{ - int n, nPrefix, nSuffix; - - n = getVarint32(pReader->pData, &nPrefix); - n += getVarint32(pReader->pData+n, &nSuffix); - - /* Truncate the current term and append suffix data. */ - pReader->term.nData = nPrefix; - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); - - assert( n+nSuffix<=pReader->nData ); - pReader->pData += n+nSuffix; - pReader->nData -= n+nSuffix; - } - pReader->iBlockid++; -} - -/* Compare the current term to pTerm[nTerm], returning strcmp-style -** results. If isPrefix, equality means equal through nTerm bytes. -*/ -static int interiorReaderTermCmp(InteriorReader *pReader, - const char *pTerm, int nTerm, int isPrefix){ - const char *pReaderTerm = interiorReaderTerm(pReader); - int nReaderTerm = interiorReaderTermBytes(pReader); - int c, n = nReaderTerm0 ) return -1; - if( nTerm>0 ) return 1; - return 0; - } - - c = memcmp(pReaderTerm, pTerm, n); - if( c!=0 ) return c; - if( isPrefix && n==nTerm ) return 0; - return nReaderTerm - nTerm; -} - -/****************************************************************/ -/* LeafWriter is used to collect terms and associated doclist data -** into leaf blocks in %_segments (see top of file for format info). -** Expected usage is: -** -** LeafWriter writer; -** leafWriterInit(0, 0, &writer); -** while( sorted_terms_left_to_process ){ -** // data is doclist data for that term. -** rc = leafWriterStep(v, &writer, pTerm, nTerm, pData, nData); -** if( rc!=SQLITE_OK ) goto err; -** } -** rc = leafWriterFinalize(v, &writer); -**err: -** leafWriterDestroy(&writer); -** return rc; -** -** leafWriterStep() may write a collected leaf out to %_segments. -** leafWriterFinalize() finishes writing any buffered data and stores -** a root node in %_segdir. leafWriterDestroy() frees all buffers and -** InteriorWriters allocated as part of writing this segment. -** -** TODO(shess) Document leafWriterStepMerge(). -*/ - -/* Put terms with data this big in their own block. */ -#define STANDALONE_MIN 1024 - -/* Keep leaf blocks below this size. */ -#define LEAF_MAX 2048 - -typedef struct LeafWriter { - int iLevel; - int idx; - sqlite_int64 iStartBlockid; /* needed to create the root info */ - sqlite_int64 iEndBlockid; /* when we're done writing. */ - - DataBuffer term; /* previous encoded term */ - DataBuffer data; /* encoding buffer */ - - /* bytes of first term in the current node which distinguishes that - ** term from the last term of the previous node. - */ - int nTermDistinct; - - InteriorWriter parentWriter; /* if we overflow */ - int has_parent; -} LeafWriter; - -static void leafWriterInit(int iLevel, int idx, LeafWriter *pWriter){ - CLEAR(pWriter); - pWriter->iLevel = iLevel; - pWriter->idx = idx; - - dataBufferInit(&pWriter->term, 32); - - /* Start out with a reasonably sized block, though it can grow. */ - dataBufferInit(&pWriter->data, LEAF_MAX); -} - -#ifndef NDEBUG -/* Verify that the data is readable as a leaf node. */ -static void leafNodeValidate(const char *pData, int nData){ - int n, iDummy; - - if( nData==0 ) return; - assert( nData>0 ); - assert( pData!=0 ); - assert( pData+nData>pData ); - - /* Must lead with a varint(0) */ - n = getVarint32(pData, &iDummy); - assert( iDummy==0 ); - assert( n>0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy<=nData ); - ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); - pData += n+iDummy; - nData -= n+iDummy; - - /* Verify that trailing terms and doclists also are readable. */ - while( nData!=0 ){ - n = getVarint32(pData, &iDummy); - assert( n>0 ); - assert( iDummy>=0 ); - assert( n0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy0 ); - assert( iDummy>0 ); - assert( n+iDummy>0 ); - assert( n+iDummy<=nData ); - ASSERT_VALID_DOCLIST(DL_DEFAULT, pData+n, iDummy, NULL); - pData += n+iDummy; - nData -= n+iDummy; - } -} -#define ASSERT_VALID_LEAF_NODE(p, n) leafNodeValidate(p, n) -#else -#define ASSERT_VALID_LEAF_NODE(p, n) assert( 1 ) -#endif - -/* Flush the current leaf node to %_segments, and adding the resulting -** blockid and the starting term to the interior node which will -** contain it. -*/ -static int leafWriterInternalFlush(fulltext_vtab *v, LeafWriter *pWriter, - int iData, int nData){ - sqlite_int64 iBlockid = 0; - const char *pStartingTerm; - int nStartingTerm, rc, n; - - /* Must have the leading varint(0) flag, plus at least some - ** valid-looking data. - */ - assert( nData>2 ); - assert( iData>=0 ); - assert( iData+nData<=pWriter->data.nData ); - ASSERT_VALID_LEAF_NODE(pWriter->data.pData+iData, nData); - - rc = block_insert(v, pWriter->data.pData+iData, nData, &iBlockid); - if( rc!=SQLITE_OK ) return rc; - assert( iBlockid!=0 ); - - /* Reconstruct the first term in the leaf for purposes of building - ** the interior node. - */ - n = getVarint32(pWriter->data.pData+iData+1, &nStartingTerm); - pStartingTerm = pWriter->data.pData+iData+1+n; - assert( pWriter->data.nData>iData+1+n+nStartingTerm ); - assert( pWriter->nTermDistinct>0 ); - assert( pWriter->nTermDistinct<=nStartingTerm ); - nStartingTerm = pWriter->nTermDistinct; - - if( pWriter->has_parent ){ - interiorWriterAppend(&pWriter->parentWriter, - pStartingTerm, nStartingTerm, iBlockid); - }else{ - interiorWriterInit(1, pStartingTerm, nStartingTerm, iBlockid, - &pWriter->parentWriter); - pWriter->has_parent = 1; - } - - /* Track the span of this segment's leaf nodes. */ - if( pWriter->iEndBlockid==0 ){ - pWriter->iEndBlockid = pWriter->iStartBlockid = iBlockid; - }else{ - pWriter->iEndBlockid++; - assert( iBlockid==pWriter->iEndBlockid ); - } - - return SQLITE_OK; -} -static int leafWriterFlush(fulltext_vtab *v, LeafWriter *pWriter){ - int rc = leafWriterInternalFlush(v, pWriter, 0, pWriter->data.nData); - if( rc!=SQLITE_OK ) return rc; - - /* Re-initialize the output buffer. */ - dataBufferReset(&pWriter->data); - - return SQLITE_OK; -} - -/* Fetch the root info for the segment. If the entire leaf fits -** within ROOT_MAX, then it will be returned directly, otherwise it -** will be flushed and the root info will be returned from the -** interior node. *piEndBlockid is set to the blockid of the last -** interior or leaf node written to disk (0 if none are written at -** all). -*/ -static int leafWriterRootInfo(fulltext_vtab *v, LeafWriter *pWriter, - char **ppRootInfo, int *pnRootInfo, - sqlite_int64 *piEndBlockid){ - /* we can fit the segment entirely inline */ - if( !pWriter->has_parent && pWriter->data.nDatadata.pData; - *pnRootInfo = pWriter->data.nData; - *piEndBlockid = 0; - return SQLITE_OK; - } - - /* Flush remaining leaf data. */ - if( pWriter->data.nData>0 ){ - int rc = leafWriterFlush(v, pWriter); - if( rc!=SQLITE_OK ) return rc; - } - - /* We must have flushed a leaf at some point. */ - assert( pWriter->has_parent ); - - /* Tenatively set the end leaf blockid as the end blockid. If the - ** interior node can be returned inline, this will be the final - ** blockid, otherwise it will be overwritten by - ** interiorWriterRootInfo(). - */ - *piEndBlockid = pWriter->iEndBlockid; - - return interiorWriterRootInfo(v, &pWriter->parentWriter, - ppRootInfo, pnRootInfo, piEndBlockid); -} - -/* Collect the rootInfo data and store it into the segment directory. -** This has the effect of flushing the segment's leaf data to -** %_segments, and also flushing any interior nodes to %_segments. -*/ -static int leafWriterFinalize(fulltext_vtab *v, LeafWriter *pWriter){ - sqlite_int64 iEndBlockid; - char *pRootInfo; - int rc, nRootInfo; - - rc = leafWriterRootInfo(v, pWriter, &pRootInfo, &nRootInfo, &iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - /* Don't bother storing an entirely empty segment. */ - if( iEndBlockid==0 && nRootInfo==0 ) return SQLITE_OK; - - return segdir_set(v, pWriter->iLevel, pWriter->idx, - pWriter->iStartBlockid, pWriter->iEndBlockid, - iEndBlockid, pRootInfo, nRootInfo); -} - -static void leafWriterDestroy(LeafWriter *pWriter){ - if( pWriter->has_parent ) interiorWriterDestroy(&pWriter->parentWriter); - dataBufferDestroy(&pWriter->term); - dataBufferDestroy(&pWriter->data); -} - -/* Encode a term into the leafWriter, delta-encoding as appropriate. -** Returns the length of the new term which distinguishes it from the -** previous term, which can be used to set nTermDistinct when a node -** boundary is crossed. -*/ -static int leafWriterEncodeTerm(LeafWriter *pWriter, - const char *pTerm, int nTerm){ - char c[VARINT_MAX+VARINT_MAX]; - int n, nPrefix = 0; - - assert( nTerm>0 ); - while( nPrefixterm.nData && - pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ - nPrefix++; - /* Failing this implies that the terms weren't in order. */ - assert( nPrefixdata.nData==0 ){ - /* Encode the node header and leading term as: - ** varint(0) - ** varint(nTerm) - ** char pTerm[nTerm] - */ - n = putVarint(c, '\0'); - n += putVarint(c+n, nTerm); - dataBufferAppend2(&pWriter->data, c, n, pTerm, nTerm); - }else{ - /* Delta-encode the term as: - ** varint(nPrefix) - ** varint(nSuffix) - ** char pTermSuffix[nSuffix] - */ - n = putVarint(c, nPrefix); - n += putVarint(c+n, nTerm-nPrefix); - dataBufferAppend2(&pWriter->data, c, n, pTerm+nPrefix, nTerm-nPrefix); - } - dataBufferReplace(&pWriter->term, pTerm, nTerm); - - return nPrefix+1; -} - -/* Used to avoid a memmove when a large amount of doclist data is in -** the buffer. This constructs a node and term header before -** iDoclistData and flushes the resulting complete node using -** leafWriterInternalFlush(). -*/ -static int leafWriterInlineFlush(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - int iDoclistData){ - char c[VARINT_MAX+VARINT_MAX]; - int iData, n = putVarint(c, 0); - n += putVarint(c+n, nTerm); - - /* There should always be room for the header. Even if pTerm shared - ** a substantial prefix with the previous term, the entire prefix - ** could be constructed from earlier data in the doclist, so there - ** should be room. - */ - assert( iDoclistData>=n+nTerm ); - - iData = iDoclistData-(n+nTerm); - memcpy(pWriter->data.pData+iData, c, n); - memcpy(pWriter->data.pData+iData+n, pTerm, nTerm); - - return leafWriterInternalFlush(v, pWriter, iData, pWriter->data.nData-iData); -} - -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of -** %_segments. -*/ -static int leafWriterStepMerge(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - DLReader *pReaders, int nReaders){ - char c[VARINT_MAX+VARINT_MAX]; - int iTermData = pWriter->data.nData, iDoclistData; - int i, nData, n, nActualData, nActual, rc, nTermDistinct; - - ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); - nTermDistinct = leafWriterEncodeTerm(pWriter, pTerm, nTerm); - - /* Remember nTermDistinct if opening a new node. */ - if( iTermData==0 ) pWriter->nTermDistinct = nTermDistinct; - - iDoclistData = pWriter->data.nData; - - /* Estimate the length of the merged doclist so we can leave space - ** to encode it. - */ - for(i=0, nData=0; idata, c, n); - - docListMerge(&pWriter->data, pReaders, nReaders); - ASSERT_VALID_DOCLIST(DL_DEFAULT, - pWriter->data.pData+iDoclistData+n, - pWriter->data.nData-iDoclistData-n, NULL); - - /* The actual amount of doclist data at this point could be smaller - ** than the length we encoded. Additionally, the space required to - ** encode this length could be smaller. For small doclists, this is - ** not a big deal, we can just use memmove() to adjust things. - */ - nActualData = pWriter->data.nData-(iDoclistData+n); - nActual = putVarint(c, nActualData); - assert( nActualData<=nData ); - assert( nActual<=n ); - - /* If the new doclist is big enough for force a standalone leaf - ** node, we can immediately flush it inline without doing the - ** memmove(). - */ - /* TODO(shess) This test matches leafWriterStep(), which does this - ** test before it knows the cost to varint-encode the term and - ** doclist lengths. At some point, change to - ** pWriter->data.nData-iTermData>STANDALONE_MIN. - */ - if( nTerm+nActualData>STANDALONE_MIN ){ - /* Push leaf node from before this term. */ - if( iTermData>0 ){ - rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); - if( rc!=SQLITE_OK ) return rc; - - pWriter->nTermDistinct = nTermDistinct; - } - - /* Fix the encoded doclist length. */ - iDoclistData += n - nActual; - memcpy(pWriter->data.pData+iDoclistData, c, nActual); - - /* Push the standalone leaf node. */ - rc = leafWriterInlineFlush(v, pWriter, pTerm, nTerm, iDoclistData); - if( rc!=SQLITE_OK ) return rc; - - /* Leave the node empty. */ - dataBufferReset(&pWriter->data); - - return rc; - } - - /* At this point, we know that the doclist was small, so do the - ** memmove if indicated. - */ - if( nActualdata.pData+iDoclistData+nActual, - pWriter->data.pData+iDoclistData+n, - pWriter->data.nData-(iDoclistData+n)); - pWriter->data.nData -= n-nActual; - } - - /* Replace written length with actual length. */ - memcpy(pWriter->data.pData+iDoclistData, c, nActual); - - /* If the node is too large, break things up. */ - /* TODO(shess) This test matches leafWriterStep(), which does this - ** test before it knows the cost to varint-encode the term and - ** doclist lengths. At some point, change to - ** pWriter->data.nData>LEAF_MAX. - */ - if( iTermData+nTerm+nActualData>LEAF_MAX ){ - /* Flush out the leading data as a node */ - rc = leafWriterInternalFlush(v, pWriter, 0, iTermData); - if( rc!=SQLITE_OK ) return rc; - - pWriter->nTermDistinct = nTermDistinct; - - /* Rebuild header using the current term */ - n = putVarint(pWriter->data.pData, 0); - n += putVarint(pWriter->data.pData+n, nTerm); - memcpy(pWriter->data.pData+n, pTerm, nTerm); - n += nTerm; - - /* There should always be room, because the previous encoding - ** included all data necessary to construct the term. - */ - assert( ndata.nData-iDoclistDatadata.pData+n, - pWriter->data.pData+iDoclistData, - pWriter->data.nData-iDoclistData); - pWriter->data.nData -= iDoclistData-n; - } - ASSERT_VALID_LEAF_NODE(pWriter->data.pData, pWriter->data.nData); - - return SQLITE_OK; -} - -/* Push pTerm[nTerm] along with the doclist data to the leaf layer of -** %_segments. -*/ -/* TODO(shess) Revise writeZeroSegment() so that doclists are -** constructed directly in pWriter->data. -*/ -static int leafWriterStep(fulltext_vtab *v, LeafWriter *pWriter, - const char *pTerm, int nTerm, - const char *pData, int nData){ - int rc; - DLReader reader; - - dlrInit(&reader, DL_DEFAULT, pData, nData); - rc = leafWriterStepMerge(v, pWriter, pTerm, nTerm, &reader, 1); - dlrDestroy(&reader); - - return rc; -} - - -/****************************************************************/ -/* LeafReader is used to iterate over an individual leaf node. */ -typedef struct LeafReader { - DataBuffer term; /* copy of current term. */ - - const char *pData; /* data for current term. */ - int nData; -} LeafReader; - -static void leafReaderDestroy(LeafReader *pReader){ - dataBufferDestroy(&pReader->term); - SCRAMBLE(pReader); -} - -static int leafReaderAtEnd(LeafReader *pReader){ - return pReader->nData<=0; -} - -/* Access the current term. */ -static int leafReaderTermBytes(LeafReader *pReader){ - return pReader->term.nData; -} -static const char *leafReaderTerm(LeafReader *pReader){ - assert( pReader->term.nData>0 ); - return pReader->term.pData; -} - -/* Access the doclist data for the current term. */ -static int leafReaderDataBytes(LeafReader *pReader){ - int nData; - assert( pReader->term.nData>0 ); - getVarint32(pReader->pData, &nData); - return nData; -} -static const char *leafReaderData(LeafReader *pReader){ - int n, nData; - assert( pReader->term.nData>0 ); - n = getVarint32(pReader->pData, &nData); - return pReader->pData+n; -} - -static void leafReaderInit(const char *pData, int nData, - LeafReader *pReader){ - int nTerm, n; - - assert( nData>0 ); - assert( pData[0]=='\0' ); - - CLEAR(pReader); - - /* Read the first term, skipping the header byte. */ - n = getVarint32(pData+1, &nTerm); - dataBufferInit(&pReader->term, nTerm); - dataBufferReplace(&pReader->term, pData+1+n, nTerm); - - /* Position after the first term. */ - assert( 1+n+nTermpData = pData+1+n+nTerm; - pReader->nData = nData-1-n-nTerm; -} - -/* Step the reader forward to the next term. */ -static void leafReaderStep(LeafReader *pReader){ - int n, nData, nPrefix, nSuffix; - assert( !leafReaderAtEnd(pReader) ); - - /* Skip previous entry's data block. */ - n = getVarint32(pReader->pData, &nData); - assert( n+nData<=pReader->nData ); - pReader->pData += n+nData; - pReader->nData -= n+nData; - - if( !leafReaderAtEnd(pReader) ){ - /* Construct the new term using a prefix from the old term plus a - ** suffix from the leaf data. - */ - n = getVarint32(pReader->pData, &nPrefix); - n += getVarint32(pReader->pData+n, &nSuffix); - assert( n+nSuffixnData ); - pReader->term.nData = nPrefix; - dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); - - pReader->pData += n+nSuffix; - pReader->nData -= n+nSuffix; - } -} - -/* strcmp-style comparison of pReader's current term against pTerm. -** If isPrefix, equality means equal through nTerm bytes. -*/ -static int leafReaderTermCmp(LeafReader *pReader, - const char *pTerm, int nTerm, int isPrefix){ - int c, n = pReader->term.nDataterm.nData : nTerm; - if( n==0 ){ - if( pReader->term.nData>0 ) return -1; - if(nTerm>0 ) return 1; - return 0; - } - - c = memcmp(pReader->term.pData, pTerm, n); - if( c!=0 ) return c; - if( isPrefix && n==nTerm ) return 0; - return pReader->term.nData - nTerm; -} - - -/****************************************************************/ -/* LeavesReader wraps LeafReader to allow iterating over the entire -** leaf layer of the tree. -*/ -typedef struct LeavesReader { - int idx; /* Index within the segment. */ - - sqlite3_stmt *pStmt; /* Statement we're streaming leaves from. */ - int eof; /* we've seen SQLITE_DONE from pStmt. */ - - LeafReader leafReader; /* reader for the current leaf. */ - DataBuffer rootData; /* root data for inline. */ -} LeavesReader; - -/* Access the current term. */ -static int leavesReaderTermBytes(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderTermBytes(&pReader->leafReader); -} -static const char *leavesReaderTerm(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderTerm(&pReader->leafReader); -} - -/* Access the doclist data for the current term. */ -static int leavesReaderDataBytes(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderDataBytes(&pReader->leafReader); -} -static const char *leavesReaderData(LeavesReader *pReader){ - assert( !pReader->eof ); - return leafReaderData(&pReader->leafReader); -} - -static int leavesReaderAtEnd(LeavesReader *pReader){ - return pReader->eof; -} - -/* loadSegmentLeaves() may not read all the way to SQLITE_DONE, thus -** leaving the statement handle open, which locks the table. -*/ -/* TODO(shess) This "solution" is not satisfactory. Really, there -** should be check-in function for all statement handles which -** arranges to call sqlite3_reset(). This most likely will require -** modification to control flow all over the place, though, so for now -** just punt. -** -** Note the current system assumes that segment merges will run to -** completion, which is why this particular probably hasn't arisen in -** this case. Probably a brittle assumption. -*/ -static int leavesReaderReset(LeavesReader *pReader){ - return sqlite3_reset(pReader->pStmt); -} - -static void leavesReaderDestroy(LeavesReader *pReader){ - /* If idx is -1, that means we're using a non-cached statement - ** handle in the optimize() case, so we need to release it. - */ - if( pReader->pStmt!=NULL && pReader->idx==-1 ){ - sqlite3_finalize(pReader->pStmt); - } - leafReaderDestroy(&pReader->leafReader); - dataBufferDestroy(&pReader->rootData); - SCRAMBLE(pReader); -} - -/* Initialize pReader with the given root data (if iStartBlockid==0 -** the leaf data was entirely contained in the root), or from the -** stream of blocks between iStartBlockid and iEndBlockid, inclusive. -*/ -static int leavesReaderInit(fulltext_vtab *v, - int idx, - sqlite_int64 iStartBlockid, - sqlite_int64 iEndBlockid, - const char *pRootData, int nRootData, - LeavesReader *pReader){ - CLEAR(pReader); - pReader->idx = idx; - - dataBufferInit(&pReader->rootData, 0); - if( iStartBlockid==0 ){ - /* Entire leaf level fit in root data. */ - dataBufferReplace(&pReader->rootData, pRootData, nRootData); - leafReaderInit(pReader->rootData.pData, pReader->rootData.nData, - &pReader->leafReader); - }else{ - sqlite3_stmt *s; - int rc = sql_get_leaf_statement(v, idx, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iStartBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 2, iEndBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ){ - pReader->eof = 1; - return SQLITE_OK; - } - if( rc!=SQLITE_ROW ) return rc; - - pReader->pStmt = s; - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), - sqlite3_column_bytes(pReader->pStmt, 0), - &pReader->leafReader); - } - return SQLITE_OK; -} - -/* Step the current leaf forward to the next term. If we reach the -** end of the current leaf, step forward to the next leaf block. -*/ -static int leavesReaderStep(fulltext_vtab *v, LeavesReader *pReader){ - assert( !leavesReaderAtEnd(pReader) ); - leafReaderStep(&pReader->leafReader); - - if( leafReaderAtEnd(&pReader->leafReader) ){ - int rc; - if( pReader->rootData.pData ){ - pReader->eof = 1; - return SQLITE_OK; - } - rc = sqlite3_step(pReader->pStmt); - if( rc!=SQLITE_ROW ){ - pReader->eof = 1; - return rc==SQLITE_DONE ? SQLITE_OK : rc; - } - leafReaderDestroy(&pReader->leafReader); - leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0), - sqlite3_column_bytes(pReader->pStmt, 0), - &pReader->leafReader); - } - return SQLITE_OK; -} - -/* Order LeavesReaders by their term, ignoring idx. Readers at eof -** always sort to the end. -*/ -static int leavesReaderTermCmp(LeavesReader *lr1, LeavesReader *lr2){ - if( leavesReaderAtEnd(lr1) ){ - if( leavesReaderAtEnd(lr2) ) return 0; - return 1; - } - if( leavesReaderAtEnd(lr2) ) return -1; - - return leafReaderTermCmp(&lr1->leafReader, - leavesReaderTerm(lr2), leavesReaderTermBytes(lr2), - 0); -} - -/* Similar to leavesReaderTermCmp(), with additional ordering by idx -** so that older segments sort before newer segments. -*/ -static int leavesReaderCmp(LeavesReader *lr1, LeavesReader *lr2){ - int c = leavesReaderTermCmp(lr1, lr2); - if( c!=0 ) return c; - return lr1->idx-lr2->idx; -} - -/* Assume that pLr[1]..pLr[nLr] are sorted. Bubble pLr[0] into its -** sorted position. -*/ -static void leavesReaderReorder(LeavesReader *pLr, int nLr){ - while( nLr>1 && leavesReaderCmp(pLr, pLr+1)>0 ){ - LeavesReader tmp = pLr[0]; - pLr[0] = pLr[1]; - pLr[1] = tmp; - nLr--; - pLr++; - } -} - -/* Initializes pReaders with the segments from level iLevel, returning -** the number of segments in *piReaders. Leaves pReaders in sorted -** order. -*/ -static int leavesReadersInit(fulltext_vtab *v, int iLevel, - LeavesReader *pReaders, int *piReaders){ - sqlite3_stmt *s; - int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int(s, 1, iLevel); - if( rc!=SQLITE_OK ) return rc; - - i = 0; - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - sqlite_int64 iStart = sqlite3_column_int64(s, 0); - sqlite_int64 iEnd = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - int nRootData = sqlite3_column_bytes(s, 2); - - assert( i0 ){ - leavesReaderDestroy(&pReaders[i]); - } - return rc; - } - - *piReaders = i; - - /* Leave our results sorted by term, then age. */ - while( i-- ){ - leavesReaderReorder(pReaders+i, *piReaders-i); - } - return SQLITE_OK; -} - -/* Merge doclists from pReaders[nReaders] into a single doclist, which -** is written to pWriter. Assumes pReaders is ordered oldest to -** newest. -*/ -/* TODO(shess) Consider putting this inline in segmentMerge(). */ -static int leavesReadersMerge(fulltext_vtab *v, - LeavesReader *pReaders, int nReaders, - LeafWriter *pWriter){ - DLReader dlReaders[MERGE_COUNT]; - const char *pTerm = leavesReaderTerm(pReaders); - int i, nTerm = leavesReaderTermBytes(pReaders); - - assert( nReaders<=MERGE_COUNT ); - - for(i=0; i0 ){ - rc = leavesReaderStep(v, lrs+i); - if( rc!=SQLITE_OK ) goto err; - - /* Reorder by term, then by age. */ - leavesReaderReorder(lrs+i, MERGE_COUNT-i); - } - } - - for(i=0; i0 ); - - for(rc=SQLITE_OK; rc==SQLITE_OK && !leavesReaderAtEnd(pReader); - rc=leavesReaderStep(v, pReader)){ - /* TODO(shess) Really want leavesReaderTermCmp(), but that name is - ** already taken to compare the terms of two LeavesReaders. Think - ** on a better name. [Meanwhile, break encapsulation rather than - ** use a confusing name.] - */ - int c = leafReaderTermCmp(&pReader->leafReader, pTerm, nTerm, isPrefix); - if( c>0 ) break; /* Past any possible matches. */ - if( c==0 ){ - const char *pData = leavesReaderData(pReader); - int iBuffer, nData = leavesReaderDataBytes(pReader); - - /* Find the first empty buffer. */ - for(iBuffer=0; iBuffer0 ){ - assert(pBuffers!=NULL); - memcpy(p, pBuffers, nBuffers*sizeof(*pBuffers)); - sqlite3_free(pBuffers); - } - pBuffers = p; - } - dataBufferInit(&(pBuffers[nBuffers]), 0); - nBuffers++; - } - - /* At this point, must have an empty at iBuffer. */ - assert(iBufferpData, p->nData); - - /* dataBufferReset() could allow a large doclist to blow up - ** our memory requirements. - */ - if( p->nCapacity<1024 ){ - dataBufferReset(p); - }else{ - dataBufferDestroy(p); - dataBufferInit(p, 0); - } - } - } - } - } - - /* Union all the doclists together into *out. */ - /* TODO(shess) What if *out is big? Sigh. */ - if( rc==SQLITE_OK && nBuffers>0 ){ - int iBuffer; - for(iBuffer=0; iBuffer0 ){ - if( out->nData==0 ){ - dataBufferSwap(out, &(pBuffers[iBuffer])); - }else{ - docListAccumulateUnion(out, pBuffers[iBuffer].pData, - pBuffers[iBuffer].nData); - } - } - } - } - - while( nBuffers-- ){ - dataBufferDestroy(&(pBuffers[nBuffers])); - } - if( pBuffers!=NULL ) sqlite3_free(pBuffers); - - return rc; -} - -/* Call loadSegmentLeavesInt() with pData/nData as input. */ -static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - LeavesReader reader; - int rc; - - assert( nData>1 ); - assert( *pData=='\0' ); - rc = leavesReaderInit(v, 0, 0, 0, pData, nData, &reader); - if( rc!=SQLITE_OK ) return rc; - - rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); - leavesReaderReset(&reader); - leavesReaderDestroy(&reader); - return rc; -} - -/* Call loadSegmentLeavesInt() with the leaf nodes from iStartLeaf to -** iEndLeaf (inclusive) as input, and merge the resulting doclist into -** out. -*/ -static int loadSegmentLeaves(fulltext_vtab *v, - sqlite_int64 iStartLeaf, sqlite_int64 iEndLeaf, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - int rc; - LeavesReader reader; - - assert( iStartLeaf<=iEndLeaf ); - rc = leavesReaderInit(v, 0, iStartLeaf, iEndLeaf, NULL, 0, &reader); - if( rc!=SQLITE_OK ) return rc; - - rc = loadSegmentLeavesInt(v, &reader, pTerm, nTerm, isPrefix, out); - leavesReaderReset(&reader); - leavesReaderDestroy(&reader); - return rc; -} - -/* Taking pData/nData as an interior node, find the sequence of child -** nodes which could include pTerm/nTerm/isPrefix. Note that the -** interior node terms logically come between the blocks, so there is -** one more blockid than there are terms (that block contains terms >= -** the last interior-node term). -*/ -/* TODO(shess) The calling code may already know that the end child is -** not worth calculating, because the end may be in a later sibling -** node. Consider whether breaking symmetry is worthwhile. I suspect -** it is not worthwhile. -*/ -static void getChildrenContaining(const char *pData, int nData, - const char *pTerm, int nTerm, int isPrefix, - sqlite_int64 *piStartChild, - sqlite_int64 *piEndChild){ - InteriorReader reader; - - assert( nData>1 ); - assert( *pData!='\0' ); - interiorReaderInit(pData, nData, &reader); - - /* Scan for the first child which could contain pTerm/nTerm. */ - while( !interiorReaderAtEnd(&reader) ){ - if( interiorReaderTermCmp(&reader, pTerm, nTerm, 0)>0 ) break; - interiorReaderStep(&reader); - } - *piStartChild = interiorReaderCurrentBlockid(&reader); - - /* Keep scanning to find a term greater than our term, using prefix - ** comparison if indicated. If isPrefix is false, this will be the - ** same blockid as the starting block. - */ - while( !interiorReaderAtEnd(&reader) ){ - if( interiorReaderTermCmp(&reader, pTerm, nTerm, isPrefix)>0 ) break; - interiorReaderStep(&reader); - } - *piEndChild = interiorReaderCurrentBlockid(&reader); - - interiorReaderDestroy(&reader); - - /* Children must ascend, and if !prefix, both must be the same. */ - assert( *piEndChild>=*piStartChild ); - assert( isPrefix || *piStartChild==*piEndChild ); -} - -/* Read block at iBlockid and pass it with other params to -** getChildrenContaining(). -*/ -static int loadAndGetChildrenContaining( - fulltext_vtab *v, - sqlite_int64 iBlockid, - const char *pTerm, int nTerm, int isPrefix, - sqlite_int64 *piStartChild, sqlite_int64 *piEndChild -){ - sqlite3_stmt *s = NULL; - int rc; - - assert( iBlockid!=0 ); - assert( pTerm!=NULL ); - assert( nTerm!=0 ); /* TODO(shess) Why not allow this? */ - assert( piStartChild!=NULL ); - assert( piEndChild!=NULL ); - - rc = sql_get_statement(v, BLOCK_SELECT_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_bind_int64(s, 1, iBlockid); - if( rc!=SQLITE_OK ) return rc; - - rc = sqlite3_step(s); - if( rc==SQLITE_DONE ) return SQLITE_ERROR; - if( rc!=SQLITE_ROW ) return rc; - - getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0), - pTerm, nTerm, isPrefix, piStartChild, piEndChild); - - /* We expect only one row. We must execute another sqlite3_step() - * to complete the iteration; otherwise the table will remain - * locked. */ - rc = sqlite3_step(s); - if( rc==SQLITE_ROW ) return SQLITE_ERROR; - if( rc!=SQLITE_DONE ) return rc; - - return SQLITE_OK; -} - -/* Traverse the tree represented by pData[nData] looking for -** pTerm[nTerm], placing its doclist into *out. This is internal to -** loadSegment() to make error-handling cleaner. -*/ -static int loadSegmentInt(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 iLeavesEnd, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - /* Special case where root is a leaf. */ - if( *pData=='\0' ){ - return loadSegmentLeaf(v, pData, nData, pTerm, nTerm, isPrefix, out); - }else{ - int rc; - sqlite_int64 iStartChild, iEndChild; - - /* Process pData as an interior node, then loop down the tree - ** until we find the set of leaf nodes to scan for the term. - */ - getChildrenContaining(pData, nData, pTerm, nTerm, isPrefix, - &iStartChild, &iEndChild); - while( iStartChild>iLeavesEnd ){ - sqlite_int64 iNextStart, iNextEnd; - rc = loadAndGetChildrenContaining(v, iStartChild, pTerm, nTerm, isPrefix, - &iNextStart, &iNextEnd); - if( rc!=SQLITE_OK ) return rc; - - /* If we've branched, follow the end branch, too. */ - if( iStartChild!=iEndChild ){ - sqlite_int64 iDummy; - rc = loadAndGetChildrenContaining(v, iEndChild, pTerm, nTerm, isPrefix, - &iDummy, &iNextEnd); - if( rc!=SQLITE_OK ) return rc; - } - - assert( iNextStart<=iNextEnd ); - iStartChild = iNextStart; - iEndChild = iNextEnd; - } - assert( iStartChild<=iLeavesEnd ); - assert( iEndChild<=iLeavesEnd ); - - /* Scan through the leaf segments for doclists. */ - return loadSegmentLeaves(v, iStartChild, iEndChild, - pTerm, nTerm, isPrefix, out); - } -} - -/* Call loadSegmentInt() to collect the doclist for pTerm/nTerm, then -** merge its doclist over *out (any duplicate doclists read from the -** segment rooted at pData will overwrite those in *out). -*/ -/* TODO(shess) Consider changing this to determine the depth of the -** leaves using either the first characters of interior nodes (when -** ==1, we're one level above the leaves), or the first character of -** the root (which will describe the height of the tree directly). -** Either feels somewhat tricky to me. -*/ -/* TODO(shess) The current merge is likely to be slow for large -** doclists (though it should process from newest/smallest to -** oldest/largest, so it may not be that bad). It might be useful to -** modify things to allow for N-way merging. This could either be -** within a segment, with pairwise merges across segments, or across -** all segments at once. -*/ -static int loadSegment(fulltext_vtab *v, const char *pData, int nData, - sqlite_int64 iLeavesEnd, - const char *pTerm, int nTerm, int isPrefix, - DataBuffer *out){ - DataBuffer result; - int rc; - - assert( nData>1 ); - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&result, 0); - rc = loadSegmentInt(v, pData, nData, iLeavesEnd, - pTerm, nTerm, isPrefix, &result); - if( rc==SQLITE_OK && result.nData>0 ){ - if( out->nData==0 ){ - DataBuffer tmp = *out; - *out = result; - result = tmp; - }else{ - DataBuffer merged; - DLReader readers[2]; - - dlrInit(&readers[0], DL_DEFAULT, out->pData, out->nData); - dlrInit(&readers[1], DL_DEFAULT, result.pData, result.nData); - dataBufferInit(&merged, out->nData+result.nData); - docListMerge(&merged, readers, 2); - dataBufferDestroy(out); - *out = merged; - dlrDestroy(&readers[0]); - dlrDestroy(&readers[1]); - } - } - dataBufferDestroy(&result); - return rc; -} - -/* Scan the database and merge together the posting lists for the term -** into *out. -*/ -static int termSelect(fulltext_vtab *v, int iColumn, - const char *pTerm, int nTerm, int isPrefix, - DocListType iType, DataBuffer *out){ - DataBuffer doclist; - sqlite3_stmt *s; - int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); - if( rc!=SQLITE_OK ) return rc; - - /* This code should never be called with buffered updates. */ - assert( v->nPendingData<0 ); - - dataBufferInit(&doclist, 0); - - /* Traverse the segments from oldest to newest so that newer doclist - ** elements for given docids overwrite older elements. - */ - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - const char *pData = sqlite3_column_blob(s, 2); - const int nData = sqlite3_column_bytes(s, 2); - const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); - rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, - &doclist); - if( rc!=SQLITE_OK ) goto err; - } - if( rc==SQLITE_DONE ){ - if( doclist.nData!=0 ){ - /* TODO(shess) The old term_select_all() code applied the column - ** restrict as we merged segments, leading to smaller buffers. - ** This is probably worthwhile to bring back, once the new storage - ** system is checked in. - */ - if( iColumn==v->nColumn) iColumn = -1; - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, - iColumn, iType, out); - } - rc = SQLITE_OK; - } - - err: - dataBufferDestroy(&doclist); - return rc; -} - -/****************************************************************/ -/* Used to hold hashtable data for sorting. */ -typedef struct TermData { - const char *pTerm; - int nTerm; - DLCollector *pCollector; -} TermData; - -/* Orders TermData elements in strcmp fashion ( <0 for less-than, 0 -** for equal, >0 for greater-than). -*/ -static int termDataCmp(const void *av, const void *bv){ - const TermData *a = (const TermData *)av; - const TermData *b = (const TermData *)bv; - int n = a->nTermnTerm ? a->nTerm : b->nTerm; - int c = memcmp(a->pTerm, b->pTerm, n); - if( c!=0 ) return c; - return a->nTerm-b->nTerm; -} - -/* Order pTerms data by term, then write a new level 0 segment using -** LeafWriter. -*/ -static int writeZeroSegment(fulltext_vtab *v, fts2Hash *pTerms){ - fts2HashElem *e; - int idx, rc, i, n; - TermData *pData; - LeafWriter writer; - DataBuffer dl; - - /* Determine the next index at level 0, merging as necessary. */ - rc = segdirNextIndex(v, 0, &idx); - if( rc!=SQLITE_OK ) return rc; - - n = fts2HashCount(pTerms); - pData = sqlite3_malloc(n*sizeof(TermData)); - - for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){ - assert( i1 ) qsort(pData, n, sizeof(*pData), termDataCmp); - - /* TODO(shess) Refactor so that we can write directly to the segment - ** DataBuffer, as happens for segment merges. - */ - leafWriterInit(0, idx, &writer); - dataBufferInit(&dl, 0); - for(i=0; inPendingData>=0 ){ - fts2HashElem *e; - for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){ - dlcDelete(fts2HashData(e)); - } - fts2HashClear(&v->pendingTerms); - v->nPendingData = -1; - } - return SQLITE_OK; -} - -/* If pendingTerms has data, flush it to a level-zero segment, and -** free it. -*/ -static int flushPendingTerms(fulltext_vtab *v){ - if( v->nPendingData>=0 ){ - int rc = writeZeroSegment(v, &v->pendingTerms); - if( rc==SQLITE_OK ) clearPendingTerms(v); - return rc; - } - return SQLITE_OK; -} - -/* If pendingTerms is "too big", or docid is out of order, flush it. -** Regardless, be certain that pendingTerms is initialized for use. -*/ -static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){ - /* TODO(shess) Explore whether partially flushing the buffer on - ** forced-flush would provide better performance. I suspect that if - ** we ordered the doclists by size and flushed the largest until the - ** buffer was half empty, that would let the less frequent terms - ** generate longer doclists. - */ - if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){ - int rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) return rc; - } - if( v->nPendingData<0 ){ - fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1); - v->nPendingData = 0; - } - v->iPrevDocid = iDocid; - return SQLITE_OK; -} - -/* This function implements the xUpdate callback; it is the top-level entry - * point for inserting, deleting or updating a row in a full-text table. */ -static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, - sqlite_int64 *pRowid){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - int rc; - - TRACE(("FTS2 Update %p\n", pVtab)); - - if( nArg<2 ){ - rc = index_delete(v, sqlite3_value_int64(ppArg[0])); - if( rc==SQLITE_OK ){ - /* If we just deleted the last row in the table, clear out the - ** index data. - */ - rc = content_exists(v); - if( rc==SQLITE_ROW ){ - rc = SQLITE_OK; - }else if( rc==SQLITE_DONE ){ - /* Clear the pending terms so we don't flush a useless level-0 - ** segment when the transaction closes. - */ - rc = clearPendingTerms(v); - if( rc==SQLITE_OK ){ - rc = segdir_delete_all(v); - } - } - } - } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ - /* An update: - * ppArg[0] = old rowid - * ppArg[1] = new rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); - if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || - sqlite3_value_int64(ppArg[1]) != rowid ){ - rc = SQLITE_ERROR; /* we don't allow changing the rowid */ - } else { - assert( nArg==2+v->nColumn+1); - rc = index_update(v, rowid, &ppArg[2]); - } - } else { - /* An insert: - * ppArg[1] = requested rowid - * ppArg[2..2+v->nColumn-1] = values - * ppArg[2+v->nColumn] = value for magic column (we ignore this) - */ - assert( nArg==2+v->nColumn+1); - rc = index_insert(v, ppArg[1], &ppArg[2], pRowid); - } - - return rc; -} - -static int fulltextSync(sqlite3_vtab *pVtab){ - TRACE(("FTS2 xSync()\n")); - return flushPendingTerms((fulltext_vtab *)pVtab); -} - -static int fulltextBegin(sqlite3_vtab *pVtab){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - TRACE(("FTS2 xBegin()\n")); - - /* Any buffered updates should have been cleared by the previous - ** transaction. - */ - assert( v->nPendingData<0 ); - return clearPendingTerms(v); -} - -static int fulltextCommit(sqlite3_vtab *pVtab){ - fulltext_vtab *v = (fulltext_vtab *) pVtab; - TRACE(("FTS2 xCommit()\n")); - - /* Buffered updates should have been cleared by fulltextSync(). */ - assert( v->nPendingData<0 ); - return clearPendingTerms(v); -} - -static int fulltextRollback(sqlite3_vtab *pVtab){ - TRACE(("FTS2 xRollback()\n")); - return clearPendingTerms((fulltext_vtab *)pVtab); -} - -/* -** Implementation of the snippet() function for FTS2 -*/ -static void snippetFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); - }else{ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - if( argc>=2 ){ - zStart = (const char*)sqlite3_value_text(argv[1]); - if( argc>=3 ){ - zEnd = (const char*)sqlite3_value_text(argv[2]); - if( argc>=4 ){ - zEllipsis = (const char*)sqlite3_value_text(argv[3]); - } - } - } - snippetAllOffsets(pCursor); - snippetText(pCursor, zStart, zEnd, zEllipsis); - sqlite3_result_text(pContext, pCursor->snippet.zSnippet, - pCursor->snippet.nSnippet, SQLITE_STATIC); - } -} - -/* -** Implementation of the offsets() function for FTS2 -*/ -static void snippetOffsetsFunc( - sqlite3_context *pContext, - int argc, - sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc<1 ) return; - if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to offsets",-1); - }else{ - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - snippetAllOffsets(pCursor); - snippetOffsetText(&pCursor->snippet); - sqlite3_result_text(pContext, - pCursor->snippet.zOffset, pCursor->snippet.nOffset, - SQLITE_STATIC); - } -} - -/* OptLeavesReader is nearly identical to LeavesReader, except that -** where LeavesReader is geared towards the merging of complete -** segment levels (with exactly MERGE_COUNT segments), OptLeavesReader -** is geared towards implementation of the optimize() function, and -** can merge all segments simultaneously. This version may be -** somewhat less efficient than LeavesReader because it merges into an -** accumulator rather than doing an N-way merge, but since segment -** size grows exponentially (so segment count logrithmically) this is -** probably not an immediate problem. -*/ -/* TODO(shess): Prove that assertion, or extend the merge code to -** merge tree fashion (like the prefix-searching code does). -*/ -/* TODO(shess): OptLeavesReader and LeavesReader could probably be -** merged with little or no loss of performance for LeavesReader. The -** merged code would need to handle >MERGE_COUNT segments, and would -** also need to be able to optionally optimize away deletes. -*/ -typedef struct OptLeavesReader { - /* Segment number, to order readers by age. */ - int segment; - LeavesReader reader; -} OptLeavesReader; - -static int optLeavesReaderAtEnd(OptLeavesReader *pReader){ - return leavesReaderAtEnd(&pReader->reader); -} -static int optLeavesReaderTermBytes(OptLeavesReader *pReader){ - return leavesReaderTermBytes(&pReader->reader); -} -static const char *optLeavesReaderData(OptLeavesReader *pReader){ - return leavesReaderData(&pReader->reader); -} -static int optLeavesReaderDataBytes(OptLeavesReader *pReader){ - return leavesReaderDataBytes(&pReader->reader); -} -static const char *optLeavesReaderTerm(OptLeavesReader *pReader){ - return leavesReaderTerm(&pReader->reader); -} -static int optLeavesReaderStep(fulltext_vtab *v, OptLeavesReader *pReader){ - return leavesReaderStep(v, &pReader->reader); -} -static int optLeavesReaderTermCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ - return leavesReaderTermCmp(&lr1->reader, &lr2->reader); -} -/* Order by term ascending, segment ascending (oldest to newest), with -** exhausted readers to the end. -*/ -static int optLeavesReaderCmp(OptLeavesReader *lr1, OptLeavesReader *lr2){ - int c = optLeavesReaderTermCmp(lr1, lr2); - if( c!=0 ) return c; - return lr1->segment-lr2->segment; -} -/* Bubble pLr[0] to appropriate place in pLr[1..nLr-1]. Assumes that -** pLr[1..nLr-1] is already sorted. -*/ -static void optLeavesReaderReorder(OptLeavesReader *pLr, int nLr){ - while( nLr>1 && optLeavesReaderCmp(pLr, pLr+1)>0 ){ - OptLeavesReader tmp = pLr[0]; - pLr[0] = pLr[1]; - pLr[1] = tmp; - nLr--; - pLr++; - } -} - -/* optimize() helper function. Put the readers in order and iterate -** through them, merging doclists for matching terms into pWriter. -** Returns SQLITE_OK on success, or the SQLite error code which -** prevented success. -*/ -static int optimizeInternal(fulltext_vtab *v, - OptLeavesReader *readers, int nReaders, - LeafWriter *pWriter){ - int i, rc = SQLITE_OK; - DataBuffer doclist, merged, tmp; - - /* Order the readers. */ - i = nReaders; - while( i-- > 0 ){ - optLeavesReaderReorder(&readers[i], nReaders-i); - } - - dataBufferInit(&doclist, LEAF_MAX); - dataBufferInit(&merged, LEAF_MAX); - - /* Exhausted readers bubble to the end, so when the first reader is - ** at eof, all are at eof. - */ - while( !optLeavesReaderAtEnd(&readers[0]) ){ - - /* Figure out how many readers share the next term. */ - for(i=1; i 0 ){ - dlrDestroy(&dlReaders[nReaders]); - } - - /* Accumulated doclist to reader 0 for next pass. */ - dlrInit(&dlReaders[0], DL_DEFAULT, doclist.pData, doclist.nData); - } - - /* Destroy reader that was left in the pipeline. */ - dlrDestroy(&dlReaders[0]); - - /* Trim deletions from the doclist. */ - dataBufferReset(&merged); - docListTrim(DL_DEFAULT, doclist.pData, doclist.nData, - -1, DL_DEFAULT, &merged); - } - - /* Only pass doclists with hits (skip if all hits deleted). */ - if( merged.nData>0 ){ - rc = leafWriterStep(v, pWriter, - optLeavesReaderTerm(&readers[0]), - optLeavesReaderTermBytes(&readers[0]), - merged.pData, merged.nData); - if( rc!=SQLITE_OK ) goto err; - } - - /* Step merged readers to next term and reorder. */ - while( i-- > 0 ){ - rc = optLeavesReaderStep(v, &readers[i]); - if( rc!=SQLITE_OK ) goto err; - - optLeavesReaderReorder(&readers[i], nReaders-i); - } - } - - err: - dataBufferDestroy(&doclist); - dataBufferDestroy(&merged); - return rc; -} - -/* Implement optimize() function for FTS3. optimize(t) merges all -** segments in the fts index into a single segment. 't' is the magic -** table-named column. -*/ -static void optimizeFunc(sqlite3_context *pContext, - int argc, sqlite3_value **argv){ - fulltext_cursor *pCursor; - if( argc>1 ){ - sqlite3_result_error(pContext, "excess arguments to optimize()",-1); - }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - sqlite3_result_error(pContext, "illegal first argument to optimize",-1); - }else{ - fulltext_vtab *v; - int i, rc, iMaxLevel; - OptLeavesReader *readers; - int nReaders; - LeafWriter writer; - sqlite3_stmt *s; - - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - v = cursor_vtab(pCursor); - - /* Flush any buffered updates before optimizing. */ - rc = flushPendingTerms(v); - if( rc!=SQLITE_OK ) goto err; - - rc = segdir_count(v, &nReaders, &iMaxLevel); - if( rc!=SQLITE_OK ) goto err; - if( nReaders==0 || nReaders==1 ){ - sqlite3_result_text(pContext, "Index already optimal", -1, - SQLITE_STATIC); - return; - } - - rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s); - if( rc!=SQLITE_OK ) goto err; - - readers = sqlite3_malloc(nReaders*sizeof(readers[0])); - if( readers==NULL ) goto err; - - /* Note that there will already be a segment at this position - ** until we call segdir_delete() on iMaxLevel. - */ - leafWriterInit(iMaxLevel, 0, &writer); - - i = 0; - while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - sqlite_int64 iStart = sqlite3_column_int64(s, 0); - sqlite_int64 iEnd = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - int nRootData = sqlite3_column_bytes(s, 2); - - assert( i 0 ){ - leavesReaderDestroy(&readers[i].reader); - } - sqlite3_free(readers); - - /* If we've successfully gotten to here, delete the old segments - ** and flush the interior structure of the new segment. - */ - if( rc==SQLITE_OK ){ - for( i=0; i<=iMaxLevel; i++ ){ - rc = segdir_delete(v, i); - if( rc!=SQLITE_OK ) break; - } - - if( rc==SQLITE_OK ) rc = leafWriterFinalize(v, &writer); - } - - leafWriterDestroy(&writer); - - if( rc!=SQLITE_OK ) goto err; - - sqlite3_result_text(pContext, "Index optimized", -1, SQLITE_STATIC); - return; - - /* TODO(shess): Error-handling needs to be improved along the - ** lines of the dump_ functions. - */ - err: - { - char buf[512]; - sqlite3_snprintf(sizeof(buf), buf, "Error in optimize: %s", - sqlite3_errmsg(sqlite3_context_db_handle(pContext))); - sqlite3_result_error(pContext, buf, -1); - } - } -} - -#ifdef SQLITE_TEST -/* Generate an error of the form ": ". If msg is NULL, -** pull the error from the context's db handle. -*/ -static void generateError(sqlite3_context *pContext, - const char *prefix, const char *msg){ - char buf[512]; - if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext)); - sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg); - sqlite3_result_error(pContext, buf, -1); -} - -/* Helper function to collect the set of terms in the segment into -** pTerms. The segment is defined by the leaf nodes between -** iStartBlockid and iEndBlockid, inclusive, or by the contents of -** pRootData if iStartBlockid is 0 (in which case the entire segment -** fit in a leaf). -*/ -static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, - fts2Hash *pTerms){ - const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); - const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); - const char *pRootData = sqlite3_column_blob(s, 2); - const int nRootData = sqlite3_column_bytes(s, 2); - LeavesReader reader; - int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, - pRootData, nRootData, &reader); - if( rc!=SQLITE_OK ) return rc; - - while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ - const char *pTerm = leavesReaderTerm(&reader); - const int nTerm = leavesReaderTermBytes(&reader); - void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); - void *newValue = (void *)((char *)oldValue+1); - - /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, - ** the data value passed is returned in case of malloc failure. - */ - if( newValue==sqlite3Fts2HashInsert(pTerms, pTerm, nTerm, newValue) ){ - rc = SQLITE_NOMEM; - }else{ - rc = leavesReaderStep(v, &reader); - } - } - - leavesReaderDestroy(&reader); - return rc; -} - -/* Helper function to build the result string for dump_terms(). */ -static int generateTermsResult(sqlite3_context *pContext, fts2Hash *pTerms){ - int iTerm, nTerms, nResultBytes, iByte; - char *result; - TermData *pData; - fts2HashElem *e; - - /* Iterate pTerms to generate an array of terms in pData for - ** sorting. - */ - nTerms = fts2HashCount(pTerms); - assert( nTerms>0 ); - pData = sqlite3_malloc(nTerms*sizeof(TermData)); - if( pData==NULL ) return SQLITE_NOMEM; - - nResultBytes = 0; - for(iTerm = 0, e = fts2HashFirst(pTerms); e; iTerm++, e = fts2HashNext(e)){ - nResultBytes += fts2HashKeysize(e)+1; /* Term plus trailing space */ - assert( iTerm0 ); /* nTerms>0, nResultsBytes must be, too. */ - result = sqlite3_malloc(nResultBytes); - if( result==NULL ){ - sqlite3_free(pData); - return SQLITE_NOMEM; - } - - if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp); - - /* Read the terms in order to build the result. */ - iByte = 0; - for(iTerm=0; iTerm0 ){ - rc = generateTermsResult(pContext, &terms); - if( rc==SQLITE_NOMEM ){ - generateError(pContext, "dump_terms", "out of memory"); - }else{ - assert( rc==SQLITE_OK ); - } - }else if( argc==3 ){ - /* The specific segment asked for could not be found. */ - generateError(pContext, "dump_terms", "segment not found"); - }else{ - /* No segments found. */ - /* TODO(shess): It should be impossible to reach this. This - ** case can only happen for an empty table, in which case - ** SQLite has no rows to call this function on. - */ - sqlite3_result_null(pContext); - } - } - sqlite3Fts2HashClear(&terms); - } -} - -/* Expand the DL_DEFAULT doclist in pData into a text result in -** pContext. -*/ -static void createDoclistResult(sqlite3_context *pContext, - const char *pData, int nData){ - DataBuffer dump; - DLReader dlReader; - - assert( pData!=NULL && nData>0 ); - - dataBufferInit(&dump, 0); - dlrInit(&dlReader, DL_DEFAULT, pData, nData); - for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ - char buf[256]; - PLReader plReader; - - plrInit(&plReader, &dlReader); - if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ - sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); - dataBufferAppend(&dump, buf, strlen(buf)); - }else{ - int iColumn = plrColumn(&plReader); - - sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", - dlrDocid(&dlReader), iColumn); - dataBufferAppend(&dump, buf, strlen(buf)); - - for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ - if( plrColumn(&plReader)!=iColumn ){ - iColumn = plrColumn(&plReader); - sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dataBufferAppend(&dump, buf, strlen(buf)); - } - if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ - sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", - plrPosition(&plReader), - plrStartOffset(&plReader), plrEndOffset(&plReader)); - }else if( DL_DEFAULT==DL_POSITIONS ){ - sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); - }else{ - assert( NULL=="Unhandled DL_DEFAULT value"); - } - dataBufferAppend(&dump, buf, strlen(buf)); - } - plrDestroy(&plReader); - - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dataBufferAppend(&dump, "]] ", 3); - } - } - dlrDestroy(&dlReader); - - assert( dump.nData>0 ); - dump.nData--; /* Overwrite trailing space. */ - assert( dump.pData[dump.nData]==' '); - dump.pData[dump.nData] = '\0'; - assert( dump.nData>0 ); - - /* Passes ownership of dump's buffer to pContext. */ - sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); - dump.pData = NULL; - dump.nData = dump.nCapacity = 0; -} - -/* Implements dump_doclist() for use in inspecting the fts2 index from -** tests. TEXT result containing a string representation of the -** doclist for the indicated term. dump_doclist(t, term, level, idx) -** dumps the doclist for term from the segment specified by level, idx -** (in %_segdir), while dump_doclist(t, term) dumps the logical -** doclist for the term across all segments. The per-segment doclist -** can contain deletions, while the full-index doclist will not -** (deletions are omitted). -** -** Result formats differ with the setting of DL_DEFAULTS. Examples: -** -** DL_DOCIDS: [1] [3] [7] -** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]] -** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]] -** -** In each case the number after the outer '[' is the docid. In the -** latter two cases, the number before the inner '[' is the column -** associated with the values within. For DL_POSITIONS the numbers -** within are the positions, for DL_POSITIONS_OFFSETS they are the -** position, the start offset, and the end offset. -*/ -static void dumpDoclistFunc( - sqlite3_context *pContext, - int argc, sqlite3_value **argv -){ - fulltext_cursor *pCursor; - if( argc!=2 && argc!=4 ){ - generateError(pContext, "dump_doclist", "incorrect arguments"); - }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || - sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ - generateError(pContext, "dump_doclist", "illegal first argument"); - }else if( sqlite3_value_text(argv[1])==NULL || - sqlite3_value_text(argv[1])[0]=='\0' ){ - generateError(pContext, "dump_doclist", "empty second argument"); - }else{ - const char *pTerm = (const char *)sqlite3_value_text(argv[1]); - const int nTerm = strlen(pTerm); - fulltext_vtab *v; - int rc; - DataBuffer doclist; - - memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); - v = cursor_vtab(pCursor); - - dataBufferInit(&doclist, 0); - - /* termSelect() yields the same logical doclist that queries are - ** run against. - */ - if( argc==2 ){ - rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist); - }else{ - sqlite3_stmt *s = NULL; - - /* Get our specific segment's information. */ - rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); - if( rc==SQLITE_OK ){ - rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2])); - if( rc==SQLITE_OK ){ - rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3])); - } - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_step(s); - - if( rc==SQLITE_DONE ){ - dataBufferDestroy(&doclist); - generateError(pContext, "dump_doclist", "segment not found"); - return; - } - - /* Found a segment, load it into doclist. */ - if( rc==SQLITE_ROW ){ - const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); - const char *pData = sqlite3_column_blob(s, 2); - const int nData = sqlite3_column_bytes(s, 2); - - /* loadSegment() is used by termSelect() to load each - ** segment's data. - */ - rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0, - &doclist); - if( rc==SQLITE_OK ){ - rc = sqlite3_step(s); - - /* Should not have more than one matching segment. */ - if( rc!=SQLITE_DONE ){ - sqlite3_reset(s); - dataBufferDestroy(&doclist); - generateError(pContext, "dump_doclist", "invalid segdir"); - return; - } - rc = SQLITE_OK; - } - } - } - - sqlite3_reset(s); - } - - if( rc==SQLITE_OK ){ - if( doclist.nData>0 ){ - createDoclistResult(pContext, doclist.pData, doclist.nData); - }else{ - /* TODO(shess): This can happen if the term is not present, or - ** if all instances of the term have been deleted and this is - ** an all-index dump. It may be interesting to distinguish - ** these cases. - */ - sqlite3_result_text(pContext, "", 0, SQLITE_STATIC); - } - }else if( rc==SQLITE_NOMEM ){ - /* Handle out-of-memory cases specially because if they are - ** generated in fts2 code they may not be reflected in the db - ** handle. - */ - /* TODO(shess): Handle this more comprehensively. - ** sqlite3ErrStr() has what I need, but is internal. - */ - generateError(pContext, "dump_doclist", "out of memory"); - }else{ - generateError(pContext, "dump_doclist", NULL); - } - - dataBufferDestroy(&doclist); - } -} -#endif - -/* -** This routine implements the xFindFunction method for the FTS2 -** virtual table. -*/ -static int fulltextFindFunction( - sqlite3_vtab *pVtab, - int nArg, - const char *zName, - void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), - void **ppArg -){ - if( strcmp(zName,"snippet")==0 ){ - *pxFunc = snippetFunc; - return 1; - }else if( strcmp(zName,"offsets")==0 ){ - *pxFunc = snippetOffsetsFunc; - return 1; - }else if( strcmp(zName,"optimize")==0 ){ - *pxFunc = optimizeFunc; - return 1; -#ifdef SQLITE_TEST - /* NOTE(shess): These functions are present only for testing - ** purposes. No particular effort is made to optimize their - ** execution or how they build their results. - */ - }else if( strcmp(zName,"dump_terms")==0 ){ - /* fprintf(stderr, "Found dump_terms\n"); */ - *pxFunc = dumpTermsFunc; - return 1; - }else if( strcmp(zName,"dump_doclist")==0 ){ - /* fprintf(stderr, "Found dump_doclist\n"); */ - *pxFunc = dumpDoclistFunc; - return 1; -#endif - } - return 0; -} - -/* -** Rename an fts2 table. -*/ -static int fulltextRename( - sqlite3_vtab *pVtab, - const char *zName -){ - fulltext_vtab *p = (fulltext_vtab *)pVtab; - int rc = SQLITE_NOMEM; - char *zSql = sqlite3_mprintf( - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';" - "ALTER TABLE %Q.'%q_segments' RENAME TO '%q_segments';" - "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';" - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - , p->zDb, p->zName, zName - ); - if( zSql ){ - rc = sqlite3_exec(p->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; -} - -static const sqlite3_module fts2Module = { - /* iVersion */ 0, - /* xCreate */ fulltextCreate, - /* xConnect */ fulltextConnect, - /* xBestIndex */ fulltextBestIndex, - /* xDisconnect */ fulltextDisconnect, - /* xDestroy */ fulltextDestroy, - /* xOpen */ fulltextOpen, - /* xClose */ fulltextClose, - /* xFilter */ fulltextFilter, - /* xNext */ fulltextNext, - /* xEof */ fulltextEof, - /* xColumn */ fulltextColumn, - /* xRowid */ fulltextRowid, - /* xUpdate */ fulltextUpdate, - /* xBegin */ fulltextBegin, - /* xSync */ fulltextSync, - /* xCommit */ fulltextCommit, - /* xRollback */ fulltextRollback, - /* xFindFunction */ fulltextFindFunction, - /* xRename */ fulltextRename, -}; - -static void hashDestroy(void *p){ - fts2Hash *pHash = (fts2Hash *)p; - sqlite3Fts2HashClear(pHash); - sqlite3_free(pHash); -} - -/* -** The fts2 built-in tokenizers - "simple" and "porter" - are implemented -** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following -** two forward declarations are for functions declared in these files -** used to retrieve the respective implementations. -** -** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed -** to by the argument to point a the "simple" tokenizer implementation. -** Function ...PorterTokenizerModule() sets *pModule to point to the -** porter tokenizer/stemmer implementation. -*/ -void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); -void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); - -int sqlite3Fts2InitHashTable(sqlite3 *, fts2Hash *, const char *); - -/* -** Initialize the fts2 extension. If this extension is built as part -** of the sqlite library, then this function is called directly by -** SQLite. If fts2 is built as a dynamically loadable extension, this -** function is called by the sqlite3_extension_init() entry point. -*/ -int sqlite3Fts2Init(sqlite3 *db){ - int rc = SQLITE_OK; - fts2Hash *pHash = 0; - const sqlite3_tokenizer_module *pSimple = 0; - const sqlite3_tokenizer_module *pPorter = 0; - const sqlite3_tokenizer_module *pIcu = 0; - - sqlite3Fts2SimpleTokenizerModule(&pSimple); - sqlite3Fts2PorterTokenizerModule(&pPorter); -#ifdef SQLITE_ENABLE_ICU - sqlite3Fts2IcuTokenizerModule(&pIcu); -#endif - - /* Allocate and initialize the hash-table used to store tokenizers. */ - pHash = sqlite3_malloc(sizeof(fts2Hash)); - if( !pHash ){ - rc = SQLITE_NOMEM; - }else{ - sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); - } - - /* Load the built-in tokenizers into the hash table */ - if( rc==SQLITE_OK ){ - if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple) - || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter) - || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu)) - ){ - rc = SQLITE_NOMEM; - } - } - - /* Create the virtual table wrapper around the hash-table and overload - ** the two scalar functions. If this is successful, register the - ** module with sqlite. - */ - if( SQLITE_OK==rc - && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", -1)) -#ifdef SQLITE_TEST - && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) -#endif - ){ - return sqlite3_create_module_v2( - db, "fts2", &fts2Module, (void *)pHash, hashDestroy - ); - } - - /* An error has occurred. Delete the hash table and return the error code. */ - assert( rc!=SQLITE_OK ); - if( pHash ){ - sqlite3Fts2HashClear(pHash); - sqlite3_free(pHash); - } - return rc; -} - -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -int sqlite3_fts2_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3Fts2Init(db); -} -#endif - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2.h b/ext/fts2/fts2.h deleted file mode 100644 index 4da4c3877b..0000000000 --- a/ext/fts2/fts2.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This header file is used by programs that want to link against the -** FTS2 library. All it does is declare the sqlite3Fts2Init() interface. -*/ -#include "sqlite3.h" - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -int sqlite3Fts2Init(sqlite3 *db); - -#ifdef __cplusplus -} /* extern "C" */ -#endif /* __cplusplus */ diff --git a/ext/fts2/fts2_hash.c b/ext/fts2/fts2_hash.c deleted file mode 100644 index 3596dcf0b8..0000000000 --- a/ext/fts2/fts2_hash.c +++ /dev/null @@ -1,376 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_hash.h" - -/* -** Malloc and Free functions -*/ -static void *fts2HashMalloc(int n){ - void *p = sqlite3_malloc(n); - if( p ){ - memset(p, 0, n); - } - return p; -} -static void fts2HashFree(void *p){ - sqlite3_free(p); -} - -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. -*/ -void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; -} - -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -void sqlite3Fts2HashClear(fts2Hash *pH){ - fts2HashElem *elem; /* For looping over all elements of the table */ - - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - fts2HashFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - fts2HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - fts2HashFree(elem->pKey); - } - fts2HashFree(elem); - elem = next_elem; - } - pH->count = 0; -} - -/* -** Hash and comparison functions when the mode is FTS2_HASH_STRING -*/ -static int strHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - int h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; - } - return h & 0x7fffffff; -} -static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} - -/* -** Hash and comparison functions when the mode is FTS2_HASH_BINARY -*/ -static int binHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); - } - return h & 0x7fffffff; -} -static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "hashFunction". The function takes a -** single parameter "keyClass". The return value of hashFunction() -** is a pointer to another function. Specifically, the return value -** of hashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*hashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS2_HASH_STRING ){ - return &strHash; - }else{ - assert( keyClass==FTS2_HASH_BINARY ); - return &binHash; - } -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS2_HASH_STRING ){ - return &strCompare; - }else{ - assert( keyClass==FTS2_HASH_BINARY ); - return &binCompare; - } -} - -/* Link an element into the hash table -*/ -static void insertElement( - fts2Hash *pH, /* The complete hash table */ - struct _fts2ht *pEntry, /* The entry into which pNew is inserted */ - fts2HashElem *pNew /* The element to be inserted */ -){ - fts2HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} - - -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -*/ -static void rehash(fts2Hash *pH, int new_size){ - struct _fts2ht *new_ht; /* The new hash table */ - fts2HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts2ht *)fts2HashMalloc( new_size*sizeof(struct _fts2ht) ); - if( new_ht==0 ) return; - fts2HashFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = hashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - insertElement(pH, &new_ht[h], elem); - } -} - -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static fts2HashElem *findElementGivenHash( - const fts2Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ -){ - fts2HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ - - if( pH->ht ){ - struct _fts2ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = compareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; - } - } - return 0; -} - -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. -*/ -static void removeElementGivenHash( - fts2Hash *pH, /* The pH containing "elem" */ - fts2HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ -){ - struct _fts2ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - fts2HashFree(elem->pKey); - } - fts2HashFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts2HashClear(pH); - } -} - -/* Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){ - int h; /* A hash on key */ - fts2HashElem *elem; /* The element that matches key */ - int (*xHash)(const void*,int); /* The hash function */ - - if( pH==0 || pH->ht==0 ) return 0; - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); - return elem ? elem->data : 0; -} - -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". -** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. -** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. -** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. -*/ -void *sqlite3Fts2HashInsert( - fts2Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ -){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - fts2HashElem *elem; /* Used to loop thru the element list */ - fts2HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ - - assert( pH!=0 ); - xHash = hashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = findElementGivenHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - removeElementGivenHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; - } - if( data==0 ) return 0; - new_elem = (fts2HashElem*)fts2HashMalloc( sizeof(fts2HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = fts2HashMalloc( nKey ); - if( new_elem->pKey==0 ){ - fts2HashFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; - } - new_elem->nKey = nKey; - pH->count++; - if( pH->htsize==0 ){ - rehash(pH,8); - if( pH->htsize==0 ){ - pH->count = 0; - fts2HashFree(new_elem); - return data; - } - } - if( pH->count > pH->htsize ){ - rehash(pH,pH->htsize*2); - } - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - insertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_hash.h b/ext/fts2/fts2_hash.h deleted file mode 100644 index 02936f18bb..0000000000 --- a/ext/fts2/fts2_hash.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -** 2001 September 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This is the header file for the generic hash-table implementation -** used in SQLite. We've modified it slightly to serve as a standalone -** hash table implementation for the full-text indexing module. -** -*/ -#ifndef _FTS2_HASH_H_ -#define _FTS2_HASH_H_ - -/* Forward declarations of structures. */ -typedef struct fts2Hash fts2Hash; -typedef struct fts2HashElem fts2HashElem; - -/* A complete hash table is an instance of the following structure. -** The internals of this structure are intended to be opaque -- client -** code should not attempt to access or modify the fields of this structure -** directly. Change this structure only by using the routines below. -** However, many of the "procedures" and "functions" for modifying and -** accessing this structure are really macros, so we can't really make -** this structure opaque. -*/ -struct fts2Hash { - char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ - char copyKey; /* True if copy of key made on insert */ - int count; /* Number of entries in this table */ - fts2HashElem *first; /* The first element of the array */ - int htsize; /* Number of buckets in the hash table */ - struct _fts2ht { /* the hash table */ - int count; /* Number of entries with this hash */ - fts2HashElem *chain; /* Pointer to first entry with this hash */ - } *ht; -}; - -/* Each element in the hash table is an instance of the following -** structure. All elements are stored on a single doubly-linked list. -** -** Again, this structure is intended to be opaque, but it can't really -** be opaque because it is used by macros. -*/ -struct fts2HashElem { - fts2HashElem *next, *prev; /* Next and previous elements in the table */ - void *data; /* Data associated with this element */ - void *pKey; int nKey; /* Key associated with this element */ -}; - -/* -** There are 2 different modes of operation for a hash table: -** -** FTS2_HASH_STRING pKey points to a string that is nKey bytes long -** (including the null-terminator, if any). Case -** is respected in comparisons. -** -** FTS2_HASH_BINARY pKey points to binary data nKey bytes long. -** memcmp() is used to compare keys. -** -** A copy of the key is made if the copyKey parameter to fts2HashInit is 1. -*/ -#define FTS2_HASH_STRING 1 -#define FTS2_HASH_BINARY 2 - -/* -** Access routines. To delete, insert a NULL pointer. -*/ -void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey); -void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData); -void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey); -void sqlite3Fts2HashClear(fts2Hash*); - -/* -** Shorthand for the functions above -*/ -#define fts2HashInit sqlite3Fts2HashInit -#define fts2HashInsert sqlite3Fts2HashInsert -#define fts2HashFind sqlite3Fts2HashFind -#define fts2HashClear sqlite3Fts2HashClear - -/* -** Macros for looping over all elements of a hash table. The idiom is -** like this: -** -** fts2Hash h; -** fts2HashElem *p; -** ... -** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){ -** SomeStructure *pData = fts2HashData(p); -** // do something with pData -** } -*/ -#define fts2HashFirst(H) ((H)->first) -#define fts2HashNext(E) ((E)->next) -#define fts2HashData(E) ((E)->data) -#define fts2HashKey(E) ((E)->pKey) -#define fts2HashKeysize(E) ((E)->nKey) - -/* -** Number of entries in a hash table -*/ -#define fts2HashCount(H) ((H)->count) - -#endif /* _FTS2_HASH_H_ */ diff --git a/ext/fts2/fts2_icu.c b/ext/fts2/fts2_icu.c deleted file mode 100644 index 2670301f51..0000000000 --- a/ext/fts2/fts2_icu.c +++ /dev/null @@ -1,260 +0,0 @@ -/* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements a tokenizer for fts2 based on the ICU library. -** -** $Id: fts2_icu.c,v 1.3 2008/12/18 05:30:26 danielk1977 Exp $ -*/ - -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) -#ifdef SQLITE_ENABLE_ICU - -#include -#include -#include "fts2_tokenizer.h" - -#include -#include -#include -#include - -typedef struct IcuTokenizer IcuTokenizer; -typedef struct IcuCursor IcuCursor; - -struct IcuTokenizer { - sqlite3_tokenizer base; - char *zLocale; -}; - -struct IcuCursor { - sqlite3_tokenizer_cursor base; - - UBreakIterator *pIter; /* ICU break-iterator object */ - int nChar; /* Number of UChar elements in pInput */ - UChar *aChar; /* Copy of input using utf-16 encoding */ - int *aOffset; /* Offsets of each character in utf-8 input */ - - int nBuffer; - char *zBuffer; - - int iToken; -}; - -/* -** Create a new tokenizer instance. -*/ -static int icuCreate( - int argc, /* Number of entries in argv[] */ - const char * const *argv, /* Tokenizer creation arguments */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ -){ - IcuTokenizer *p; - int n = 0; - - if( argc>0 ){ - n = strlen(argv[0])+1; - } - p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); - if( !p ){ - return SQLITE_NOMEM; - } - memset(p, 0, sizeof(IcuTokenizer)); - - if( n ){ - p->zLocale = (char *)&p[1]; - memcpy(p->zLocale, argv[0], n); - } - - *ppTokenizer = (sqlite3_tokenizer *)p; - - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int icuDestroy(sqlite3_tokenizer *pTokenizer){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int icuOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, /* Input string */ - int nInput, /* Length of zInput in bytes */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - IcuCursor *pCsr; - - const int32_t opt = U_FOLD_CASE_DEFAULT; - UErrorCode status = U_ZERO_ERROR; - int nChar; - - UChar32 c; - int iInput = 0; - int iOut = 0; - - *ppCursor = 0; - - if( nInput<0 ){ - nInput = strlen(zInput); - } - nChar = nInput+1; - pCsr = (IcuCursor *)sqlite3_malloc( - sizeof(IcuCursor) + /* IcuCursor */ - ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ - (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ - ); - if( !pCsr ){ - return SQLITE_NOMEM; - } - memset(pCsr, 0, sizeof(IcuCursor)); - pCsr->aChar = (UChar *)&pCsr[1]; - pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; - - pCsr->aOffset[iOut] = iInput; - U8_NEXT(zInput, iInput, nInput, c); - while( c>0 ){ - int isError = 0; - c = u_foldCase(c, opt); - U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); - if( isError ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; - } - pCsr->aOffset[iOut] = iInput; - - if( iInputpIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); - if( !U_SUCCESS(status) ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; - } - pCsr->nChar = iOut; - - ubrk_first(pCsr->pIter); - *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to icuOpen(). -*/ -static int icuClose(sqlite3_tokenizer_cursor *pCursor){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - ubrk_close(pCsr->pIter); - sqlite3_free(pCsr->zBuffer); - sqlite3_free(pCsr); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. -*/ -static int icuNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - - int iStart = 0; - int iEnd = 0; - int nByte = 0; - - while( iStart==iEnd ){ - UChar32 c; - - iStart = ubrk_current(pCsr->pIter); - iEnd = ubrk_next(pCsr->pIter); - if( iEnd==UBRK_DONE ){ - return SQLITE_DONE; - } - - while( iStartaChar, iWhite, pCsr->nChar, c); - if( u_isspace(c) ){ - iStart = iWhite; - }else{ - break; - } - } - assert(iStart<=iEnd); - } - - do { - UErrorCode status = U_ZERO_ERROR; - if( nByte ){ - char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); - if( !zNew ){ - return SQLITE_NOMEM; - } - pCsr->zBuffer = zNew; - pCsr->nBuffer = nByte; - } - - u_strToUTF8( - pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ - &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ - &status /* Output success/failure */ - ); - } while( nByte>pCsr->nBuffer ); - - *ppToken = pCsr->zBuffer; - *pnBytes = nByte; - *piStartOffset = pCsr->aOffset[iStart]; - *piEndOffset = pCsr->aOffset[iEnd]; - *piPosition = pCsr->iToken++; - - return SQLITE_OK; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module icuTokenizerModule = { - 0, /* iVersion */ - icuCreate, /* xCreate */ - icuDestroy, /* xCreate */ - icuOpen, /* xOpen */ - icuClose, /* xClose */ - icuNext, /* xNext */ -}; - -/* -** Set *ppModule to point at the implementation of the ICU tokenizer. -*/ -void sqlite3Fts2IcuTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &icuTokenizerModule; -} - -#endif /* defined(SQLITE_ENABLE_ICU) */ -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_porter.c b/ext/fts2/fts2_porter.c deleted file mode 100644 index 881baf7100..0000000000 --- a/ext/fts2/fts2_porter.c +++ /dev/null @@ -1,644 +0,0 @@ -/* -** 2006 September 30 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - - -#include -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_tokenizer.h" - -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; - -/* -** Class derived from sqlit3_tokenizer_cursor -*/ -typedef struct porter_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ - int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module porterTokenizerModule; - - -/* -** Create a new tokenizer instance. -*/ -static int porterCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - porter_tokenizer *t; - t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int porterOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - porter_tokenizer_cursor *c; - - c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); - }else{ - c->nInput = nInput; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** porterOpen() above. -*/ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - sqlite3_free(c->zToken); - sqlite3_free(c); - return SQLITE_OK; -} -/* -** Vowel or consonant -*/ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 -}; - -/* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. -** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. -** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. -*/ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); -} - -/* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: -** -** [C] (VC){m} [V] -** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. -** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. -** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of of a consonant followed by a vowel. -*/ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 -*/ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} - -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if there is a vowel anywhere within z[0..n-1] -*/ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} - -/* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. -*/ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1] && isConsonant(z+1); -} - -/* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. -*/ -static int star_oh(const char *z){ - return - z[0]!=0 && isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - z[1]!=0 && isVowel(z+1) && - z[2]!=0 && isConsonant(z+2); -} - -/* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. -** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. -*/ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ -){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} - -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; - } - } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; i=sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; - - - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; - } - } - - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } - } - - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; - } - - /* Step 2 */ - switch( z[1] ){ - case 'a': - stem(&z, "lanoita", "ate", m_gt_0) || - stem(&z, "lanoit", "tion", m_gt_0); - break; - case 'c': - stem(&z, "icne", "ence", m_gt_0) || - stem(&z, "icna", "ance", m_gt_0); - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - stem(&z, "ilb", "ble", m_gt_0) || - stem(&z, "illa", "al", m_gt_0) || - stem(&z, "iltne", "ent", m_gt_0) || - stem(&z, "ile", "e", m_gt_0) || - stem(&z, "ilsuo", "ous", m_gt_0); - break; - case 'o': - stem(&z, "noitazi", "ize", m_gt_0) || - stem(&z, "noita", "ate", m_gt_0) || - stem(&z, "rota", "ate", m_gt_0); - break; - case 's': - stem(&z, "msila", "al", m_gt_0) || - stem(&z, "ssenevi", "ive", m_gt_0) || - stem(&z, "ssenluf", "ful", m_gt_0) || - stem(&z, "ssensuo", "ous", m_gt_0); - break; - case 't': - stem(&z, "itila", "al", m_gt_0) || - stem(&z, "itivi", "ive", m_gt_0) || - stem(&z, "itilib", "ble", m_gt_0); - break; - } - - /* Step 3 */ - switch( z[0] ){ - case 'e': - stem(&z, "etaci", "ic", m_gt_0) || - stem(&z, "evita", "", m_gt_0) || - stem(&z, "ezila", "al", m_gt_0); - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - stem(&z, "laci", "ic", m_gt_0) || - stem(&z, "luf", "", m_gt_0); - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; - } - - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - stem(&z, "tneme", "", m_gt_1) || - stem(&z, "tnem", "", m_gt_1) || - stem(&z, "tne", "", m_gt_1); - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - stem(&z, "eta", "", m_gt_1) || - stem(&z, "iti", "", m_gt_1); - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - } - - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; - } - } - - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; - } - - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. - */ - *pnOut = i = strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); - } -} - -/* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. -*/ -static const char porterIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). -*/ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; - - while( c->iOffsetnInput ){ - int iStartOffset, ch; - - /* Scan past delimiter characters */ - while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - c->nAllocated = n+20; - c->zToken = sqlite3_realloc(c->zToken, c->nAllocated); - if( c->zToken==NULL ) return SQLITE_NOMEM; - } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the porter-stemmer tokenizer -*/ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, -}; - -/* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts2PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &porterTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_tokenizer.c b/ext/fts2/fts2_tokenizer.c deleted file mode 100644 index dda33a72d2..0000000000 --- a/ext/fts2/fts2_tokenizer.c +++ /dev/null @@ -1,375 +0,0 @@ -/* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This is part of an SQLite module implementing full-text search. -** This particular file implements the generic tokenizer interface. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 - -#include "fts2_hash.h" -#include "fts2_tokenizer.h" -#include - -/* -** Implementation of the SQL scalar function for accessing the underlying -** hash table. This function may be called as follows: -** -** SELECT (); -** SELECT (, ); -** -** where is the name passed as the second argument -** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer'). -** -** If the argument is specified, it must be a blob value -** containing a pointer to be stored as the hash data corresponding -** to the string . If is not specified, then -** the string must already exist in the has table. Otherwise, -** an error is returned. -** -** Whether or not the argument is specified, the value returned -** is a blob containing the pointer stored as the hash data corresponding -** to string (after the hash-table is updated, if applicable). -*/ -static void scalarFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - fts2Hash *pHash; - void *pPtr = 0; - const unsigned char *zName; - int nName; - - assert( argc==1 || argc==2 ); - - pHash = (fts2Hash *)sqlite3_user_data(context); - - zName = sqlite3_value_text(argv[0]); - nName = sqlite3_value_bytes(argv[0])+1; - - if( argc==2 ){ - void *pOld; - int n = sqlite3_value_bytes(argv[1]); - if( n!=sizeof(pPtr) ){ - sqlite3_result_error(context, "argument type mismatch", -1); - return; - } - pPtr = *(void **)sqlite3_value_blob(argv[1]); - pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr); - if( pOld==pPtr ){ - sqlite3_result_error(context, "out of memory", -1); - return; - } - }else{ - pPtr = sqlite3Fts2HashFind(pHash, zName, nName); - if( !pPtr ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } - } - - sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); -} - -#ifdef SQLITE_TEST - -#if defined(INCLUDE_SQLITE_TCL_H) -# include "sqlite_tcl.h" -#else -# include "tcl.h" -#endif -#include - -/* -** Implementation of a special SQL scalar function for testing tokenizers -** designed to be used in concert with the Tcl testing framework. This -** function must be called with two arguments: -** -** SELECT (, ); -** SELECT (, ); -** -** where is the name passed as the second argument -** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer') -** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test'). -** -** The return value is a string that may be interpreted as a Tcl -** list. For each token in the , three elements are -** added to the returned list. The first is the token position, the -** second is the token text (folded, stemmed, etc.) and the third is the -** substring of associated with the token. For example, -** using the built-in "simple" tokenizer: -** -** SELECT fts_tokenizer_test('simple', 'I don't see how'); -** -** will return the string: -** -** "{0 i I 1 dont don't 2 see see 3 how how}" -** -*/ -static void testFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - fts2Hash *pHash; - sqlite3_tokenizer_module *p; - sqlite3_tokenizer *pTokenizer = 0; - sqlite3_tokenizer_cursor *pCsr = 0; - - const char *zErr = 0; - - const char *zName; - int nName; - const char *zInput; - int nInput; - - const char *zArg = 0; - - const char *zToken; - int nToken; - int iStart; - int iEnd; - int iPos; - - Tcl_Obj *pRet; - - assert( argc==2 || argc==3 ); - - nName = sqlite3_value_bytes(argv[0]); - zName = (const char *)sqlite3_value_text(argv[0]); - nInput = sqlite3_value_bytes(argv[argc-1]); - zInput = (const char *)sqlite3_value_text(argv[argc-1]); - - if( argc==3 ){ - zArg = (const char *)sqlite3_value_text(argv[1]); - } - - pHash = (fts2Hash *)sqlite3_user_data(context); - p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1); - - if( !p ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } - - pRet = Tcl_NewObj(); - Tcl_IncrRefCount(pRet); - - if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){ - zErr = "error in xCreate()"; - goto finish; - } - pTokenizer->pModule = p; - if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){ - zErr = "error in xOpen()"; - goto finish; - } - pCsr->pTokenizer = pTokenizer; - - while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - zToken = &zInput[iStart]; - nToken = iEnd-iStart; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - } - - if( SQLITE_OK!=p->xClose(pCsr) ){ - zErr = "error in xClose()"; - goto finish; - } - if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ - zErr = "error in xDestroy()"; - goto finish; - } - -finish: - if( zErr ){ - sqlite3_result_error(context, zErr, -1); - }else{ - sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); - } - Tcl_DecrRefCount(pRet); -} - -static -int registerTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module *p -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?, ?)"; - - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); - sqlite3_step(pStmt); - - return sqlite3_finalize(pStmt); -} - -static -int queryFts2Tokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module **pp -){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts2_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } - } - - return sqlite3_finalize(pStmt); -} - -void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); - -/* -** Implementation of the scalar function fts2_tokenizer_internal_test(). -** This function is used for testing only, it is not included in the -** build unless SQLITE_TEST is defined. -** -** The purpose of this is to test that the fts2_tokenizer() function -** can be used as designed by the C-code in the queryFts2Tokenizer and -** registerTokenizer() functions above. These two functions are repeated -** in the README.tokenizer file as an example, so it is important to -** test them. -** -** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar -** function with no arguments. An assert() will fail if a problem is -** detected. i.e.: -** -** SELECT fts2_tokenizer_internal_test(); -** -*/ -static void intTestFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - int rc; - const sqlite3_tokenizer_module *p1; - const sqlite3_tokenizer_module *p2; - sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); - - /* Test the query function */ - sqlite3Fts2SimpleTokenizerModule(&p1); - rc = queryFts2Tokenizer(db, "simple", &p2); - assert( rc==SQLITE_OK ); - assert( p1==p2 ); - rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_ERROR ); - assert( p2==0 ); - assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); - - /* Test the storage function */ - rc = registerTokenizer(db, "nosuchtokenizer", p1); - assert( rc==SQLITE_OK ); - rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_OK ); - assert( p2==p1 ); - - sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); -} - -#endif - -/* -** Set up SQL objects in database db used to access the contents of -** the hash table pointed to by argument pHash. The hash table must -** been initialized to use string keys, and to take a private copy -** of the key when a value is inserted. i.e. by a call similar to: -** -** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1); -** -** This function adds a scalar function (see header comment above -** scalarFunc() in this file for details) and, if ENABLE_TABLE is -** defined at compilation time, a temporary virtual table (see header -** comment above struct HashTableVtab) to the database schema. Both -** provide read/write access to the contents of *pHash. -** -** The third argument to this function, zName, is used as the name -** of both the scalar and, if created, the virtual table. -*/ -int sqlite3Fts2InitHashTable( - sqlite3 *db, - fts2Hash *pHash, - const char *zName -){ - int rc = SQLITE_OK; - void *p = (void *)pHash; - const int any = SQLITE_ANY; - char *zTest = 0; - char *zTest2 = 0; - -#ifdef SQLITE_TEST - void *pdb = (void *)db; - zTest = sqlite3_mprintf("%s_test", zName); - zTest2 = sqlite3_mprintf("%s_internal_test", zName); - if( !zTest || !zTest2 ){ - rc = SQLITE_NOMEM; - } -#endif - - if( rc!=SQLITE_OK - || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0)) -#ifdef SQLITE_TEST - || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0)) - || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0)) -#endif - ); - - sqlite3_free(zTest); - sqlite3_free(zTest2); - return rc; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_tokenizer.h b/ext/fts2/fts2_tokenizer.h deleted file mode 100644 index 8db2048d6b..0000000000 --- a/ext/fts2/fts2_tokenizer.h +++ /dev/null @@ -1,145 +0,0 @@ -/* -** 2006 July 10 -** -** The author disclaims copyright to this source code. -** -************************************************************************* -** Defines the interface to tokenizers used by fulltext-search. There -** are three basic components: -** -** sqlite3_tokenizer_module is a singleton defining the tokenizer -** interface functions. This is essentially the class structure for -** tokenizers. -** -** sqlite3_tokenizer is used to define a particular tokenizer, perhaps -** including customization information defined at creation time. -** -** sqlite3_tokenizer_cursor is generated by a tokenizer to generate -** tokens from a particular input. -*/ -#ifndef _FTS2_TOKENIZER_H_ -#define _FTS2_TOKENIZER_H_ - -/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. -** If tokenizers are to be allowed to call sqlite3_*() functions, then -** we will need a way to register the API consistently. -*/ -#include "sqlite3.h" - -/* -** Structures used by the tokenizer interface. When a new tokenizer -** implementation is registered, the caller provides a pointer to -** an sqlite3_tokenizer_module containing pointers to the callback -** functions that make up an implementation. -** -** When an fts2 table is created, it passes any arguments passed to -** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the -** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer -** implementation. The xCreate() function in turn returns an -** sqlite3_tokenizer structure representing the specific tokenizer to -** be used for the fts2 table (customized by the tokenizer clause arguments). -** -** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() -** method is called. It returns an sqlite3_tokenizer_cursor object -** that may be used to tokenize a specific input buffer based on -** the tokenization rules supplied by a specific sqlite3_tokenizer -** object. -*/ -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; -typedef struct sqlite3_tokenizer sqlite3_tokenizer; -typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; - -struct sqlite3_tokenizer_module { - - /* - ** Structure version. Should always be set to 0. - */ - int iVersion; - - /* - ** Create a new tokenizer. The values in the argv[] array are the - ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL - ** TABLE statement that created the fts2 table. For example, if - ** the following SQL is executed: - ** - ** CREATE .. USING fts2( ... , tokenizer arg1 arg2) - ** - ** then argc is set to 2, and the argv[] array contains pointers - ** to the strings "arg1" and "arg2". - ** - ** This method should return either SQLITE_OK (0), or an SQLite error - ** code. If SQLITE_OK is returned, then *ppTokenizer should be set - ** to point at the newly created tokenizer structure. The generic - ** sqlite3_tokenizer.pModule variable should not be initialized by - ** this callback. The caller will do so. - */ - int (*xCreate)( - int argc, /* Size of argv array */ - const char *const*argv, /* Tokenizer argument strings */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ - ); - - /* - ** Destroy an existing tokenizer. The fts2 module calls this method - ** exactly once for each successful call to xCreate(). - */ - int (*xDestroy)(sqlite3_tokenizer *pTokenizer); - - /* - ** Create a tokenizer cursor to tokenize an input buffer. The caller - ** is responsible for ensuring that the input buffer remains valid - ** until the cursor is closed (using the xClose() method). - */ - int (*xOpen)( - sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ - const char *pInput, int nBytes, /* Input buffer */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ - ); - - /* - ** Destroy an existing tokenizer cursor. The fts2 module calls this - ** method exactly once for each successful call to xOpen(). - */ - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); - - /* - ** Retrieve the next token from the tokenizer cursor pCursor. This - ** method should either return SQLITE_OK and set the values of the - ** "OUT" variables identified below, or SQLITE_DONE to indicate that - ** the end of the buffer has been reached, or an SQLite error code. - ** - ** *ppToken should be set to point at a buffer containing the - ** normalized version of the token (i.e. after any case-folding and/or - ** stemming has been performed). *pnBytes should be set to the length - ** of this buffer in bytes. The input text that generated the token is - ** identified by the byte offsets returned in *piStartOffset and - ** *piEndOffset. - ** - ** The buffer *ppToken is set to point at is managed by the tokenizer - ** implementation. It is only required to be valid until the next call - ** to xNext() or xClose(). - */ - /* TODO(shess) current implementation requires pInput to be - ** nul-terminated. This should either be fixed, or pInput/nBytes - ** should be converted to zInput. - */ - int (*xNext)( - sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ - const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ - int *piStartOffset, /* OUT: Byte offset of token in input buffer */ - int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ - int *piPosition /* OUT: Number of tokens returned before this one */ - ); -}; - -struct sqlite3_tokenizer { - const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ - /* Tokenizer implementations will typically add additional fields */ -}; - -struct sqlite3_tokenizer_cursor { - sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ - /* Tokenizer implementations will typically add additional fields */ -}; - -#endif /* _FTS2_TOKENIZER_H_ */ diff --git a/ext/fts2/fts2_tokenizer1.c b/ext/fts2/fts2_tokenizer1.c deleted file mode 100644 index fe4f9eb4b5..0000000000 --- a/ext/fts2/fts2_tokenizer1.c +++ /dev/null @@ -1,233 +0,0 @@ -/* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** Implementation of the "simple" full-text-search tokenizer. -*/ - -/* -** The code in this file is only compiled if: -** -** * The FTS2 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS2 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). -*/ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) - - -#include -#include -#include -#include - -#include "sqlite3.h" -#include "sqlite3ext.h" -SQLITE_EXTENSION_INIT3 -#include "fts2_tokenizer.h" - -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; - -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; - - -/* Forward declaration */ -static const sqlite3_tokenizer_module simpleTokenizerModule; - -static int simpleDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} - -/* -** Create a new tokenizer instance. -*/ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer -){ - simple_tokenizer *t; - - t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); - - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = strlen(argv[1]); - for(i=0; i=0x80 ){ - sqlite3_free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; - } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !((i>='0' && i<='9') || (i>='A' && i<='Z') || - (i>='a' && i<='z')); - } - } - - *ppTokenizer = &t->base; - return SQLITE_OK; -} - -/* -** Destroy a tokenizer -*/ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; -} - -/* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. -*/ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - simple_tokenizer_cursor *c; - - c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; - - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; - } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. -*/ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - sqlite3_free(c->pToken); - sqlite3_free(c); - return SQLITE_OK; -} - -/* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). -*/ -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - unsigned char *p = (unsigned char *)c->pInput; - - while( c->iOffsetnBytes ){ - int iStartOffset; - - /* Scan past delimiter characters */ - while( c->iOffsetnBytes && simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffsetnBytes && !simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; - } - - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - c->nTokenAllocated = n+20; - c->pToken = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( c->pToken==NULL ) return SQLITE_NOMEM; - } - for(i=0; ipToken[i] = (ch>='A' && ch<='Z') ? (ch - 'A' + 'a') : ch; - } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - - return SQLITE_OK; - } - } - return SQLITE_DONE; -} - -/* -** The set of routines that implement the simple tokenizer -*/ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, -}; - -/* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule -*/ -void sqlite3Fts2SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &simpleTokenizerModule; -} - -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/mkfts2amal.tcl b/ext/fts2/mkfts2amal.tcl deleted file mode 100644 index 5c8d1e93d7..0000000000 --- a/ext/fts2/mkfts2amal.tcl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/tclsh -# -# This script builds a single C code file holding all of FTS2 code. -# The name of the output file is fts2amal.c. To build this file, -# first do: -# -# make target_source -# -# The make target above moves all of the source code files into -# a subdirectory named "tsrc". (This script expects to find the files -# there and will not work if they are not found.) -# -# After the "tsrc" directory has been created and populated, run -# this script: -# -# tclsh mkfts2amal.tcl -# -# The amalgamated FTS2 code will be written into fts2amal.c -# - -# Open the output file and write a header comment at the beginning -# of the file. -# -set out [open fts2amal.c w] -set today [clock format [clock seconds] -format "%Y-%m-%d %H:%M:%S UTC" -gmt 1] -puts $out [subst \ -{/****************************************************************************** -** This file is an amalgamation of separate C source files from the SQLite -** Full Text Search extension 2 (fts2). By combining all the individual C -** code files into this single large file, the entire code can be compiled -** as a one translation unit. This allows many compilers to do optimizations -** that would not be possible if the files were compiled separately. It also -** makes the code easier to import into other projects. -** -** This amalgamation was generated on $today. -*/}] - -# These are the header files used by FTS2. The first time any of these -# files are seen in a #include statement in the C code, include the complete -# text of the file in-line. The file only needs to be included once. -# -foreach hdr { - fts2.h - fts2_hash.h - fts2_tokenizer.h - sqlite3.h - sqlite3ext.h -} { - set available_hdr($hdr) 1 -} - -# 78 stars used for comment formatting. -set s78 \ -{*****************************************************************************} - -# Insert a comment into the code -# -proc section_comment {text} { - global out s78 - set n [string length $text] - set nstar [expr {60 - $n}] - set stars [string range $s78 0 $nstar] - puts $out "/************** $text $stars/" -} - -# Read the source file named $filename and write it into the -# sqlite3.c output file. If any #include statements are seen, -# process them approprately. -# -proc copy_file {filename} { - global seen_hdr available_hdr out - set tail [file tail $filename] - section_comment "Begin file $tail" - set in [open $filename r] - while {![eof $in]} { - set line [gets $in] - if {[regexp {^#\s*include\s+["<]([^">]+)[">]} $line all hdr]} { - if {[info exists available_hdr($hdr)]} { - if {$available_hdr($hdr)} { - section_comment "Include $hdr in the middle of $tail" - copy_file tsrc/$hdr - section_comment "Continuing where we left off in $tail" - } - } elseif {![info exists seen_hdr($hdr)]} { - set seen_hdr($hdr) 1 - puts $out $line - } - } elseif {[regexp {^#ifdef __cplusplus} $line]} { - puts $out "#if 0" - } elseif {[regexp {^#line} $line]} { - # Skip #line directives. - } else { - puts $out $line - } - } - close $in - section_comment "End of $tail" -} - - -# Process the source files. Process files containing commonly -# used subroutines first in order to help the compiler find -# inlining opportunities. -# -foreach file { - fts2.c - fts2_hash.c - fts2_porter.c - fts2_tokenizer.c - fts2_tokenizer1.c - fts2_icu.c -} { - copy_file tsrc/$file -} - -close $out diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index c43eac4914..43a9daf60d 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -5287,9 +5287,8 @@ static void fts3EvalNextRow( Fts3Expr *pExpr, /* Expr. to advance to next matching row */ int *pRc /* IN/OUT: Error code */ ){ - if( *pRc==SQLITE_OK ){ + if( *pRc==SQLITE_OK && pExpr->bEof==0 ){ int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ - assert( pExpr->bEof==0 ); pExpr->bStart = 1; switch( pExpr->eType ){ @@ -5765,6 +5764,22 @@ static void fts3EvalUpdateCounts(Fts3Expr *pExpr, int nCol){ } } +/* +** This is an sqlite3Fts3ExprIterate() callback. If the Fts3Expr.aMI[] array +** has not yet been allocated, allocate and zero it. Otherwise, just zero +** it. +*/ +static int fts3AllocateMSI(Fts3Expr *pExpr, int iPhrase, void *pCtx){ + Fts3Table *pTab = (Fts3Table*)pCtx; + UNUSED_PARAMETER(iPhrase); + if( pExpr->aMI==0 ){ + pExpr->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32)); + if( pExpr->aMI==0 ) return SQLITE_NOMEM; + } + memset(pExpr->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + return SQLITE_OK; +} + /* ** Expression pExpr must be of type FTSQUERY_PHRASE. ** @@ -5786,7 +5801,6 @@ static int fts3EvalGatherStats( if( pExpr->aMI==0 ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; Fts3Expr *pRoot; /* Root of NEAR expression */ - Fts3Expr *p; /* Iterator used for several purposes */ sqlite3_int64 iPrevId = pCsr->iPrevId; sqlite3_int64 iDocid; @@ -5794,7 +5808,9 @@ static int fts3EvalGatherStats( /* Find the root of the NEAR expression */ pRoot = pExpr; - while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + while( pRoot->pParent + && (pRoot->pParent->eType==FTSQUERY_NEAR || pRoot->bDeferred) + ){ pRoot = pRoot->pParent; } iDocid = pRoot->iDocid; @@ -5802,14 +5818,8 @@ static int fts3EvalGatherStats( assert( pRoot->bStart ); /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ - for(p=pRoot; p; p=p->pLeft){ - Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); - assert( pE->aMI==0 ); - pE->aMI = (u32 *)sqlite3_malloc64(pTab->nColumn * 3 * sizeof(u32)); - if( !pE->aMI ) return SQLITE_NOMEM; - memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); - } - + rc = sqlite3Fts3ExprIterate(pRoot, fts3AllocateMSI, (void*)pTab); + if( rc!=SQLITE_OK ) return rc; fts3EvalRestart(pCsr, pRoot, &rc); while( pCsr->isEof==0 && rc==SQLITE_OK ){ @@ -5965,6 +5975,7 @@ int sqlite3Fts3EvalPhrasePoslist( u8 bTreeEof = 0; Fts3Expr *p; /* Used to iterate from pExpr to root */ Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ + Fts3Expr *pRun; /* Closest non-deferred ancestor of pNear */ int bMatch; /* Check if this phrase descends from an OR expression node. If not, @@ -5979,25 +5990,30 @@ int sqlite3Fts3EvalPhrasePoslist( if( p->bEof ) bTreeEof = 1; } if( bOr==0 ) return SQLITE_OK; + pRun = pNear; + while( pRun->bDeferred ){ + assert( pRun->pParent ); + pRun = pRun->pParent; + } /* This is the descendent of an OR node. In this case we cannot use ** an incremental phrase. Load the entire doclist for the phrase ** into memory in this case. */ if( pPhrase->bIncr ){ - int bEofSave = pNear->bEof; - fts3EvalRestart(pCsr, pNear, &rc); - while( rc==SQLITE_OK && !pNear->bEof ){ - fts3EvalNextRow(pCsr, pNear, &rc); - if( bEofSave==0 && pNear->iDocid==iDocid ) break; + int bEofSave = pRun->bEof; + fts3EvalRestart(pCsr, pRun, &rc); + while( rc==SQLITE_OK && !pRun->bEof ){ + fts3EvalNextRow(pCsr, pRun, &rc); + if( bEofSave==0 && pRun->iDocid==iDocid ) break; } assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); - if( rc==SQLITE_OK && pNear->bEof!=bEofSave ){ + if( rc==SQLITE_OK && pRun->bEof!=bEofSave ){ rc = FTS_CORRUPT_VTAB; } } if( bTreeEof ){ - while( rc==SQLITE_OK && !pNear->bEof ){ - fts3EvalNextRow(pCsr, pNear, &rc); + while( rc==SQLITE_OK && !pRun->bEof ){ + fts3EvalNextRow(pCsr, pRun, &rc); } } if( rc!=SQLITE_OK ) return rc; diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index e821d6be31..3a8a884f92 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -650,5 +650,7 @@ int sqlite3FtsUnicodeIsalnum(int); int sqlite3FtsUnicodeIsdiacritic(int); #endif +int sqlite3Fts3ExprIterate(Fts3Expr*, int (*x)(Fts3Expr*,int,void*), void*); + #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ #endif /* _FTSINT_H */ diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index c5192c4eb0..227c5f00f6 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -41,7 +41,7 @@ typedef sqlite3_int64 i64; /* -** Used as an fts3ExprIterate() context when loading phrase doclists to +** Used as an sqlite3Fts3ExprIterate() context when loading phrase doclists to ** Fts3Expr.aDoclist[]/nDoclist. */ typedef struct LoadDoclistCtx LoadDoclistCtx; @@ -85,7 +85,7 @@ struct SnippetFragment { }; /* -** This type is used as an fts3ExprIterate() context object while +** This type is used as an sqlite3Fts3ExprIterate() context object while ** accumulating the data returned by the matchinfo() function. */ typedef struct MatchInfo MatchInfo; @@ -244,7 +244,7 @@ static void fts3GetDeltaPosition(char **pp, i64 *piPos){ } /* -** Helper function for fts3ExprIterate() (see below). +** Helper function for sqlite3Fts3ExprIterate() (see below). */ static int fts3ExprIterate2( Fts3Expr *pExpr, /* Expression to iterate phrases of */ @@ -278,7 +278,7 @@ static int fts3ExprIterate2( ** Otherwise, SQLITE_OK is returned after a callback has been made for ** all eligible phrase nodes. */ -static int fts3ExprIterate( +int sqlite3Fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ @@ -287,10 +287,9 @@ static int fts3ExprIterate( return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); } - /* -** This is an fts3ExprIterate() callback used while loading the doclists -** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also +** This is an sqlite3Fts3ExprIterate() callback used while loading the +** doclists for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also ** fts3ExprLoadDoclists(). */ static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ @@ -322,9 +321,9 @@ static int fts3ExprLoadDoclists( int *pnToken /* OUT: Number of tokens in query */ ){ int rc; /* Return Code */ - LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ + LoadDoclistCtx sCtx = {0,0,0}; /* Context for sqlite3Fts3ExprIterate() */ sCtx.pCsr = pCsr; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); + rc = sqlite3Fts3ExprIterate(pCsr->pExpr,fts3ExprLoadDoclistsCb,(void*)&sCtx); if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; @@ -337,7 +336,7 @@ static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ } static int fts3ExprPhraseCount(Fts3Expr *pExpr){ int nPhrase = 0; - (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); return nPhrase; } @@ -465,8 +464,9 @@ static void fts3SnippetDetails( } /* -** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). -** Each invocation populates an element of the SnippetIter.aPhrase[] array. +** This function is an sqlite3Fts3ExprIterate() callback used by +** fts3BestSnippet(). Each invocation populates an element of the +** SnippetIter.aPhrase[] array. */ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ SnippetIter *p = (SnippetIter *)ctx; @@ -556,7 +556,9 @@ static int fts3BestSnippet( sIter.nSnippet = nSnippet; sIter.nPhrase = nList; sIter.iCurrent = -1; - rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); + rc = sqlite3Fts3ExprIterate( + pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter + ); if( rc==SQLITE_OK ){ /* Set the *pmSeen output variable. */ @@ -917,10 +919,10 @@ static int fts3ExprLHitGather( } /* -** fts3ExprIterate() callback used to collect the "global" matchinfo stats -** for a single query. +** sqlite3Fts3ExprIterate() callback used to collect the "global" matchinfo +** stats for a single query. ** -** fts3ExprIterate() callback to load the 'global' elements of a +** sqlite3Fts3ExprIterate() callback to load the 'global' elements of a ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements ** of the matchinfo array that are constant for all rows returned by the ** current query. @@ -955,7 +957,7 @@ static int fts3ExprGlobalHitsCb( } /* -** fts3ExprIterate() callback used to collect the "local" part of the +** sqlite3Fts3ExprIterate() callback used to collect the "local" part of the ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the ** array that are different for each row returned by the query. */ @@ -1151,7 +1153,7 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ **/ aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase); if( !aIter ) return SQLITE_NOMEM; - (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + (void)sqlite3Fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); for(i=0; inPhrase; i++){ LcsIterator *pIter = &aIter[i]; @@ -1328,11 +1330,11 @@ static int fts3MatchinfoValues( rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0); if( rc!=SQLITE_OK ) break; } - rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + rc = sqlite3Fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); sqlite3Fts3EvalTestDeferred(pCsr, &rc); if( rc!=SQLITE_OK ) break; } - (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + (void)sqlite3Fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); break; } } @@ -1555,7 +1557,7 @@ struct TermOffsetCtx { }; /* -** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). +** This function is an sqlite3Fts3ExprIterate() callback used by sqlite3Fts3Offsets(). */ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ TermOffsetCtx *p = (TermOffsetCtx *)ctx; @@ -1637,7 +1639,9 @@ void sqlite3Fts3Offsets( */ sCtx.iCol = iCol; sCtx.iTerm = 0; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); + rc = sqlite3Fts3ExprIterate( + pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx + ); if( rc!=SQLITE_OK ) goto offsets_out; /* Retreive the text stored in column iCol. If an SQL NULL is stored diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 7eca9b1321..0c883f020a 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -5073,7 +5073,7 @@ static void fts5MergePrefixLists( /* Initialize a doclist-iterator for each input buffer. Arrange them in ** a linked-list starting at pHead in ascending order of rowid. Avoid ** linking any iterators already at EOF into the linked list at all. */ - assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) ); + assert( nBuf+1<=(int)(sizeof(aMerger)/sizeof(aMerger[0])) ); memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); pHead = &aMerger[nBuf]; fts5DoclistIterInit(p1, &pHead->iter); diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 01888f42ce..5d38c97f21 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -1625,7 +1625,7 @@ static int fts5UpdateMethod( int rc = SQLITE_OK; /* Return code */ /* A transaction must be open when this is called. */ - assert( pTab->ts.eState==1 ); + assert( pTab->ts.eState==1 || pTab->ts.eState==2 ); assert( pVtab->zErrMsg==0 ); assert( nArg==1 || nArg==(2+pConfig->nCol+2) ); @@ -2867,7 +2867,9 @@ static int fts5Init(sqlite3 *db){ } if( rc==SQLITE_OK ){ rc = sqlite3_create_function( - db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0 + db, "fts5_source_id", 0, + SQLITE_UTF8|SQLITE_DETERMINISTIC|SQLITE_INNOCUOUS, + p, fts5SourceIdFunc, 0, 0 ); } } diff --git a/ext/fts5/test/fts5misc.test b/ext/fts5/test/fts5misc.test index 4b9ca2dbb2..416b4c8085 100644 --- a/ext/fts5/test/fts5misc.test +++ b/ext/fts5/test/fts5misc.test @@ -402,5 +402,46 @@ do_test 15.4 { list [catch { db2 eval COMMIT } msg] $msg } {0 {}} +#------------------------------------------------------------------------- +reset_db +forcedelete test.db2 +sqlite3 db2 test.db +do_execsql_test 16.0 { + + ATTACH 'test.db2' AS aux; + CREATE TABLE aux.t2(x,y); + INSERT INTO t2 VALUES(1, 2); + CREATE VIRTUAL TABLE x1 USING fts5(a); + BEGIN; + INSERT INTO x1 VALUES('abc'); + INSERT INTO t2 VALUES(3, 4); +} + +do_execsql_test -db db2 16.1 { + ATTACH 'test.db2' AS aux; + BEGIN; + SELECT * FROM t2 +} {1 2} + +do_catchsql_test 16.2 { + COMMIT; +} {1 {database is locked}} + +do_execsql_test 16.3 { + INSERT INTO x1 VALUES('def'); +} + +do_execsql_test -db db2 16.4 { + END +} + +do_execsql_test 16.5 { + COMMIT +} + +do_execsql_test -db db2 16.6 { + SELECT * FROM x1 +} {abc def} + finish_test diff --git a/ext/lsm1/lsmInt.h b/ext/lsm1/lsmInt.h index 0f822e4793..5060366d0d 100644 --- a/ext/lsm1/lsmInt.h +++ b/ext/lsm1/lsmInt.h @@ -44,6 +44,8 @@ # endif #endif +/* #define LSM_DEBUG_EXPENSIVE 1 */ + /* ** Default values for various data structure parameters. These may be ** overridden by calls to lsm_config(). @@ -405,7 +407,7 @@ struct Segment { LsmPgno iFirst; /* First page of this run */ LsmPgno iLastPg; /* Last page of this run */ LsmPgno iRoot; /* Root page number (if any) */ - int nSize; /* Size of this run in pages */ + LsmPgno nSize; /* Size of this run in pages */ Redirect *pRedirect; /* Block redirects (or NULL) */ }; @@ -853,6 +855,8 @@ int lsmVarintGet32(u8 *, int *); int lsmVarintPut64(u8 *aData, i64 iVal); int lsmVarintGet64(const u8 *aData, i64 *piVal); +int lsmVarintLen64(i64); + int lsmVarintLen32(int); int lsmVarintSize(u8 c); diff --git a/ext/lsm1/lsm_ckpt.c b/ext/lsm1/lsm_ckpt.c index ba92a823cf..1c4f788ad6 100644 --- a/ext/lsm1/lsm_ckpt.c +++ b/ext/lsm1/lsm_ckpt.c @@ -511,7 +511,7 @@ static void ckptNewSegment( pSegment->iFirst = ckptGobble64(aIn, piIn); pSegment->iLastPg = ckptGobble64(aIn, piIn); pSegment->iRoot = ckptGobble64(aIn, piIn); - pSegment->nSize = (int)ckptGobble64(aIn, piIn); + pSegment->nSize = ckptGobble64(aIn, piIn); assert( pSegment->iFirst ); } diff --git a/ext/lsm1/lsm_file.c b/ext/lsm1/lsm_file.c index 1dcdd05d99..fd78835bbb 100644 --- a/ext/lsm1/lsm_file.c +++ b/ext/lsm1/lsm_file.c @@ -215,8 +215,8 @@ struct FileSystem { char *zLog; /* Database file name */ int nMetasize; /* Size of meta pages in bytes */ int nMetaRwSize; /* Read/written size of meta pages in bytes */ - int nPagesize; /* Database page-size in bytes */ - int nBlocksize; /* Database block-size in bytes */ + i64 nPagesize; /* Database page-size in bytes */ + i64 nBlocksize; /* Database block-size in bytes */ /* r/w file descriptors for both files. */ LsmFile *pLsmFile; /* Used after lsm_close() to link into list */ @@ -889,7 +889,7 @@ static LsmPgno fsFirstPageOnBlock(FileSystem *pFS, int iBlock){ iPg = pFS->nBlocksize * (LsmPgno)(iBlock-1) + 4; } }else{ - const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize); + const i64 nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize); if( iBlock==1 ){ iPg = 1 + ((pFS->nMetasize*2 + pFS->nPagesize - 1) / pFS->nPagesize); }else{ @@ -1131,7 +1131,6 @@ static void fsGrowMapping( i64 iSz, /* Minimum size to extend mapping to */ int *pRc /* IN/OUT: Error code */ ){ - assert( pFS->pCompress==0 ); assert( PAGE_HASPREV==4 ); if( *pRc==LSM_OK && iSz>pFS->nMap ){ @@ -1872,7 +1871,7 @@ void lsmFsGobble( assert( nPgno>0 && 0==fsPageRedirects(pFS, pRun, aPgno[0]) ); iBlk = fsPageToBlock(pFS, pRun->iFirst); - pRun->nSize += (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk)); + pRun->nSize += (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk)); while( rc==LSM_OK ){ int iNext = 0; @@ -1883,13 +1882,13 @@ void lsmFsGobble( } rc = fsBlockNext(pFS, pRun, iBlk, &iNext); if( rc==LSM_OK ) rc = fsFreeBlock(pFS, pSnapshot, pRun, iBlk); - pRun->nSize -= (int)( + pRun->nSize -= ( 1 + fsLastPageOnBlock(pFS, iBlk) - fsFirstPageOnBlock(pFS, iBlk) ); iBlk = iNext; } - pRun->nSize -= (int)(pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk)); + pRun->nSize -= (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk)); assert( pRun->nSize>0 ); } diff --git a/ext/lsm1/lsm_main.c b/ext/lsm1/lsm_main.c index a9c48e004e..f2b353105a 100644 --- a/ext/lsm1/lsm_main.c +++ b/ext/lsm1/lsm_main.c @@ -432,7 +432,7 @@ int lsm_config(lsm_db *pDb, int eParam, ...){ } void lsmAppendSegmentList(LsmString *pStr, char *zPre, Segment *pSeg){ - lsmStringAppendf(pStr, "%s{%d %d %d %d}", zPre, + lsmStringAppendf(pStr, "%s{%lld %lld %lld %lld}", zPre, pSeg->iFirst, pSeg->iLastPg, pSeg->iRoot, pSeg->nSize ); } diff --git a/ext/lsm1/lsm_shared.c b/ext/lsm1/lsm_shared.c index 2fdacf1eca..09f9338488 100644 --- a/ext/lsm1/lsm_shared.c +++ b/ext/lsm1/lsm_shared.c @@ -1306,6 +1306,24 @@ int lsmBeginRoTrans(lsm_db *db){ } } + /* In 'lsm_open()' we don't update the page and block sizes in the + ** Filesystem for 'readonly' connection. Because member 'db->pShmhdr' is a + ** nullpointer, this prevents loading a checkpoint. Now that the system is + ** live this member should be set. So we can update both values in + ** the Filesystem. + ** + ** Configure the file-system connection with the page-size and block-size + ** of this database. Even if the database file is zero bytes in size + ** on disk, these values have been set in shared-memory by now, and so + ** are guaranteed not to change during the lifetime of this connection. */ + if( LSM_OK==rc + && 0==lsmCheckpointClientCacheOk(db) + && LSM_OK==(rc=lsmCheckpointLoad(db, 0)) + ){ + lsmFsSetPageSize(db->pFS, lsmCheckpointPgsz(db->aSnapshot)); + lsmFsSetBlockSize(db->pFS, lsmCheckpointBlksz(db->aSnapshot)); + } + if( rc==LSM_OK ){ rc = lsmBeginReadTrans(db); } diff --git a/ext/lsm1/lsm_sorted.c b/ext/lsm1/lsm_sorted.c index 4a24e4b829..6e5243e850 100644 --- a/ext/lsm1/lsm_sorted.c +++ b/ext/lsm1/lsm_sorted.c @@ -525,7 +525,7 @@ static u8 *pageGetKey( LsmBlob *pBlob /* If required, use this for dynamic memory */ ){ u8 *pKey; - int nDummy; + i64 nDummy; int eType; u8 *aData; int nData; @@ -537,10 +537,10 @@ static u8 *pageGetKey( pKey = pageGetCell(aData, nData, iCell); eType = *pKey++; - pKey += lsmVarintGet32(pKey, &nDummy); + pKey += lsmVarintGet64(pKey, &nDummy); pKey += lsmVarintGet32(pKey, pnKey); if( rtIsWrite(eType) ){ - pKey += lsmVarintGet32(pKey, &nDummy); + pKey += lsmVarintGet64(pKey, &nDummy); } *piTopic = rtTopic(eType); @@ -657,14 +657,14 @@ static int btreeCursorLoadKey(BtreeCursor *pCsr){ return rc; } -static int btreeCursorPtr(u8 *aData, int nData, int iCell){ +static LsmPgno btreeCursorPtr(u8 *aData, int nData, int iCell){ int nCell; nCell = pageGetNRec(aData, nData); if( iCell>=nCell ){ - return (int)pageGetPtr(aData, nData); + return pageGetPtr(aData, nData); } - return (int)pageGetRecordPtr(aData, nData, iCell); + return pageGetRecordPtr(aData, nData, iCell); } static int btreeCursorNext(BtreeCursor *pCsr){ @@ -751,7 +751,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){ Page *pPg = 0; FileSystem *pFS = pCsr->pFS; - int iPg = (int)pCsr->pSeg->iRoot; + LsmPgno iPg = pCsr->pSeg->iRoot; do { rc = lsmFsDbPageGet(pFS, pCsr->pSeg, iPg, &pPg); @@ -779,7 +779,7 @@ static int btreeCursorFirst(BtreeCursor *pCsr){ assert( pCsr->aPg[pCsr->nDepth].iCell==0 ); pCsr->aPg[pCsr->nDepth].pPage = pPg; pCsr->nDepth++; - iPg = (int)pageGetRecordPtr(aData, nData, 0); + iPg = pageGetRecordPtr(aData, nData, 0); } } }while( rc==LSM_OK ); @@ -871,7 +871,7 @@ static int btreeCursorRestore( int nSeek; int iTopicSeek; int iPg = 0; - int iLoad = (int)pSeg->iRoot; + LsmPgno iLoad = pSeg->iRoot; Page *pPg = pCsr->aPg[nDepth-1].pPage; if( pageObjGetNRec(pPg)==0 ){ @@ -903,7 +903,7 @@ static int btreeCursorRestore( aData = fsPageData(pPg2, &nData); assert( (pageGetFlags(aData, nData) & SEGMENT_BTREE_FLAG) ); - iLoad = (int)pageGetPtr(aData, nData); + iLoad = pageGetPtr(aData, nData); iCell2 = pageGetNRec(aData, nData); iMax = iCell2-1; iMin = 0; @@ -926,7 +926,7 @@ static int btreeCursorRestore( assert( res!=0 ); if( res<0 ){ - iLoad = (int)iPtr; + iLoad = iPtr; iCell2 = iTry; iMax = iTry-1; }else{ @@ -1013,7 +1013,7 @@ static void segmentPtrSetPage(SegmentPtr *pPtr, Page *pNext){ static int segmentPtrLoadPage( FileSystem *pFS, SegmentPtr *pPtr, /* Load page into this SegmentPtr object */ - int iNew /* Page number of new page */ + LsmPgno iNew /* Page number of new page */ ){ Page *pPg = 0; /* The new page */ int rc; /* Return Code */ @@ -1633,7 +1633,7 @@ static int segmentPtrSeek( int iTopic, /* Key topic to seek to */ void *pKey, int nKey, /* Key to seek to */ int eSeek, /* Search bias - see above */ - int *piPtr, /* OUT: FC pointer */ + LsmPgno *piPtr, /* OUT: FC pointer */ int *pbStop ){ int (*xCmp)(void *, int, void *, int) = pCsr->pDb->xCmp; @@ -1759,7 +1759,7 @@ static int segmentPtrSeek( } assert( rc!=LSM_OK || assertSeekResult(pCsr,pPtr,iTopic,pKey,nKey,eSeek) ); - *piPtr = (int)iPtrOut; + *piPtr = iPtrOut; return rc; } @@ -1773,11 +1773,11 @@ static int seekInBtree( ){ int i = 0; int rc; - int iPg; + LsmPgno iPg; Page *pPg = 0; LsmBlob blob = {0, 0, 0}; - iPg = (int)pSeg->iRoot; + iPg = pSeg->iRoot; do { LsmPgno *piFirst = 0; if( aPg ){ @@ -1799,7 +1799,7 @@ static int seekInBtree( flags = pageGetFlags(aData, nData); if( (flags & SEGMENT_BTREE_FLAG)==0 ) break; - iPg = (int)pageGetPtr(aData, nData); + iPg = pageGetPtr(aData, nData); nRec = pageGetNRec(aData, nData); iMin = 0; @@ -1825,7 +1825,7 @@ static int seekInBtree( pCsr->pDb->xCmp, iTopic, pKey, nKey, iTopicT, pKeyT, nKeyT ); if( res<0 ){ - iPg = (int)iPtr; + iPg = iPtr; iMax = iTry-1; }else{ iMin = iTry+1; @@ -1851,12 +1851,12 @@ static int seekInSegment( SegmentPtr *pPtr, int iTopic, void *pKey, int nKey, - int iPg, /* Page to search */ + LsmPgno iPg, /* Page to search */ int eSeek, /* Search bias - see above */ - int *piPtr, /* OUT: FC pointer */ + LsmPgno *piPtr, /* OUT: FC pointer */ int *pbStop /* OUT: Stop search flag */ ){ - int iPtr = iPg; + LsmPgno iPtr = iPg; int rc = LSM_OK; if( pPtr->pSeg->iRoot ){ @@ -1866,7 +1866,7 @@ static int seekInSegment( if( rc==LSM_OK ) segmentPtrSetPage(pPtr, pPg); }else{ if( iPtr==0 ){ - iPtr = (int)pPtr->pSeg->iFirst; + iPtr = pPtr->pSeg->iFirst; } if( rc==LSM_OK ){ rc = segmentPtrLoadPage(pCsr->pDb->pFS, pPtr, iPtr); @@ -1904,7 +1904,7 @@ static int seekInLevel( ){ Level *pLvl = aPtr[0].pLevel; /* Level to seek within */ int rc = LSM_OK; /* Return code */ - int iOut = 0; /* Pointer to return to caller */ + LsmPgno iOut = 0; /* Pointer to return to caller */ int res = -1; /* Result of xCmp(pKey, split) */ int nRhs = pLvl->nRight; /* Number of right-hand-side segments */ int bStop = 0; @@ -1923,8 +1923,8 @@ static int seekInLevel( ** left-hand-side of the level in this case. */ if( res<0 ){ int i; - int iPtr = 0; - if( nRhs==0 ) iPtr = (int)*piPgno; + LsmPgno iPtr = 0; + if( nRhs==0 ) iPtr = *piPgno; rc = seekInSegment( pCsr, &aPtr[0], iTopic, pKey, nKey, iPtr, eSeek, &iOut, &bStop @@ -1939,7 +1939,7 @@ static int seekInLevel( if( res>=0 ){ int bHit = 0; /* True if at least one rhs is not EOF */ - int iPtr = (int)*piPgno; + LsmPgno iPtr = *piPgno; int i; segmentPtrReset(&aPtr[0], LSM_SEGMENTPTR_FREE_THRESHOLD); for(i=1; rc==LSM_OK && i<=nRhs && bStop==0; i++){ @@ -3361,6 +3361,8 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){ int iOff; int nKey; int eType; + i64 nDummy; + nRec = lsmGetU16(&aData[SEGMENT_NRECORD_OFFSET(nData)]); iOff = lsmGetU16(&aData[SEGMENT_CELLPTR_OFFSET(nData, nRec-1)]); @@ -3370,7 +3372,7 @@ static int mergeWorkerPageOffset(u8 *aData, int nData){ || eType==(LSM_SEPARATOR) ); - iOff += lsmVarintGet32(&aData[iOff], &nKey); + iOff += lsmVarintGet64(&aData[iOff], &nDummy); iOff += lsmVarintGet32(&aData[iOff], &nKey); return iOff + (eType ? nKey : 0); @@ -3480,7 +3482,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){ lsm_env *pEnv = pMW->pDb->pEnv; Page **apHier = 0; int nHier = 0; - int iPg = (int)pSeg->iRoot; + LsmPgno iPg = pSeg->iRoot; do { Page *pPg = 0; @@ -3506,7 +3508,7 @@ static int mergeWorkerLoadHierarchy(MergeWorker *pMW){ nHier++; apHier[0] = pPg; - iPg = (int)pageGetPtr(aData, nData); + iPg = pageGetPtr(aData, nData); }else{ lsmFsPageRelease(pPg); break; @@ -3625,10 +3627,11 @@ static int mergeWorkerBtreeWrite( assert( lsmFsPageWritable(pOld) ); aData = fsPageData(pOld, &nData); if( eType==0 ){ - nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32((int)iKeyPg); + nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen64(iKeyPg); }else{ - nByte = 2 + 1 + lsmVarintLen32((int)iPtr) + lsmVarintLen32(nKey) + nKey; + nByte = 2 + 1 + lsmVarintLen64(iPtr) + lsmVarintLen32(nKey) + nKey; } + nRec = pageGetNRec(aData, nData); nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData); if( nByte<=nFree ) break; @@ -3672,11 +3675,11 @@ static int mergeWorkerBtreeWrite( lsmPutU16(&aData[SEGMENT_NRECORD_OFFSET(nData)], (u16)(nRec+1)); if( eType==0 ){ aData[iOff++] = 0x00; - iOff += lsmVarintPut32(&aData[iOff], (int)iPtr); - iOff += lsmVarintPut32(&aData[iOff], (int)iKeyPg); + iOff += lsmVarintPut64(&aData[iOff], iPtr); + iOff += lsmVarintPut64(&aData[iOff], iKeyPg); }else{ aData[iOff++] = eType; - iOff += lsmVarintPut32(&aData[iOff], (int)iPtr); + iOff += lsmVarintPut64(&aData[iOff], iPtr); iOff += lsmVarintPut32(&aData[iOff], nKey); memcpy(&aData[iOff], pKey, nKey); } @@ -3872,7 +3875,7 @@ static int mergeWorkerNextPage( static int mergeWorkerData( MergeWorker *pMW, /* Merge worker object */ int bSep, /* True to write to separators run */ - int iFPtr, /* Footer ptr for new pages */ + LsmPgno iFPtr, /* Footer ptr for new pages */ u8 *aWrite, /* Write data from this buffer */ int nWrite /* Size of aWrite[] in bytes */ ){ @@ -3916,14 +3919,14 @@ static int mergeWorkerData( static int mergeWorkerFirstPage(MergeWorker *pMW){ int rc = LSM_OK; /* Return code */ Page *pPg = 0; /* First page of run pSeg */ - int iFPtr = 0; /* Pointer value read from footer of pPg */ + LsmPgno iFPtr = 0; /* Pointer value read from footer of pPg */ MultiCursor *pCsr = pMW->pCsr; assert( pMW->pPage==0 ); if( pCsr->pBtCsr ){ rc = LSM_OK; - iFPtr = (int)pMW->pLevel->pNext->lhs.iFirst; + iFPtr = pMW->pLevel->pNext->lhs.iFirst; }else if( pCsr->nPtr>0 ){ Segment *pSeg; pSeg = pCsr->aPtr[pCsr->nPtr-1].pSeg; @@ -3932,7 +3935,7 @@ static int mergeWorkerFirstPage(MergeWorker *pMW){ u8 *aData; /* Buffer for page pPg */ int nData; /* Size of aData[] in bytes */ aData = fsPageData(pPg, &nData); - iFPtr = (int)pageGetPtr(aData, nData); + iFPtr = pageGetPtr(aData, nData); lsmFsPageRelease(pPg); } } @@ -3951,7 +3954,7 @@ static int mergeWorkerWrite( int eType, /* One of SORTED_SEPARATOR, WRITE or DELETE */ void *pKey, int nKey, /* Key value */ void *pVal, int nVal, /* Value value */ - int iPtr /* Absolute value of page pointer, or 0 */ + LsmPgno iPtr /* Absolute value of page pointer, or 0 */ ){ int rc = LSM_OK; /* Return code */ Merge *pMerge; /* Persistent part of level merge state */ @@ -3960,8 +3963,8 @@ static int mergeWorkerWrite( u8 *aData; /* Data buffer for page pWriter->pPage */ int nData = 0; /* Size of buffer aData[] in bytes */ int nRec = 0; /* Number of records on page pPg */ - int iFPtr = 0; /* Value of pointer in footer of pPg */ - int iRPtr = 0; /* Value of pointer written into record */ + LsmPgno iFPtr = 0; /* Value of pointer in footer of pPg */ + LsmPgno iRPtr = 0; /* Value of pointer written into record */ int iOff = 0; /* Current write offset within page pPg */ Segment *pSeg; /* Segment being written */ int flags = 0; /* If != 0, flags value for page footer */ @@ -3978,8 +3981,8 @@ static int mergeWorkerWrite( if( pPg ){ aData = fsPageData(pPg, &nData); nRec = pageGetNRec(aData, nData); - iFPtr = (int)pageGetPtr(aData, nData); - iRPtr = iPtr - iFPtr; + iFPtr = pageGetPtr(aData, nData); + iRPtr = iPtr ? (iPtr - iFPtr) : 0; } /* Figure out how much space is required by the new record. The space @@ -3997,7 +4000,7 @@ static int mergeWorkerWrite( ** 4) Value size - 1 varint (only if LSM_INSERT flag is set) */ if( rc==LSM_OK ){ - nHdr = 1 + lsmVarintLen32(iRPtr) + lsmVarintLen32(nKey); + nHdr = 1 + lsmVarintLen64(iRPtr) + lsmVarintLen32(nKey); if( rtIsWrite(eType) ) nHdr += lsmVarintLen32(nVal); /* If the entire header will not fit on page pPg, or if page pPg is @@ -4009,8 +4012,8 @@ static int mergeWorkerWrite( assert( aData ); memset(&aData[iOff], 0, SEGMENT_EOF(nData, nRec)-iOff); } - iFPtr = (int)*pMW->pCsr->pPrevMergePtr; - iRPtr = iPtr - iFPtr; + iFPtr = *pMW->pCsr->pPrevMergePtr; + iRPtr = iPtr ? (iPtr - iFPtr) : 0; iOff = 0; nRec = 0; rc = mergeWorkerNextPage(pMW, iFPtr); @@ -4049,7 +4052,7 @@ static int mergeWorkerWrite( /* Write the entry header into the current page. */ aData[iOff++] = (u8)eType; /* 1 */ - iOff += lsmVarintPut32(&aData[iOff], iRPtr); /* 2 */ + iOff += lsmVarintPut64(&aData[iOff], iRPtr); /* 2 */ iOff += lsmVarintPut32(&aData[iOff], nKey); /* 3 */ if( rtIsWrite(eType) ) iOff += lsmVarintPut32(&aData[iOff], nVal); /* 4 */ pMerge->iOutputOff = iOff; @@ -4283,7 +4286,7 @@ static int mergeWorkerStep(MergeWorker *pMW){ pVal = pCsr->val.pData; } if( rc==LSM_OK ){ - rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, (int)iPtr); + rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pVal, nVal, iPtr); } } } @@ -4604,7 +4607,7 @@ static int mergeWorkerInit( SegmentPtr *pPtr; assert( pCsr->aPtr[i].pPg==0 ); pPtr = &pCsr->aPtr[i]; - rc = segmentPtrLoadPage(pDb->pFS, pPtr, (int)pInput->iPg); + rc = segmentPtrLoadPage(pDb->pFS, pPtr, pInput->iPg); if( rc==LSM_OK && pPtr->nCell>0 ){ rc = segmentPtrLoadCell(pPtr, pInput->iCell); } @@ -5469,7 +5472,7 @@ int lsmFlushTreeToDisk(lsm_db *pDb){ ** be freed by the caller using lsmFree(). */ static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){ - int nSize = pSeg->nSize; + LsmPgno nSize = pSeg->nSize; LsmPgno iRoot = pSeg->iRoot; LsmPgno iFirst = pSeg->iFirst; LsmPgno iLast = pSeg->iLastPg; @@ -5481,9 +5484,9 @@ static char *segToString(lsm_env *pEnv, Segment *pSeg, int nMin){ z1 = lsmMallocPrintf(pEnv, "%d.%d", iFirst, iLast); if( iRoot ){ - z2 = lsmMallocPrintf(pEnv, "root=%d", iRoot); + z2 = lsmMallocPrintf(pEnv, "root=%lld", iRoot); }else{ - z2 = lsmMallocPrintf(pEnv, "size=%d", nSize); + z2 = lsmMallocPrintf(pEnv, "size=%lld", nSize); } nPad = nMin - 2 - strlen(z1) - 1 - strlen(z2); @@ -5536,7 +5539,7 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){ int i; int nRec; - int iPtr; + LsmPgno iPtr; int flags; u8 *aData; int nData; @@ -5544,11 +5547,11 @@ void sortedDumpPage(lsm_db *pDb, Segment *pRun, Page *pPg, int bVals){ aData = fsPageData(pPg, &nData); nRec = pageGetNRec(aData, nData); - iPtr = (int)pageGetPtr(aData, nData); + iPtr = pageGetPtr(aData, nData); flags = pageGetFlags(aData, nData); lsmStringInit(&s, pDb->pEnv); - lsmStringAppendf(&s,"nCell=%d iPtr=%d flags=%d {", nRec, iPtr, flags); + lsmStringAppendf(&s,"nCell=%d iPtr=%lld flags=%d {", nRec, iPtr, flags); if( flags&SEGMENT_BTREE_FLAG ) iPtr = 0; for(i=0; ipEnv); lsmStringAppendf(&str, "Page : %lld (%d bytes)\n", iPg, nData); lsmStringAppendf(&str, "nRec : %d\n", nRec); - lsmStringAppendf(&str, "iPtr : %d\n", iPtr); + lsmStringAppendf(&str, "iPtr : %lld\n", iPtr); lsmStringAppendf(&str, "flags: %04x\n", flags2); lsmStringAppendf(&str, "\n"); diff --git a/ext/lsm1/lsm_varint.c b/ext/lsm1/lsm_varint.c index c550a6405d..f690e3b063 100644 --- a/ext/lsm1/lsm_varint.c +++ b/ext/lsm1/lsm_varint.c @@ -185,6 +185,11 @@ int lsmVarintLen32(int n){ return lsmVarintPut32(aData, n); } +int lsmVarintLen64(i64 n){ + u8 aData[9]; + return lsmVarintPut64(aData, n); +} + /* ** The argument is the first byte of a varint. This function returns the ** total number of bytes in the entire varint (including the first byte). diff --git a/ext/misc/base64.c b/ext/misc/base64.c index 81767379b1..69eff61811 100755 --- a/ext/misc/base64.c +++ b/ext/misc/base64.c @@ -53,8 +53,15 @@ #include -#ifndef SQLITE_SHELL_EXTFUNCS /* Guard for #include as built-in extension. */ #include "sqlite3ext.h" + +#ifndef deliberate_fall_through +/* Quiet some compilers about some of our intentional code. */ +# if GCC_VERSION>=7000000 +# define deliberate_fall_through __attribute__((fallthrough)); +# else +# define deliberate_fall_through +# endif #endif SQLITE_EXTENSION_INIT1; @@ -64,12 +71,12 @@ SQLITE_EXTENSION_INIT1; #define ND 0x82 /* Not above or digit-value */ #define PAD_CHAR '=' -#ifndef UBYTE_TYPEDEF -typedef unsigned char ubyte; -# define UBYTE_TYPEDEF +#ifndef U8_TYPEDEF +typedef unsigned char u8; +#define U8_TYPEDEF #endif -static const ubyte b64DigitValues[128] = { +static const u8 b64DigitValues[128] = { /* HT LF VT FF CR */ ND,ND,ND,ND, ND,ND,ND,ND, ND,WS,WS,WS, WS,WS,ND,ND, /* US */ @@ -92,18 +99,18 @@ static const char b64Numerals[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; #define BX_DV_PROTO(c) \ - ((((ubyte)(c))<0x80)? (ubyte)(b64DigitValues[(ubyte)(c)]) : 0x80) -#define IS_BX_DIGIT(bdp) (((ubyte)(bdp))<0x80) + ((((u8)(c))<0x80)? (u8)(b64DigitValues[(u8)(c)]) : 0x80) +#define IS_BX_DIGIT(bdp) (((u8)(bdp))<0x80) #define IS_BX_WS(bdp) ((bdp)==WS) #define IS_BX_PAD(bdp) ((bdp)==PC) -#define BX_NUMERAL(dv) (b64Numerals[(ubyte)(dv)]) +#define BX_NUMERAL(dv) (b64Numerals[(u8)(dv)]) /* Width of base64 lines. Should be an integer multiple of 4. */ #define B64_DARK_MAX 72 /* Encode a byte buffer into base64 text with linefeeds appended to limit ** encoded group lengths to B64_DARK_MAX or to terminate the last group. */ -static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){ +static char* toBase64( u8 *pIn, int nbIn, char *pOut ){ int nCol = 0; while( nbIn >= 3 ){ /* Do the bit-shuffle, exploiting unsigned input to avoid masking. */ @@ -128,7 +135,7 @@ static char* toBase64( ubyte *pIn, int nbIn, char *pOut ){ if( nbe=0; --nbe ){ - char ce = (nbe>= 6; pOut[nbe] = ce; } @@ -147,7 +154,7 @@ static char * skipNonB64( char *s ){ } /* Decode base64 text into a byte buffer. */ -static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){ +static u8* fromBase64( char *pIn, int ncIn, u8 *pOut ){ if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn; while( ncIn>0 && *pIn!=PAD_CHAR ){ static signed char nboi[] = { 0, 0, 1, 2, 3 }; @@ -162,20 +169,20 @@ static ubyte* fromBase64( char *pIn, int ncIn, ubyte *pOut ){ if( nbo==0 ) break; for( nac=0; nac<4; ++nac ){ char c = (nac='#')+((c)>'&')+((c)>='*')+((c)>'z')) /* Provide digitValue to b85Numeral offset as a function of above class. */ -static ubyte b85_cOffset[] = { 0, '#', 0, '*'-4, 0 }; +static u8 b85_cOffset[] = { 0, '#', 0, '*'-4, 0 }; #define B85_DNOS( c ) b85_cOffset[B85_CLASS(c)] /* Say whether c is a base85 numeral. */ #define IS_B85( c ) (B85_CLASS(c) & 1) #if 0 /* Not used, */ -static ubyte base85DigitValue( char c ){ - ubyte dv = (ubyte)(c - '#'); +static u8 base85DigitValue( char c ){ + u8 dv = (u8)(c - '#'); if( dv>87 ) return 0xff; return (dv > 3)? dv-3 : dv; } @@ -151,7 +149,7 @@ static char * skipNonB85( char *s ){ /* Convert small integer, known to be in 0..84 inclusive, to base85 numeral. * Do not use the macro form with argument expression having a side-effect.*/ #if 0 -static char base85Numeral( ubyte b ){ +static char base85Numeral( u8 b ){ return (b < 4)? (char)(b + '#') : (char)(b - 4 + '*'); } #else @@ -169,11 +167,12 @@ static char *putcs(char *pc, char *s){ ** to be appended to encoded groups to limit their length to B85_DARK_MAX ** or to terminate the last group (to aid concatenation.) */ -static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){ +static char* toBase85( u8 *pIn, int nbIn, char *pOut, char *pSep ){ int nCol = 0; while( nbIn >= 4 ){ int nco = 5; - unsigned long qbv = (pIn[0]<<24)|(pIn[1]<<16)|(pIn[2]<<8)|pIn[3]; + unsigned long qbv = (((unsigned long)pIn[0])<<24) | + (pIn[1]<<16) | (pIn[2]<<8) | pIn[3]; while( nco > 0 ){ unsigned nqv = (unsigned)(qbv/85UL); unsigned char dv = qbv - 85UL*nqv; @@ -197,7 +196,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){ } nCol += nco; while( nco > 0 ){ - ubyte dv = (ubyte)(qv % 85); + u8 dv = (u8)(qv % 85); qv /= 85; pOut[--nco] = base85Numeral(dv); } @@ -209,7 +208,7 @@ static char* toBase85( ubyte *pIn, int nbIn, char *pOut, char *pSep ){ } /* Decode base85 text into a byte buffer. */ -static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){ +static u8* fromBase85( char *pIn, int ncIn, u8 *pOut ){ if( ncIn>0 && pIn[ncIn-1]=='\n' ) --ncIn; while( ncIn>0 ){ static signed char nboi[] = { 0, 0, 1, 2, 3, 4 }; @@ -223,7 +222,7 @@ static ubyte* fromBase85( char *pIn, int ncIn, ubyte *pOut ){ if( nbo==0 ) break; while( nti>0 ){ char c = *pIn++; - ubyte cdo = B85_DNOS(c); + u8 cdo = B85_DNOS(c); --ncIn; if( cdo==0 ) break; qv = 85 * qv + (c - cdo); @@ -287,7 +286,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){ int nvMax = sqlite3_limit(sqlite3_context_db_handle(context), SQLITE_LIMIT_LENGTH, -1); char *cBuf; - ubyte *bBuf; + u8 *bBuf; assert(na==1); switch( sqlite3_value_type(av[0]) ){ case SQLITE_BLOB: @@ -300,7 +299,7 @@ static void base85(sqlite3_context *context, int na, sqlite3_value *av[]){ } cBuf = sqlite3_malloc(nc); if( !cBuf ) goto memFail; - bBuf = (ubyte*)sqlite3_value_blob(av[0]); + bBuf = (u8*)sqlite3_value_blob(av[0]); nc = (int)(toBase85(bBuf, nb, cBuf, "\n") - cBuf); sqlite3_result_text(context, cBuf, nc, sqlite3_free); break; @@ -370,7 +369,7 @@ static int sqlite3_base85_init int main(int na, char *av[]){ int cin; int rc = 0; - ubyte bBuf[4*(B85_DARK_MAX/5)]; + u8 bBuf[4*(B85_DARK_MAX/5)]; char cBuf[5*(sizeof(bBuf)/4)+2]; size_t nio; # ifndef OMIT_BASE85_CHECKER diff --git a/ext/misc/basexx.c b/ext/misc/basexx.c index 2ca2906fae..c1808aa70f 100644 --- a/ext/misc/basexx.c +++ b/ext/misc/basexx.c @@ -49,6 +49,9 @@ static void init_api_ptr(const sqlite3_api_routines *pApi){ #undef SQLITE_EXTENSION_INIT2 #define SQLITE_EXTENSION_INIT2(v) (void)v +typedef unsigned char u8; +#define U8_TYPEDEF + /* These next 2 undef's are only needed because the entry point names * collide when formulated per the rules stated for loadable extension * entry point names that will be deduced from the file basenames. diff --git a/ext/misc/carray.c b/ext/misc/carray.c index b6eb0453a0..709c894f27 100644 --- a/ext/misc/carray.c +++ b/ext/misc/carray.c @@ -28,7 +28,7 @@ ** ** There is an optional third parameter to determine the datatype of ** the C-language array. Allowed values of the third parameter are -** 'int32', 'int64', 'double', 'char*'. Example: +** 'int32', 'int64', 'double', 'char*', 'struct iovec'. Example: ** ** SELECT * FROM carray($ptr,10,'char*'); ** @@ -56,6 +56,14 @@ SQLITE_EXTENSION_INIT1 #include #include +#ifdef _WIN32 + struct iovec { + void *iov_base; + size_t iov_len; + }; +#else +# include +#endif /* Allowed values for the mFlags parameter to sqlite3_carray_bind(). ** Must exactly match the definitions in carray.h. @@ -65,6 +73,7 @@ SQLITE_EXTENSION_INIT1 # define CARRAY_INT64 1 /* Data is 64-bit signed integers */ # define CARRAY_DOUBLE 2 /* Data is doubles */ # define CARRAY_TEXT 3 /* Data is char* */ +# define CARRAY_BLOB 4 /* Data is struct iovec* */ #endif #ifndef SQLITE_API @@ -80,7 +89,8 @@ SQLITE_EXTENSION_INIT1 /* ** Names of allowed datatypes */ -static const char *azType[] = { "int32", "int64", "double", "char*" }; +static const char *azType[] = { "int32", "int64", "double", "char*", + "struct iovec" }; /* ** Structure used to hold the sqlite3_carray_bind() information @@ -224,6 +234,12 @@ static int carrayColumn( sqlite3_result_text(ctx, p[pCur->iRowid-1], -1, SQLITE_TRANSIENT); return SQLITE_OK; } + case CARRAY_BLOB: { + const struct iovec *p = (struct iovec*)pCur->pPtr; + sqlite3_result_blob(ctx, p[pCur->iRowid-1].iov_base, + (int)p[pCur->iRowid-1].iov_len, SQLITE_TRANSIENT); + return SQLITE_OK; + } } } } @@ -268,7 +284,7 @@ static int carrayFilter( if( pBind==0 ) break; pCur->pPtr = pBind->aData; pCur->iCnt = pBind->nData; - pCur->eType = pBind->mFlags & 0x03; + pCur->eType = pBind->mFlags & 0x07; break; } case 2: @@ -431,24 +447,29 @@ SQLITE_API int sqlite3_carray_bind( pNew->mFlags = mFlags; if( xDestroy==SQLITE_TRANSIENT ){ sqlite3_int64 sz = nData; - switch( mFlags & 0x03 ){ - case CARRAY_INT32: sz *= 4; break; - case CARRAY_INT64: sz *= 8; break; - case CARRAY_DOUBLE: sz *= 8; break; - case CARRAY_TEXT: sz *= sizeof(char*); break; + switch( mFlags & 0x07 ){ + case CARRAY_INT32: sz *= 4; break; + case CARRAY_INT64: sz *= 8; break; + case CARRAY_DOUBLE: sz *= 8; break; + case CARRAY_TEXT: sz *= sizeof(char*); break; + case CARRAY_BLOB: sz *= sizeof(struct iovec); break; } - if( (mFlags & 0x03)==CARRAY_TEXT ){ + if( (mFlags & 0x07)==CARRAY_TEXT ){ for(i=0; iaData = sqlite3_malloc64( sz ); if( pNew->aData==0 ){ sqlite3_free(pNew); return SQLITE_NOMEM; } - if( (mFlags & 0x03)==CARRAY_TEXT ){ + if( (mFlags & 0x07)==CARRAY_TEXT ){ char **az = (char**)pNew->aData; char *z = (char*)&az[nData]; for(i=0; iaData; + unsigned char *z = (unsigned char*)&p[nData]; + for(i=0; iaData, aData, sz); } diff --git a/ext/misc/carray.h b/ext/misc/carray.h index 63df0066b5..ae0a9e8ab0 100644 --- a/ext/misc/carray.h +++ b/ext/misc/carray.h @@ -41,10 +41,10 @@ SQLITE_API int sqlite3_carray_bind( #define CARRAY_INT64 1 /* Data is 64-bit signed integers */ #define CARRAY_DOUBLE 2 /* Data is doubles */ #define CARRAY_TEXT 3 /* Data is char* */ +#define CARRAY_BLOB 4 /* Data is struct iovec */ #ifdef __cplusplus } /* end of the 'extern "C"' block */ #endif #endif /* ifndef _CARRAY_H */ - diff --git a/ext/misc/decimal.c b/ext/misc/decimal.c index 37c6c2f52c..4495cae469 100644 --- a/ext/misc/decimal.c +++ b/ext/misc/decimal.c @@ -616,7 +616,7 @@ int sqlite3_decimal_init( SQLITE_EXTENSION_INIT2(pApi); - for(i=0; ipRow; for(p=sqlite3_next_stmt(pCur->db, 0); p; p=sqlite3_next_stmt(pCur->db, p)){ @@ -271,6 +279,7 @@ static int stmtBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ){ + (void)tab; pIdxInfo->estimatedCost = (double)500; pIdxInfo->estimatedRows = 500; return SQLITE_OK; diff --git a/ext/misc/zipfile.c b/ext/misc/zipfile.c index f818fbc11c..480fbe3990 100644 --- a/ext/misc/zipfile.c +++ b/ext/misc/zipfile.c @@ -352,6 +352,7 @@ static int zipfileConnect( const char *zFile = 0; ZipfileTab *pNew = 0; int rc; + (void)pAux; /* If the table name is not "zipfile", require that the argument be ** specified. This stops zipfile tables from being created as: @@ -808,6 +809,7 @@ static int zipfileGetEntry( u8 *aRead; char **pzErr = &pTab->base.zErrMsg; int rc = SQLITE_OK; + (void)nBlob; if( aBlob==0 ){ aRead = pTab->aBuffer; @@ -1254,6 +1256,9 @@ static int zipfileFilter( int rc = SQLITE_OK; /* Return Code */ int bInMemory = 0; /* True for an in-memory zipfile */ + (void)idxStr; + (void)argc; + zipfileResetCursor(pCsr); if( pTab->zFile ){ @@ -1280,7 +1285,7 @@ static int zipfileFilter( } if( 0==pTab->pWriteFd && 0==bInMemory ){ - pCsr->pFile = fopen(zFile, "rb"); + pCsr->pFile = zFile ? fopen(zFile, "rb") : 0; if( pCsr->pFile==0 ){ zipfileCursorErr(pCsr, "cannot open file: %s", zFile); rc = SQLITE_ERROR; @@ -1314,6 +1319,7 @@ static int zipfileBestIndex( int i; int idx = -1; int unusable = 0; + (void)tab; for(i=0; inConstraint; i++){ const struct sqlite3_index_constraint *pCons = &pIdxInfo->aConstraint[i]; @@ -1564,6 +1570,8 @@ static int zipfileUpdate( int bIsDir = 0; u32 iCrc32 = 0; + (void)pRowid; + if( pTab->pWriteFd==0 ){ rc = zipfileBegin(pVtab); if( rc!=SQLITE_OK ) return rc; @@ -1898,6 +1906,7 @@ static int zipfileFindFunction( void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ void **ppArg /* OUT: User data for *pxFunc */ ){ + (void)nArg; if( sqlite3_stricmp("zipfile_cds", zName)==0 ){ *pxFunc = zipfileFunctionCds; *ppArg = (void*)pVtab; diff --git a/ext/rbu/rbu1.test b/ext/rbu/rbu1.test index 9237dbcea7..2c3860e808 100644 --- a/ext/rbu/rbu1.test +++ b/ext/rbu/rbu1.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu1 db close diff --git a/ext/rbu/rbu10.test b/ext/rbu/rbu10.test index aa4db8a29f..7f75dbfa76 100644 --- a/ext/rbu/rbu10.test +++ b/ext/rbu/rbu10.test @@ -10,13 +10,10 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu10 - #-------------------------------------------------------------------- # Test that UPDATE commands work even if the input columns are in a # different order to the output columns. diff --git a/ext/rbu/rbu11.test b/ext/rbu/rbu11.test index 5a4219b001..4b711721ef 100644 --- a/ext/rbu/rbu11.test +++ b/ext/rbu/rbu11.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu11 diff --git a/ext/rbu/rbu12.test b/ext/rbu/rbu12.test index 7816bfe89f..6e27ba692e 100644 --- a/ext/rbu/rbu12.test +++ b/ext/rbu/rbu12.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/lock_common.tcl set ::testprefix rbu12 diff --git a/ext/rbu/rbu13.test b/ext/rbu/rbu13.test index 624c587cc3..45b84f2b1d 100644 --- a/ext/rbu/rbu13.test +++ b/ext/rbu/rbu13.test @@ -13,10 +13,8 @@ # for UPDATE statements. This tests RBU's internal UPDATE statement cache. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/lock_common.tcl set ::testprefix rbu13 diff --git a/ext/rbu/rbu14.test b/ext/rbu/rbu14.test index 07f6784094..18114ec7a9 100644 --- a/ext/rbu/rbu14.test +++ b/ext/rbu/rbu14.test @@ -13,10 +13,8 @@ # table. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/lock_common.tcl set ::testprefix rbu14 diff --git a/ext/rbu/rbu3.test b/ext/rbu/rbu3.test index da87561fbb..fe2afdc059 100644 --- a/ext/rbu/rbu3.test +++ b/ext/rbu/rbu3.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu3 diff --git a/ext/rbu/rbu5.test b/ext/rbu/rbu5.test index 3696dfb874..02ecf6be16 100644 --- a/ext/rbu/rbu5.test +++ b/ext/rbu/rbu5.test @@ -13,6 +13,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu5 diff --git a/ext/rbu/rbu6.test b/ext/rbu/rbu6.test index bb5b17f425..cddd938703 100644 --- a/ext/rbu/rbu6.test +++ b/ext/rbu/rbu6.test @@ -13,10 +13,8 @@ # outcome of some other client writing to the database while an RBU update # is being applied. -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu6 proc setup_test {} { diff --git a/ext/rbu/rbu7.test b/ext/rbu/rbu7.test index 1e3bc174dd..e8ec0343db 100644 --- a/ext/rbu/rbu7.test +++ b/ext/rbu/rbu7.test @@ -13,10 +13,8 @@ # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu7 # Test index: diff --git a/ext/rbu/rbu8.test b/ext/rbu/rbu8.test index c0ccd8c5eb..86e931482e 100644 --- a/ext/rbu/rbu8.test +++ b/ext/rbu/rbu8.test @@ -12,10 +12,8 @@ # Test the rbu_delta() feature. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu8 do_execsql_test 1.0 { diff --git a/ext/rbu/rbu9.test b/ext/rbu/rbu9.test index 0efdc1dde5..fe808b3ac6 100644 --- a/ext/rbu/rbu9.test +++ b/ext/rbu/rbu9.test @@ -12,10 +12,8 @@ # Test RBU with virtual tables. And tables with no PRIMARY KEY declarations. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbu9 ifcapable !fts3 { diff --git a/ext/rbu/rbuA.test b/ext/rbu/rbuA.test index 642caca196..cdc0c8040b 100644 --- a/ext/rbu/rbuA.test +++ b/ext/rbu/rbuA.test @@ -14,10 +14,8 @@ # a wal mode database via RBU. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuA set db_sql { diff --git a/ext/rbu/rbuB.test b/ext/rbu/rbuB.test index 6eb917e57d..fca755a4b6 100644 --- a/ext/rbu/rbuB.test +++ b/ext/rbu/rbuB.test @@ -12,6 +12,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuB db close diff --git a/ext/rbu/rbuC.test b/ext/rbu/rbuC.test index ff3d4d5075..430b14418e 100644 --- a/ext/rbu/rbuC.test +++ b/ext/rbu/rbuC.test @@ -13,6 +13,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuC #------------------------------------------------------------------------- diff --git a/ext/rbu/rbubusy.test b/ext/rbu/rbubusy.test index 8cc47db8f9..088711405a 100644 --- a/ext/rbu/rbubusy.test +++ b/ext/rbu/rbubusy.test @@ -12,6 +12,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbubusy db close diff --git a/ext/rbu/rbucollate.test b/ext/rbu/rbucollate.test index ccc0976247..52f6ba373f 100644 --- a/ext/rbu/rbucollate.test +++ b/ext/rbu/rbucollate.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbucollate ifcapable !icu_collations { diff --git a/ext/rbu/rbucrash.test b/ext/rbu/rbucrash.test index 34ac2c86fe..c2132bc3d3 100644 --- a/ext/rbu/rbucrash.test +++ b/ext/rbu/rbucrash.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbucrash db close diff --git a/ext/rbu/rbucrash2.test b/ext/rbu/rbucrash2.test index 9ff6eba4f2..683de88492 100644 --- a/ext/rbu/rbucrash2.test +++ b/ext/rbu/rbucrash2.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbucrash2 db close diff --git a/ext/rbu/rbudiff.test b/ext/rbu/rbudiff.test index 5c2bf9bee7..b165cf2be7 100644 --- a/ext/rbu/rbudiff.test +++ b/ext/rbu/rbudiff.test @@ -12,10 +12,9 @@ # Tests for the [sqldiff --rbu] command. # # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl + +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set testprefix rbudiff set PROG [test_find_sqldiff] diff --git a/ext/rbu/rbudor.test b/ext/rbu/rbudor.test index df4d934de7..684b43a946 100644 --- a/ext/rbu/rbudor.test +++ b/ext/rbu/rbudor.test @@ -13,10 +13,8 @@ # enabled by SQLITE_DIRECT_OVERFLOW_READ - Direct Overflow Read. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbudor set bigA [string repeat a 5000] diff --git a/ext/rbu/rbuexlock.test b/ext/rbu/rbuexlock.test index eddcdc115f..570489cbc7 100644 --- a/ext/rbu/rbuexlock.test +++ b/ext/rbu/rbuexlock.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuexlock db close diff --git a/ext/rbu/rbuexpr.test b/ext/rbu/rbuexpr.test index a392c4ed8c..724a0f131f 100644 --- a/ext/rbu/rbuexpr.test +++ b/ext/rbu/rbuexpr.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuexpr db close diff --git a/ext/rbu/rbufault.test b/ext/rbu/rbufault.test index 247a991899..fb3efb972e 100644 --- a/ext/rbu/rbufault.test +++ b/ext/rbu/rbufault.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/malloc_common.tcl set ::testprefix rbufault diff --git a/ext/rbu/rbufault2.test b/ext/rbu/rbufault2.test index 36f2b6b6f2..4bf53a17fe 100644 --- a/ext/rbu/rbufault2.test +++ b/ext/rbu/rbufault2.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/malloc_common.tcl set ::testprefix rbufault2 diff --git a/ext/rbu/rbufault3.test b/ext/rbu/rbufault3.test index 4f690284a2..b7ba54f6a6 100644 --- a/ext/rbu/rbufault3.test +++ b/ext/rbu/rbufault3.test @@ -13,6 +13,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/malloc_common.tcl set ::testprefix rbufault3 diff --git a/ext/rbu/rbufault4.test b/ext/rbu/rbufault4.test index 551b8b2b6f..f4207c162e 100644 --- a/ext/rbu/rbufault4.test +++ b/ext/rbu/rbufault4.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } source $testdir/malloc_common.tcl set ::testprefix rbufault4 diff --git a/ext/rbu/rbufts.test b/ext/rbu/rbufts.test index 95d4e75c52..8424c95708 100644 --- a/ext/rbu/rbufts.test +++ b/ext/rbu/rbufts.test @@ -13,10 +13,8 @@ # contains tests to ensure that RBU works with FTS tables. # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbufts ifcapable !fts3 { diff --git a/ext/rbu/rbumisc.test b/ext/rbu/rbumisc.test index c2a3906c09..6b89150bce 100644 --- a/ext/rbu/rbumisc.test +++ b/ext/rbu/rbumisc.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbumisc db close diff --git a/ext/rbu/rbumulti.test b/ext/rbu/rbumulti.test index 5579a5f5bf..95c4fee703 100644 --- a/ext/rbu/rbumulti.test +++ b/ext/rbu/rbumulti.test @@ -9,11 +9,14 @@ # #*********************************************************************** # +# TESTRUNNER: slow +# # This file contains tests of multiple RBU operations running # concurrently within the same process. # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbumulti db close diff --git a/ext/rbu/rbupartial.test b/ext/rbu/rbupartial.test index d6cd7330fe..c8c476a1f9 100644 --- a/ext/rbu/rbupartial.test +++ b/ext/rbu/rbupartial.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbupartial db close diff --git a/ext/rbu/rbupass.test b/ext/rbu/rbupass.test new file mode 100644 index 0000000000..8bebe074bd --- /dev/null +++ b/ext/rbu/rbupass.test @@ -0,0 +1,81 @@ +# 2023 January 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } +set ::testprefix rbupass + +if {[info commands register_demovfs]==""} { + finish_test + return +} + +db close +sqlite3_shutdown +sqlite3_config_uri 1 + +register_demovfs +sqlite3rbu_create_vfs myvfs demo + +sqlite3 db file:test.db?vfs=myvfs +do_execsql_test 1.0 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 2); + SELECT * FROM t1; +} {1 2} + +do_execsql_test 1.1 { + PRAGMA journal_mode = wal; +} {delete} + +do_execsql_test 1.2 { + SELECT * FROM t1; +} {1 2} + +do_test 1.3 { + forcedelete rbu.db + sqlite3 rbu rbu.db + rbu eval { + CREATE TABLE data_t1(a, b, rbu_control); + INSERT INTO data_t1 VALUES(2, 4, 0); + } + rbu close +} {} + +do_test 1.4 { + sqlite3rbu rbu test.db rbu.db +} {rbu} +do_test 1.5 { + rbu step +} {SQLITE_CANTOPEN} +do_test 1.6 { + list [catch { rbu close } msg] $msg +} {1 {SQLITE_CANTOPEN - unable to open database file}} + +do_test 1.7 { + sqlite3rbu_vacuum rbu test.db +} {rbu} +do_test 1.8 { + rbu step + catch { rbu close } +} {1} + +do_execsql_test 1.9 { + SELECT * FROM t1; +} {1 2} + +db close +sqlite3rbu_destroy_vfs myvfs +unregister_demovfs +sqlite3_shutdown +finish_test + diff --git a/ext/rbu/rbuprogress.test b/ext/rbu/rbuprogress.test index bf37b4e505..513ccdbbe4 100644 --- a/ext/rbu/rbuprogress.test +++ b/ext/rbu/rbuprogress.test @@ -11,6 +11,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuprogress diff --git a/ext/rbu/rburename.test b/ext/rbu/rburename.test index 2275396bca..fb9d83ea19 100644 --- a/ext/rbu/rburename.test +++ b/ext/rbu/rburename.test @@ -12,6 +12,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rburename diff --git a/ext/rbu/rburesume.test b/ext/rbu/rburesume.test index a2cf9bc4ab..fdbd214213 100644 --- a/ext/rbu/rburesume.test +++ b/ext/rbu/rburesume.test @@ -9,11 +9,14 @@ # #*********************************************************************** # +# TESTRUNNER: slow +# # This file contains tests for resumption of RBU operations in the # case where the previous RBU process crashed. # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rburesume forcedelete test.db-shm test.db-oal diff --git a/ext/rbu/rbusave.test b/ext/rbu/rbusave.test index 210d7b545a..79b49de513 100644 --- a/ext/rbu/rbusave.test +++ b/ext/rbu/rbusave.test @@ -10,10 +10,8 @@ #*********************************************************************** # -if {![info exists testdir]} { - set testdir [file join [file dirname [info script]] .. .. test] -} -source $testdir/tester.tcl +source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbusave do_execsql_test 1.0 { diff --git a/ext/rbu/rbusplit.test b/ext/rbu/rbusplit.test index 34fef38677..b28f0c0da9 100644 --- a/ext/rbu/rbusplit.test +++ b/ext/rbu/rbusplit.test @@ -12,6 +12,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbusplit db close diff --git a/ext/rbu/rbutemplimit.test b/ext/rbu/rbutemplimit.test index 958b2bfcbc..3ca9c64d81 100644 --- a/ext/rbu/rbutemplimit.test +++ b/ext/rbu/rbutemplimit.test @@ -9,8 +9,10 @@ # #*********************************************************************** # +# TESTRUNNER: slow source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbutemplimit db close diff --git a/ext/rbu/rbuvacuum.test b/ext/rbu/rbuvacuum.test index 1785f529b1..4ed71c2665 100644 --- a/ext/rbu/rbuvacuum.test +++ b/ext/rbu/rbuvacuum.test @@ -15,6 +15,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set ::testprefix rbuvacuum foreach step {0 1} { diff --git a/ext/rbu/rbuvacuum2.test b/ext/rbu/rbuvacuum2.test index d4f7c52328..eed8ea1ae4 100644 --- a/ext/rbu/rbuvacuum2.test +++ b/ext/rbu/rbuvacuum2.test @@ -15,6 +15,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } foreach {step} {0 1} { foreach {ttt state} { diff --git a/ext/rbu/rbuvacuum3.test b/ext/rbu/rbuvacuum3.test index 7e1e337f3c..98fb25fc04 100644 --- a/ext/rbu/rbuvacuum3.test +++ b/ext/rbu/rbuvacuum3.test @@ -15,6 +15,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set testprefix rbuvacuum3 do_execsql_test 1.0 { diff --git a/ext/rbu/rbuvacuum4.test b/ext/rbu/rbuvacuum4.test index 5cf33d6765..269bd2152f 100644 --- a/ext/rbu/rbuvacuum4.test +++ b/ext/rbu/rbuvacuum4.test @@ -15,6 +15,7 @@ # source [file join [file dirname [info script]] rbu_common.tcl] +ifcapable !rbu { finish_test ; return } set testprefix rbuvacuum4 set step 1 diff --git a/ext/rbu/sqlite3rbu.c b/ext/rbu/sqlite3rbu.c index 04d727a571..2db66f67ac 100644 --- a/ext/rbu/sqlite3rbu.c +++ b/ext/rbu/sqlite3rbu.c @@ -3830,7 +3830,8 @@ static void rbuSetupOal(sqlite3rbu *p, RbuState *pState){ static void rbuDeleteOalFile(sqlite3rbu *p){ char *zOal = rbuMPrintf(p, "%s-oal", p->zTarget); if( zOal ){ - sqlite3_vfs *pVfs = sqlite3_vfs_find(0); + sqlite3_vfs *pVfs = 0; + sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_VFS_POINTER, &pVfs); assert( pVfs && p->rc==SQLITE_OK && p->zErrmsg==0 ); pVfs->xDelete(pVfs, zOal, 0); sqlite3_free(zOal); @@ -4587,9 +4588,12 @@ static int rbuVfsClose(sqlite3_file *pFile){ sqlite3_free(p->zDel); if( p->openFlags & SQLITE_OPEN_MAIN_DB ){ + const sqlite3_io_methods *pMeth = p->pReal->pMethods; rbuMainlistRemove(p); rbuUnlockShm(p); - p->pReal->pMethods->xShmUnmap(p->pReal, 0); + if( pMeth->iVersion>1 && pMeth->xShmUnmap ){ + pMeth->xShmUnmap(p->pReal, 0); + } } else if( (p->openFlags & SQLITE_OPEN_DELETEONCLOSE) && p->pRbu ){ rbuUpdateTempSize(p, 0); @@ -5048,6 +5052,25 @@ static int rbuVfsOpen( rbuVfsShmUnmap, /* xShmUnmap */ 0, 0 /* xFetch, xUnfetch */ }; + static sqlite3_io_methods rbuvfs_io_methods1 = { + 1, /* iVersion */ + rbuVfsClose, /* xClose */ + rbuVfsRead, /* xRead */ + rbuVfsWrite, /* xWrite */ + rbuVfsTruncate, /* xTruncate */ + rbuVfsSync, /* xSync */ + rbuVfsFileSize, /* xFileSize */ + rbuVfsLock, /* xLock */ + rbuVfsUnlock, /* xUnlock */ + rbuVfsCheckReservedLock, /* xCheckReservedLock */ + rbuVfsFileControl, /* xFileControl */ + rbuVfsSectorSize, /* xSectorSize */ + rbuVfsDeviceCharacteristics, /* xDeviceCharacteristics */ + 0, 0, 0, 0, 0, 0 + }; + + + rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs; sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs; rbu_file *pFd = (rbu_file *)pFile; @@ -5102,10 +5125,15 @@ static int rbuVfsOpen( rc = pRealVfs->xOpen(pRealVfs, zOpen, pFd->pReal, oflags, pOutFlags); } if( pFd->pReal->pMethods ){ + const sqlite3_io_methods *pMeth = pFd->pReal->pMethods; /* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods ** pointer and, if the file is a main database file, link it into the ** mutex protected linked list of all such files. */ - pFile->pMethods = &rbuvfs_io_methods; + if( pMeth->iVersion<2 || pMeth->xShmLock==0 ){ + pFile->pMethods = &rbuvfs_io_methods1; + }else{ + pFile->pMethods = &rbuvfs_io_methods; + } if( flags & SQLITE_OPEN_MAIN_DB ){ rbuMainlistAdd(pFd); } diff --git a/ext/rbu/sqlite3rbu.h b/ext/rbu/sqlite3rbu.h index c819cd3f2a..d156b3178a 100644 --- a/ext/rbu/sqlite3rbu.h +++ b/ext/rbu/sqlite3rbu.h @@ -10,42 +10,42 @@ ** ************************************************************************* ** -** This file contains the public interface for the RBU extension. +** This file contains the public interface for the RBU extension. */ /* ** SUMMARY ** -** Writing a transaction containing a large number of operations on +** Writing a transaction containing a large number of operations on ** b-tree indexes that are collectively larger than the available cache -** memory can be very inefficient. +** memory can be very inefficient. ** ** The problem is that in order to update a b-tree, the leaf page (at least) ** containing the entry being inserted or deleted must be modified. If the -** working set of leaves is larger than the available cache memory, then a -** single leaf that is modified more than once as part of the transaction +** working set of leaves is larger than the available cache memory, then a +** single leaf that is modified more than once as part of the transaction ** may be loaded from or written to the persistent media multiple times. ** Additionally, because the index updates are likely to be applied in -** random order, access to pages within the database is also likely to be in +** random order, access to pages within the database is also likely to be in ** random order, which is itself quite inefficient. ** ** One way to improve the situation is to sort the operations on each index ** by index key before applying them to the b-tree. This leads to an IO ** pattern that resembles a single linear scan through the index b-tree, -** and all but guarantees each modified leaf page is loaded and stored +** and all but guarantees each modified leaf page is loaded and stored ** exactly once. SQLite uses this trick to improve the performance of ** CREATE INDEX commands. This extension allows it to be used to improve ** the performance of large transactions on existing databases. ** -** Additionally, this extension allows the work involved in writing the -** large transaction to be broken down into sub-transactions performed -** sequentially by separate processes. This is useful if the system cannot -** guarantee that a single update process will run for long enough to apply -** the entire update, for example because the update is being applied on a -** mobile device that is frequently rebooted. Even after the writer process +** Additionally, this extension allows the work involved in writing the +** large transaction to be broken down into sub-transactions performed +** sequentially by separate processes. This is useful if the system cannot +** guarantee that a single update process will run for long enough to apply +** the entire update, for example because the update is being applied on a +** mobile device that is frequently rebooted. Even after the writer process ** has committed one or more sub-transactions, other database clients continue -** to read from the original database snapshot. In other words, partially -** applied transactions are not visible to other clients. +** to read from the original database snapshot. In other words, partially +** applied transactions are not visible to other clients. ** ** "RBU" stands for "Resumable Bulk Update". As in a large database update ** transmitted via a wireless network to a mobile device. A transaction @@ -61,9 +61,9 @@ ** ** * INSERT statements may not use any default values. ** -** * UPDATE and DELETE statements must identify their target rows by +** * UPDATE and DELETE statements must identify their target rows by ** non-NULL PRIMARY KEY values. Rows with NULL values stored in PRIMARY -** KEY fields may not be updated or deleted. If the table being written +** KEY fields may not be updated or deleted. If the table being written ** has no PRIMARY KEY, affected rows must be identified by rowid. ** ** * UPDATE statements may not modify PRIMARY KEY columns. @@ -80,10 +80,10 @@ ** PREPARATION ** ** An "RBU update" is stored as a separate SQLite database. A database -** containing an RBU update is an "RBU database". For each table in the +** containing an RBU update is an "RBU database". For each table in the ** target database to be updated, the RBU database should contain a table ** named "data_" containing the same set of columns as the -** target table, and one more - "rbu_control". The data_% table should +** target table, and one more - "rbu_control". The data_% table should ** have no PRIMARY KEY or UNIQUE constraints, but each column should have ** the same type as the corresponding column in the target database. ** The "rbu_control" column should have no type at all. For example, if @@ -98,22 +98,22 @@ ** The order of the columns in the data_% table does not matter. ** ** Instead of a regular table, the RBU database may also contain virtual -** tables or view named using the data_ naming scheme. +** tables or views named using the data_ naming scheme. ** -** Instead of the plain data_ naming scheme, RBU database tables +** Instead of the plain data_ naming scheme, RBU database tables ** may also be named data_, where is any sequence ** of zero or more numeric characters (0-9). This can be significant because -** tables within the RBU database are always processed in order sorted by +** tables within the RBU database are always processed in order sorted by ** name. By judicious selection of the portion of the names ** of the RBU tables the user can therefore control the order in which they ** are processed. This can be useful, for example, to ensure that "external ** content" FTS4 tables are updated before their underlying content tables. ** ** If the target database table is a virtual table or a table that has no -** PRIMARY KEY declaration, the data_% table must also contain a column -** named "rbu_rowid". This column is mapped to the tables implicit primary -** key column - "rowid". Virtual tables for which the "rowid" column does -** not function like a primary key value cannot be updated using RBU. For +** PRIMARY KEY declaration, the data_% table must also contain a column +** named "rbu_rowid". This column is mapped to the table's implicit primary +** key column - "rowid". Virtual tables for which the "rowid" column does +** not function like a primary key value cannot be updated using RBU. For ** example, if the target db contains either of the following: ** ** CREATE VIRTUAL TABLE x1 USING fts3(a, b); @@ -136,35 +136,35 @@ ** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control); ** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control); ** -** For each row to INSERT into the target database as part of the RBU +** For each row to INSERT into the target database as part of the RBU ** update, the corresponding data_% table should contain a single record ** with the "rbu_control" column set to contain integer value 0. The -** other columns should be set to the values that make up the new record -** to insert. +** other columns should be set to the values that make up the new record +** to insert. ** -** If the target database table has an INTEGER PRIMARY KEY, it is not -** possible to insert a NULL value into the IPK column. Attempting to +** If the target database table has an INTEGER PRIMARY KEY, it is not +** possible to insert a NULL value into the IPK column. Attempting to ** do so results in an SQLITE_MISMATCH error. ** -** For each row to DELETE from the target database as part of the RBU +** For each row to DELETE from the target database as part of the RBU ** update, the corresponding data_% table should contain a single record ** with the "rbu_control" column set to contain integer value 1. The ** real primary key values of the row to delete should be stored in the ** corresponding columns of the data_% table. The values stored in the ** other columns are not used. ** -** For each row to UPDATE from the target database as part of the RBU +** For each row to UPDATE from the target database as part of the RBU ** update, the corresponding data_% table should contain a single record ** with the "rbu_control" column set to contain a value of type text. -** The real primary key values identifying the row to update should be +** The real primary key values identifying the row to update should be ** stored in the corresponding columns of the data_% table row, as should -** the new values of all columns being update. The text value in the +** the new values of all columns being update. The text value in the ** "rbu_control" column must contain the same number of characters as ** there are columns in the target database table, and must consist entirely -** of 'x' and '.' characters (or in some special cases 'd' - see below). For +** of 'x' and '.' characters (or in some special cases 'd' - see below). For ** each column that is being updated, the corresponding character is set to ** 'x'. For those that remain as they are, the corresponding character of the -** rbu_control value should be set to '.'. For example, given the tables +** rbu_control value should be set to '.'. For example, given the tables ** above, the update statement: ** ** UPDATE t1 SET c = 'usa' WHERE a = 4; @@ -178,30 +178,30 @@ ** target table with the value stored in the corresponding data_% column, the ** user-defined SQL function "rbu_delta()" is invoked and the result stored in ** the target table column. rbu_delta() is invoked with two arguments - the -** original value currently stored in the target table column and the +** original value currently stored in the target table column and the ** value specified in the data_xxx table. ** ** For example, this row: ** ** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d'); ** -** is similar to an UPDATE statement such as: +** is similar to an UPDATE statement such as: ** ** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4; ** -** Finally, if an 'f' character appears in place of a 'd' or 's' in an +** Finally, if an 'f' character appears in place of a 'd' or 's' in an ** ota_control string, the contents of the data_xxx table column is assumed ** to be a "fossil delta" - a patch to be applied to a blob value in the ** format used by the fossil source-code management system. In this case -** the existing value within the target database table must be of type BLOB. +** the existing value within the target database table must be of type BLOB. ** It is replaced by the result of applying the specified fossil delta to ** itself. ** ** If the target database table is a virtual table or a table with no PRIMARY -** KEY, the rbu_control value should not include a character corresponding +** KEY, the rbu_control value should not include a character corresponding ** to the rbu_rowid value. For example, this: ** -** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control) +** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control) ** VALUES(NULL, 'usa', 12, '.x'); ** ** causes a result similar to: diff --git a/ext/recover/dbdata.c b/ext/recover/dbdata.c index 9563ab502a..da02b754b2 100644 --- a/ext/recover/dbdata.c +++ b/ext/recover/dbdata.c @@ -164,6 +164,9 @@ static int dbdataConnect( DbdataTable *pTab = 0; int rc = sqlite3_declare_vtab(db, pAux ? DBPTR_SCHEMA : DBDATA_SCHEMA); + (void)argc; + (void)argv; + (void)pzErr; if( rc==SQLITE_OK ){ pTab = (DbdataTable*)sqlite3_malloc64(sizeof(DbdataTable)); if( pTab==0 ){ @@ -768,6 +771,8 @@ static int dbdataFilter( DbdataTable *pTab = (DbdataTable*)pCursor->pVtab; int rc = SQLITE_OK; const char *zSchema = "main"; + (void)idxStr; + (void)argc; dbdataResetCursor(pCsr); assert( pCsr->iPgno==1 ); @@ -936,6 +941,7 @@ int sqlite3_dbdata_init( const sqlite3_api_routines *pApi ){ SQLITE_EXTENSION_INIT2(pApi); + (void)pzErrMsg; return sqlite3DbdataRegister(db); } diff --git a/ext/recover/sqlite3recover.c b/ext/recover/sqlite3recover.c index e62aba752f..8306e8ed8e 100644 --- a/ext/recover/sqlite3recover.c +++ b/ext/recover/sqlite3recover.c @@ -761,6 +761,7 @@ static void recoverEscapeCrnl( sqlite3_value **argv ){ const char *zText = (const char*)sqlite3_value_text(argv[0]); + (void)argc; if( zText && zText[0]=='\'' ){ int nText = sqlite3_value_bytes(argv[0]); int i; @@ -913,7 +914,7 @@ static void recoverTransferSettings(sqlite3_recover *p){ return; } - for(ii=0; iidbIn, "PRAGMA %Q.%s", p->zDb, zPrag); @@ -991,7 +992,9 @@ static int recoverOpenOutput(sqlite3_recover *p){ } /* Register the custom user-functions with the output handle. */ - for(ii=0; p->errCode==SQLITE_OK && iierrCode==SQLITE_OK && ii<(int)(sizeof(aFunc)/sizeof(aFunc[0])); + ii++){ p->errCode = sqlite3_create_function(db, aFunc[ii].zName, aFunc[ii].nArg, SQLITE_UTF8, (void*)p, aFunc[ii].xFunc, 0, 0 ); @@ -2388,7 +2391,7 @@ static int recoverVfsRead(sqlite3_file *pFd, void *aBuf, int nByte, i64 iOff){ if( pgsz==65536 ) pgsz = 1; recoverPutU16(&aHdr[16], pgsz); aHdr[20] = nReserve; - for(ii=0; ii=(4+6*sizeof(GeoCoord)) + && (nByte = sqlite3_value_bytes(pVal))>=(int)(4+6*sizeof(GeoCoord)) ){ const unsigned char *a = sqlite3_value_blob(pVal); int nVertex; @@ -362,6 +362,7 @@ static void geopolyBlobFunc( sqlite3_value **argv ){ GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + (void)argc; if( p ){ sqlite3_result_blob(context, p->hdr, 4+8*p->nVertex, SQLITE_TRANSIENT); @@ -381,6 +382,7 @@ static void geopolyJsonFunc( sqlite3_value **argv ){ GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + (void)argc; if( p ){ sqlite3 *db = sqlite3_context_db_handle(context); sqlite3_str *x = sqlite3_str_new(db); @@ -462,6 +464,7 @@ static void geopolyXformFunc( double F = sqlite3_value_double(argv[6]); GeoCoord x1, y1, x0, y0; int ii; + (void)argc; if( p ){ for(ii=0; iinVertex; ii++){ x0 = GeoX(p,ii); @@ -512,6 +515,7 @@ static void geopolyAreaFunc( sqlite3_value **argv ){ GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + (void)argc; if( p ){ sqlite3_result_double(context, geopolyArea(p)); sqlite3_free(p); @@ -537,6 +541,7 @@ static void geopolyCcwFunc( sqlite3_value **argv ){ GeoPoly *p = geopolyFuncParam(context, argv[0], 0); + (void)argc; if( p ){ if( geopolyArea(p)<0.0 ){ int ii, jj; @@ -591,6 +596,7 @@ static void geopolyRegularFunc( int n = sqlite3_value_int(argv[3]); int i; GeoPoly *p; + (void)argc; if( n<3 || r<=0.0 ) return; if( n>1000 ) n = 1000; @@ -700,6 +706,7 @@ static void geopolyBBoxFunc( sqlite3_value **argv ){ GeoPoly *p = geopolyBBox(context, argv[0], 0, 0); + (void)argc; if( p ){ sqlite3_result_blob(context, p->hdr, 4+8*p->nVertex, SQLITE_TRANSIENT); @@ -727,6 +734,7 @@ static void geopolyBBoxStep( ){ RtreeCoord a[4]; int rc = SQLITE_OK; + (void)argc; (void)geopolyBBox(context, argv[0], a, &rc); if( rc==SQLITE_OK ){ GeoBBox *pBBox; @@ -815,6 +823,8 @@ static void geopolyContainsPointFunc( int v = 0; int cnt = 0; int ii; + (void)argc; + if( p1==0 ) return; for(ii=0; iinVertex-1; ii++){ v = pointBeneathLine(x0,y0,GeoX(p1,ii), GeoY(p1,ii), @@ -854,6 +864,7 @@ static void geopolyWithinFunc( ){ GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0); GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0); + (void)argc; if( p1 && p2 ){ int x = geopolyOverlap(p1, p2); if( x<0 ){ @@ -1184,6 +1195,7 @@ static void geopolyOverlapFunc( ){ GeoPoly *p1 = geopolyFuncParam(context, argv[0], 0); GeoPoly *p2 = geopolyFuncParam(context, argv[1], 0); + (void)argc; if( p1 && p2 ){ int x = geopolyOverlap(p1, p2); if( x<0 ){ @@ -1204,8 +1216,12 @@ static void geopolyDebugFunc( int argc, sqlite3_value **argv ){ + (void)context; + (void)argc; #ifdef GEOPOLY_ENABLE_DEBUG geo_debug = sqlite3_value_int(argv[0]); +#else + (void)argv; #endif } @@ -1233,6 +1249,7 @@ static int geopolyInit( sqlite3_str *pSql; char *zSql; int ii; + (void)pAux; sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); @@ -1349,6 +1366,7 @@ static int geopolyFilter( RtreeNode *pRoot = 0; int rc = SQLITE_OK; int iCell = 0; + (void)idxStr; rtreeReference(pRtree); @@ -1475,6 +1493,7 @@ static int geopolyBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ int iRowidTerm = -1; int iFuncTerm = -1; int idxNum = 0; + (void)tab; for(ii=0; iinConstraint; ii++){ struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; @@ -1721,6 +1740,8 @@ static int geopolyFindFunction( void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), void **ppArg ){ + (void)pVtab; + (void)nArg; if( sqlite3_stricmp(zName, "geopoly_overlap")==0 ){ *pxFunc = geopolyOverlapFunc; *ppArg = 0; @@ -1790,7 +1811,7 @@ static int sqlite3_geopoly_init(sqlite3 *db){ } aAgg[] = { { geopolyBBoxStep, geopolyBBoxFinal, "geopoly_group_bbox" }, }; - int i; + unsigned int i; for(i=0; i=1300 pCoord->u = _byteswap_ulong(*(u32*)p); #elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 @@ -555,7 +555,7 @@ static void writeInt16(u8 *p, int i){ } static int writeCoord(u8 *p, RtreeCoord *pCoord){ u32 i; - assert( ((((char*)p) - (char*)0)&3)==0 ); /* p is always 4-byte aligned */ + assert( (((sqlite3_uint64)p)&3)==0 ); /* p is always 4-byte aligned */ assert( sizeof(RtreeCoord)==4 ); assert( sizeof(u32)==4 ); #if SQLITE_BYTEORDER==1234 && GCC_VERSION>=4003000 @@ -1283,7 +1283,7 @@ static void rtreeNonleafConstraint( assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE || p->op==RTREE_FALSE ); - assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ + assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */ switch( p->op ){ case RTREE_TRUE: return; /* Always satisfied */ case RTREE_FALSE: break; /* Never satisfied */ @@ -1336,7 +1336,7 @@ static void rtreeLeafConstraint( || p->op==RTREE_GT || p->op==RTREE_EQ || p->op==RTREE_TRUE || p->op==RTREE_FALSE ); pCellData += 8 + p->iCoord*4; - assert( ((((char*)pCellData) - (char*)0)&3)==0 ); /* 4-byte aligned */ + assert( (((sqlite3_uint64)pCellData)&3)==0 ); /* 4-byte aligned */ RTREE_DECODE_COORD(eInt, pCellData, xN); switch( p->op ){ case RTREE_TRUE: return; /* Always satisfied */ diff --git a/ext/rtree/rtreecheck.test b/ext/rtree/rtreecheck.test index 17f359aa8a..1754ff5c3d 100644 --- a/ext/rtree/rtreecheck.test +++ b/ext/rtree/rtreecheck.test @@ -157,4 +157,19 @@ do_execsql_test 5.2 { SELECT rtreecheck('r3')=='ok' } 0 +#------------------------------------------------------------------------- +# dbsqlfuzz 4a1399d39bf9feccbf6b290da51d3b30103a4bf6 +# +reset_db +do_execsql_test 6.0 { + PRAGMA encoding = 'utf16'; + CREATE VIRTUAL TABLE t1 USING rtree(id, x, y); +} +db close +sqlite3 db test.db +do_catchsql_test 6.1 { + SELECT ( 'elvis' IN(SELECT rtreecheck('t1')) ) FROM (SELECT 1) GROUP BY 1; +} {1 {database table is locked}} + finish_test + diff --git a/ext/session/sqlite3session.c b/ext/session/sqlite3session.c index e8d95553d0..4e0ad827eb 100644 --- a/ext/session/sqlite3session.c +++ b/ext/session/sqlite3session.c @@ -1506,6 +1506,8 @@ static void xPreUpdate( int nDb = sqlite3Strlen30(zDb); assert( sqlite3_mutex_held(db->mutex) ); + (void)iKey1; + (void)iKey2; for(pSession=(sqlite3_session *)pCtx; pSession; pSession=pSession->pNext){ SessionTable *pTab; @@ -1582,6 +1584,7 @@ static int sessionDiffCount(void *pCtx){ return p->nOldOff ? p->nOldOff : sqlite3_column_count(p->pStmt); } static int sessionDiffDepth(void *pCtx){ + (void)pCtx; return 0; } @@ -1655,7 +1658,6 @@ static char *sessionExprCompareOther( } static char *sessionSelectFindNew( - int nCol, const char *zDb1, /* Pick rows in this db only */ const char *zDb2, /* But not in this one */ const char *zTbl, /* Table name */ @@ -1679,7 +1681,7 @@ static int sessionDiffFindNew( char *zExpr ){ int rc = SQLITE_OK; - char *zStmt = sessionSelectFindNew(pTab->nCol, zDb1, zDb2, pTab->zName,zExpr); + char *zStmt = sessionSelectFindNew(zDb1, zDb2, pTab->zName,zExpr); if( zStmt==0 ){ rc = SQLITE_NOMEM; @@ -4210,7 +4212,6 @@ static int sessionBindRow( ** UPDATE, bind values from the old.* record. */ static int sessionSeekToRow( - sqlite3 *db, /* Database handle */ sqlite3_changeset_iter *pIter, /* Changeset iterator */ u8 *abPK, /* Primary key flags array */ sqlite3_stmt *pSelect /* SELECT statement from sessionSelectRow() */ @@ -4340,7 +4341,7 @@ static int sessionConflictHandler( /* Bind the new.* PRIMARY KEY values to the SELECT statement. */ if( pbReplace ){ - rc = sessionSeekToRow(p->db, pIter, p->abPK, p->pSelect); + rc = sessionSeekToRow(pIter, p->abPK, p->pSelect); }else{ rc = SQLITE_OK; } @@ -4514,7 +4515,7 @@ static int sessionApplyOneOp( /* Check if there is a conflicting row. For sqlite_stat1, this needs ** to be done using a SELECT, as there is no PRIMARY KEY in the ** database schema to throw an exception if a duplicate is inserted. */ - rc = sessionSeekToRow(p->db, pIter, p->abPK, p->pSelect); + rc = sessionSeekToRow(pIter, p->abPK, p->pSelect); if( rc==SQLITE_ROW ){ rc = SQLITE_CONSTRAINT; sqlite3_reset(p->pSelect); diff --git a/ext/wasm/GNUmakefile b/ext/wasm/GNUmakefile index db18b283e6..7ffd866f24 100644 --- a/ext/wasm/GNUmakefile +++ b/ext/wasm/GNUmakefile @@ -37,6 +37,15 @@ # - wasm-strip for release builds: https://github.com/WebAssembly/wabt # - InfoZip for 'dist' zip file ######################################################################## +# +# Significant TODOs for this build include, but are not necessarily +# limited to: +# +# 1) Consolidate the code generation for sqlite3*.*js into a script +# which generates the makefile code, rather than using $(call) and +# $(eval), or at least centralize the setup of the numerous vars +# related to each build variant (vanilla, esm, bundler-friendly). +# SHELL := $(shell which bash 2>/dev/null) MAKEFILE := $(lastword $(MAKEFILE_LIST)) CLEAN_FILES := @@ -50,6 +59,20 @@ emcc.bin ?= $(word 1,$(wildcard $(EMSDK_HOME)/upstream/emscripten/emcc) $(shell ifeq (,$(emcc.bin)) $(error Cannot find emcc.) endif +emcc.version := $(shell "$(emcc.bin)" --version | sed -n 1p \ + | sed -e 's/^.* \([3-9][^ ]*\) .*$$/\1/;') +ifeq (,$(emcc.version)) + $(warning Cannot determine emcc version. This might unduly impact build flags.) +else + $(info using emcc version [$(emcc.version)]) +endif +emcc.version := $(shell "$(emcc.bin)" --version | sed -n 1p \ + | sed -e 's/^.* \([3-9][^ ]*\) .*$$/\1/;') +ifeq (,$(emcc.version)) + $(warning Cannot determine emcc version. This might unduly impact build flags.) +else + $(info using emcc version [$(emcc.version)]) +endif wasm-strip ?= $(shell which wasm-strip 2>/dev/null) ifeq (,$(filter clean,$(MAKECMDGOALS))) @@ -213,7 +236,10 @@ DISTCLEAN_FILES += $(bin.stripccomments) # https://fossil.wanderinghorse.net/r/c-pp bin.c-pp := ./c-pp $(bin.c-pp): c-pp.c $(sqlite3.c) $(MAKEFILE) - $(CC) -O0 -o $@ c-pp.c $(sqlite3.c) '-DCMPP_DEFAULT_DELIM="//#"' -I$(dir.top) + $(CC) -O0 -o $@ c-pp.c $(sqlite3.c) '-DCMPP_DEFAULT_DELIM="//#"' -I$(dir.top) \ + -DSQLITE_OMIT_LOAD_EXTENSION -DSQLITE_OMIT_DEPRECATED -DSQLITE_OMIT_UTF16 \ + -DSQLITE_OMIT_SHARED_CACHE -DSQLITE_OMIT_WAL -DSQLITE_THREADSAFE=0 \ + -DSQLITE_TEMP_STORE=3 define C-PP.FILTER # Create $2 from $1 using $(bin.c-pp) # $1 = Input file: c-pp -f $(1).js @@ -223,12 +249,9 @@ $(2): $(1) $$(MAKEFILE) $$(bin.c-pp) $$(bin.c-pp) -f $(1) -o $$@ $(3) CLEAN_FILES += $(2) endef -c-pp.D.vanilla ?= -c-pp.D.esm ?= -Dtarget=es6-module # /end C-PP.FILTER ######################################################################## - # cflags.common = C compiler flags for all builds cflags.common := -I. -I.. -I$(dir.top) # emcc.WASM_BIGINT = 1 for BigInt (C int64) support, else 0. The API @@ -304,8 +327,6 @@ sqlite3-api.jses += $(dir.api)/sqlite3-api-cleanup.js # "External" API files which are part of our distribution # but not part of the sqlite3-api.js amalgamation. SOAP.js := $(dir.api)/sqlite3-opfs-async-proxy.js -sqlite3-worker1.js := $(dir.api)/sqlite3-worker1.js -sqlite3-worker1-promiser.js := $(dir.api)/sqlite3-worker1-promiser.js # COPY_XAPI = a $(call)able function to copy $1 to $(dir.dout), where # $1 must be one of the "external" JS API files. define COPY_XAPI @@ -313,11 +334,32 @@ sqlite3-api.ext.jses += $$(dir.dout)/$$(notdir $(1)) $$(dir.dout)/$$(notdir $(1)): $(1) $$(MAKEFILE) cp $$< $$@ endef -$(foreach X,$(SOAP.js) $(sqlite3-worker1.js) $(sqlite3-worker1-promiser.js),\ +$(foreach X,$(SOAP.js),\ $(eval $(call COPY_XAPI,$(X)))) all quick: $(sqlite3-api.ext.jses) q: quick +######################################################################## +# $(sqlite3-api*.*js) contain the core library code but not the +# Emscripten-related glue which deals with loading sqlite3.wasm. In +# theory they can be used by arbitrary build environments and WASM +# loaders, but in practice that breaks down because the WASM loader +# has to be able to provide all of the necessary "imports" to +# sqlite3.wasm, and that list of imports is unknown until sqlite3.wasm +# is compiled, at which point Emscripten sets up the imports +# appropriately. Abstractly speaking, it's impossible for other build +# environments to know exactly which imports are needed and provide +# them. Tools like wasm-objdump can be used to find the list of +# imports but it's questionable whether a non-Emscripten tool could +# realistically use that info to provide proper implementations. +# Sidebar: some of the imports are used soley by the Emscripten glue, +# which the sqlite3 JS code does not rely on. +# +# We build $(sqlite3-api*.*) "because we can" and because it might be +# a useful point of experimentation for some clients, but the +# above-described caveat may well make them unusable for real-life +# clients. +# # sqlite3-api.js.in = the generated sqlite3-api.js before it gets # preprocessed. It contains all of $(sqlite3-api.jses) but none of the # Emscripten-specific headers and footers. @@ -330,110 +372,6 @@ $(sqlite3-api.js.in): $(sqlite3-api.jses) $(MAKEFILE) echo "/* END FILE: $$i */"; \ done > $@ -$(sqlite3-api-build-version.js): $(bin.version-info) $(MAKEFILE) - @echo "Making $@..." - @{ \ - echo 'self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){'; \ - echo -n ' sqlite3.version = '; \ - $(bin.version-info) --json; \ - echo ';'; \ - echo '});'; \ - } > $@ -$(sqlite3-license-version.js): $(sqlite3.h) $(sqlite3-license-version-header.js) \ - $(MAKEFILE) - @echo "Making $@..."; { \ - cat $(sqlite3-license-version-header.js); \ - echo '/*'; \ - echo '** This code was built from sqlite3 version...'; \ - echo "** "; \ - awk -e '/define SQLITE_VERSION/{$$1=""; print "**" $$0}' \ - -e '/define SQLITE_SOURCE_ID/{$$1=""; print "**" $$0}' $(sqlite3.h); \ - echo '*/'; \ - } > $@ - -######################################################################## -# --post-js and --pre-js are emcc flags we use to append/prepend JS to -# the generated emscripten module file. The following rules generate -# various versions of those files for the vanilla and ESM builds. -pre-js.js.in := $(dir.api)/pre-js.c-pp.js -pre-js.js.esm := $(dir.tmp)/pre-js.esm.js -pre-js.js.vanilla := $(dir.tmp)/pre-js.vanilla.js -$(eval $(call C-PP.FILTER,$(pre-js.js.in),$(pre-js.js.vanilla),$(c-pp.D.vanilla))) -$(eval $(call C-PP.FILTER,$(pre-js.js.in),$(pre-js.js.esm),$(c-pp.D.esm))) -post-js.js.in := $(dir.tmp)/post-js.c-pp.js -post-js.js.vanilla := $(dir.tmp)/post-js.vanilla.js -post-js.js.esm := $(dir.tmp)/post-js.esm.js -post-jses.js := \ - $(dir.api)/post-js-header.js \ - $(sqlite3-api.js.in) \ - $(dir.api)/post-js-footer.js -$(post-js.js.in): $(post-jses.js) $(MAKEFILE) - @echo "Making $@..." - @for i in $(post-jses.js); do \ - echo "/* BEGIN FILE: $$i */"; \ - cat $$i; \ - echo "/* END FILE: $$i */"; \ - done > $@ -$(eval $(call C-PP.FILTER,$(post-js.js.in),$(post-js.js.vanilla),$(c-pp.D.vanilla))) -$(eval $(call C-PP.FILTER,$(post-js.js.in),$(post-js.js.esm),$(c-pp.D.esm))) - -# extern-post-js* and extern-pre-js* are files for use with -# Emscripten's --extern-pre-js and --extern-post-js flags. These -# rules make different copies for the vanilla and ESM builds. -extern-post-js.js.in := $(dir.api)/extern-post-js.c-pp.js -extern-post-js.js.vanilla := $(dir.tmp)/extern-post-js.vanilla.js -extern-post-js.js.esm := $(dir.tmp)/extern-post-js.esm.js -$(eval $(call C-PP.FILTER,$(extern-post-js.js.in),$(extern-post-js.js.vanilla),$(c-pp.D.vanilla))) -$(eval $(call C-PP.FILTER,$(extern-post-js.js.in),$(extern-post-js.js.esm),$(c-pp.D.esm))) -extern-pre-js.js := $(dir.api)/extern-pre-js.js - -# Emscripten flags for --[extern-][pre|post]-js=... for the -# various builds. -pre-post-common.flags := \ - --extern-pre-js=$(sqlite3-license-version.js) -pre-post-common.flags.vanilla := \ - $(pre-post-common.flags) \ - --post-js=$(post-js.js.vanilla) \ - --extern-post-js=$(extern-post-js.js.vanilla) -pre-post-common.flags.esm := \ - $(pre-post-common.flags) \ - --post-js=$(post-js.js.esm) \ - --extern-post-js=$(extern-post-js.js.esm) - -# pre-post-jses.deps.* = a list of dependencies for the -# --[extern-][pre/post]-js files. -pre-post-jses.deps.common := $(extern-pre-js.js) $(sqlite3-license-version.js) -pre-post-jses.deps.vanilla := $(pre-post-jses.deps.common) \ - $(post-js.js.vanilla) $(extern-post-js.js.vanilla) -pre-post-jses.deps.esm := $(pre-post-jses.deps.common) \ - $(post-js.js.esm) $(extern-post-js.js.esm) - -######################################################################## -# call-make-pre-js is a $(call)able which creates rules for -# pre-js-$(1).js. $1 = the base name of the JS file on whose behalf -# this pre-js is for. $2 is the build mode: one of (vanilla, esm). -# This sets up --[extern-][pre/post]-js flags in -# $(pre-post-$(1).flags.$(2)) and dependencies in -# $(pre-post-$(1).deps.$(2)). -define call-make-pre-js -pre-post-$(1).flags.$(2) ?= -$$(dir.tmp)/pre-js-$(1)-$(2).js: $$(pre-js.js.$(2)) $$(MAKEFILE) - cp $$(pre-js.js.$(2)) $$@ - @if [ sqlite3-wasmfs = $(1) ]; then \ - echo "delete Module[xNameOfInstantiateWasm] /*for WASMFS build*/;"; \ - elif [ sqlite3 != $(1) ]; then \ - echo "Module[xNameOfInstantiateWasm].uri = '$(1).wasm';"; \ - fi >> $$@ -pre-post-$(1).deps.$(2) := \ - $$(pre-post-jses.deps.$(2)) \ - $$(dir.tmp)/pre-js-$(1)-$(2).js -pre-post-$(1).flags.$(2) += \ - $$(pre-post-common.flags.$(2)) \ - --pre-js=$$(dir.tmp)/pre-js-$(1)-$(2).js -endef -# /post-js and pre-js -######################################################################## - ######################################################################## # emcc flags for .c/.o/.wasm/.js. emcc.flags := @@ -448,7 +386,6 @@ emcc.cflags := emcc.cflags += -std=c99 -fPIC # -------------^^^^^^^^ we need c99 for $(sqlite3-wasm.c). emcc.cflags += -I. -I$(dir.top) - ######################################################################## # emcc flags specific to building .js/.wasm files... emcc.jsflags := -fPIC @@ -456,7 +393,6 @@ emcc.jsflags += --minify 0 emcc.jsflags += --no-entry emcc.jsflags += -sWASM_BIGINT=$(emcc.WASM_BIGINT) emcc.jsflags += -sMODULARIZE -emcc.jsflags += -sSTRICT_JS emcc.jsflags += -sDYNAMIC_EXECUTION=0 emcc.jsflags += -sNO_POLYFILL emcc.jsflags += -sEXPORTED_FUNCTIONS=@$(EXPORTED_FUNCTIONS.api) @@ -466,6 +402,13 @@ emcc.exportedRuntimeMethods := \ emcc.jsflags += $(emcc.exportedRuntimeMethods) emcc.jsflags += -sUSE_CLOSURE_COMPILER=0 emcc.jsflags += -sIMPORTED_MEMORY +ifeq (3.1.31,$(emcc.version)) + emcc.jsflags += -sSTRICT_JS=0 + $(warning Disabling -sSTRICT_JS for emcc $(emcc.version): \ + https://github.com/emscripten-core/emscripten/issues/18610) +else + emcc.jsflags += -sSTRICT_JS=1 +endif emcc.environment := -sENVIRONMENT=web,worker ######################################################################## # -sINITIAL_MEMORY: How much memory we need to start with is governed @@ -552,7 +495,6 @@ emcc.jsflags += -sLLD_REPORT_UNDEFINED # such constructs are found all over the place in the source code. ######################################################################## - ######################################################################## # -sSINGLE_FILE: # https://github.com/emscripten-core/emscripten/blob/main/src/settings.js @@ -564,27 +506,89 @@ emcc.jsflags += -sLLD_REPORT_UNDEFINED # -g3 debugging info, _huge_. ######################################################################## -sqlite3.js := $(dir.dout)/sqlite3.js -sqlite3.mjs := $(dir.dout)/sqlite3.mjs +$(sqlite3-api-build-version.js): $(bin.version-info) $(MAKEFILE) + @echo "Making $@..." + @{ \ + echo 'self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){'; \ + echo -n ' sqlite3.version = '; \ + $(bin.version-info) --json; \ + echo ';'; \ + echo '});'; \ + } > $@ +$(sqlite3-license-version.js): $(sqlite3.h) $(sqlite3-license-version-header.js) \ + $(MAKEFILE) + @echo "Making $@..."; { \ + cat $(sqlite3-license-version-header.js); \ + echo '/*'; \ + echo '** This code was built from sqlite3 version...'; \ + echo "**"; \ + awk -e '/define SQLITE_VERSION/{$$1=""; print "**" $$0}' \ + -e '/define SQLITE_SOURCE_ID/{$$1=""; print "**" $$0}' $(sqlite3.h); \ + echo "**"; \ + echo "** Using the Emscripten SDK version $(emcc.version)."; \ + echo '*/'; \ + } > $@ + +######################################################################## +# --post-js and --pre-js are emcc flags we use to append/prepend JS to +# the generated emscripten module file. These rules set up the core +# pre/post files for use by the various builds. +pre-js.js.in := $(dir.api)/pre-js.c-pp.js +post-js.js.in := $(dir.tmp)/post-js.c-pp.js +post-jses.js := \ + $(dir.api)/post-js-header.js \ + $(sqlite3-api.js.in) \ + $(dir.api)/post-js-footer.js +$(post-js.js.in): $(post-jses.js) $(MAKEFILE) + @echo "Making $@..." + @for i in $(post-jses.js); do \ + echo "/* BEGIN FILE: $$i */"; \ + cat $$i; \ + echo "/* END FILE: $$i */"; \ + done > $@ + + +######################################################################## +# call-make-pre-post is a $(call)able which creates rules for +# pre-js-$(1).js. $1 = the base name of the JS file on whose behalf +# this pre-js is for (one of: sqlite3, sqlite3-wasmfs). $2 is the build +# mode: one of (vanilla, esm, bundler-friendly). This sets up +# --[extern-][pre/post]-js flags in $(pre-post-$(1).flags.$(2)) and +# dependencies in $(pre-post-$(1).deps.$(2)). +define call-make-pre-post +pre-post-$(1).flags.$(2) ?= +$$(dir.tmp)/pre-js-$(1)-$(2).js: $$(pre-js.js.$(2)) $$(MAKEFILE) + cp $$(pre-js.js.$(2)) $$@ + @if [ sqlite3-wasmfs = $(1) ]; then \ + echo "delete Module[xNameOfInstantiateWasm] /*for WASMFS build*/;"; \ + elif [ sqlite3 != $(1) ]; then \ + echo "Module[xNameOfInstantiateWasm].uri = '$(1).wasm';"; \ + fi >> $$@ +pre-post-$(1).deps.$(2) := \ + $$(pre-post-jses.deps.$(2)) \ + $$(dir.tmp)/pre-js-$(1)-$(2).js +pre-post-$(1).flags.$(2) += \ + $$(pre-post-common.flags.$(2)) \ + --pre-js=$$(dir.tmp)/pre-js-$(1)-$(2).js +endef +# /post-js and pre-js +######################################################################## + # Undocumented Emscripten feature: if the target file extension is # "mjs", it defaults to ES6 module builds: # https://github.com/emscripten-core/emscripten/issues/14383 sqlite3.wasm := $(dir.dout)/sqlite3.wasm sqlite3-wasm.c := $(dir.api)/sqlite3-wasm.c # sqlite3-wasm.o vs sqlite3-wasm.c: building against the latter -# (predictably) results in a slightly faster binary, but we're close +# (predictably) results in a slightly faster binary. We're close # enough to the target speed requirements that the 500ms makes a -# difference. Thus we build all binaries against sqlite3-wasm.c -# instead of building a shared copy of sqlite3-wasm.o. -$(eval $(call call-make-pre-js,sqlite3,vanilla)) -$(eval $(call call-make-pre-js,sqlite3,esm)) -$(sqlite3.js) $(sqlite3.mjs): $(MAKEFILE) $(sqlite3-wasm.c) \ - $(EXPORTED_FUNCTIONS.api) -$(sqlite3.js): $(pre-post-sqlite3.deps.vanilla) -$(sqlite3.mjs): $(pre-post-sqlite3.deps.esm) +# difference, so we build all binaries against sqlite3-wasm.c instead +# of building a shared copy of sqlite3-wasm.o to link against. ######################################################################## -# SQLITE3.xJS.RECIPE = the $(call)able recipe body for $(sqlite3.js) -# and $(sqlite3.mjs). $1 = one of (vanilla, esm). +# SQLITE3.xJS.EXPORT-DEFAULT is part of SQLITE3-WASMFS.xJS.RECIPE and +# SETUP_LIB_BUILD_MODE, factored into a separate piece to avoid code +# duplication. $1 is 1 if the build mode needs this workaround (esm, +# bundler-friendly) and 0 if not (vanilla). # # Reminder for ESM builds: even if we use -sEXPORT_ES6=0, emcc _still_ # adds: @@ -593,17 +597,13 @@ $(sqlite3.mjs): $(pre-post-sqlite3.deps.esm) # # when building *.mjs, which is bad because we need to export an # overwritten version of that function and cannot "export default" -# twice. Because of this, we have to sed $(sqlite3.mjs) to remove the -# _first_ instance (only) of /^export default/. +# twice. Because of this, we have to sed *.mjs to remove the _first_ +# instance (only) of /^export default/. # # Upstream RFE: # https://github.com/emscripten-core/emscripten/issues/18237 -######################################################################## -# SQLITE3.xJS.EXPORT-DEFAULT is part of SQLITE3[-WASMFS].xJS.RECIPE, -# factored into a separate piece to avoid code duplication. $1 is -# the build mode: one of (vanilla, esm). define SQLITE3.xJS.ESM-EXPORT-DEFAULT -if [ esm = $(1) ]; then \ +if [ x1 = x$(1) ]; then \ echo "Fragile workaround for an Emscripten annoyance. See SQLITE3.xJS.RECIPE."; \ sed -i -e '0,/^export default/{/^export default/d;}' $@ || exit $$?; \ if ! grep -q '^export default' $@; then \ @@ -612,38 +612,129 @@ if [ esm = $(1) ]; then \ fi; \ fi endef -define SQLITE3.xJS.RECIPE - @echo "Building $@ ..." - $(emcc.bin) -o $@ $(emcc_opt_full) $(emcc.flags) \ - $(emcc.jsflags) \ - $(pre-post-sqlite3.flags.$(1)) $(emcc.flags.sqlite3.$(1)) \ - $(cflags.common) $(SQLITE_OPT) $(sqlite3-wasm.c) - @$(call SQLITE3.xJS.ESM-EXPORT-DEFAULT,$(1)) - chmod -x $(sqlite3.wasm) - $(maybe-wasm-strip) $(sqlite3.wasm) - @ls -la $@ $(sqlite3.wasm) -endef -emcc.flags.sqlite3.vanilla := -emcc.flags.sqlite3.esm := -sEXPORT_ES6 -sUSE_ES6_IMPORT_META -$(sqlite3.js): - $(call SQLITE3.xJS.RECIPE,vanilla) -$(sqlite3.mjs): - $(call SQLITE3.xJS.RECIPE,esm) + +# extern-post-js* and extern-pre-js* are files for use with +# Emscripten's --extern-pre-js and --extern-post-js flags. +extern-pre-js.js := $(dir.api)/extern-pre-js.js +extern-post-js.js.in := $(dir.api)/extern-post-js.c-pp.js +# Emscripten flags for --[extern-][pre|post]-js=... for the +# various builds. +pre-post-common.flags := \ + --extern-pre-js=$(sqlite3-license-version.js) +# pre-post-jses.deps.* = a list of dependencies for the +# --[extern-][pre/post]-js files. +pre-post-jses.deps.common := $(extern-pre-js.js) $(sqlite3-license-version.js) ######################################################################## -# We have to ensure that we do not build both $(sqlite3.js) and -# $(sqlite3.mjs) in parallel because both result in the creation of -# $(sqlite3.wasm). We have no(?) way to build just the .mjs file -# without also building the .wasm file. i.e. we're building -# $(sqlite3.wasm) twice, but that's apparently unavoidable (and -# harmless, just a waste of build time). +# SETUP_LIB_BUILD_MODE is a $(call)'able which sets up numerous pieces +# for one of the build modes (vanilla, esm, bundler-friendly). +# +# $1 = build mode name +# $2 = 1 for ESM build mode, else 0 +# $3 = resulting sqlite-api JS/MJS file +# $4 = resulting JS/MJS file +# $5 = -D... flags for $(bin.c-pp) +# $6 = emcc -sXYZ flags +define SETUP_LIB_BUILD_MODE +$(info Setting up build [$(1)]: $(4)) +c-pp.D.$(1) := $(5) +pre-js.js.$(1) := $$(dir.api)/pre-js.$(1).js +$$(eval $$(call C-PP.FILTER,$$(pre-js.js.in),$$(pre-js.js.$(1)),$$(c-pp.D.$(1)))) +post-js.js.$(1) := $$(dir.tmp)/post-js.$(1).js +$$(eval $$(call C-PP.FILTER,$$(post-js.js.in),$$(post-js.js.$(1)),$$(c-pp.D.$(1)))) +extern-post-js.js.$(1) := $$(dir.tmp)/extern-post-js.$(1).js +$$(eval $$(call C-PP.FILTER,$$(extern-post-js.js.in),$$(extern-post-js.js.$(1)),$$(c-pp.D.$(1)))) +pre-post-common.flags.$(1) := \ + $$(pre-post-common.flags) \ + --post-js=$$(post-js.js.$(1)) \ + --extern-post-js=$$(extern-post-js.js.$(1)) +pre-post-jses.deps.$(1) := $$(pre-post-jses.deps.common) \ + $$(post-js.js.$(1)) $$(extern-post-js.js.$(1)) +$$(eval $$(call call-make-pre-post,sqlite3,$(1))) +emcc.flags.sqlite3.$(1) := $(6) +$$(eval $$(call C-PP.FILTER, $$(sqlite3-api.js.in), $(3), $(5))) +$(4): $(3) +$(4): $(3) $$(MAKEFILE) $$(sqlite3-wasm.c) $$(EXPORTED_FUNCTIONS.api) $$(pre-post-sqlite3.deps.$(1)) + @echo "Building $$@ ..." + $$(emcc.bin) -o $$@ $$(emcc_opt_full) $$(emcc.flags) \ + $$(emcc.jsflags) \ + $$(pre-post-sqlite3.flags.$(1)) $$(emcc.flags.sqlite3.$(1)) \ + $$(cflags.common) $$(SQLITE_OPT) $$(sqlite3-wasm.c) + @$$(call SQLITE3.xJS.ESM-EXPORT-DEFAULT,$(2)) + @if [ bundler-friendly = $(1) ]; then \ + echo "Patching $(3) for sqlite3.wasm..."; \ + rm -f $$(dir.dout)/sqlite3-bundler-friendly.wasm; \ + sed -i -e 's/sqlite3-bundler-friendly.wasm/sqlite3.wasm/g' $$@ || exit $$$$?; \ + fi + chmod -x $$(sqlite3.wasm) + $$(maybe-wasm-strip) $$(sqlite3.wasm) + @ls -la $@ $$(sqlite3.wasm) +all: $(4) +quick: $(4) +CLEAN_FILES += $(3) $(4) +endef +# ^^^ /SETUP_LIB_BUILD_MODE +######################################################################## +sqlite3-api.js := $(dir.dout)/sqlite3-api.js +sqlite3.js := $(dir.dout)/sqlite3.js +sqlite3-api.mjs := $(dir.dout)/sqlite3-api.mjs +sqlite3.mjs := $(dir.dout)/sqlite3.mjs +sqlite3-api-bundler-friendly.mjs := $(dir.dout)/sqlite3-api-bundler-friendly.mjs +sqlite3-bundler-friendly.mjs := $(dir.dout)/sqlite3-bundler-friendly.mjs +# Maintenance reminder: careful not to introduce spaces around args $1, $2 +#$(info $(call SETUP_LIB_BUILD_MODE,vanilla,0, $(sqlite3-api.js), $(sqlite3.js))) +$(eval $(call SETUP_LIB_BUILD_MODE,vanilla,0, $(sqlite3-api.js), $(sqlite3.js))) +$(eval $(call SETUP_LIB_BUILD_MODE,esm,1, $(sqlite3-api.mjs), $(sqlite3.mjs), \ + -Dtarget=es6-module, -sEXPORT_ES6 -sUSE_ES6_IMPORT_META)) +$(eval $(call SETUP_LIB_BUILD_MODE,bundler-friendly,1,\ + $(sqlite3-api-bundler-friendly.mjs),$(sqlite3-bundler-friendly.mjs),\ + $(c-pp.D.esm) -Dtarget=es6-bundler-friendly, $(emcc.flags.sqlite3.esm))) +# The various -D... values used by *.c-pp.js include: +# +# -Dtarget=es6-module: for all ESM module builds +# +# -Dtarget=es6-module -Dtarget=es6-bundler-friendly: intended for +# "bundler-friendly" ESM module build. These have some restrictions +# on how URL() objects are constructed in some contexts: URLs which +# refer to files which are part of this project must be references +# as string literals so that bundlers' static-analysis tools can +# find those files and include them in their bundles. +# +######################################################################## +######################################################################## +# We have to ensure that we do not build both $(sqlite3*.*js) in +# parallel because both result in the creation of $(sqlite3.wasm). We +# have no way to build just the .mjs file without also building the +# .wasm file because the generated .mjs file has to include info about +# the imports needed by the wasm file, so they have to be built +# together. i.e. we're building $(sqlite3.wasm) multiple times, but +# that's unavoidable (and harmless, just a waste of build time). $(sqlite3.wasm): $(sqlite3.js) $(sqlite3.mjs): $(sqlite3.js) -CLEAN_FILES += $(sqlite3.js) $(sqlite3.mjs) $(sqlite3.wasm) -all: $(sqlite3.js) $(sqlite3.mjs) -quick: $(sqlite3.js) -quick: $(sqlite3.mjs) # for the sake of the snapshot build -# End main $(sqlite3.js) build +$(sqlite3-bundler-friendly.mjs): $(sqlite3.mjs) +CLEAN_FILES += $(sqlite3.wasm) + ######################################################################## +# We need separate copies of certain supplementary JS files for the +# bundler-friendly build. Concretely, any supplemental JS files which +# themselves use importScripts() or Workers or URL() constructors +# which refer to other in-tree (m)JS files quire a bundler-friendly +# copy. +sqlite3-worker1.js.in := $(dir.api)/sqlite3-worker1.c-pp.js +sqlite3-worker1-promiser.js.in := $(dir.api)/sqlite3-worker1-promiser.c-pp.js +sqlite3-worker1.js := $(dir.dout)/sqlite3-worker1.js +sqlite3-worker1-promiser.js := $(dir.dout)/sqlite3-worker1-promiser.js +sqlite3-worker1-bundler-friendly.js := $(dir.dout)/sqlite3-worker1-bundler-friendly.js +sqlite3-worker1-promiser-bundler-friendly.js := $(dir.dout)/sqlite3-worker1-promiser-bundler-friendly.js +$(eval $(call C-PP.FILTER,$(sqlite3-worker1.js.in),$(sqlite3-worker1.js))) +$(eval $(call C-PP.FILTER,$(sqlite3-worker1.js.in),$(sqlite3-worker1-bundler-friendly.js),\ + $(c-pp.D.bundler-friendly))) +$(eval $(call C-PP.FILTER,$(sqlite3-worker1-promiser.js.in),$(sqlite3-worker1-promiser.js))) +$(eval $(call C-PP.FILTER,$(sqlite3-worker1-promiser.js.in),\ + $(sqlite3-worker1-promiser-bundler-friendly.js),\ + $(c-pp.D.bundler-friendly))) +$(sqlite3-bundler-friendly.mjs): $(sqlite3-worker1-bundler-friendly.js) \ + $(sqlite3-worker1-promiser-bundler-friendly.js) +$(sqlite3.js) $(sqlite3.mjs): $(sqlite3-worker1.js) $(sqlite3-worker1-promiser.js) ######################################################################## # batch-runner.js is part of one of the test apps which reads in SQL @@ -680,7 +771,6 @@ emcc.speedtest1 += -sALLOW_MEMORY_GROWTH emcc.speedtest1 += -sINITIAL_MEMORY=$(emcc.INITIAL_MEMORY.$(emcc.INITIAL_MEMORY)) emcc.speedtest1.common += -sINVOKE_RUN=0 emcc.speedtest1.common += --no-entry -#emcc.speedtest1.common += -flto emcc.speedtest1.common += -sABORTING_MALLOC emcc.speedtest1.common += -sSTRICT_JS emcc.speedtest1.common += -sMODULARIZE @@ -720,7 +810,7 @@ speedtest1.js := $(dir.dout)/speedtest1.js speedtest1.wasm := $(dir.dout)/speedtest1.wasm cflags.speedtest1 := $(cflags.common) -DSQLITE_SPEEDTEST1_WASM speedtest1.cses := $(speedtest1.c) $(sqlite3-wasm.c) -$(eval $(call call-make-pre-js,speedtest1,vanilla)) +$(eval $(call call-make-pre-post,speedtest1,vanilla)) $(speedtest1.js): $(MAKEFILE) $(speedtest1.cses) \ $(pre-post-speedtest1.deps.vanilla) \ $(EXPORTED_FUNCTIONS.speedtest1) @@ -751,7 +841,7 @@ CLEAN_FILES += $(speedtest1.js) $(speedtest1.wasm) # 4) Load sqlite3 as an ESM worker. (Not all browsers support this.) # # To that end, we require two separate builds of tester1.js: -# +# # tester1.js: cases 1 and 2 # tester1.mjs: cases 3 and 4 # @@ -761,6 +851,8 @@ $(eval $(call C-PP.FILTER,tester1.c-pp.js,tester1.mjs,$(c-pp.D.esm))) $(eval $(call C-PP.FILTER,tester1.c-pp.html,tester1.html)) $(eval $(call C-PP.FILTER,tester1.c-pp.html,tester1-esm.html,$(c-pp.D.esm))) tester1: tester1.js tester1.mjs tester1.html tester1-esm.html +# Note that we do not include $(sqlite3-bundler-friendly.mjs) in this +# because bundlers are client-specific. all quick: tester1 ######################################################################## diff --git a/ext/wasm/README-dist.txt b/ext/wasm/README-dist.txt index ca6bef93e8..6656a2072a 100644 --- a/ext/wasm/README-dist.txt +++ b/ext/wasm/README-dist.txt @@ -4,20 +4,43 @@ Main project page: https://sqlite.org Documentation: https://sqlite.org/wasm -This archive contains the sqlite3.js and sqlite3.wasm file which make -up the sqlite3 WASM/JS build. +This archive contains the following deliverables for the WASM/JS +build: -The jswasm directory contains the core sqlite3 deliverables and the -top-level directory contains demonstration and test apps. Browsers -will not serve WASM files from file:// URLs, so the demo/test apps -require a web server and that server must include the following -headers in its response when serving the files: +- jswasm/sqlite3.js is the canonical "vanilla JS" version. + +- jswasm/sqlite3.mjs is the same but in ES6 module form + +- jswasm/*-bundler-friendly.js and .mjs are variants which are + intended to be compatible with "bundler" tools commonly seen in + node.js-based projects. Projects using such tools should use those + variants, where available, instead of files without the + "-bundler-friendly" suffix. Some files do not have separate + variants. + +- jswasm/sqlite3.wasm is the binary WASM file imported by all of the + above-listed JS files. + +- The jswasm directory additionally contains a number of supplemental + JS files which cannot be bundled directly with the main JS files + but are necessary for certain usages. + +- The top-level directory contains various demonstration and test + applications for sqlite3.js and sqlite3.mjs. + sqlite3-bundler-friendly.mjs requires client-side build tools to make + use of and is not demonstrated here. + +Browsers will not serve WASM files from file:// URLs, so the test and +demonstration apps require a web server and that server must include +the following headers in its response when serving the files: Cross-Origin-Opener-Policy: same-origin Cross-Origin-Embedder-Policy: require-corp +The core library will function without those headers but certain +features, most notably OPFS storage, will not be available. + One simple way to get the demo apps up and running on Unix-style systems is to install althttpd (https://sqlite.org/althttpd) and run: althttpd --enable-sab --page index.html - diff --git a/ext/wasm/api/extern-post-js.c-pp.js b/ext/wasm/api/extern-post-js.c-pp.js index 2258697944..a577a63e1e 100644 --- a/ext/wasm/api/extern-post-js.c-pp.js +++ b/ext/wasm/api/extern-post-js.c-pp.js @@ -9,7 +9,7 @@ Emscripten-generated module init scope, in the current global scope. */ //#if target=es6-module -const toExportForES6 = +const toExportForESM = //#endif (function(){ /** @@ -45,10 +45,10 @@ const toExportForES6 = moduleScript: self?.document?.currentScript, isWorker: ('undefined' !== typeof WorkerGlobalScope), location: self.location, - urlParams: new URL(self.location.href).searchParams + urlParams: new URL(self.location.href).searchParams }); initModuleState.debugModule = - (new URL(self.location.href).searchParams).has('sqlite3.debugModule') + initModuleState.urlParams.has('sqlite3.debugModule') ? (...args)=>console.warn('sqlite3.debugModule:',...args) : ()=>{}; @@ -105,6 +105,10 @@ const toExportForES6 = document?.currentScript?.src); } } +//#ifnot target=es6-module +// Emscripten does not inject these module-loader bits in ES6 module +// builds and including them here breaks JS bundlers, so elide them +// from ESM builds. /* Replace the various module exports performed by the Emscripten glue... */ if (typeof exports === 'object' && typeof module === 'object'){ @@ -114,8 +118,9 @@ const toExportForES6 = } /* AMD modules get injected in a way we cannot override, so we can't handle those here. */ +//#endif // !target=es6-module return self.sqlite3InitModule /* required for ESM */; })(); //#if target=es6-module -export default toExportForES6; +export default toExportForESM; //#endif diff --git a/ext/wasm/api/pre-js.c-pp.js b/ext/wasm/api/pre-js.c-pp.js index 2e2fe66bc9..a25c7ce774 100644 --- a/ext/wasm/api/pre-js.c-pp.js +++ b/ext/wasm/api/pre-js.c-pp.js @@ -6,10 +6,14 @@ */ // See notes in extern-post-js.js -const sqlite3InitModuleState = self.sqlite3InitModuleState || Object.create(null); +const sqlite3InitModuleState = self.sqlite3InitModuleState + || Object.assign(Object.create(null),{ + debugModule: ()=>{} + }); delete self.sqlite3InitModuleState; sqlite3InitModuleState.debugModule('self.location =',self.location); +//#ifnot target=es6-bundler-friendly /** This custom locateFile() tries to figure out where to load `path` from. The intent is to provide a way for foo/bar/X.js loaded from a @@ -51,8 +55,9 @@ Module['locateFile'] = function(path, prefix) { "result =", theFile ); return theFile; -//#endif /* SQLITE_JS_EMS */ +//#endif target=es6-module }.bind(sqlite3InitModuleState); +//#endif ifnot target=es6-bundler-friendly /** Bug warning: a custom Module.instantiateWasm() does not work @@ -62,7 +67,7 @@ Module['locateFile'] = function(path, prefix) { In such builds we must disable this. */ -const xNameOfInstantiateWasm = true +const xNameOfInstantiateWasm = false ? 'instantiateWasm' : 'emscripten-bug-17951'; Module[xNameOfInstantiateWasm] = function callee(imports,onSuccess){ diff --git a/ext/wasm/api/sqlite3-api-cleanup.js b/ext/wasm/api/sqlite3-api-cleanup.js index 30cd64b053..7c23f8f894 100644 --- a/ext/wasm/api/sqlite3-api-cleanup.js +++ b/ext/wasm/api/sqlite3-api-cleanup.js @@ -51,11 +51,6 @@ if('undefined' !== typeof Module){ // presumably an Emscripten build delete self.sqlite3ApiConfig; } - if(self.location && +self.location.port > 1024){ - console.warn("Installing sqlite3 bits as global S for local dev/test purposes."); - self.S = sqlite3; - } - Module.sqlite3 = sqlite3 /* Needed for customized sqlite3InitModule() to be able to pass the sqlite3 object off to the client. */; }else{ diff --git a/ext/wasm/api/sqlite3-api-glue.js b/ext/wasm/api/sqlite3-api-glue.js index 59b40786ec..7db23bacc9 100644 --- a/ext/wasm/api/sqlite3-api-glue.js +++ b/ext/wasm/api/sqlite3-api-glue.js @@ -24,6 +24,50 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ self.WhWasmUtilInstaller(wasm); delete self.WhWasmUtilInstaller; + if(0){ + /** + Please keep this block around as a maintenance reminder + that we cannot rely on this type of check. + + This block fails on Safari, per a report at + https://sqlite.org/forum/forumpost/e5b20e1feb. + + It turns out that what Safari serves from the indirect function + table (e.g. wasm.functionEntry(X)) is anonymous functions which + wrap the WASM functions, rather than returning the WASM + functions themselves. That means comparison of such functions + is useless for determining whether or not we have a specific + function from wasm.exports. i.e. if function X is indirection + function table entry N then wasm.exports.X is not equal to + wasm.functionEntry(N) in Safari, despite being so in the other + browsers. + */ + /** + Find a mapping for SQLITE_WASM_DEALLOC, which the API + guarantees is a WASM pointer to the same underlying function as + wasm.dealloc() (noting that wasm.dealloc() is permitted to be a + JS wrapper around the WASM function). There is unfortunately no + O(1) algorithm for finding this pointer: we have to walk the + WASM indirect function table to find it. However, experience + indicates that that particular function is always very close to + the front of the table (it's been entry #3 in all relevant + tests). + */ + const dealloc = wasm.exports[sqlite3.config.deallocExportName]; + const nFunc = wasm.functionTable().length; + let i; + for(i = 0; i < nFunc; ++i){ + const e = wasm.functionEntry(i); + if(dealloc === e){ + capi.SQLITE_WASM_DEALLOC = i; + break; + } + } + if(dealloc !== wasm.functionEntry(capi.SQLITE_WASM_DEALLOC)){ + toss("Internal error: cannot find function pointer for SQLITE_WASM_DEALLOC."); + } + } + /** Signatures for the WASM-exported C-side functions. Each entry is an array with 2+ elements: @@ -42,10 +86,9 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ wasm.bindingSignatures = [ // Please keep these sorted by function name! ["sqlite3_aggregate_context","void*", "sqlite3_context*", "int"], - ["sqlite3_bind_blob","int", "sqlite3_stmt*", "int", "*", "int", "*" - /* TODO: we should arguably write a custom wrapper which knows - how to handle Blob, TypedArrays, and JS strings. */ - ], + /* sqlite3_auto_extension() has a hand-written binding. */ + /* sqlite3_bind_blob() and sqlite3_bind_text() have hand-written + bindings to permit more flexible inputs. */ ["sqlite3_bind_double","int", "sqlite3_stmt*", "int", "f64"], ["sqlite3_bind_int","int", "sqlite3_stmt*", "int", "int"], ["sqlite3_bind_null",undefined, "sqlite3_stmt*", "int"], @@ -53,18 +96,18 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_bind_parameter_index","int", "sqlite3_stmt*", "string"], ["sqlite3_bind_pointer", "int", "sqlite3_stmt*", "int", "*", "string:static", "*"], - ["sqlite3_bind_text","int", "sqlite3_stmt*", "int", "string", "int", "int" - /* We should arguably create a hand-written binding of - bind_text() which does more flexible text conversion, along - the lines of sqlite3_prepare_v3(). The slightly problematic - part is the final argument (text destructor). */ - ], - //["sqlite3_busy_handler","int", "sqlite3*", "*", "*"], - // ^^^^ TODO: custom binding which auto-converts JS function arg - // to a WASM function, noting that calling it multiple times - // would introduce a leak. + ["sqlite3_busy_handler","int", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + signature: 'i(pi)', + contextKey: (argv,argIndex)=>argv[0/* sqlite3* */] + }), + "*" + ]], ["sqlite3_busy_timeout","int", "sqlite3*", "int"], - ["sqlite3_close_v2", "int", "sqlite3*"], + /* sqlite3_cancel_auto_extension() has a hand-written binding. */ + /* sqlite3_close_v2() is implemented by hand to perform some + extra work. */ ["sqlite3_changes", "int", "sqlite3*"], ["sqlite3_clear_bindings","int", "sqlite3_stmt*"], ["sqlite3_collation_needed", "int", "sqlite3*", "*", "*"/*=>v(ppis)*/], @@ -77,9 +120,20 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_column_text","string", "sqlite3_stmt*", "int"], ["sqlite3_column_type","int", "sqlite3_stmt*", "int"], ["sqlite3_column_value","sqlite3_value*", "sqlite3_stmt*", "int"], + ["sqlite3_commit_hook", "void*", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + name: 'sqlite3_commit_hook', + signature: 'i(p)', + contextKey: (argv)=>argv[0/* sqlite3* */] + }), + '*' + ]], ["sqlite3_compileoption_get", "string", "int"], ["sqlite3_compileoption_used", "int", "string"], ["sqlite3_complete", "int", "string:flexible"], + ["sqlite3_context_db_handle", "sqlite3*", "sqlite3_context*"], + /* sqlite3_create_function(), sqlite3_create_function_v2(), and sqlite3_create_window_function() use hand-written bindings to simplify handling of their function-type arguments. */ @@ -102,9 +156,32 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_errmsg", "string", "sqlite3*"], ["sqlite3_error_offset", "int", "sqlite3*"], ["sqlite3_errstr", "string", "int"], - /*["sqlite3_exec", "int", "sqlite3*", "string", "*", "*", "**" - Handled seperately to perform translation of the callback - into a WASM-usable one. ],*/ + ["sqlite3_exec", "int", [ + "sqlite3*", "string:flexible", + new wasm.xWrap.FuncPtrAdapter({ + signature: 'i(pipp)', + bindScope: 'transient', + callProxy: (callback)=>{ + let aNames; + return (pVoid, nCols, pColVals, pColNames)=>{ + try { + const aVals = wasm.cArgvToJs(nCols, pColVals); + if(!aNames) aNames = wasm.cArgvToJs(nCols, pColNames); + return callback(aVals, aNames) | 0; + }catch(e){ + /* If we set the db error state here, the higher-level + exec() call replaces it with its own, so we have no way + of reporting the exception message except the console. We + must not propagate exceptions through the C API. Though + we make an effort to report OOM here, sqlite3_exec() + translates that into SQLITE_ABORT as well. */ + return e.resultCode || capi.SQLITE_ERROR; + } + } + } + }), + "*", "**" + ]], ["sqlite3_expanded_sql", "string", "sqlite3_stmt*"], ["sqlite3_extended_errcode", "int", "sqlite3*"], ["sqlite3_extended_result_codes", "int", "sqlite3*", "int"], @@ -131,18 +208,17 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ for those, depending on how their SQL argument is provided. */ /* sqlite3_randomness() uses a hand-written wrapper to extend the range of supported argument types. */ - [ - "sqlite3_progress_handler", undefined, [ - "sqlite3*", "int", new wasm.xWrap.FuncPtrAdapter({ - name: 'xProgressHandler', - signature: 'i(p)', - bindScope: 'context', - contextKey: (argIndex,argv)=>'sqlite3@'+argv[0] - }), "*" - ] - ], + ["sqlite3_progress_handler", undefined, [ + "sqlite3*", "int", new wasm.xWrap.FuncPtrAdapter({ + name: 'xProgressHandler', + signature: 'i(p)', + bindScope: 'context', + contextKey: (argv,argIndex)=>argv[0/* sqlite3* */] + }), "*" + ]], ["sqlite3_realloc", "*","*","int"], ["sqlite3_reset", "int", "sqlite3_stmt*"], + /* sqlite3_reset_auto_extension() has a hand-written binding. */ ["sqlite3_result_blob", undefined, "sqlite3_context*", "*", "int", "*"], ["sqlite3_result_double", undefined, "sqlite3_context*", "f64"], ["sqlite3_result_error", undefined, "sqlite3_context*", "string", "int"], @@ -156,7 +232,43 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_result_subtype", undefined, "sqlite3_value*", "int"], ["sqlite3_result_text", undefined, "sqlite3_context*", "string", "int", "*"], ["sqlite3_result_zeroblob", undefined, "sqlite3_context*", "int"], - ["sqlite3_set_auxdata", undefined, "sqlite3_context*", "int", "*", "*"/* => v(*) */], + ["sqlite3_rollback_hook", "void*", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + name: 'sqlite3_rollback_hook', + signature: 'v(p)', + contextKey: (argv)=>argv[0/* sqlite3* */] + }), + '*' + ]], + ["sqlite3_set_authorizer", "int", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + name: "sqlite3_set_authorizer::xAuth", + signature: "i(pi"+"ssss)", + contextKey: (argv, argIndex)=>argv[0/*(sqlite3*)*/], + callProxy: (callback)=>{ + return (pV, iCode, s0, s1, s2, s3)=>{ + try{ + s0 = s0 && wasm.cstrToJs(s0); s1 = s1 && wasm.cstrToJs(s1); + s2 = s2 && wasm.cstrToJs(s2); s3 = s3 && wasm.cstrToJs(s3); + return callback(pV, iCode, s0, s1, s2, s3) || 0; + }catch(e){ + return e.resultCode || capi.SQLITE_ERROR; + } + } + } + }), + "*"/*pUserData*/ + ]], + ["sqlite3_set_auxdata", undefined, [ + "sqlite3_context*", "int", "*", + new wasm.xWrap.FuncPtrAdapter({ + name: 'xDestroyAuxData', + signature: 'v(*)', + contextKey: (argv, argIndex)=>argv[0/* sqlite3_context* */] + }) + ]], ["sqlite3_shutdown", undefined], ["sqlite3_sourceid", "string"], ["sqlite3_sql", "string", "sqlite3_stmt*"], @@ -173,12 +285,15 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ "sqlite3*", "string", "string", "string", "**", "**", "*", "*", "*"], ["sqlite3_total_changes", "int", "sqlite3*"], - ["sqlite3_trace_v2", "int", "sqlite3*", "int", - new wasm.xWrap.FuncPtrAdapter({ - name: 'sqlite3_trace_v2::callback', - signature: 'i(ippp)', - contextKey: (argIndex, argv)=>'sqlite3@'+argv[0] - }), "*"], + ["sqlite3_trace_v2", "int", [ + "sqlite3*", "int", + new wasm.xWrap.FuncPtrAdapter({ + name: 'sqlite3_trace_v2::callback', + signature: 'i(ippp)', + contextKey: (argv,argIndex)=>argv[0/* sqlite3* */] + }), + "*" + ]], ["sqlite3_txn_state", "int", ["sqlite3*","string"]], /* Note that sqlite3_uri_...() have very specific requirements for their first C-string arguments, so we cannot perform any value @@ -211,7 +326,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ optional features into account. */ wasm.bindingSignatures.push(["sqlite3_normalized_sql", "string", "sqlite3_stmt*"]); } - + /** Functions which require BigInt (int64) support are separated from the others because we need to conditionally bind them or apply @@ -240,15 +355,47 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_malloc64", "*","i64"], ["sqlite3_msize", "i64", "*"], ["sqlite3_overload_function", "int", ["sqlite3*","string","int"]], + ["sqlite3_preupdate_blobwrite", "int", "sqlite3*"], + ["sqlite3_preupdate_count", "int", "sqlite3*"], + ["sqlite3_preupdate_depth", "int", "sqlite3*"], + ["sqlite3_preupdate_hook", "*", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + name: 'sqlite3_preupdate_hook', + signature: "v(ppippjj)", + contextKey: (argv)=>argv[0/* sqlite3* */], + callProxy: (callback)=>{ + return (p,db,op,zDb,zTbl,iKey1,iKey2)=>{ + callback(p, db, op, wasm.cstrToJs(zDb), wasm.cstrToJs(zTbl), + iKey1, iKey2); + }; + } + }), + "*" + ]], + ["sqlite3_preupdate_new", "int", ["sqlite3*", "int", "**"]], + ["sqlite3_preupdate_old", "int", ["sqlite3*", "int", "**"]], ["sqlite3_realloc64", "*","*", "i64"], ["sqlite3_result_int64", undefined, "*", "i64"], ["sqlite3_result_zeroblob64", "int", "*", "i64"], ["sqlite3_serialize","*", "sqlite3*", "string", "*", "int"], - /* sqlite3_set_authorizer() requires a hand-written binding for - string conversions, so is defined elsewhere. */ ["sqlite3_set_last_insert_rowid", undefined, ["sqlite3*", "i64"]], ["sqlite3_status64", "int", "int", "*", "*", "int"], ["sqlite3_total_changes64", "i64", ["sqlite3*"]], + ["sqlite3_update_hook", "*", [ + "sqlite3*", + new wasm.xWrap.FuncPtrAdapter({ + name: 'sqlite3_update_hook', + signature: "v(iippj)", + contextKey: (argv)=>argv[0/* sqlite3* */], + callProxy: (callback)=>{ + return (p,op,z0,z1,rowid)=>{ + callback(p, op, wasm.cstrToJs(z0), wasm.cstrToJs(z1), rowid); + }; + } + }), + "*" + ]], ["sqlite3_uri_int64", "i64", ["sqlite3_filename", "string", "i64"]], ["sqlite3_value_int64","i64", "sqlite3_value*"], ["sqlite3_vtab_collation","string","sqlite3_index_info*","int"], @@ -263,6 +410,184 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ["sqlite3_vtab_rhs_value","int", "sqlite3_index_info*", "int", "**"] ]; + // Add session/changeset APIs... + if(wasm.bigIntEnabled && !!wasm.exports.sqlite3changegroup_add){ + /* ACHTUNG: 2022-12-23: the session/changeset API bindings are + COMPLETELY UNTESTED. */ + /** + FuncPtrAdapter options for session-related callbacks with the + native signature "i(ps)". This proxy converts the 2nd argument + from a C string to a JS string before passing the arguments on + to the client-provided JS callback. + */ + const __ipsProxy = { + signature: 'i(ps)', + callProxy:(callback)=>{ + return (p,s)=>{ + try{return callback(p, wasm.cstrToJs(s)) | 0} + catch(e){return e.resultCode || capi.SQLITE_ERROR} + } + } + }; + + wasm.bindingSignatures.int64.push(...[ + ['sqlite3changegroup_add', 'int', ['sqlite3_changegroup*', 'int', 'void*']], + ['sqlite3changegroup_add_strm', 'int', [ + 'sqlite3_changegroup*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changegroup_delete', undefined, ['sqlite3_changegroup*']], + ['sqlite3changegroup_new', 'int', ['**']], + ['sqlite3changegroup_output', 'int', ['sqlite3_changegroup*', 'int*', '**']], + ['sqlite3changegroup_output_strm', 'int', [ + 'sqlite3_changegroup*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xOutput', signature: 'i(ppi)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_apply', 'int', [ + 'sqlite3*', 'int', 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xFilter', bindScope: 'transient', ...__ipsProxy + }), + new wasm.xWrap.FuncPtrAdapter({ + name: 'xConflict', signature: 'i(pip)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_apply_strm', 'int', [ + 'sqlite3*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xFilter', bindScope: 'transient', ...__ipsProxy + }), + new wasm.xWrap.FuncPtrAdapter({ + name: 'xConflict', signature: 'i(pip)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_apply_v2', 'int', [ + 'sqlite3*', 'int', 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xFilter', bindScope: 'transient', ...__ipsProxy + }), + new wasm.xWrap.FuncPtrAdapter({ + name: 'xConflict', signature: 'i(pip)', bindScope: 'transient' + }), + 'void*', '**', 'int*', 'int' + + ]], + ['sqlite3changeset_apply_v2_strm', 'int', [ + 'sqlite3*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xFilter', bindScope: 'transient', ...__ipsProxy + }), + new wasm.xWrap.FuncPtrAdapter({ + name: 'xConflict', signature: 'i(pip)', bindScope: 'transient' + }), + 'void*', '**', 'int*', 'int' + ]], + ['sqlite3changeset_concat', 'int', ['int','void*', 'int', 'void*', 'int*', '**']], + ['sqlite3changeset_concat_strm', 'int', [ + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInputA', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInputB', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xOutput', signature: 'i(ppi)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_conflict', 'int', ['sqlite3_changeset_iter*', 'int', '**']], + ['sqlite3changeset_finalize', 'int', ['sqlite3_changeset_iter*']], + ['sqlite3changeset_fk_conflicts', 'int', ['sqlite3_changeset_iter*', 'int*']], + ['sqlite3changeset_invert', 'int', ['int', 'void*', 'int*', '**']], + ['sqlite3changeset_invert_strm', 'int', [ + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xOutput', signature: 'i(ppi)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_new', 'int', ['sqlite3_changeset_iter*', 'int', '**']], + ['sqlite3changeset_next', 'int', ['sqlite3_changeset_iter*']], + ['sqlite3changeset_old', 'int', ['sqlite3_changeset_iter*', 'int', '**']], + ['sqlite3changeset_op', 'int', [ + 'sqlite3_changeset_iter*', '**', 'int*', 'int*','int*' + ]], + ['sqlite3changeset_pk', 'int', ['sqlite3_changeset_iter*', '**', 'int*']], + ['sqlite3changeset_start', 'int', ['**', 'int', '*']], + ['sqlite3changeset_start_strm', 'int', [ + '**', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3changeset_start_v2', 'int', ['**', 'int', '*', 'int']], + ['sqlite3changeset_start_v2_strm', 'int', [ + '**', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xInput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*', 'int' + ]], + ['sqlite3session_attach', 'int', ['sqlite3_session*', 'string']], + ['sqlite3session_changeset', 'int', ['sqlite3_session*', 'int*', '**']], + ['sqlite3session_changeset_size', 'i64', ['sqlite3_session*']], + ['sqlite3session_changeset_strm', 'int', [ + 'sqlite3_session*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xOutput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3session_config', 'int', ['int', 'void*']], + ['sqlite3session_create', 'int', ['sqlite3*', 'string', '**']], + //sqlite3session_delete() is bound manually + ['sqlite3session_diff', 'int', ['sqlite3_session*', 'string', 'string', '**']], + ['sqlite3session_enable', 'int', ['sqlite3_session*', 'int']], + ['sqlite3session_indirect', 'int', ['sqlite3_session*', 'int']], + ['sqlite3session_isempty', 'int', ['sqlite3_session*']], + ['sqlite3session_memory_used', 'i64', ['sqlite3_session*']], + ['sqlite3session_object_config', 'int', ['sqlite3_session*', 'int', 'void*']], + ['sqlite3session_patchset', 'int', ['sqlite3_session*', '*', '**']], + ['sqlite3session_patchset_strm', 'int', [ + 'sqlite3_session*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xOutput', signature: 'i(ppp)', bindScope: 'transient' + }), + 'void*' + ]], + ['sqlite3session_table_filter', undefined, [ + 'sqlite3_session*', + new wasm.xWrap.FuncPtrAdapter({ + name: 'xFilter', ...__ipsProxy, + contextKey: (argv,argIndex)=>argv[0/* (sqlite3_session*) */] + }), + '*' + ]] + ]); + }/*session/changeset APIs*/ + /** Functions which are intended solely for API-internal use by the WASM components, not client code. These get installed into @@ -290,11 +615,13 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }); delete self.Jaccwabyt; - {/* Convert Arrays and certain TypedArrays to strings for - 'string:flexible'-type arguments */ - const xString = wasm.xWrap.argAdapter('string'); + {// wasm.xWrap() bindings... + + /* Convert Arrays and certain TypedArrays to strings for + 'string:flexible'-type arguments */ + const __xString = wasm.xWrap.argAdapter('string'); wasm.xWrap.argAdapter( - 'string:flexible', (v)=>xString(util.flexibleString(v)) + 'string:flexible', (v)=>__xString(util.flexibleString(v)) ); /** @@ -324,36 +651,38 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ if(wasm.isPtr(v)) return v; v = ''+v; let rc = this[v]; - return rc || (rc = this[v] = wasm.allocCString(v)); + return rc || (this[v] = wasm.allocCString(v)); }.bind(Object.create(null)) ); - }/* special-case string-type argument conversions */ - - if(1){// WhWasmUtil.xWrap() bindings... + /** Add some descriptive xWrap() aliases for '*' intended to (A) - initially improve readability/correctness of capi.signatures - and (B) provide automatic conversion from higher-level - representations, e.g. capi.sqlite3_vfs to `sqlite3_vfs*` via - capi.sqlite3_vfs.pointer. + initially improve readability/correctness of + wasm.bindingSignatures and (B) provide automatic conversion + from higher-level representations, e.g. capi.sqlite3_vfs to + `sqlite3_vfs*` via capi.sqlite3_vfs.pointer. */ - const aPtr = wasm.xWrap.argAdapter('*'); - const nilType = function(){}; - wasm.xWrap.argAdapter('sqlite3_filename', aPtr) - ('sqlite3_context*', aPtr) - ('sqlite3_value*', aPtr) - ('void*', aPtr) + const __xArgPtr = wasm.xWrap.argAdapter('*'); + const nilType = function(){}/*a class no value can ever be an instance of*/; + wasm.xWrap.argAdapter('sqlite3_filename', __xArgPtr) + ('sqlite3_context*', __xArgPtr) + ('sqlite3_value*', __xArgPtr) + ('void*', __xArgPtr) + ('sqlite3_changegroup*', __xArgPtr) + ('sqlite3_changeset_iter*', __xArgPtr) + //('sqlite3_rebaser*', __xArgPtr) + ('sqlite3_session*', __xArgPtr) ('sqlite3_stmt*', (v)=> - aPtr((v instanceof (sqlite3?.oo1?.Stmt || nilType)) + __xArgPtr((v instanceof (sqlite3?.oo1?.Stmt || nilType)) ? v.pointer : v)) ('sqlite3*', (v)=> - aPtr((v instanceof (sqlite3?.oo1?.DB || nilType)) + __xArgPtr((v instanceof (sqlite3?.oo1?.DB || nilType)) ? v.pointer : v)) ('sqlite3_index_info*', (v)=> - aPtr((v instanceof (capi.sqlite3_index_info || nilType)) + __xArgPtr((v instanceof (capi.sqlite3_index_info || nilType)) ? v.pointer : v)) ('sqlite3_module*', (v)=> - aPtr((v instanceof (capi.sqlite3_module || nilType)) + __xArgPtr((v instanceof (capi.sqlite3_module || nilType)) ? v.pointer : v)) /** `sqlite3_vfs*`: @@ -369,19 +698,22 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ VFS in many contexts. We specifically do not want that behavior here. */ return capi.sqlite3_vfs_find(v) - || sqlite3.SQLite3Error.toss("Unknown sqlite3_vfs name:",v); + || sqlite3.SQLite3Error.toss( + capi.SQLITE_NOTFOUND, + "Unknown sqlite3_vfs name:", v + ); } - return aPtr((v instanceof (capi.sqlite3_vfs || nilType)) - ? v.pointer : v); + return __xArgPtr((v instanceof (capi.sqlite3_vfs || nilType)) + ? v.pointer : v); }); - const rPtr = wasm.xWrap.resultAdapter('*'); - wasm.xWrap.resultAdapter('sqlite3*', rPtr) - ('sqlite3_context*', rPtr) - ('sqlite3_stmt*', rPtr) - ('sqlite3_value*', rPtr) - ('sqlite3_vfs*', rPtr) - ('void*', rPtr); + const __xRcPtr = wasm.xWrap.resultAdapter('*'); + wasm.xWrap.resultAdapter('sqlite3*', __xRcPtr) + ('sqlite3_context*', __xRcPtr) + ('sqlite3_stmt*', __xRcPtr) + ('sqlite3_value*', __xRcPtr) + ('sqlite3_vfs*', __xRcPtr) + ('void*', __xRcPtr); /** Populate api object with sqlite3_...() by binding the "raw" wasm @@ -395,10 +727,12 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ } /* For C API functions which cannot work properly unless - wasm.bigIntEnabled is true, install a bogus impl which - throws if called when bigIntEnabled is false. */ + wasm.bigIntEnabled is true, install a bogus impl which throws + if called when bigIntEnabled is false. The alternative would be + to elide these functions altogether, which seems likely to + cause more confusion. */ const fI64Disabled = function(fname){ - return ()=>toss(fname+"() disabled due to lack", + return ()=>toss(fname+"() is unavailable due to lack", "of BigInt support in this build."); }; for(const e of wasm.bindingSignatures.int64){ @@ -421,7 +755,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ - (sqlite3*, int code, string msg) - (sqlite3*, Error e [,string msg = ''+e]) - If passed a WasmAllocError, the message is ingored and the + If passed a WasmAllocError, the message is ignored and the result code is SQLITE_NOMEM. If passed any other Error type, the result code defaults to SQLITE_ERROR unless the Error object has a resultCode property, in which case that is used @@ -439,7 +773,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ message = message || ''+resultCode; resultCode = (resultCode.resultCode || capi.SQLITE_ERROR); } - return __db_err(pDb, resultCode, message); + return pDb ? __db_err(pDb, resultCode, message) : resultCode; }; }else{ util.sqlite3_wasm_db_error = function(pDb,errCode,msg){ @@ -447,7 +781,87 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ return errCode; }; } - }/*xWrap() bindings*/; + }/*xWrap() bindings*/ + + {/* Import C-level constants and structs... */ + const cJson = wasm.xCall('sqlite3_wasm_enum_json'); + if(!cJson){ + toss("Maintenance required: increase sqlite3_wasm_enum_json()'s", + "static buffer size!"); + } + //console.debug('wasm.ctype length =',wasm.cstrlen(cJson)); + wasm.ctype = JSON.parse(wasm.cstrToJs(cJson)); + // Groups of SQLITE_xyz macros... + const defineGroups = ['access', 'authorizer', + 'blobFinalizers', 'changeset', + 'config', 'dataTypes', + 'dbConfig', 'dbStatus', + 'encodings', 'fcntl', 'flock', 'ioCap', + 'limits', 'openFlags', + 'prepareFlags', 'resultCodes', + 'sqlite3Status', + 'stmtStatus', 'syncFlags', + 'trace', 'txnState', 'udfFlags', + 'version' ]; + if(wasm.bigIntEnabled){ + defineGroups.push('serialize', 'session', 'vtab'); + } + for(const t of defineGroups){ + for(const e of Object.entries(wasm.ctype[t])){ + // ^^^ [k,v] there triggers a buggy code transformation via + // one of the Emscripten-driven optimizers. + capi[e[0]] = e[1]; + } + } + if(!wasm.functionEntry(capi.SQLITE_WASM_DEALLOC)){ + toss("Internal error: cannot resolve exported function", + "entry SQLITE_WASM_DEALLOC (=="+capi.SQLITE_WASM_DEALLOC+")."); + } + const __rcMap = Object.create(null); + for(const t of ['resultCodes']){ + for(const e of Object.entries(wasm.ctype[t])){ + __rcMap[e[1]] = e[0]; + } + } + /** + For the given integer, returns the SQLITE_xxx result code as a + string, or undefined if no such mapping is found. + */ + capi.sqlite3_js_rc_str = (rc)=>__rcMap[rc]; + /* Bind all registered C-side structs... */ + const notThese = Object.assign(Object.create(null),{ + // For each struct to NOT register, map its name to true: + WasmTestStruct: true, + /* We unregister the kvvfs VFS from Worker threads below. */ + sqlite3_kvvfs_methods: !util.isUIThread(), + /* sqlite3_index_info and friends require int64: */ + sqlite3_index_info: !wasm.bigIntEnabled, + sqlite3_index_constraint: !wasm.bigIntEnabled, + sqlite3_index_orderby: !wasm.bigIntEnabled, + sqlite3_index_constraint_usage: !wasm.bigIntEnabled + }); + for(const s of wasm.ctype.structs){ + if(!notThese[s.name]){ + capi[s.name] = sqlite3.StructBinder(s); + } + } + if(capi.sqlite3_index_info){ + /* Move these inner structs into sqlite3_index_info. Binding + ** them to WASM requires that we create global-scope structs to + ** model them with, but those are no longer needed after we've + ** passed them to StructBinder. */ + for(const k of ['sqlite3_index_constraint', + 'sqlite3_index_orderby', + 'sqlite3_index_constraint_usage']){ + capi.sqlite3_index_info[k] = capi[k]; + delete capi[k]; + } + capi.sqlite3_vtab_config = wasm.xWrap( + 'sqlite3_wasm_vtab_config','int',[ + 'sqlite3*', 'int', 'int'] + ); + }/* end vtab-related setup */ + }/*end C constant and struct imports*/ /** Internal helper to assist in validating call argument counts in @@ -460,42 +874,221 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ (1===n?"":'s')+"."); }; - if(1){/* Bindings for sqlite3_create_collation() */ + /** Code duplication reducer for functions which take an encoding + argument and require SQLITE_UTF8. Sets the db error code to + SQLITE_FORMAT and returns that code. */ + const __errEncoding = (pDb)=>{ + return util.sqlite3_wasm_db_error( + pDb, capi.SQLITE_FORMAT, "SQLITE_UTF8 is the only supported encoding." + ); + }; - const __collationContextKey = (argIndex,argv)=>{ - return 'argv['+argIndex+']:sqlite3@'+argv[0]+ - ':'+wasm.cstrToJs(argv[1]).toLowerCase() + /** + __dbCleanupMap is infrastructure for recording registration of + UDFs and collations so that sqlite3_close_v2() can clean up any + automated JS-to-WASM function conversions installed by those. + */ + const __argPDb = (pDb)=>wasm.xWrap.argAdapter('sqlite3*')(pDb); + const __argStr = (str)=>wasm.isPtr(str) ? wasm.cstrToJs(str) : str; + const __dbCleanupMap = function( + pDb, mode/*0=remove, >0=create if needed, <0=do not create if missing*/ + ){ + pDb = __argPDb(pDb); + let m = this.dbMap.get(pDb); + if(!mode){ + this.dbMap.delete(pDb); + return m; + }else if(!m && mode>0){ + this.dbMap.set(pDb, (m = Object.create(null))); + } + return m; + }.bind(Object.assign(Object.create(null),{ + dbMap: new Map + })); + + __dbCleanupMap.addCollation = function(pDb, name){ + const m = __dbCleanupMap(pDb, 1); + if(!m.collation) m.collation = new Set; + m.collation.add(__argStr(name).toLowerCase()); + }; + + __dbCleanupMap._addUDF = function(pDb, name, arity, map){ + /* Map UDF name to a Set of arity values */ + name = __argStr(name).toLowerCase(); + let u = map.get(name); + if(!u) map.set(name, (u = new Set)); + u.add((arity<0) ? -1 : arity); + }; + + __dbCleanupMap.addFunction = function(pDb, name, arity){ + const m = __dbCleanupMap(pDb, 1); + if(!m.udf) m.udf = new Map; + this._addUDF(pDb, name, arity, m.udf); + }; + + __dbCleanupMap.addWindowFunc = function(pDb, name, arity){ + const m = __dbCleanupMap(pDb, 1); + if(!m.wudf) m.wudf = new Map; + this._addUDF(pDb, name, arity, m.wudf); + }; + + /** + Intended to be called _only_ from sqlite3_close_v2(), + passed its non-0 db argument. + + This function frees up certain automatically-installed WASM + function bindings which were installed on behalf of the given db, + as those may otherwise leak. + + Notable caveat: this is only ever run via + sqlite3.capi.sqlite3_close_v2(). If a client, for whatever + reason, uses sqlite3.wasm.exports.sqlite3_close_v2() (the + function directly exported from WASM), this cleanup will not + happen. + + This is not a silver bullet for avoiding automation-related + leaks but represents "an honest effort." + + The issue being addressed here is covered at: + + https://sqlite.org/wasm/doc/trunk/api-c-style.md#convert-func-ptr + */ + __dbCleanupMap.cleanup = function(pDb){ + pDb = __argPDb(pDb); + //wasm.xWrap.FuncPtrAdapter.debugFuncInstall = false; + /** + Installing NULL functions in the C API will remove those + bindings. The FuncPtrAdapter which sits between us and the C + API will also treat that as an opportunity to + wasm.uninstallFunction() any WASM function bindings it has + installed for pDb. + */ + const closeArgs = [pDb]; + for(const name of [ + 'sqlite3_busy_handler', + 'sqlite3_commit_hook', + 'sqlite3_preupdate_hook', + 'sqlite3_progress_handler', + 'sqlite3_rollback_hook', + 'sqlite3_set_authorizer', + 'sqlite3_trace_v2', + 'sqlite3_update_hook' + ]) { + const x = wasm.exports[name]; + closeArgs.length = x.length/*==argument count*/ + /* recall that undefined entries translate to 0 when passed to + WASM. */; + try{ capi[name](...closeArgs) } + catch(e){ + console.warn("close-time call of",name+"(",closeArgs,") threw:",e); + } + } + const m = __dbCleanupMap(pDb, 0); + if(!m) return; + if(m.collation){ + for(const name of m.collation){ + try{ + capi.sqlite3_create_collation_v2( + pDb, name, capi.SQLITE_UTF8, 0, 0, 0 + ); + }catch(e){ + /*ignored*/ + } + } + delete m.collation; + } + let i; + for(i = 0; i < 2; ++i){ /* Clean up UDFs... */ + const fmap = i ? m.wudf : m.udf; + if(!fmap) continue; + const func = i + ? capi.sqlite3_create_window_function + : capi.sqlite3_create_function_v2; + for(const e of fmap){ + const name = e[0], arities = e[1]; + const fargs = [pDb, name, 0/*arity*/, capi.SQLITE_UTF8, 0, 0, 0, 0, 0]; + if(i) fargs.push(0); + for(const arity of arities){ + try{ fargs[2] = arity; func.apply(null, fargs); } + catch(e){/*ignored*/} + } + arities.clear(); + } + fmap.clear(); + } + delete m.udf; + delete m.wudf; + }/*__dbCleanupMap.cleanup()*/; + + {/* Binding of sqlite3_close_v2() */ + const __sqlite3CloseV2 = wasm.xWrap("sqlite3_close_v2", "int", "sqlite3*"); + capi.sqlite3_close_v2 = function(pDb){ + if(1!==arguments.length) return __dbArgcMismatch(pDb, 'sqlite3_close_v2', 1); + if(pDb){ + try{__dbCleanupMap.cleanup(pDb)} catch(e){/*ignored*/} + } + return __sqlite3CloseV2(pDb); }; - const __ccv2 = wasm.xWrap( - 'sqlite3_create_collation_v2', 'int', - 'sqlite3*','string','int','*', - new wasm.xWrap.FuncPtrAdapter({ - /* int(*xCompare)(void*,int,const void*,int,const void*) */ - name: 'sqlite3_create_collation_v2::xCompare', - signature: 'i(pipip)', - bindScope: 'context', - contextKey: __collationContextKey - }), - new wasm.xWrap.FuncPtrAdapter({ - /* void(*xDestroy(void*) */ - name: 'sqlite3_create_collation_v2::xDestroy', - signature: 'v(p)', - bindScope: 'context', - contextKey: __collationContextKey - }) + }/*sqlite3_close_v2()*/ + + if(capi.sqlite3session_table_filter){ + const __sqlite3SessionDelete = wasm.xWrap( + 'sqlite3session_delete', undefined, ['sqlite3_session*'] + ); + capi.sqlite3session_delete = function(pSession){ + if(1!==arguments.length){ + return __dbArgcMismatch(pDb, 'sqlite3session_delete', 1); + /* Yes, we're returning a value from a void function. That seems + like the lesser evil compared to not maintaining arg-count + consistency as we do with other similar bindings. */ + } + else if(pSession){ + //wasm.xWrap.FuncPtrAdapter.debugFuncInstall = true; + capi.sqlite3session_table_filter(pSession, 0, 0); + } + __sqlite3SessionDelete(pSession); + }; + } + + {/* Bindings for sqlite3_create_collation[_v2]() */ + // contextKey() impl for wasm.xWrap.FuncPtrAdapter + const contextKey = (argv,argIndex)=>{ + return 'argv['+argIndex+']:'+argv[0/* sqlite3* */]+ + ':'+wasm.cstrToJs(argv[1/* collation name */]).toLowerCase() + }; + const __sqlite3CreateCollationV2 = wasm.xWrap( + 'sqlite3_create_collation_v2', 'int', [ + 'sqlite3*', 'string', 'int', '*', + new wasm.xWrap.FuncPtrAdapter({ + /* int(*xCompare)(void*,int,const void*,int,const void*) */ + name: 'xCompare', signature: 'i(pipip)', contextKey + }), + new wasm.xWrap.FuncPtrAdapter({ + /* void(*xDestroy(void*) */ + name: 'xDestroy', signature: 'v(p)', contextKey + }) + ] ); /** Works exactly like C's sqlite3_create_collation_v2() except that: - 1) It accepts JS functions for its function-pointer arguments, + 1) It returns capi.SQLITE_FORMAT if the 3rd argument contains + any encoding-related value other than capi.SQLITE_UTF8. No + other encodings are supported. As a special case, if the + bottom 4 bits of that argument are 0, SQLITE_UTF8 is + assumed. + + 2) It accepts JS functions for its function-pointer arguments, for which it will install WASM-bound proxies. The bindings are "permanent," in that they will stay in the WASM environment - until it shuts down unless the client somehow finds and removes - them. + until it shuts down unless the client calls this again with the + same collation name and a value of 0 or null for the + the function pointer(s). - 2) It returns capi.SQLITE_FORMAT if the 3rd argument is not - capi.SQLITE_UTF8. No other encodings are supported. + For consistency with the C API, it requires the same number of + arguments. It returns capi.SQLITE_MISUSE if passed any other + argument count. Returns 0 on success, non-0 on error, in which case the error state of pDb (of type `sqlite3*` or argument-convertible to it) @@ -503,18 +1096,20 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ */ capi.sqlite3_create_collation_v2 = function(pDb,zName,eTextRep,pArg,xCompare,xDestroy){ if(6!==arguments.length) return __dbArgcMismatch(pDb, 'sqlite3_create_collation_v2', 6); - else if(capi.SQLITE_UTF8!==eTextRep){ - return util.sqlite3_wasm_db_error( - pDb, capi.SQLITE_FORMAT, "SQLITE_UTF8 is the only supported encoding." - ); + else if( 0 === (eTextRep & 0xf) ){ + eTextRep |= capi.SQLITE_UTF8; + }else if( capi.SQLITE_UTF8 !== (eTextRep & 0xf) ){ + return __errEncoding(pDb); } - let rc, pfCompare, pfDestroy; - try{ - rc = __ccv2(pDb, zName, eTextRep, pArg, xCompare, xDestroy); + try{ + const rc = __sqlite3CreateCollationV2(pDb, zName, eTextRep, pArg, xCompare, xDestroy); + if(0===rc && xCompare instanceof Function){ + __dbCleanupMap.addCollation(pDb, zName); + } + return rc; }catch(e){ - rc = util.sqlite3_wasm_db_error(pDb, e); + return util.sqlite3_wasm_db_error(pDb, e); } - return rc; }; capi.sqlite3_create_collation = (pDb,zName,eTextRep,pArg,xCompare)=>{ @@ -525,127 +1120,92 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }/*sqlite3_create_collation() and friends*/ - if(1){/* Special-case handling of sqlite3_exec() */ - const __exec = wasm.xWrap("sqlite3_exec", "int", - ["sqlite3*", "string:flexible", - new wasm.xWrap.FuncPtrAdapter({ - signature: 'i(pipp)', - bindScope: 'transient' - }), "*", "**"]); - /* Documented in the api object's initializer. */ - capi.sqlite3_exec = function f(pDb, sql, callback, pVoid, pErrMsg){ - if(f.length!==arguments.length){ - return __dbArgcMismatch(pDb,"sqlite3_exec",f.length); - }else if(!(callback instanceof Function)){ - return __exec(pDb, sql, callback, pVoid, pErrMsg); - } - /* Wrap the callback in a WASM-bound function and convert the callback's - `(char**)` arguments to arrays of strings... */ - const cbwrap = function(pVoid, nCols, pColVals, pColNames){ - let rc = capi.SQLITE_ERROR; - try { - let aVals = [], aNames = [], i = 0, offset = 0; - for( ; i < nCols; offset += (wasm.ptrSizeof * ++i) ){ - aVals.push( wasm.cstrToJs(wasm.peekPtr(pColVals + offset)) ); - aNames.push( wasm.cstrToJs(wasm.peekPtr(pColNames + offset)) ); - } - rc = callback(pVoid, nCols, aVals, aNames) | 0; - /* The first 2 args of the callback are useless for JS but - we want the JS mapping of the C API to be as close to the - C API as possible. */ - }catch(e){ - /* If we set the db error state here, the higher-level exec() call - replaces it with its own, so we have no way of reporting the - exception message except the console. We must not propagate - exceptions through the C API. */ - } - return rc; - }; - let rc; - try{ - rc = __exec(pDb, sql, cbwrap, pVoid, pErrMsg); - }catch(e){ - rc = util.sqlite3_wasm_db_error(pDb, capi.SQLITE_ERROR, - "Error running exec(): "+e); - } - return rc; + {/* Special-case handling of sqlite3_create_function_v2() + and sqlite3_create_window_function(). */ + /** FuncPtrAdapter for contextKey() for sqlite3_create_function() + and friends. */ + const contextKey = function(argv,argIndex){ + return ( + argv[0/* sqlite3* */] + +':'+(argv[2/*number of UDF args*/] < 0 ? -1 : argv[2]) + +':'+argIndex/*distinct for each xAbc callback type*/ + +':'+wasm.cstrToJs(argv[1]).toLowerCase() + ) }; - }/*sqlite3_exec() proxy*/; - if(1){/* Special-case handling of sqlite3_create_function_v2() - and sqlite3_create_window_function() */ - const sqlite3CreateFunction = wasm.xWrap( - "sqlite3_create_function_v2", "int", - ["sqlite3*", "string"/*funcName*/, "int"/*nArg*/, - "int"/*eTextRep*/, "*"/*pApp*/, - "*"/*xStep*/,"*"/*xFinal*/, "*"/*xValue*/, "*"/*xDestroy*/] - ); - const sqlite3CreateWindowFunction = wasm.xWrap( - "sqlite3_create_window_function", "int", - ["sqlite3*", "string"/*funcName*/, "int"/*nArg*/, - "int"/*eTextRep*/, "*"/*pApp*/, - "*"/*xStep*/,"*"/*xFinal*/, "*"/*xValue*/, - "*"/*xInverse*/, "*"/*xDestroy*/] + /** + JS proxies for the various sqlite3_create[_window]_function() + callbacks, structured in a form usable by wasm.xWrap.FuncPtrAdapter. + */ + const __cfProxy = Object.assign(Object.create(null), { + xInverseAndStep: { + signature:'v(pip)', contextKey, + callProxy: (callback)=>{ + return (pCtx, argc, pArgv)=>{ + try{ callback(pCtx, ...capi.sqlite3_values_to_js(argc, pArgv)) } + catch(e){ capi.sqlite3_result_error_js(pCtx, e) } + }; + } + }, + xFinalAndValue: { + signature:'v(p)', contextKey, + callProxy: (callback)=>{ + return (pCtx)=>{ + try{ capi.sqlite3_result_js(pCtx, callback(pCtx)) } + catch(e){ capi.sqlite3_result_error_js(pCtx, e) } + }; + } + }, + xFunc: { + signature:'v(pip)', contextKey, + callProxy: (callback)=>{ + return (pCtx, argc, pArgv)=>{ + try{ + capi.sqlite3_result_js( + pCtx, + callback(pCtx, ...capi.sqlite3_values_to_js(argc, pArgv)) + ); + }catch(e){ + //console.error('xFunc() caught:',e); + capi.sqlite3_result_error_js(pCtx, e); + } + }; + } + }, + xDestroy: { + signature:'v(p)', contextKey, + //Arguable: a well-behaved destructor doesn't require a proxy. + callProxy: (callback)=>{ + return (pVoid)=>{ + try{ callback(pVoid) } + catch(e){ console.error("UDF xDestroy method threw:",e) } + }; + } + } + })/*__cfProxy*/; + + const __sqlite3CreateFunction = wasm.xWrap( + "sqlite3_create_function_v2", "int", [ + "sqlite3*", "string"/*funcName*/, "int"/*nArg*/, + "int"/*eTextRep*/, "*"/*pApp*/, + new wasm.xWrap.FuncPtrAdapter({name: 'xFunc', ...__cfProxy.xFunc}), + new wasm.xWrap.FuncPtrAdapter({name: 'xStep', ...__cfProxy.xInverseAndStep}), + new wasm.xWrap.FuncPtrAdapter({name: 'xFinal', ...__cfProxy.xFinalAndValue}), + new wasm.xWrap.FuncPtrAdapter({name: 'xDestroy', ...__cfProxy.xDestroy}) + ] ); - const __xFunc = function(callback){ - return function(pCtx, argc, pArgv){ - try{ - capi.sqlite3_result_js( - pCtx, - callback(pCtx, ...capi.sqlite3_values_to_js(argc, pArgv)) - ); - }catch(e){ - //console.error('xFunc() caught:',e); - capi.sqlite3_result_error_js(pCtx, e); - } - }; - }; - - const __xInverseAndStep = function(callback){ - return function(pCtx, argc, pArgv){ - try{ callback(pCtx, ...capi.sqlite3_values_to_js(argc, pArgv)) } - catch(e){ capi.sqlite3_result_error_js(pCtx, e) } - }; - }; - - const __xFinalAndValue = function(callback){ - return function(pCtx){ - try{ capi.sqlite3_result_js(pCtx, callback(pCtx)) } - catch(e){ capi.sqlite3_result_error_js(pCtx, e) } - }; - }; - - const __xDestroy = function(callback){ - return function(pVoid){ - try{ callback(pVoid) } - catch(e){ console.error("UDF xDestroy method threw:",e) } - }; - }; - - const __xMap = Object.assign(Object.create(null), { - xFunc: {sig:'v(pip)', f:__xFunc}, - xStep: {sig:'v(pip)', f:__xInverseAndStep}, - xInverse: {sig:'v(pip)', f:__xInverseAndStep}, - xFinal: {sig:'v(p)', f:__xFinalAndValue}, - xValue: {sig:'v(p)', f:__xFinalAndValue}, - xDestroy: {sig:'v(p)', f:__xDestroy} - }); - - const __xWrapFuncs = function(theFuncs, tgtUninst){ - const rc = [] - let k; - for(k in theFuncs){ - let fArg = theFuncs[k]; - if('function'===typeof fArg){ - const w = __xMap[k]; - fArg = wasm.installFunction(w.sig, w.f(fArg)); - tgtUninst.push(fArg); - } - rc.push(fArg); - } - return rc; - }; + const __sqlite3CreateWindowFunction = wasm.xWrap( + "sqlite3_create_window_function", "int", [ + "sqlite3*", "string"/*funcName*/, "int"/*nArg*/, + "int"/*eTextRep*/, "*"/*pApp*/, + new wasm.xWrap.FuncPtrAdapter({name: 'xStep', ...__cfProxy.xInverseAndStep}), + new wasm.xWrap.FuncPtrAdapter({name: 'xFinal', ...__cfProxy.xFinalAndValue}), + new wasm.xWrap.FuncPtrAdapter({name: 'xValue', ...__cfProxy.xFinalAndValue}), + new wasm.xWrap.FuncPtrAdapter({name: 'xInverse', ...__cfProxy.xInverseAndStep}), + new wasm.xWrap.FuncPtrAdapter({name: 'xDestroy', ...__cfProxy.xDestroy}) + ] + ); /* Documented in the api object's initializer. */ capi.sqlite3_create_function_v2 = function f( @@ -655,28 +1215,30 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ xFinal, //void (*xFinal)(sqlite3_context*) xDestroy //void (*xDestroy)(void*) ){ - if(f.length!==arguments.length){ + if( f.length!==arguments.length ){ return __dbArgcMismatch(pDb,"sqlite3_create_function_v2",f.length); + }else if( 0 === (eTextRep & 0xf) ){ + eTextRep |= capi.SQLITE_UTF8; + }else if( capi.SQLITE_UTF8 !== (eTextRep & 0xf) ){ + return __errEncoding(pDb); } - /* Wrap the callbacks in a WASM-bound functions... */ - const uninstall = [/*funcs to uninstall on error*/]; - let rc; try{ - const funcArgs = __xWrapFuncs({xFunc, xStep, xFinal, xDestroy}, - uninstall); - rc = sqlite3CreateFunction(pDb, funcName, nArg, eTextRep, - pApp, ...funcArgs); + const rc = __sqlite3CreateFunction(pDb, funcName, nArg, eTextRep, + pApp, xFunc, xStep, xFinal, xDestroy); + if(0===rc && (xFunc instanceof Function + || xStep instanceof Function + || xFinal instanceof Function + || xDestroy instanceof Function)){ + __dbCleanupMap.addFunction(pDb, funcName, nArg); + } + return rc; }catch(e){ console.error("sqlite3_create_function_v2() setup threw:",e); - for(let v of uninstall){ - wasm.uninstallFunction(v); - } - rc = util.sqlite3_wasm_db_error(pDb, capi.SQLITE_ERROR, - "Creation of UDF threw: "+e.message); + return util.sqlite3_wasm_db_error(pDb, e, "Creation of UDF threw: "+e); } - return rc; }; + /* Documented in the api object's initializer. */ capi.sqlite3_create_function = function f( pDb, funcName, nArg, eTextRep, pApp, xFunc, xStep, xFinal @@ -692,30 +1254,33 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ pDb, funcName, nArg, eTextRep, pApp, xStep, //void (*xStep)(sqlite3_context*,int,sqlite3_value**) xFinal, //void (*xFinal)(sqlite3_context*) - xValue, //void (*xFinal)(sqlite3_context*) - xInverse,//void (*xStep)(sqlite3_context*,int,sqlite3_value**) + xValue, //void (*xValue)(sqlite3_context*) + xInverse,//void (*xInverse)(sqlite3_context*,int,sqlite3_value**) xDestroy //void (*xDestroy)(void*) ){ - if(f.length!==arguments.length){ + if( f.length!==arguments.length ){ return __dbArgcMismatch(pDb,"sqlite3_create_window_function",f.length); + }else if( 0 === (eTextRep & 0xf) ){ + eTextRep |= capi.SQLITE_UTF8; + }else if( capi.SQLITE_UTF8 !== (eTextRep & 0xf) ){ + return __errEncoding(pDb); } - /* Wrap the callbacks in a WASM-bound functions... */ - const uninstall = [/*funcs to uninstall on error*/]; - let rc; try{ - const funcArgs = __xWrapFuncs({xStep, xFinal, xValue, xInverse, xDestroy}, - uninstall); - rc = sqlite3CreateWindowFunction(pDb, funcName, nArg, eTextRep, - pApp, ...funcArgs); + const rc = __sqlite3CreateWindowFunction(pDb, funcName, nArg, eTextRep, + pApp, xStep, xFinal, xValue, + xInverse, xDestroy); + if(0===rc && (xStep instanceof Function + || xFinal instanceof Function + || xValue instanceof Function + || xInverse instanceof Function + || xDestroy instanceof Function)){ + __dbCleanupMap.addWindowFunc(pDb, funcName, nArg); + } + return rc; }catch(e){ console.error("sqlite3_create_window_function() setup threw:",e); - for(let v of uninstall){ - wasm.uninstallFunction(v); - } - rc = util.sqlite3_wasm_db_error(pDb, capi.SQLITE_ERROR, - "Creation of UDF threw: "+e.message); + return util.sqlite3_wasm_db_error(pDb, e, "Creation of UDF threw: "+e); } - return rc; }; /** A _deprecated_ alias for capi.sqlite3_result_js() which @@ -743,21 +1308,24 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }/*sqlite3_create_function_v2() and sqlite3_create_window_function() proxies*/; - if(1){/* Special-case handling of sqlite3_prepare_v2() and - sqlite3_prepare_v3() */ + {/* Special-case handling of sqlite3_prepare_v2() and + sqlite3_prepare_v3() */ + /** Helper for string:flexible conversions which require a byte-length counterpart argument. Passed a value and its - ostensible length, this function returns [V,N], where V - is either v or a transformed copy of v and N is either n, - -1, or the byte length of v (if it's a byte array). + ostensible length, this function returns [V,N], where V is + either v or a transformed copy of v and N is either n, -1, or + the byte length of v (if it's a byte array or ArrayBuffer). */ const __flexiString = (v,n)=>{ if('string'===typeof v){ n = -1; }else if(util.isSQLableTypedArray(v)){ n = v.byteLength; - v = util.typedArrayToString(v); + v = util.typedArrayToString( + (v instanceof ArrayBuffer) ? new Uint8Array(v) : v + ); }else if(Array.isArray(v)){ v = v.join(""); n = -1; @@ -768,32 +1336,33 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ /** Scope-local holder of the two impls of sqlite3_prepare_v2/v3(). */ - const __prepare = Object.create(null); - /** - This binding expects a JS string as its 2nd argument and - null as its final argument. In order to compile multiple - statements from a single string, the "full" impl (see - below) must be used. - */ - __prepare.basic = wasm.xWrap('sqlite3_prepare_v3', - "int", ["sqlite3*", "string", - "int"/*ignored for this impl!*/, - "int", "**", - "**"/*MUST be 0 or null or undefined!*/]); - /** - Impl which requires that the 2nd argument be a pointer - to the SQL string, instead of being converted to a - string. This variant is necessary for cases where we - require a non-NULL value for the final argument - (exec()'ing multiple statements from one input - string). For simpler cases, where only the first - statement in the SQL string is required, the wrapper - named sqlite3_prepare_v2() is sufficient and easier to - use because it doesn't require dealing with pointers. - */ - __prepare.full = wasm.xWrap('sqlite3_prepare_v3', - "int", ["sqlite3*", "*", "int", "int", - "**", "**"]); + const __prepare = { + /** + This binding expects a JS string as its 2nd argument and + null as its final argument. In order to compile multiple + statements from a single string, the "full" impl (see + below) must be used. + */ + basic: wasm.xWrap('sqlite3_prepare_v3', + "int", ["sqlite3*", "string", + "int"/*ignored for this impl!*/, + "int", "**", + "**"/*MUST be 0 or null or undefined!*/]), + /** + Impl which requires that the 2nd argument be a pointer + to the SQL string, instead of being converted to a + string. This variant is necessary for cases where we + require a non-NULL value for the final argument + (exec()'ing multiple statements from one input + string). For simpler cases, where only the first + statement in the SQL string is required, the wrapper + named sqlite3_prepare_v2() is sufficient and easier to + use because it doesn't require dealing with pointers. + */ + full: wasm.xWrap('sqlite3_prepare_v3', + "int", ["sqlite3*", "*", "int", "int", + "**", "**"]) + }; /* Documented in the capi object's initializer. */ capi.sqlite3_prepare_v3 = function f(pDb, sql, sqlLen, prepFlags, ppStmt, pzTail){ @@ -818,38 +1387,86 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ ? capi.sqlite3_prepare_v3(pDb, sql, sqlLen, 0, ppStmt, pzTail) : __dbArgcMismatch(pDb,"sqlite3_prepare_v2",f.length); }; - }/*sqlite3_prepare_v2/v3()*/; - {/* sqlite3_set_authorizer() */ - const __ssa = wasm.xWrap("sqlite3_set_authorizer", 'int', [ - "sqlite3*", - new wasm.xWrap.FuncPtrAdapter({ - name: "sqlite3_set_authorizer::xAuth", - signature: "i(pi"+"ssss)", - contextKey: (argIndex, argv)=>argv[0/*(sqlite3*)*/] - }), - "*" + }/*sqlite3_prepare_v2/v3()*/ + + {/*sqlite3_bind_text/blob()*/ + const __bindText = wasm.xWrap("sqlite3_bind_text", "int", [ + "sqlite3_stmt*", "int", "string", "int", "*" ]); - capi.sqlite3_set_authorizer = function(pDb, xAuth, pUserData){ - if(3!==arguments.length) return __dbArgcMismatch(pDb, 'sqlite3_set_authorizer', 3); - if(xAuth instanceof Function){ - const xProxy = xAuth; - /* Create a proxy which will receive the C-strings from WASM - and convert them to JS strings for the client-supplied - function. */ - xAuth = function(pV, iCode, s0, s1, s2, s3){ - try{ - s0 = s0 && wasm.cstrToJs(s0); s1 = s1 && wasm.cstrToJs(s1); - s2 = s2 && wasm.cstrToJs(s2); s3 = s3 && wasm.cstrToJs(s3); - return xProxy(pV, iCode, s0, s1, s2, s3) || 0; - }catch(e){ - return util.sqlite3_wasm_db_error(pDb, e); - } - }; + const __bindBlob = wasm.xWrap("sqlite3_bind_blob", "int", [ + "sqlite3_stmt*", "int", "*", "int", "*" + ]); + + /** Documented in the capi object's initializer. */ + capi.sqlite3_bind_text = function f(pStmt, iCol, text, nText, xDestroy){ + if(f.length!==arguments.length){ + return __dbArgcMismatch(capi.sqlite3_db_handle(pStmt), + "sqlite3_bind_text", f.length); + }else if(wasm.isPtr(text) || null===text){ + return __bindText(pStmt, iCol, text, nText, xDestroy); + }else if(text instanceof ArrayBuffer){ + text = new Uint8Array(text); + }else if(Array.isArray(pMem)){ + text = pMem.join(''); } - return __ssa(pDb, xAuth, pUserData); - }; - }/* sqlite3_set_authorizer() */ + let p, n; + try{ + if(util.isSQLableTypedArray(text)){ + p = wasm.allocFromTypedArray(text); + n = text.byteLength; + }else if('string'===typeof text){ + [p, n] = wasm.allocCString(text); + }else{ + return util.sqlite3_wasm_db_error( + capi.sqlite3_db_handle(pStmt), capi.SQLITE_MISUSE, + "Invalid 3rd argument type for sqlite3_bind_text()." + ); + } + return __bindText(pStmt, iCol, p, n, capi.SQLITE_WASM_DEALLOC); + }catch(e){ + wasm.dealloc(p); + return util.sqlite3_wasm_db_error( + capi.sqlite3_db_handle(pStmt), e + ); + } + }/*sqlite3_bind_text()*/; + + /** Documented in the capi object's initializer. */ + capi.sqlite3_bind_blob = function f(pStmt, iCol, pMem, nMem, xDestroy){ + if(f.length!==arguments.length){ + return __dbArgcMismatch(capi.sqlite3_db_handle(pStmt), + "sqlite3_bind_blob", f.length); + }else if(wasm.isPtr(pMem) || null===pMem){ + return __bindBlob(pStmt, iCol, pMem, nMem, xDestroy); + }else if(pMem instanceof ArrayBuffer){ + pMem = new Uint8Array(pMem); + }else if(Array.isArray(pMem)){ + pMem = pMem.join(''); + } + let p, n; + try{ + if(util.isBindableTypedArray(pMem)){ + p = wasm.allocFromTypedArray(pMem); + n = nMem>=0 ? nMem : pMem.byteLength; + }else if('string'===typeof pMem){ + [p, n] = wasm.allocCString(pMem); + }else{ + return util.sqlite3_wasm_db_error( + capi.sqlite3_db_handle(pStmt), capi.SQLITE_MISUSE, + "Invalid 3rd argument type for sqlite3_bind_blob()." + ); + } + return __bindBlob(pStmt, iCol, p, n, capi.SQLITE_WASM_DEALLOC); + }catch(e){ + wasm.dealloc(p); + return util.sqlite3_wasm_db_error( + capi.sqlite3_db_handle(pStmt), e + ); + } + }/*sqlite3_bind_blob()*/; + + }/*sqlite3_bind_text/blob()*/ {/* sqlite3_config() */ /** @@ -897,79 +1514,41 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }; }/* sqlite3_config() */ - {/* Import C-level constants and structs... */ - const cJson = wasm.xCall('sqlite3_wasm_enum_json'); - if(!cJson){ - toss("Maintenance required: increase sqlite3_wasm_enum_json()'s", - "static buffer size!"); - } - wasm.ctype = JSON.parse(wasm.cstrToJs(cJson)); - //console.debug('wasm.ctype length =',wasm.cstrlen(cJson)); - const defineGroups = ['access', 'authorizer', - 'blobFinalizers', 'config', 'dataTypes', - 'dbConfig', 'dbStatus', - 'encodings', 'fcntl', 'flock', 'ioCap', - 'limits', 'openFlags', - 'prepareFlags', 'resultCodes', - 'serialize', 'sqlite3Status', - 'stmtStatus', 'syncFlags', - 'trace', 'txnState', 'udfFlags', - 'version' ]; - if(wasm.bigIntEnabled){ - defineGroups.push('vtab'); - } - for(const t of defineGroups){ - for(const e of Object.entries(wasm.ctype[t])){ - // ^^^ [k,v] there triggers a buggy code transformation via - // one of the Emscripten-driven optimizers. - capi[e[0]] = e[1]; + {/*auto-extension bindings.*/ + const __autoExtFptr = new Set; + + capi.sqlite3_auto_extension = function(fPtr){ + if( fPtr instanceof Function ){ + fPtr = wasm.installFunction('i(ppp)', fPtr); + }else if( 1!==arguments.length || !wasm.isPtr(fPtr) ){ + return capi.SQLITE_MISUSE; } - } - const __rcMap = Object.create(null); - for(const t of ['resultCodes']){ - for(const e of Object.entries(wasm.ctype[t])){ - __rcMap[e[1]] = e[0]; + const rc = wasm.exports.sqlite3_auto_extension(fPtr); + if( fPtr!==arguments[0] ){ + if(0===rc) __autoExtFptr.add(fPtr); + else wasm.uninstallFunction(fPtr); } - } - /** - For the given integer, returns the SQLITE_xxx result code as a - string, or undefined if no such mapping is found. - */ - capi.sqlite3_js_rc_str = (rc)=>__rcMap[rc]; - /* Bind all registered C-side structs... */ - const notThese = Object.assign(Object.create(null),{ - // For each struct to NOT register, map its name to true: - WasmTestStruct: true, - /* We unregister the kvvfs VFS from Worker threads below. */ - sqlite3_kvvfs_methods: !util.isUIThread(), - /* sqlite3_index_info and friends require int64: */ - sqlite3_index_info: !wasm.bigIntEnabled, - sqlite3_index_constraint: !wasm.bigIntEnabled, - sqlite3_index_orderby: !wasm.bigIntEnabled, - sqlite3_index_constraint_usage: !wasm.bigIntEnabled - }); - for(const s of wasm.ctype.structs){ - if(!notThese[s.name]){ - capi[s.name] = sqlite3.StructBinder(s); - } - } - if(capi.sqlite3_index_info){ - /* Move these inner structs into sqlite3_index_info. Binding - ** them to WASM requires that we create global-scope structs to - ** model them with, but those are no longer needed after we've - ** passed them to StructBinder. */ - for(const k of ['sqlite3_index_constraint', - 'sqlite3_index_orderby', - 'sqlite3_index_constraint_usage']){ - capi.sqlite3_index_info[k] = capi[k]; - delete capi[k]; - } - capi.sqlite3_vtab_config = wasm.xWrap( - 'sqlite3_wasm_vtab_config','int',[ - 'sqlite3*', 'int', 'int'] - ); - }/* end vtab-related setup */ - }/*end C constant and struct imports*/ + return rc; + }; + + capi.sqlite3_cancel_auto_extension = function(fPtr){ + /* We do not do an automatic JS-to-WASM function conversion here + because it would be senseless: the converted pointer would + never possibly match an already-installed one. */; + if(!fPtr || 1!==arguments.length || !wasm.isPtr(fPtr)) return 0; + return wasm.exports.sqlite3_cancel_auto_extension(fPtr); + /* Note that it "cannot happen" that a client passes a pointer which + is in __autoExtFptr because __autoExtFptr only contains automatic + conversions created inside sqlite3_auto_extension() and + never exposed to the client. */ + }; + + capi.sqlite3_reset_auto_extension = function(){ + wasm.exports.sqlite3_reset_auto_extension(); + for(const fp of __autoExtFptr) wasm.uninstallFunction(fp); + __autoExtFptr.clear(); + }; + }/* auto-extension */ const pKvvfs = capi.sqlite3_vfs_find("kvvfs"); if( pKvvfs ){/* kvvfs-specific glue */ @@ -1068,4 +1647,5 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ } }/*pKvvfs*/ + wasm.xWrap.FuncPtrAdapter.warnOnUse = true; }); diff --git a/ext/wasm/api/sqlite3-api-oo1.js b/ext/wasm/api/sqlite3-api-oo1.js index 59ecf56b96..26aa80f723 100644 --- a/ext/wasm/api/sqlite3-api-oo1.js +++ b/ext/wasm/api/sqlite3-api-oo1.js @@ -72,7 +72,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ wasm.installFunction('i(ippp)', function(t,c,p,x){ if(capi.SQLITE_TRACE_STMT===t){ // x == SQL, p == sqlite3_stmt* - console.log("SQL TRACE #"+(++this.counter), + console.log("SQL TRACE #"+(++this.counter)+' via sqlite3@'+c+':', wasm.cstrToJs(x)); } }.bind({counter: 0})); @@ -161,7 +161,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ capi.sqlite3_extended_result_codes(pDb, 1); if(flagsStr.indexOf('t')>=0){ capi.sqlite3_trace_v2(pDb, capi.SQLITE_TRACE_STMT, - __dbTraceToConsole, 0); + __dbTraceToConsole, pDb); } }catch( e ){ if( pDb ) capi.sqlite3_close_v2(pDb); @@ -183,7 +183,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ checkSqlite3Rc( pDb, capi.sqlite3_exec(pDb, postInitSql, 0, 0, 0) ); - } + } }catch(e){ this.close(); throw e; @@ -337,7 +337,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ */ const Stmt = function(){ if(BindTypes!==arguments[2]){ - toss3("Do not call the Stmt constructor directly. Use DB.prepare()."); + toss3(capi.SQLITE_MISUSE, "Do not call the Stmt constructor directly. Use DB.prepare()."); } this.db = arguments[0]; __ptrMap.set(this, arguments[1]); @@ -422,6 +422,10 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ default: toss3("Invalid returnValue value:",opt.returnValue); } + if(!opt.callback && !opt.returnValue && undefined!==opt.rowMode){ + if(!opt.resultRows) opt.resultRows = []; + out.returnVal = ()=>opt.resultRows; + } if(opt.callback || opt.resultRows){ switch((undefined===opt.rowMode) ? 'array' : opt.rowMode) { @@ -439,21 +443,22 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ if(util.isInt32(opt.rowMode)){ out.cbArg = (stmt)=>stmt.get(opt.rowMode); break; - }else if('string'===typeof opt.rowMode && opt.rowMode.length>1){ + }else if('string'===typeof opt.rowMode + && opt.rowMode.length>1 + && '$'===opt.rowMode[0]){ /* "$X": fetch column named "X" (case-sensitive!). Prior to 2022-12-14 ":X" and "@X" were also permitted, but having so many options is unnecessary and likely to cause confusion. */ - if('$'===opt.rowMode[0]){ - out.cbArg = function(stmt){ - const rc = stmt.get(this.obj)[this.colName]; - return (undefined===rc) ? toss3("exec(): unknown result column:",this.colName) : rc; - }.bind({ - obj:Object.create(null), - colName: opt.rowMode.substr(1) - }); - break; - } + const $colName = opt.rowMode.substr(1); + out.cbArg = (stmt)=>{ + const rc = stmt.get(Object.create(null))[$colName]; + return (undefined===rc) + ? toss3(capi.SQLITE_NOTFOUND, + "exec(): unknown result column:",$colName) + : rc; + }; + break; } toss3("Invalid rowMode:",opt.rowMode); } @@ -462,23 +467,21 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }; /** - Internal impl of the DB.selectArray() and + Internal impl of the DB.selectValue(), selectArray(), and selectObject() methods. */ - const __selectFirstRow = (db, sql, bind, getArg)=>{ - let stmt, rc; + const __selectFirstRow = (db, sql, bind, ...getArgs)=>{ + const stmt = db.prepare(sql); try { - stmt = db.prepare(sql).bind(bind); - if(stmt.step()) rc = stmt.get(getArg); + return stmt.bind(bind).step() ? stmt.get(...getArgs) : undefined; }finally{ - if(stmt) stmt.finalize(); + stmt.finalize(); } - return rc; }; /** - Internal impl of the DB.selectArrays() and - selectObjects() methods. + Internal impl of the DB.selectArrays() and selectObjects() + methods. */ const __selectAll = (db, sql, bind, rowMode)=>db.exec({ @@ -608,7 +611,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ try{ rc = wasm.cstrToJs(v.$zName) } finally { v.dispose() } } - return rc; + return rc; }, /** Compiles the given SQL and returns a prepared Stmt. This is @@ -697,21 +700,27 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ unchanged. Achtung: an SQL result may have multiple columns with identical names. - - `callback` = a function which gets called for each row of - the result set, but only if that statement has any result + - `callback` = a function which gets called for each row of the + result set, but only if that statement has any result _rows_. The callback's "this" is the options object, noting that this function synthesizes one if the caller does not pass one to exec(). The second argument passed to the callback is always the current Stmt object, as it's needed if the caller wants to fetch the column names or some such (noting that they could also be fetched via `this.columnNames`, if the client - provides the `columnNames` option). + provides the `columnNames` option). If the callback returns a + literal `false` (as opposed to any other falsy value, e.g. an + implicit `undefined` return), any ongoing statement-`step()` + iteration stops without an error. The return value of the + callback is otherwise ignored. ACHTUNG: The callback MUST NOT modify the Stmt object. Calling any of the Stmt.get() variants, Stmt.getColumnName(), or similar, is legal, but calling step() or finalize() is not. Member methods which are illegal in this context will - trigger an exception. + trigger an exception, but clients must also refrain from using + any lower-level (C-style) APIs which might modify the + statement. The first argument passed to the callback defaults to an array of values from the current result row but may be changed with ... @@ -765,8 +774,11 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ - `returnValue`: is a string specifying what this function should return: - A) The default value is `"this"`, meaning that the - DB object itself should be returned. + A) The default value is (usually) `"this"`, meaning that the + DB object itself should be returned. The exceptions is if + the caller passes neither of `callback` nor `returnValue` + but does pass an explicit `rowMode` then the default + `returnValue` is `"resultRows"`, described below. B) `"resultRows"` means to return the value of the `resultRows` option. If `resultRows` is not set, this @@ -799,7 +811,9 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ Array.isArray(opt.resultRows) ? opt.resultRows : undefined; let stmt; let bind = opt.bind; - let evalFirstResult = !!(arg.cbArg || opt.columnNames) /* true to evaluate the first result-returning query */; + let evalFirstResult = !!( + arg.cbArg || opt.columnNames || resultRows + ) /* true to step through the first result-returning statement */; const stack = wasm.scopedAllocPush(); const saveSql = Array.isArray(opt.saveSql) ? opt.saveSql : undefined; try{ @@ -810,9 +824,10 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ space for the SQL (pSql). When prepare_v2() returns, pzTail will point to somewhere in pSql. */ let sqlByteLen = isTA ? arg.sql.byteLength : wasm.jstrlen(arg.sql); - const ppStmt = wasm.scopedAlloc(/* output (sqlite3_stmt**) arg and pzTail */ - (2 * wasm.ptrSizeof) - + (sqlByteLen + 1/* SQL + NUL */)); + const ppStmt = wasm.scopedAlloc( + /* output (sqlite3_stmt**) arg and pzTail */ + (2 * wasm.ptrSizeof) + (sqlByteLen + 1/* SQL + NUL */) + ); const pzTail = ppStmt + wasm.ptrSizeof /* final arg to sqlite3_prepare_v2() */; let pSql = pzTail + wasm.ptrSizeof; const pSqlEnd = pSql + sqlByteLen; @@ -848,11 +863,15 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ if(Array.isArray(opt.columnNames)){ stmt.getColumnNames(opt.columnNames); } - while(!!arg.cbArg && stmt.step()){ - stmt._isLocked = true; - const row = arg.cbArg(stmt); - if(resultRows) resultRows.push(row); - if(callback) callback.call(opt, row, stmt); + if(arg.cbArg || resultRows){ + for(; stmt.step(); stmt._isLocked = false){ + stmt._isLocked = true; + const row = arg.cbArg(stmt); + if(resultRows) resultRows.push(row); + if(callback && false === callback.call(opt, row, stmt)){ + break; + } + } stmt._isLocked = false; } }else{ @@ -873,10 +892,11 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ } return arg.returnVal(); }/*exec()*/, + /** - Creates a new scalar UDF (User-Defined Function) which is - accessible via SQL code. This function may be called in any - of the following forms: + Creates a new UDF (User-Defined Function) which is accessible + via SQL code. This function may be called in any of the + following forms: - (name, function) - (name, function, optionsObject) @@ -892,10 +912,12 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ functions. Creating an aggregate or window function requires the options-object form (see below for details). - UDFs cannot currently be removed from a DB handle after they're - added. More correctly, they can be removed as documented for - sqlite3_create_function_v2(), but doing so will "leak" the - JS-created WASM binding of those functions. + UDFs can be removed as documented for + sqlite3_create_function_v2() and + sqlite3_create_window_function(), but doing so will "leak" the + JS-created WASM binding of those functions (meaning that their + entries in the WASM indirect function table still + exist). Eliminating that potential leak is a pending TODO. On success, returns this object. Throws on error. @@ -1066,15 +1088,31 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ Throws on error (e.g. malformed SQL). */ selectValue: function(sql,bind,asType){ - let stmt, rc; + return __selectFirstRow(this, sql, bind, 0, asType); + }, + + /** + Runs the given query and returns an array of the values from + the first result column of each row of the result set. The 2nd + argument is an optional value for use in a single-argument call + to Stmt.bind(). The 3rd argument may be any value suitable for + use as the 2nd argument to Stmt.get(). If a 3rd argument is + desired but no bind data are needed, pass `undefined` for the 2nd + argument. + + If there are no result rows, an empty array is returned. + */ + selectValues: function(sql,bind,asType){ + const stmt = this.prepare(sql), rc = []; try { - stmt = this.prepare(sql).bind(bind); - if(stmt.step()) rc = stmt.get(0,asType); + stmt.bind(bind); + while(stmt.step()) rc.push(stmt.get(0,asType)); }finally{ - if(stmt) stmt.finalize(); + stmt.finalize(); } return rc; }, + /** Prepares the given SQL, step()s it one time, and returns an array containing the values of the first result row. If it has @@ -1130,7 +1168,10 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ /** Returns the number of currently-opened Stmt handles for this db - handle, or 0 if this DB instance is closed. + handle, or 0 if this DB instance is closed. Note that only + handles prepared via this.prepare() are counted, and not + handles prepared using capi.sqlite3_prepare_v3() (or + equivalent). */ openStatementCount: function(){ return this.pointer ? Object.keys(__stmtMap.get(this)).length : 0; @@ -1146,9 +1187,25 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ Note that transactions may not be nested, so this will throw if it is called recursively. For nested transactions, use the savepoint() method or manually manage SAVEPOINTs using exec(). + + If called with 2 arguments, the first must be a keyword which + is legal immediately after a BEGIN statement, e.g. one of + "DEFERRED", "IMMEDIATE", or "EXCLUSIVE". Though the exact list + of supported keywords is not hard-coded here, in order to be + future-compatible, if the argument does not look like a single + keyword then an exception is triggered with a description of + the problem. */ - transaction: function(callback){ - affirmDbOpen(this).exec("BEGIN"); + transaction: function(/* [beginQualifier,] */callback){ + let opener = 'BEGIN'; + if(arguments.length>1){ + if(/[^a-zA-Z]/.test(arguments[0])){ + toss3(capi.SQLITE_MISUSE, "Invalid argument for BEGIN qualifier."); + } + opener += ' '+arguments[0]; + callback = arguments[1]; + } + affirmDbOpen(this).exec(opener); try { const rc = callback(this); this.exec("COMMIT"); @@ -1212,7 +1269,6 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ if(wasm.bigIntEnabled) return t; /* else fall through */ default: - //console.log("isSupportedBindType",t,v); return util.isBindableTypedArray(v) ? BindTypes.blob : undefined; } }; @@ -1267,7 +1323,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ success. */ const bindOne = function f(stmt,ndx,bindType,val){ - affirmUnlocked(stmt, 'bind()'); + affirmUnlocked(affirmStmtOpen(stmt), 'bind()'); if(!f._){ f._tooBigInt = (v)=>toss3( "BigInt value is too big to store without precision loss:", v @@ -1277,29 +1333,9 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ so we have no range checking. */ f._ = { string: function(stmt, ndx, val, asBlob){ - if(1){ - /* _Hypothetically_ more efficient than the impl in the 'else' block. */ - const stack = wasm.scopedAllocPush(); - try{ - const n = wasm.jstrlen(val); - const pStr = wasm.scopedAlloc(n); - wasm.jstrcpy(val, wasm.heap8u(), pStr, n, false); - const f = asBlob ? capi.sqlite3_bind_blob : capi.sqlite3_bind_text; - return f(stmt.pointer, ndx, pStr, n, capi.SQLITE_TRANSIENT); - }finally{ - wasm.scopedAllocPop(stack); - } - }else{ - const bytes = wasm.jstrToUintArray(val,false); - const pStr = wasm.alloc(bytes.length || 1); - wasm.heap8u().set(bytes.length ? bytes : [0], pStr); - try{ - const f = asBlob ? capi.sqlite3_bind_blob : capi.sqlite3_bind_text; - return f(stmt.pointer, ndx, pStr, bytes.length, capi.SQLITE_TRANSIENT); - }finally{ - wasm.dealloc(pStr); - } - } + const [pStr, n] = wasm.allocCString(val, true); + const f = asBlob ? capi.sqlite3_bind_blob : capi.sqlite3_bind_text; + return f(stmt.pointer, ndx, pStr, n, capi.SQLITE_WASM_DEALLOC); } }; }/* static init */ @@ -1344,29 +1380,17 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ case BindTypes.blob: { if('string'===typeof val){ rc = f._.string(stmt, ndx, val, true); + break; + }else if(val instanceof ArrayBuffer){ + val = new Uint8Array(val); }else if(!util.isBindableTypedArray(val)){ toss3("Binding a value as a blob requires", - "that it be a string, Uint8Array, or Int8Array."); - }else if(1){ - /* _Hypothetically_ more efficient than the impl in the 'else' block. */ - const stack = wasm.scopedAllocPush(); - try{ - const pBlob = wasm.scopedAlloc(val.byteLength || 1); - wasm.heap8().set(val.byteLength ? val : [0], pBlob) - rc = capi.sqlite3_bind_blob(stmt.pointer, ndx, pBlob, val.byteLength, - capi.SQLITE_TRANSIENT); - }finally{ - wasm.scopedAllocPop(stack); - } - }else{ - const pBlob = wasm.allocFromTypedArray(val); - try{ - rc = capi.sqlite3_bind_blob(stmt.pointer, ndx, pBlob, val.byteLength, - capi.SQLITE_TRANSIENT); - }finally{ - wasm.dealloc(pBlob); - } + "that it be a string, Uint8Array, Int8Array, or ArrayBuffer."); } + const pBlob = wasm.alloc(val.byteLength || 1); + wasm.heap8().set(val.byteLength ? val : [0], pBlob) + rc = capi.sqlite3_bind_blob(stmt.pointer, ndx, pBlob, val.byteLength, + capi.SQLITE_WASM_DEALLOC); break; } default: @@ -1374,6 +1398,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ toss3("Unsupported bind() argument type: "+(typeof val)); } if(rc) DB.checkRc(stmt.db.pointer, rc); + stmt._mayGet = false; return stmt; }; @@ -1461,8 +1486,8 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ - Strings are bound as strings (use bindAsBlob() to force blob binding). - - Uint8Array and Int8Array instances are bound as blobs. - (TODO: binding the other TypedArray types.) + - Uint8Array, Int8Array, and ArrayBuffer instances are bound as + blobs. If passed an array, each element of the array is bound at the parameter index equal to the array index plus 1 @@ -1517,8 +1542,10 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ } arg.forEach((v,i)=>bindOne(this, i+1, affirmSupportedBindType(v), v)); return this; + }else if(arg instanceof ArrayBuffer){ + arg = new Uint8Array(arg); } - else if('object'===typeof arg/*null was checked above*/ + if('object'===typeof arg/*null was checked above*/ && !util.isBindableTypedArray(arg)){ /* Treat each property of arg as a named bound parameter. */ if(1!==arguments.length){ @@ -1540,7 +1567,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ the value. The ndx may be a numbered or named bind index. The value must be of type string, null/undefined (both get treated as null), or a TypedArray of a type supported by the bind() - API. + API. This API cannot bind numbers as blobs. If passed a single argument, a bind index of 1 is assumed and the first argument is the value. @@ -1556,9 +1583,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ && BindTypes.null !== t){ toss3("Invalid value type for bindAsBlob()"); } - bindOne(this, ndx, BindTypes.blob, arg); - this._mayGet = false; - return this; + return bindOne(this, ndx, BindTypes.blob, arg); }, /** Steps the statement one time. If the result indicates that a @@ -1624,7 +1649,7 @@ self.sqlite3ApiBootstrap.initializers.push(function(sqlite3){ }, /** Fetches the value from the given 0-based column index of - the current data row, throwing if index is out of range. + the current data row, throwing if index is out of range. Requires that step() has just returned a truthy value, else an exception is thrown. diff --git a/ext/wasm/api/sqlite3-api-prologue.js b/ext/wasm/api/sqlite3-api-prologue.js index 94514e4770..137ab1ee78 100644 --- a/ext/wasm/api/sqlite3-api-prologue.js +++ b/ext/wasm/api/sqlite3-api-prologue.js @@ -12,12 +12,11 @@ This file is intended to be combined at build-time with other related code, most notably a header and footer which wraps this - whole file into an Emscripten Module.postRun() handler which has a - parameter named "Module" (the Emscripten Module object). The sqlite3 - JS API has no hard requirements on Emscripten, and does not expose + whole file into an Emscripten Module.postRun() handler. The sqlite3 + JS API has no hard requirements on Emscripten and does not expose any Emscripten APIs to clients. It is structured such that its build can be tweaked to include it in arbitrary WASM environments which - supply the necessary underlying features (e.g. a POSIX file I/O + can supply the necessary underlying features (e.g. a POSIX file I/O layer). Main project home page: https://sqlite.org @@ -42,8 +41,8 @@ Emscripten. (Note the default values for the config object!) The config object is only honored the first time this is called. Subsequent calls ignore the argument and return the same - (configured) object which gets initialized by the first call. - This function will throw if any of the required config options are + (configured) object which gets initialized by the first call. This + function will throw if any of the required config options are missing. The config object properties include: @@ -62,6 +61,8 @@ true if `self.BigInt64Array` is available, else false. Some APIs will throw exceptions if called without BigInt support, as BigInt is required for marshalling C-side int64 into and out of JS. + (Sidebar: it is technically possible to add int64 support via + marshalling of int32 pairs, but doing so is unduly invasive.) - `allocExportName`: the name of the function, in `exports`, of the `malloc(3)`-compatible routine for the WASM environment. Defaults @@ -69,7 +70,9 @@ sqlite3_malloc() may require care in certain client-side code regarding which allocator is uses. Notably, sqlite3_deserialize() and sqlite3_serialize() can only safely use memory from different - allocators under very specific conditions. + allocators under very specific conditions. The canonical builds + of this API guaranty that `sqlite3_malloc()` is the JS-side + allocator implementation. - `deallocExportName`: the name of the function, in `exports`, of the `free(3)`-compatible routine for the WASM @@ -84,9 +87,11 @@ in the WASMFS+OPFS combination. This option is currently ignored. [^1] = This property may optionally be a function, in which case this - function re-assigns it to the value returned from that function, + function re-assigns calls that function to fetch the value, enabling delayed evaluation. + The returned object is the top-level sqlite3 namespace object. + */ 'use strict'; self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( @@ -139,7 +144,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( main thread (aborts via a failed assert() if it's attempted), which eliminates any(?) benefit to supporting it. */ false; - /** + /** The main sqlite3 binding API gets installed into this object, mimicking the C API as closely as we can. The numerous members names with prefixes 'sqlite3_' and 'SQLITE_' behave, insofar as @@ -322,13 +327,16 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( /** Returns true if v appears to be one of our bind()-able TypedArray - types: Uint8Array or Int8Array. Support for TypedArrays with - element sizes >1 is a potential TODO just waiting on a use case - to justify them. + types: Uint8Array or Int8Array or ArrayBuffer. Support for + TypedArrays with element sizes >1 is a potential TODO just + waiting on a use case to justify them. Until then, their `buffer` + property can be used to pass them as an ArrayBuffer. If it's not + a bindable array type, a falsy value is returned. */ const isBindableTypedArray = (v)=>{ - return v && (v instanceof Uint8Array || v instanceof Int8Array); - //v && v.constructor && (1===v.constructor.BYTES_PER_ELEMENT); + return v && (v instanceof Uint8Array + || v instanceof Int8Array + || v instanceof ArrayBuffer); }; /** @@ -341,8 +349,9 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( isSQLableTypedArray() list. */ const isSQLableTypedArray = (v)=>{ - return v && (v instanceof Uint8Array || v instanceof Int8Array); - //v && v.constructor && (1===v.constructor.BYTES_PER_ELEMENT); + return v && (v instanceof Uint8Array + || v instanceof Int8Array + || v instanceof ArrayBuffer); }; /** Returns true if isBindableTypedArray(v) does, else throws with a message @@ -373,7 +382,11 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( returned. Else v is returned as-is. */ const flexibleString = function(v){ - if(isSQLableTypedArray(v)) return typedArrayToString(v); + if(isSQLableTypedArray(v)){ + return typedArrayToString( + (v instanceof ArrayBuffer) ? new Uint8Array(v) : v + ); + } else if(Array.isArray(v)) return v.join(""); else if(wasm.isPtr(v)) v = wasm.cstrToJs(v); return v; @@ -401,6 +414,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( }else{ super("Allocation failed."); } + this.resultCode = capi.SQLITE_NOMEM; this.name = 'WasmAllocError'; } }; @@ -417,6 +431,92 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( }; Object.assign(capi, { + /** + sqlite3_bind_blob() works exactly like its C counterpart unless + its 3rd argument is one of: + + - JS string: the 3rd argument is converted to a C string, the + 4th argument is ignored, and the C-string's length is used + in its place. + + - Array: converted to a string as defined for "flexible + strings" and then it's treated as a JS string. + + - Int8Array or Uint8Array: wasm.allocFromTypedArray() is used to + conver the memory to the WASM heap. If the 4th argument is + 0 or greater, it is used as-is, otherwise the array's byteLength + value is used. This is an exception to the C API's undefined + behavior for a negative 4th argument, but results are undefined + if the given 4th argument value is greater than the byteLength + of the input array. + + - If it's an ArrayBuffer, it gets wrapped in a Uint8Array and + treated as that type. + + In all of those cases, the final argument (destructor) is + ignored and capi.SQLITE_WASM_DEALLOC is assumed. + + A 3rd argument of `null` is treated as if it were a WASM pointer + of 0. + + If the 3rd argument is neither a WASM pointer nor one of the + above-described types, capi.SQLITE_MISUSE is returned. + + The first argument may be either an `sqlite3_stmt*` WASM + pointer or an sqlite3.oo1.Stmt instance. + + For consistency with the C API, it requires the same number of + arguments. It returns capi.SQLITE_MISUSE if passed any other + argument count. + */ + sqlite3_bind_blob: undefined/*installed later*/, + + /** + sqlite3_bind_text() works exactly like its C counterpart unless + its 3rd argument is one of: + + - JS string: the 3rd argument is converted to a C string, the + 4th argument is ignored, and the C-string's length is used + in its place. + + - Array: converted to a string as defined for "flexible + strings". The 4th argument is ignored and a value of -1 + is assumed. + + - Int8Array or Uint8Array: is assumed to contain UTF-8 text, is + converted to a string. The 4th argument is ignored, replaced + by the array's byteLength value. + + - If it's an ArrayBuffer, it gets wrapped in a Uint8Array and + treated as that type. + + In each of those cases, the final argument (text destructor) is + ignored and capi.SQLITE_WASM_DEALLOC is assumed. + + A 3rd argument of `null` is treated as if it were a WASM pointer + of 0. + + If the 3rd argument is neither a WASM pointer nor one of the + above-described types, capi.SQLITE_MISUSE is returned. + + The first argument may be either an `sqlite3_stmt*` WASM + pointer or an sqlite3.oo1.Stmt instance. + + For consistency with the C API, it requires the same number of + arguments. It returns capi.SQLITE_MISUSE if passed any other + argument count. + + If client code needs to bind partial strings, it needs to + either parcel the string up before passing it in here or it + must pass in a WASM pointer for the 3rd argument and a valid + 4th-argument value, taking care not to pass a value which + truncates a multi-byte UTF-8 character. When passing + WASM-format strings, it is important that the final argument be + valid or unexpected content can result can result, or even a + crash if the application reads past the WASM heap bounds. + */ + sqlite3_bind_text: undefined/*installed later*/, + /** sqlite3_create_function_v2() differs from its native counterpart only in the following ways: @@ -425,7 +525,9 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( any encoding other than sqlite3.SQLITE_UTF8. The JS API does not currently support any other encoding and likely never will. This function does not replace that argument on its own - because it may contain other flags. + because it may contain other flags. As a special case, if + the bottom 4 bits of that argument are 0, SQLITE_UTF8 is + assumed. 2) Any of the four final arguments may be either WASM pointers (assumed to be function pointers) or JS Functions. In the @@ -433,6 +535,10 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( sqlite3.capi.wasm.installFunction() and that wrapper is passed on to the native implementation. + For consistency with the C API, it requires the same number of + arguments. It returns capi.SQLITE_MISUSE if passed any other + argument count. + The semantics of JS functions are: xFunc: is passed `(pCtx, ...values)`. Its return value becomes @@ -519,18 +625,18 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( WASM build is compiled with emcc's `-sALLOW_TABLE_GROWTH` flag. */ - sqlite3_create_function_v2: function( + sqlite3_create_function_v2: ( pDb, funcName, nArg, eTextRep, pApp, xFunc, xStep, xFinal, xDestroy - ){/*installed later*/}, + )=>{/*installed later*/}, /** Equivalent to passing the same arguments to sqlite3_create_function_v2(), with 0 as the final argument. */ - sqlite3_create_function:function( + sqlite3_create_function: ( pDb, funcName, nArg, eTextRep, pApp, xFunc, xStep, xFinal - ){/*installed later*/}, + )=>{/*installed later*/}, /** The sqlite3_create_window_function() JS wrapper differs from its native implementation in the exact same way that @@ -538,10 +644,10 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( xInverse(), is treated identically to xStep() by the wrapping layer. */ - sqlite3_create_window_function: function( + sqlite3_create_window_function: ( pDb, funcName, nArg, eTextRep, pApp, xStep, xFinal, xValue, xInverse, xDestroy - ){/*installed later*/}, + )=>{/*installed later*/}, /** The sqlite3_prepare_v3() binding handles two different uses with differing JS/WASM semantics: @@ -561,8 +667,8 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( terminated with a 0 byte. In usage (1), the 2nd argument must be of type string, - Uint8Array, or Int8Array (either of which is assumed to - hold SQL). If it is, this function assumes case (1) and + Uint8Array, Int8Array, or ArrayBuffer (all of which are assumed + to hold SQL). If it is, this function assumes case (1) and calls the underyling C function with the equivalent of: (pDb, sqlAsString, -1, prepFlags, ppStmt, null) @@ -654,7 +760,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( affirmBindableTypedArray, flexibleString, bigIntFits32, bigIntFits64, bigIntFitsDouble, isBindableTypedArray, - isInt32, isSQLableTypedArray, isTypedArray, + isInt32, isSQLableTypedArray, isTypedArray, typedArrayToString, isUIThread: ()=>(self.window===self && !!self.document), // is this true for ESM?: 'undefined'===typeof WorkerGlobalScope @@ -663,7 +769,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( toss3, typedArrayPart }; - + Object.assign(wasm, { /** Emscripten APIs have a deep-seated assumption that all pointers @@ -852,7 +958,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( const m = f._rx.exec(opt); rv[0] = (m ? m[1] : opt); rv[1] = m ? (f._rxInt.test(m[2]) ? +m[2] : m[2]) : true; - }; + }; } const rc = {}, ov = [0,0]; let i = 0, k; @@ -907,7 +1013,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( space managed by Emscripten's stack-management, so does not collide with Emscripten-provided stack allocation APIs. The memory lives in the WASM heap and can be used with routines such - as wasm.poke() and any wasm.heap8u().slice(). + as wasm.poke() and wasm.heap8u().slice(). */ wasm.pstack = Object.assign(Object.create(null),{ /** @@ -920,15 +1026,23 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( Attempts to allocate the given number of bytes from the pstack. On success, it zeroes out a block of memory of the given size, adjusts the pstack pointer, and returns a pointer - to the memory. On error, returns throws a WasmAllocError. The + to the memory. On error, throws a WasmAllocError. The memory must eventually be released using restore(). + If n is a string, it must be a WASM "IR" value in the set + accepted by wasm.sizeofIR(), which is mapped to the size of + that data type. If passed a string not in that set, it throws a + WasmAllocError. + This method always adjusts the given value to be a multiple of 8 bytes because failing to do so can lead to incorrect results when reading and writing 64-bit values from/to the WASM heap. Similarly, the returned address is always 8-byte aligned. */ - alloc: (n)=>{ + alloc: function(n){ + if('string'===typeof n && !(n = wasm.sizeofIR(n))){ + WasmAllocError.toss("Invalid value for pstack.alloc(",arguments[0],")"); + } return wasm.exports.sqlite3_wasm_pstack_alloc(n) || WasmAllocError.toss("Could not allocate",n, "bytes from the pstack."); @@ -938,6 +1052,8 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( returns the addresses as an array of n element, each holding the address of one chunk. + sz may optionally be an IR string accepted by wasm.sizeofIR(). + Throws a WasmAllocError if allocation fails. Example: @@ -946,13 +1062,14 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( const [p1, p2, p3] = wasm.pstack.allocChunks(3,4); ``` */ - allocChunks: (n,sz)=>{ + allocChunks: function(n,sz){ + if('string'===typeof sz && !(sz = wasm.sizeofIR(sz))){ + WasmAllocError.toss("Invalid size value for allocChunks(",arguments[1],")"); + } const mem = wasm.pstack.alloc(n * sz); const rc = []; let i = 0, offset = 0; - for(; i < n; offset = (sz * ++i)){ - rc.push(mem + offset); - } + for(; i < n; ++i, offset += sz) rc.push(mem + offset); return rc; }, /** @@ -1322,7 +1439,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( wasm.dealloc(pData); } }; - + if( util.isUIThread() ){ /* Features specific to the main window thread... */ @@ -1553,7 +1670,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( capi.sqlite3_result_error(pCtx, ''+e, -1); } }; - + /** This function passes its 2nd argument to one of the sqlite3_result_xyz() routines, depending on the type of that @@ -1569,7 +1686,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( - `bigint`: similar to `number` but will trigger an error if the value is too big to store in an int64. - `string`: `sqlite3_result_text()` - - Uint8Array or Int8Array: `sqlite3_result_blob()` + - Uint8Array or Int8Array or ArrayBuffer: `sqlite3_result_blob()` - `undefined`: is a no-op provided to simplify certain use cases. Anything else triggers `sqlite3_result_error()` with a @@ -1620,9 +1737,11 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( f(pCtx, val); break; } - case 'string': - capi.sqlite3_result_text(pCtx, val, -1, capi.SQLITE_TRANSIENT); + case 'string': { + const [p, n] = wasm.allocCString(val,true); + capi.sqlite3_result_text(pCtx, p, n, capi.SQLITE_WASM_DEALLOC); break; + } case 'object': if(null===val/*yes, typeof null === 'object'*/) { capi.sqlite3_result_null(pCtx); @@ -1631,7 +1750,7 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( const pBlob = wasm.allocFromTypedArray(val); capi.sqlite3_result_blob( pCtx, pBlob, val.byteLength, - wasm.exports[sqlite3.config.deallocExportName] + capi.SQLITE_WASM_DEALLOC ); break; } @@ -1651,8 +1770,8 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( argument of sqlite3_value_to_js(). If the sqlite3_column_value() returns NULL (e.g. because the column index is out of range), this function returns `undefined`, regardless of the 3rd - argument. 3rd argument is falsy and conversion fails, `undefined` - will be returned. + argument. If the 3rd argument is falsy and conversion fails, + `undefined` will be returned. Note that sqlite3_column_value() returns an "unprotected" value object, but in a single-threaded environment (like this one) @@ -1663,6 +1782,56 @@ self.sqlite3ApiBootstrap = function sqlite3ApiBootstrap( return (0===v) ? undefined : capi.sqlite3_value_to_js(v, throwIfCannotConvert); }; + /** + Internal impl of sqlite3_preupdate_new/old_js() and + sqlite3changeset_new/old_js(). + */ + const __newOldValue = function(pObj, iCol, impl){ + impl = capi[impl]; + if(!this.ptr) this.ptr = wasm.allocPtr(); + else wasm.pokePtr(this.ptr, 0); + const rc = impl(pObj, iCol, this.ptr); + if(rc) return SQLite3Error.toss(rc,arguments[2]+"() failed with code "+rc); + const pv = wasm.peekPtr(this.ptr); + return pv ? capi.sqlite3_value_to_js( pv, true ) : undefined; + }.bind(Object.create(null)); + + /** + A wrapper around sqlite3_preupdate_new() which fetches the + sqlite3_value at the given index and returns the result of + passing it to sqlite3_value_to_js(). Throws on error. + */ + capi.sqlite3_preupdate_new_js = + (pDb, iCol)=>__newOldValue(pDb, iCol, 'sqlite3_preupdate_new'); + + /** + The sqlite3_preupdate_old() counterpart of + sqlite3_preupdate_new_js(), with an identical interface. + */ + capi.sqlite3_preupdate_old_js = + (pDb, iCol)=>__newOldValue(pDb, iCol, 'sqlite3_preupdate_old'); + + /** + A wrapper around sqlite3changeset_new() which fetches the + sqlite3_value at the given index and returns the result of + passing it to sqlite3_value_to_js(). Throws on error. + + If sqlite3changeset_new() succeeds but has no value to report, + this function returns the undefined value, noting that undefined + is a valid conversion from an `sqlite3_value`, so is unambiguous. + */ + capi.sqlite3changeset_new_js = + (pChangesetIter, iCol) => __newOldValue(pChangesetIter, iCol, + 'sqlite3changeset_new'); + + /** + The sqlite3changeset_old() counterpart of + sqlite3changeset_new_js(), with an identical interface. + */ + capi.sqlite3changeset_old_js = + (pChangesetIter, iCol)=>__newOldValue(pChangesetIter, iCol, + 'sqlite3changeset_old'); + /* The remainder of the API will be set up in later steps. */ const sqlite3 = { WasmAllocError: WasmAllocError, diff --git a/ext/wasm/api/sqlite3-api-worker1.js b/ext/wasm/api/sqlite3-api-worker1.js index 62e2bb9bdf..58b9b00918 100644 --- a/ext/wasm/api/sqlite3-api-worker1.js +++ b/ext/wasm/api/sqlite3-api-worker1.js @@ -1,4 +1,4 @@ -/* +/** 2022-07-22 The author disclaims copyright to this source code. In place of a @@ -10,12 +10,12 @@ *********************************************************************** - This file implements the initializer for the sqlite3 "Worker API - #1", a very basic DB access API intended to be scripted from a main - window thread via Worker-style messages. Because of limitations in - that type of communication, this API is minimalistic and only - capable of serving relatively basic DB requests (e.g. it cannot - process nested query loops concurrently). + This file implements the initializer for SQLite's "Worker API #1", a + very basic DB access API intended to be scripted from a main window + thread via Worker-style messages. Because of limitations in that + type of communication, this API is minimalistic and only capable of + serving relatively basic DB requests (e.g. it cannot process nested + query loops concurrently). This file requires that the core C-style sqlite3 API and OO API #1 have been loaded. @@ -158,15 +158,6 @@ bigIntEnabled: bool. True if BigInt support is enabled. - wasmfsOpfsDir: path prefix, if any, _intended_ for use with - WASMFS OPFS persistent storage. - - wasmfsOpfsEnabled: true if persistent storage is enabled in the - current environment. Only files stored under wasmfsOpfsDir - will persist using that mechanism, however. It is legal to use - the non-WASMFS OPFS VFS to open a database via a URI-style - db filename. - vfsList: result of sqlite3.capi.sqlite3_js_vfs_list() } } @@ -449,7 +440,6 @@ sqlite3.initWorker1API = function(){ toss("Throwing because of simulateError flag."); } const rc = Object.create(null); - const pDir = sqlite3.capi.sqlite3_wasmfs_opfs_dir(); let byteArray, pVfs; oargs.vfs = args.vfs; if(isSpecialDbFilename(args.filename)){ @@ -475,15 +465,14 @@ sqlite3.initWorker1API = function(){ e.name+' creating '+args.filename+": "+e.message, { cause: e } - ); + ); }finally{ if(pMem) sqlite3.wasm.dealloc(pMem); } } const db = wState.open(oargs); rc.filename = db.filename; - rc.persistent = (!!pDir && db.filename.startsWith(pDir+'/')) - || !!sqlite3.capi.sqlite3_js_db_uses_vfs(db.pointer, "opfs"); + rc.persistent = !!sqlite3.capi.sqlite3_js_db_uses_vfs(db.pointer, "opfs"); rc.dbId = getDbId(db); rc.vfs = db.dbVfsName(); return rc; @@ -558,11 +547,10 @@ sqlite3.initWorker1API = function(){ 'config-get': function(){ const rc = Object.create(null), src = sqlite3.config; [ - 'wasmfsOpfsDir', 'bigIntEnabled' + 'bigIntEnabled' ].forEach(function(k){ if(Object.getOwnPropertyDescriptor(src, k)) rc[k] = src[k]; }); - rc.wasmfsOpfsEnabled = !!sqlite3.capi.sqlite3_wasmfs_opfs_dir(); rc.version = sqlite3.version; rc.vfsList = sqlite3.capi.sqlite3_js_vfs_list(); rc.opfsEnabled = !!sqlite3.opfs; diff --git a/ext/wasm/api/sqlite3-vfs-opfs.c-pp.js b/ext/wasm/api/sqlite3-vfs-opfs.c-pp.js index 50be31a2bd..451f0019c2 100644 --- a/ext/wasm/api/sqlite3-vfs-opfs.c-pp.js +++ b/ext/wasm/api/sqlite3-vfs-opfs.c-pp.js @@ -190,7 +190,7 @@ const installOpfsVfs = function callee(options){ s = metrics.s11n.deserialize = Object.create(null); s.count = s.time = 0; } - }/*metrics*/; + }/*metrics*/; const opfsVfs = new sqlite3_vfs(); const opfsIoMethods = new sqlite3_io_methods(); const promiseReject = function(err){ @@ -198,7 +198,9 @@ const installOpfsVfs = function callee(options){ return promiseReject_(err); }; const W = -//#if target=es6-module +//#if target=es6-bundler-friendly + new Worker(new URL("sqlite3-opfs-async-proxy.js", import.meta.url)); +//#elif target=es6-module new Worker(new URL(options.proxyUri, import.meta.url)); //#else new Worker(options.proxyUri); @@ -743,7 +745,7 @@ const installOpfsVfs = function callee(options){ let rc; try { rc = opRun('xRead',pFile, n, Number(offset64)); - if(0===rc || capi.SQLITE_IOERR_SHORT_READ===rc){ + if(0===rc || capi.SQLITE_IOERR_SHORT_READ===rc){ /** Results get written to the SharedArrayBuffer f.sabView. Because the heap is _not_ a SharedArrayBuffer, we have @@ -1291,7 +1293,7 @@ const installOpfsVfs = function callee(options){ }).catch(promiseReject); }else{ promiseResolve(sqlite3); - } + } }catch(e){ error(e); promiseReject(e); diff --git a/ext/wasm/api/sqlite3-wasm.c b/ext/wasm/api/sqlite3-wasm.c index a42ea68d42..e7513509ca 100644 --- a/ext/wasm/api/sqlite3-wasm.c +++ b/ext/wasm/api/sqlite3-wasm.c @@ -99,12 +99,21 @@ #ifndef SQLITE_ENABLE_FTS4 # define SQLITE_ENABLE_FTS4 1 #endif +#ifndef SQLITE_ENABLE_MATH_FUNCTIONS +# define SQLITE_ENABLE_MATH_FUNCTIONS 1 +#endif #ifndef SQLITE_ENABLE_OFFSET_SQL_FUNC # define SQLITE_ENABLE_OFFSET_SQL_FUNC 1 #endif +#ifndef SQLITE_ENABLE_PREUPDATE_HOOK +# define SQLITE_ENABLE_PREUPDATE_HOOK 1 /*required by session extension*/ +#endif #ifndef SQLITE_ENABLE_RTREE # define SQLITE_ENABLE_RTREE 1 #endif +#ifndef SQLITE_ENABLE_SESSION +# define SQLITE_ENABLE_SESSION 1 +#endif #ifndef SQLITE_ENABLE_STMTVTAB # define SQLITE_ENABLE_STMTVTAB 1 #endif @@ -320,11 +329,13 @@ SQLITE_WASM_KEEP int sqlite3_wasm_pstack_quota(void){ */ SQLITE_WASM_KEEP int sqlite3_wasm_db_error(sqlite3*db, int err_code, const char *zMsg){ - if( 0!=zMsg ){ - const int nMsg = sqlite3Strlen30(zMsg); - sqlite3ErrorWithMsg(db, err_code, "%.*s", nMsg, zMsg); - }else{ - sqlite3ErrorWithMsg(db, err_code, NULL); + if( db!=0 ){ + if( 0!=zMsg ){ + const int nMsg = sqlite3Strlen30(zMsg); + sqlite3ErrorWithMsg(db, err_code, "%.*s", nMsg, zMsg); + }else{ + sqlite3ErrorWithMsg(db, err_code, NULL); + } } return err_code; } @@ -453,6 +464,24 @@ const char * sqlite3_wasm_enum_json(void){ /* SQLITE_STATIC/TRANSIENT need to be handled explicitly as ** integers to avoid casting-related warnings. */ out("\"SQLITE_STATIC\":0, \"SQLITE_TRANSIENT\":-1"); + outf(",\"SQLITE_WASM_DEALLOC\": %lld", + (sqlite3_int64)(sqlite3_free)); + } _DefGroup; + + DefGroup(changeset){ + DefInt(SQLITE_CHANGESETSTART_INVERT); + DefInt(SQLITE_CHANGESETAPPLY_NOSAVEPOINT); + DefInt(SQLITE_CHANGESETAPPLY_INVERT); + + DefInt(SQLITE_CHANGESET_DATA); + DefInt(SQLITE_CHANGESET_NOTFOUND); + DefInt(SQLITE_CHANGESET_CONFLICT); + DefInt(SQLITE_CHANGESET_CONSTRAINT); + DefInt(SQLITE_CHANGESET_FOREIGN_KEY); + + DefInt(SQLITE_CHANGESET_OMIT); + DefInt(SQLITE_CHANGESET_REPLACE); + DefInt(SQLITE_CHANGESET_ABORT); } _DefGroup; DefGroup(config){ @@ -791,6 +820,11 @@ const char * sqlite3_wasm_enum_json(void){ DefInt(SQLITE_DESERIALIZE_RESIZEABLE); } _DefGroup; + DefGroup(session){ + DefInt(SQLITE_SESSION_CONFIG_STRMSIZE); + DefInt(SQLITE_SESSION_OBJCONFIG_SIZE); + } _DefGroup; + DefGroup(sqlite3Status){ DefInt(SQLITE_STATUS_MEMORY_USED); DefInt(SQLITE_STATUS_PAGECACHE_USED); @@ -1559,6 +1593,36 @@ int sqlite3_wasm_config_j(int op, sqlite3_int64 arg){ return sqlite3_config(op, arg); } +#if 0 +// Pending removal after verification of a workaround discussed in the +// forum post linked to below. +/* +** This function is NOT part of the sqlite3 public API. It is strictly +** for use by the sqlite project's own JS/WASM bindings. +** +** Returns a pointer to sqlite3_free(). In compliant browsers the +** return value, when passed to sqlite3.wasm.exports.functionEntry(), +** must resolve to the same function as +** sqlite3.wasm.exports.sqlite3_free. i.e. from a dev console where +** sqlite3 is exported globally, the following must be true: +** +** ``` +** sqlite3.wasm.functionEntry( +** sqlite3.wasm.exports.sqlite3_wasm_ptr_to_sqlite3_free() +** ) === sqlite3.wasm.exports.sqlite3_free +** ``` +** +** Using a function to return this pointer, as opposed to exporting it +** via sqlite3_wasm_enum_json(), is an attempt to work around a +** Safari-specific quirk covered at +** https://sqlite.org/forum/info/e5b20e1feb37a19a. +**/ +SQLITE_WASM_KEEP +void * sqlite3_wasm_ptr_to_sqlite3_free(void){ + return (void*)sqlite3_free; +} +#endif + #if defined(__EMSCRIPTEN__) && defined(SQLITE_ENABLE_WASMFS) #include @@ -1618,6 +1682,11 @@ int sqlite3_wasm_test_intptr(int * p){ return *p = *p * 2; } +SQLITE_WASM_KEEP +void * sqlite3_wasm_test_voidptr(void * p){ + return p; +} + SQLITE_WASM_KEEP int64_t sqlite3_wasm_test_int64_max(void){ return (int64_t)0x7fffffffffffffff; diff --git a/ext/wasm/api/sqlite3-worker1-promiser.js b/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js similarity index 99% rename from ext/wasm/api/sqlite3-worker1-promiser.js rename to ext/wasm/api/sqlite3-worker1-promiser.c-pp.js index 7360512d49..1689d34802 100644 --- a/ext/wasm/api/sqlite3-worker1-promiser.js +++ b/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js @@ -238,6 +238,9 @@ self.sqlite3Worker1Promiser = function callee(config = callee.defaultConfig){ }/*sqlite3Worker1Promiser()*/; self.sqlite3Worker1Promiser.defaultConfig = { worker: function(){ +//#if target=es6-bundler-friendly + return new Worker("sqlite3-worker1.js"); +//#else let theJs = "sqlite3-worker1.js"; if(this.currentScript){ const src = this.currentScript.src.split('/'); @@ -252,6 +255,7 @@ self.sqlite3Worker1Promiser.defaultConfig = { } } return new Worker(theJs + self.location.search); +//#endif }.bind({ currentScript: self?.document?.currentScript }), diff --git a/ext/wasm/api/sqlite3-worker1.js b/ext/wasm/api/sqlite3-worker1.c-pp.js similarity index 94% rename from ext/wasm/api/sqlite3-worker1.js rename to ext/wasm/api/sqlite3-worker1.c-pp.js index 942437908f..9e9c3ac426 100644 --- a/ext/wasm/api/sqlite3-worker1.js +++ b/ext/wasm/api/sqlite3-worker1.c-pp.js @@ -33,6 +33,9 @@ */ "use strict"; (()=>{ +//#if target=es6-bundler-friendly + importScripts('sqlite3.js'); +//#else const urlParams = new URL(self.location.href).searchParams; let theJs = 'sqlite3.js'; if(urlParams.has('sqlite3.dir')){ @@ -40,10 +43,8 @@ } //console.warn("worker1 theJs =",theJs); importScripts(theJs); +//#endif sqlite3InitModule().then((sqlite3)=>{ - if(sqlite3.capi.sqlite3_wasmfs_opfs_dir){ - sqlite3.capi.sqlite3_wasmfs_opfs_dir(); - } sqlite3.initWorker1API(); }); })(); diff --git a/ext/wasm/c-pp.c b/ext/wasm/c-pp.c index 881c009acb..c439a0d091 100644 --- a/ext/wasm/c-pp.c +++ b/ext/wasm/c-pp.c @@ -51,10 +51,10 @@ ** ** Design note: this code makes use of sqlite3. Though not _strictly_ ** needed in order to implement it, this tool was specifically created -** for potential use with the sqlite3 project's own JavaScript code, -** so there's no reason not to make use of it to do some of the heavy -** lifting. It does not require any cutting-edge sqlite3 features and -** should be usable with any version which supports `WITHOUT ROWID`. +** for use with the sqlite3 project's own JavaScript code, so there's +** no reason not to make use of it to do some of the heavy lifting. It +** does not require any cutting-edge sqlite3 features and should be +** usable with any version which supports `WITHOUT ROWID`. ** ** Author(s): ** @@ -603,18 +603,9 @@ void g_stderr(char const *zFmt, ...){ va_end(va); } -#if 0 -void cmpp_t_outf(CmppTokenizer * t, char const *zFmt, ...){ - if(!CT_skip(t)){ - va_list va; - va_start(va, zFmt); - vfprintf(g.out.pFile, zFmt, va); - va_end(va); - } -} -#endif - void cmpp_t_out(CmppTokenizer * t, void const *z, unsigned int n){ + g_debug(3,("CT_skipLevel() ?= %d\n",CT_skipLevel(t))); + g_debug(3,("CT_skip() ?= %d\n",CT_skip(t))); if(!CT_skip(t)){ if(1!=fwrite(z, n, 1, g.out.pFile)){ int const err = errno; @@ -631,18 +622,28 @@ void CmppLevel_push(CmppTokenizer * const t){ g.zDelim, CmppLevel_Max); } pPrev = &CT_level(t); + g_debug(3,("push from tokenizer level=%u flags=%04x\n", t->level.ndx, pPrev->flags)); p = &t->level.stack[++t->level.ndx]; *p = CmppLevel_empty; p->token = t->token; p->flags = (CmppLevel_F_INHERIT_MASK & pPrev->flags); if(CLvl_skip(pPrev)) p->flags |= CmppLevel_F_ELIDE; + g_debug(3,("push to tokenizer level=%u flags=%04x\n", t->level.ndx, p->flags)); } void CmppLevel_pop(CmppTokenizer * const t){ if(!t->level.ndx){ fatal("Internal error: CmppLevel_pop() at the top of the stack"); } + g_debug(3,("pop from tokenizer level=%u, flags=%04x skipLevel?=%d\n", t->level.ndx, + t->level.stack[t->level.ndx].flags, CT_skipLevel(t))); + g_debug(3,("CT_skipLevel() ?= %d\n",CT_skipLevel(t))); + g_debug(3,("CT_skip() ?= %d\n",CT_skip(t))); t->level.stack[t->level.ndx--] = CmppLevel_empty; + g_debug(3,("pop to tokenizer level=%u, flags=%04x\n", t->level.ndx, + t->level.stack[t->level.ndx].flags)); + g_debug(3,("CT_skipLevel() ?= %d\n",CT_skipLevel(t))); + g_debug(3,("CT_skip() ?= %d\n",CT_skip(t))); } CmppLevel * CmppLevel_get(CmppTokenizer * const t){ @@ -776,7 +777,7 @@ int db_define_has(const char * zName){ assert(SQLITE_DONE==rc); rc = 0; } - g_debug(1,("define has [%s] = %d\n",zName, rc)); + g_debug(1,("defined [%s] ?= %d\n",zName, rc)); sqlite3_clear_bindings(g.stmt.defHas); sqlite3_reset(g.stmt.defHas); return rc; @@ -1220,10 +1221,11 @@ static void cmpp_kwd_if(CmppKeyword const * pKw, CmppTokenizer *t){ if(TT_IfNot==pKw->ttype || TT_ElifNot==pKw->ttype) buul = !buul; if(buul){ CT_pstate(t) = tmpState = TS_IfPassed; - CT_skipLevel(t) = 0; + CT_skipLevel(t) = 0; }else{ CT_pstate(t) = TS_If /* also for TT_IfNot, TT_Elif, TT_ElifNot */; CT_skipLevel(t) = 1; + g_debug(3,("setting CT_skipLevel = 1 @ level %d\n", t->level.ndx)); } if(TT_If==pKw->ttype || TT_IfNot==pKw->ttype){ unsigned const lvlIf = t->level.ndx; @@ -1234,10 +1236,13 @@ static void cmpp_kwd_if(CmppKeyword const * pKw, CmppTokenizer *t){ assert(TT_EndIf == t->token.ttype); break; } +#if 0 if(TS_IfPassed==tmpState){ tmpState = TS_Start; t->level.stack[lvlIf].flags |= CmppLevel_F_ELIDE; + g_debug(1,("Setting ELIDE for TS_IfPassed @ lv %d (lvlIf=%d)\n", t->level.ndx, lvlIf)); } +#endif } if(lvlIf <= t->level.ndx){ cmpp_kwd__err_prefix(pKw, t, NULL); diff --git a/ext/wasm/common/whwasmutil.js b/ext/wasm/common/whwasmutil.js index 87cc100f33..e50210206f 100644 --- a/ext/wasm/common/whwasmutil.js +++ b/ext/wasm/common/whwasmutil.js @@ -246,6 +246,25 @@ self.WhWasmUtilInstaller = function(target){ cache.utf8Decoder = new TextDecoder(); cache.utf8Encoder = new TextEncoder('utf-8'); + /** + For the given IR-like string in the set ('i8', 'i16', 'i32', + 'f32', 'float', 'i64', 'f64', 'double', '*'), or any string value + ending in '*', returns the sizeof for that value + (target.ptrSizeof in the latter case). For any other value, it + returns the undefined value. + */ + target.sizeofIR = (n)=>{ + switch(n){ + case 'i8': return 1; + case 'i16': return 2; + case 'i32': case 'f32': case 'float': return 4; + case 'i64': case 'f64': case 'double': return 8; + case '*': return ptrSizeof; + default: + return (''+n).endsWith('*') ? ptrSizeof : undefined; + } + }; + /** If (cache.heapSize !== cache.memory.buffer.byteLength), i.e. if the heap has grown since the last call, updates cache.HEAPxyz. @@ -359,7 +378,8 @@ self.WhWasmUtilInstaller = function(target){ /** Given a function pointer, returns the WASM function table entry - if found, else returns a falsy value. + if found, else returns a falsy value: undefined if fptr is out of + range or null if it's in range but the table entry is empty. */ target.functionEntry = function(fptr){ const ft = target.functionTable(); @@ -446,7 +466,7 @@ self.WhWasmUtilInstaller = function(target){ type(s) of the given function signature, or throws if the signature is invalid. */ /******** // only valid for use with the WebAssembly.Function ctor, which - // is not yet documented on MDN. + // is not yet documented on MDN. sigToWasm: function(sig){ const rc = {parameters:[], results: []}; if('v'!==sig[0]) rc.results.push(f.sigTypes(sig[0])); @@ -552,13 +572,12 @@ self.WhWasmUtilInstaller = function(target){ cache.scopedAlloc[cache.scopedAlloc.length-1].push(ptr); } }catch(e){ - if(ptr===oldLen) cache.freeFuncIndexes.push(oldLen); - throw e; + if(ptr===oldLen) cache.freeFuncIndexes.push(oldLen); + throw e; } - return ptr; + return ptr; }; - /** Expects a JS function and signature, exactly as for this.jsFuncToWasm(). It uses that function to create a @@ -612,8 +631,13 @@ self.WhWasmUtilInstaller = function(target){ installFunction() has been called and results are undefined if ptr was not returned by that function. The returned function may be passed back to installFunction() to reinstall it. + + To simplify certain use cases, if passed a falsy non-0 value + (noting that 0 is a valid function table index), this function + has no side effects and returns undefined. */ target.uninstallFunction = function(ptr){ + if(!ptr && 0!==ptr) return undefined; const fi = cache.freeFuncIndexes; const ft = target.functionTable(); fi.push(ptr); @@ -730,7 +754,7 @@ self.WhWasmUtilInstaller = function(target){ ? cache : heapWrappers(); for(const p of (Array.isArray(ptr) ? ptr : [ptr])){ switch (type) { - case 'i1': + case 'i1': case 'i8': c.HEAP8[p>>0] = value; continue; case 'i16': c.HEAP16[p>>1] = value; continue; case 'i32': c.HEAP32[p>>2] = value; continue; @@ -803,7 +827,6 @@ self.WhWasmUtilInstaller = function(target){ /** f64 variant of poke8(). */ target.poke64f = (ptr, value)=>target.poke(ptr, value, 'f64'); - /** Deprecated alias for getMemValue() */ target.getMemValue = target.peek; /** Deprecated alias for peekPtr() */ @@ -1351,7 +1374,7 @@ self.WhWasmUtilInstaller = function(target){ const __argcMismatch = (f,n)=>toss(f+"() requires",n,"argument(s)."); - + /** Looks up a WASM-exported function named fname from target.exports. If found, it is called, passed all remaining @@ -1390,18 +1413,26 @@ self.WhWasmUtilInstaller = function(target){ if(target.bigIntEnabled){ xArg.set('i64', (i)=>BigInt(i)); } - xArg.set('i32', (i)=>(i | 0)); - xArg.set('i16', (i)=>((i | 0) & 0xFFFF)); - xArg.set('i8', (i)=>((i | 0) & 0xFF)); - xArg.set('f32', (i)=>Number(i).valueOf()); - xArg.set('float', xArg.get('f32')); - xArg.set('f64', xArg.get('f32')); - xArg.set('double', xArg.get('f64')); - xArg.set('int', xArg.get('i32')); - xResult.set('*', xArg.get(ptrIR)); - xResult.set('pointer', xArg.get(ptrIR)); - xArg.set('**', xArg.get(ptrIR)); - xResult.set('number', (v)=>Number(v)); + const __xArgPtr = 'i32' === ptrIR + ? ((i)=>(i | 0)) : ((i)=>(BigInt(i) | BigInt(0))); + xArg.set('i32', __xArgPtr ) + .set('i16', (i)=>((i | 0) & 0xFFFF)) + .set('i8', (i)=>((i | 0) & 0xFF)) + .set('f32', (i)=>Number(i).valueOf()) + .set('float', xArg.get('f32')) + .set('f64', xArg.get('f32')) + .set('double', xArg.get('f64')) + .set('int', xArg.get('i32')) + .set('null', (i)=>i) + .set(null, xArg.get('null')) + .set('**', __xArgPtr) + .set('*', __xArgPtr); + xResult.set('*', __xArgPtr) + .set('pointer', __xArgPtr) + .set('number', (v)=>Number(v)) + .set('void', (v)=>undefined) + .set('null', (v)=>v) + .set(null, xResult.get('null')); { /* Copy certain xArg[...] handlers to xResult[...] and add pointer-style variants of them. */ @@ -1430,47 +1461,30 @@ self.WhWasmUtilInstaller = function(target){ TODO? Permit an Int8Array/Uint8Array and convert it to a string? Would that be too much magic concentrated in one place, ready to - backfire? + backfire? We handle that at the client level in sqlite3 with a + custom argument converter. */ - xArg.set('string', function(v){ + const __xArgString = function(v){ if('string'===typeof v) return target.scopedAllocCString(v); - return v ? xArg.get(ptrIR)(v) : null; - }); - xArg.set('utf8', xArg.get('string')); - xArg.set('pointer', xArg.get('string')); - xArg.set('*', xArg.get('string')); + return v ? __xArgPtr(v) : null; + }; + xArg.set('string', __xArgString) + .set('utf8', __xArgString) + .set('pointer', __xArgString); + //xArg.set('*', __xArgString); - xResult.set('string', (i)=>target.cstrToJs(i)); - xResult.set('utf8', xResult.get('string')); - xResult.set('string:dealloc', (i)=>{ - try { return i ? target.cstrToJs(i) : null } - finally{ target.dealloc(i) } - }); - xResult.set('utf8:dealloc', xResult.get('string:dealloc')); - xResult.set('json', (i)=>JSON.parse(target.cstrToJs(i))); - xResult.set('json:dealloc', (i)=>{ - try{ return i ? JSON.parse(target.cstrToJs(i)) : null } - finally{ target.dealloc(i) } - }); - xResult.set('void', (v)=>undefined); - xResult.set('null', (v)=>v); - - if(0){ - /*** - This idea can't currently work because we don't know the - signature for the func and don't have a way for the user to - convey it. To do this we likely need to be able to match - arg/result handlers by a regex, but that would incur an O(N) - cost as we check the regex one at a time. Another use case for - such a thing would be pseudotypes like "int:-1" to say that - the value will always be treated like -1 (which has a useful - case in the sqlite3 bindings). - */ - xArg.set('func-ptr', function(v){ - if(!(v instanceof Function)) return xArg.get(ptrIR); - const f = target.jsFuncToWasm(v, WHAT_SIGNATURE); + xResult.set('string', (i)=>target.cstrToJs(i)) + .set('utf8', xResult.get('string')) + .set('string:dealloc', (i)=>{ + try { return i ? target.cstrToJs(i) : null } + finally{ target.dealloc(i) } + }) + .set('utf8:dealloc', xResult.get('string:dealloc')) + .set('json', (i)=>JSON.parse(target.cstrToJs(i))) + .set('json:dealloc', (i)=>{ + try{ return i ? JSON.parse(target.cstrToJs(i)) : null } + finally{ target.dealloc(i) } }); - } /** Internal-use-only base class for FuncPtrAdapter and potentially @@ -1485,7 +1499,7 @@ self.WhWasmUtilInstaller = function(target){ */ const AbstractArgAdapter = class { constructor(opt){ - this.name = opt.name; + this.name = opt.name || 'unnamed adapter'; } /** Gets called via xWrap() to "convert" v to whatever type @@ -1501,7 +1515,7 @@ self.WhWasmUtilInstaller = function(target){ types are indeterminate, whereas the LHS values will be WASM-compatible values by the time this is called. */ - convertArg(v,argIndex,argv){ + convertArg(v,argv,argIndex){ toss("AbstractArgAdapter must be subclassed."); } }; @@ -1537,8 +1551,8 @@ self.WhWasmUtilInstaller = function(target){ value. This is only useful for use with "global" functions which do not rely on any state other than this function pointer. If the being-converted function pointer is intended - to be mapped to some sort of state object (e.g. an sqlite3*) - then "context" (see below) is the proper mode. + to be mapped to some sort of state object (e.g. an + `sqlite3*`) then "context" (see below) is the proper mode. - 'context': similar to singleton mode but for a given "context", where the context is a key provided by the user @@ -1546,24 +1560,42 @@ self.WhWasmUtilInstaller = function(target){ context. This mode is the default if bindScope is _not_ set but a property named contextKey (described below) is. + - 'permanent': the function is installed and left there + forever. There is no way to recover its pointer address + later on. + + - callProxy (function): if set, this must be a function which + will act as a proxy for any "converted" JS function. It is + passed the being-converted function value and must return + either that function or a function which acts on its + behalf. The returned function will be the one which gets + installed into the WASM function table. The proxy must perform + any required argument conversion (noting that it will be called + from C code, so will receive C-format arguments) before passing + them on to the being-converted function. Whether or not the + proxy itself must return a value depends on the context. If it + does, it must be a WASM-friendly value, as it will be returning + from a call made from native code. + - contextKey (function): is only used if bindScope is 'context' or if bindScope is not set and this function is, in which case - 'context' is assumed. This function gets passed - (argIndex,argv), where argIndex is the index of _this_ function - pointer in its _wrapping_ function's arguments and argv is the - _current_ still-being-xWrap()-processed args array. All - arguments to the left of argIndex will have been processed by - xWrap() by the time this is called. argv[argIndex] will be the - value the user passed in to the xWrap()'d function for the - argument this FuncPtrAdapter is mapped to. Arguments to the - right of argv[argIndex] will not yet have been converted before - this is called. The function must return a key which uniquely + 'context' is assumed. This function gets bound to this object, + so its "this" is this object. It gets passed (argv,argIndex), + where argIndex is the index of _this_ function pointer in its + _wrapping_ function's arguments and argv is the _current_ + still-being-xWrap()-processed args array. All arguments to the + left of argIndex will have been processed by xWrap() by the + time this is called. argv[argIndex] will be the value the user + passed in to the xWrap()'d function for the argument this + FuncPtrAdapter is mapped to. Arguments to the right of + argv[argIndex] will not yet have been converted before this is + called. The function must return a key which uniquely identifies this function mapping context for _this_ FuncPtrAdapter instance (other instances are not considered), taking into account that C functions often take some sort of state object as one or more of their arguments. As an example, if the xWrap()'d function takes `(int,T*,functionPtr,X*)` and - this FuncPtrAdapter is the argv[2]nd arg, contextKey(2,argv) + this FuncPtrAdapter is the argv[2]nd arg, contextKey(argv,2) might return 'T@'+argv[1], or even just argv[1]. Note, however, that the (X*) argument will not yet have been processed by the time this is called and should not be used as @@ -1575,42 +1607,77 @@ self.WhWasmUtilInstaller = function(target){ use their pointers in the key because most C-strings in this constellation are transient. - Yes, that ^^^ is a bit awkward, but it's what we have. + Yes, that ^^^ is quite awkward, but it's what we have. The constructor only saves the above state for later, and does not actually bind any functions. Its convertArg() method is called via xWrap() to perform any bindings. - Shortcomings: function pointers which include C-string arguments - may still need a level of hand-written wrappers around them, - depending on how they're used, in order to provide the client - with JS strings. + Shortcomings: + + - These "reverse" bindings, i.e. calling into a JS-defined + function from a WASM-defined function (the generated proxy + wrapper), lack all type conversion support. That means, for + example, that... + + - Function pointers which include C-string arguments may still + need a level of hand-written wrappers around them, depending on + how they're used, in order to provide the client with JS + strings. Alternately, clients will need to perform such conversions + on their own, e.g. using cstrtojs(). Or maybe we can find a way + to perform such conversions here, via addition of an xWrap()-style + function signature to the options argument. */ xArg.FuncPtrAdapter = class FuncPtrAdapter extends AbstractArgAdapter { constructor(opt) { super(opt); + if(xArg.FuncPtrAdapter.warnOnUse){ + console.warn('xArg.FuncPtrAdapter is an internal-only API', + 'and is not intended to be invoked from', + 'client-level code. Invoked with:',opt); + } this.signature = opt.signature; - if(!opt.bindScope && (opt.contextKey instanceof Function)){ - opt.bindScope = 'context'; - }else if(FuncPtrAdapter.bindScopes.indexOf(opt.bindScope)<0){ + if(opt.contextKey instanceof Function){ + this.contextKey = opt.contextKey; + if(!opt.bindScope) opt.bindScope = 'context'; + } + this.bindScope = opt.bindScope + || toss("FuncPtrAdapter options requires a bindScope (explicit or implied)."); + if(FuncPtrAdapter.bindScopes.indexOf(opt.bindScope)<0){ toss("Invalid options.bindScope ("+opt.bindMod+") for FuncPtrAdapter. "+ "Expecting one of: ("+FuncPtrAdapter.bindScopes.join(', ')+')'); } - this.bindScope = opt.bindScope; - if(opt.contextKey) this.contextKey = opt.contextKey /*else inherit one*/; this.isTransient = 'transient'===this.bindScope; this.isContext = 'context'===this.bindScope; - if( ('singleton'===this.bindScope) ) this.singleton = []; - else this.singleton = undefined; + this.isPermanent = 'permanent'===this.bindScope; + this.singleton = ('singleton'===this.bindScope) ? [] : undefined; //console.warn("FuncPtrAdapter()",opt,this); + this.callProxy = (opt.callProxy instanceof Function) + ? opt.callProxy : undefined; } + /** If true, the constructor emits a warning. The intent is that + this be set to true after bootstrapping of the higher-level + client library is complete, to warn downstream clients that + they shouldn't be relying on this implemenation detail which + does not have a stable interface. */ + static warnOnUse = false; + + /** If true, convertArg() will FuncPtrAdapter.debugOut() when it + (un)installs a function binding to/from WASM. Note that + deinstallation of bindScope=transient bindings happens + via scopedAllocPop() so will not be output. */ + static debugFuncInstall = false; + + /** Function used for debug output. */ + static debugOut = console.debug.bind(console); + static bindScopes = [ - 'transient', 'context', 'singleton' + 'transient', 'context', 'singleton', 'permanent' ]; /* Dummy impl. Overwritten per-instance as needed. */ - contextKey(argIndex,argv){ + contextKey(argv,argIndex){ return this; } @@ -1643,18 +1710,28 @@ self.WhWasmUtilInstaller = function(target){ See the parent class's convertArg() docs for details on what exactly the 2nd and 3rd arguments are. */ - convertArg(v,argIndex,argv){ - //console.warn("FuncPtrAdapter.convertArg()",this.signature,this.transient,v); + convertArg(v,argv,argIndex){ + //FuncPtrAdapter.debugOut("FuncPtrAdapter.convertArg()",this.signature,this.transient,v); let pair = this.singleton; if(!pair && this.isContext){ - pair = this.contextMap(this.contextKey(argIndex, argv)); + pair = this.contextMap(this.contextKey(argv,argIndex)); } if(pair && pair[0]===v) return pair[1]; if(v instanceof Function){ + /* Install a WASM binding and return its pointer. */ + if(this.callProxy) v = this.callProxy(v); const fp = __installFunction(v, this.signature, this.isTransient); + if(FuncPtrAdapter.debugFuncInstall){ + FuncPtrAdapter.debugOut("FuncPtrAdapter installed", this, + this.contextKey(argv,argIndex), '@'+fp, v); + } if(pair){ /* Replace existing stashed mapping */ if(pair[1]){ + if(FuncPtrAdapter.debugFuncInstall){ + FuncPtrAdapter.debugOut("FuncPtrAdapter uninstalling", this, + this.contextKey(argv,argIndex), '@'+pair[1], v); + } try{target.uninstallFunction(pair[1])} catch(e){/*ignored*/} } @@ -1665,10 +1742,13 @@ self.WhWasmUtilInstaller = function(target){ }else if(target.isPtr(v) || null===v || undefined===v){ if(pair && pair[1] && pair[1]!==v){ /* uninstall stashed mapping and replace stashed mapping with v. */ - //console.warn("FuncPtrAdapter is uninstalling function", this.contextKey(argIndex,argv),v); + if(FuncPtrAdapter.debugFuncInstall){ + FuncPtrAdapter.debugOut("FuncPtrAdapter uninstalling", this, + this.contextKey(argv,argIndex), '@'+pair[1], v); + } try{target.uninstallFunction(pair[1])} catch(e){/*ignored*/} - pair[0] = pair[1] = (v || 0); + pair[0] = pair[1] = (v | 0); } return v || 0; }else{ @@ -1688,35 +1768,41 @@ self.WhWasmUtilInstaller = function(target){ (t)=>xResult.get(t) || toss("Result adapter not found:",t); cache.xWrap.convertArg = (t,...args)=>__xArgAdapterCheck(t)(...args); + cache.xWrap.convertArgNoCheck = (t,...args)=>xArg.get(t)(...args); + cache.xWrap.convertResult = (t,v)=>(null===t ? v : (t ? __xResultAdapterCheck(t)(v) : undefined)); + cache.xWrap.convertResultNoCheck = + (t,v)=>(null===t ? v : (t ? xResult.get(t)(v) : undefined)); /** - Creates a wrapper for the WASM-exported function fname. Uses - xGet() to fetch the exported function (which throws on - error) and returns either that function or a wrapper for that + Creates a wrapper for another function which converts the arguments + of the wrapper to argument types accepted by the wrapped function, + then converts the wrapped function's result to another form + for the wrapper. + + The first argument must be one of: + + - A JavaScript function. + - The name of a WASM-exported function. In the latter case xGet() + is used to fetch the exported function, which throws if it's not + found. + - A pointer into the indirect function table. e.g. a pointer + returned from target.installFunction(). + + It returns either the passed-in function or a wrapper for that function which converts the JS-side argument types into WASM-side - types and converts the result type. If the function takes no - arguments and resultType is `null` then the function is returned - as-is, else a wrapper is created for it to adapt its arguments - and result value, as described below. + types and converts the result type. - (If you're familiar with Emscripten's ccall() and cwrap(), this - function is essentially cwrap() on steroids.) - - This function's arguments are: - - - fname: the exported function's name. xGet() is used to fetch - this, so will throw if no exported function is found with that - name. - - - resultType: the name of the result type. A literal `null` means - to return the original function's value as-is (mnemonic: there - is "null" conversion going on). Literal `undefined` or the - string `"void"` mean to ignore the function's result and return - `undefined`. Aside from those two special cases, it may be one - of the values described below or any mapping installed by the - client using xWrap.resultAdapter(). + The second argument, `resultType`, describes the conversion for + the wrapped functions result. A literal `null` or the string + `'null'` both mean to return the original function's value as-is + (mnemonic: there is "null" conversion going on). Literal + `undefined` or the string `"void"` both mean to ignore the + function's result and return `undefined`. Aside from those two + special cases, the `resultType` value may be one of the values + described below or any mapping installed by the client using + xWrap.resultAdapter(). If passed 3 arguments and the final one is an array, that array must contain a list of type names (see below) for adapting the @@ -1730,6 +1816,12 @@ self.WhWasmUtilInstaller = function(target){ xWrap('funcname', 'i32', ['string', 'f64']); ``` + This function enforces that the given list of arguments has the + same arity as the being-wrapped function (as defined by its + `length` property) and it will throw if that is not the case. + Similarly, the created wrapper will throw if passed a differing + argument count. + Type names are symbolic names which map the arguments to an adapter function to convert, if needed, the value before passing it on to WASM or to convert a return result from WASM. The list @@ -1742,10 +1834,13 @@ self.WhWasmUtilInstaller = function(target){ - `N*` (args): a type name in the form `N*`, where N is a numeric type name, is treated the same as WASM pointer. - - `*` and `pointer` (args): have multple semantics. They - behave exactly as described below for `string` args. + - `*` and `pointer` (args): are assumed to be WASM pointer values + and are returned coerced to an appropriately-sized pointer + value (i32 or i64). Non-numeric values will coerce to 0 and + out-of-range values will have undefined results (just as with + any pointer misuse). - - `*` and `pointer` (results): are aliases for the current + - `*` and `pointer` (results): aliases for the current WASM pointer numeric type. - `**` (args): is simply a descriptive alias for the WASM pointer @@ -1766,6 +1861,10 @@ self.WhWasmUtilInstaller = function(target){ Non-numeric conversions include: + - `null` literal or `"null"` string (args and results): perform + no translation and pass the arg on as-is. This is primarily + useful for results but may have a use or two for arguments. + - `string` or `utf8` (args): has two different semantics in order to accommodate various uses of certain C APIs (e.g. output-style strings)... @@ -1780,9 +1879,9 @@ self.WhWasmUtilInstaller = function(target){ client has already allocated and it's passed on as a WASM pointer. - - `string` or `utf8` (results): treats the result value as a - const C-string, encoded as UTF-8, copies it to a JS string, - and returns that JS string. + - `string` or `utf8` (results): treats the result value as a + const C-string, encoded as UTF-8, copies it to a JS string, + and returns that JS string. - `string:dealloc` or `utf8:dealloc) (results): treats the result value as a non-const UTF-8 C-string, ownership of which has just been @@ -1819,6 +1918,11 @@ self.WhWasmUtilInstaller = function(target){ type conversions are valid for both arguments _and_ result types as they often have different memory ownership requirements. + Design note: the ability to pass in a JS function as the first + argument is of relatively limited use, primarily for testing + argument and result converters. JS functions, by and large, will + not want to deal with C-type arguments. + TODOs: - Figure out how/whether we can (semi-)transparently handle @@ -1839,14 +1943,21 @@ self.WhWasmUtilInstaller = function(target){ abstracting it into this API (and taking on the associated costs) may well not make good sense. */ - target.xWrap = function(fname, resultType, ...argTypes){ + target.xWrap = function(fArg, resultType, ...argTypes){ if(3===arguments.length && Array.isArray(arguments[2])){ argTypes = arguments[2]; } - const xf = target.xGet(fname); - if(argTypes.length!==xf.length) __argcMismatch(fname, xf.length); + if(target.isPtr(fArg)){ + fArg = target.functionEntry(fArg) + || toss("Function pointer not found in WASM function table."); + } + const fIsFunc = (fArg instanceof Function); + const xf = fIsFunc ? fArg : target.xGet(fArg); + if(fIsFunc) fArg = xf.name || 'unnamed function'; + if(argTypes.length!==xf.length) __argcMismatch(fArg, xf.length); if((null===resultType) && 0===xf.length){ - /* Func taking no args with an as-is return. We don't need a wrapper. */ + /* Func taking no args with an as-is return. We don't need a wrapper. + We forego the argc check here, though. */ return xf; } /*Verify the arg type conversions are valid...*/; @@ -1859,30 +1970,33 @@ self.WhWasmUtilInstaller = function(target){ if(0===xf.length){ // No args to convert, so we can create a simpler wrapper... return (...args)=>(args.length - ? __argcMismatch(fname, xf.length) + ? __argcMismatch(fArg, xf.length) : cxw.convertResult(resultType, xf.call(null))); } return function(...args){ - if(args.length!==xf.length) __argcMismatch(fname, xf.length); + if(args.length!==xf.length) __argcMismatch(fArg, xf.length); const scope = target.scopedAllocPush(); try{ /* - Maintenance reminder re. arguments passed to convertArgs(): + Maintenance reminder re. arguments passed to convertArg(): The public interface of argument adapters is that they take ONE argument and return a (possibly) converted result for it. The passing-on of arguments after the first is an - internal impl. detail for the sake of AbstractArgAdapter, and - not to be relied on or documented for other cases. The fact - that this is how AbstractArgAdapter.convertArgs() gets its 2nd+ - arguments, and how FuncPtrAdapter.contextKey() gets its - args, is also an implementation detail and subject to - change. i.e. the public interface of 1 argument is stable. - The fact that any arguments may be passed in after that one, - and what those arguments are, is _not_ part of the public - interface and is _not_ stable. + internal implementation detail for the sake of + AbstractArgAdapter, and not to be relied on or documented + for other cases. The fact that this is how + AbstractArgAdapter.convertArgs() gets its 2nd+ arguments, + and how FuncPtrAdapter.contextKey() gets its args, is also + an implementation detail and subject to change. i.e. the + public interface of 1 argument is stable. The fact that any + arguments may be passed in after that one, and what those + arguments are, is _not_ part of the public interface and is + _not_ stable. */ - for(const i in args) args[i] = cxw.convertArg(argTypes[i], args[i], i, args); - return cxw.convertResult(resultType, xf.apply(null,args)); + for(const i in args) args[i] = cxw.convertArgNoCheck( + argTypes[i], args[i], args, i + ); + return cxw.convertResultNoCheck(resultType, xf.apply(null,args)); }finally{ target.scopedAllocPop(scope); } @@ -1974,14 +2088,13 @@ self.WhWasmUtilInstaller = function(target){ /** Functions like xCall() but performs argument and result type - conversions as for xWrap(). The first argument is the name of the - exported function to call. The 2nd its the name of its result - type, as documented for xWrap(). The 3rd is an array of argument - type name, as documented for xWrap() (use a falsy value or an - empty array for nullary functions). The 4th+ arguments are - arguments for the call, with the special case that if the 4th - argument is an array, it is used as the arguments for the - call. Returns the converted result of the call. + conversions as for xWrap(). The first, second, and third + arguments are as documented for xWrap(), except that the 3rd + argument may be either a falsy value or empty array to represent + nullary functions. The 4th+ arguments are arguments for the call, + with the special case that if the 4th argument is an array, it is + used as the arguments for the call. Returns the converted result + of the call. This is just a thin wrapper around xWrap(). If the given function is to be called more than once, it's more efficient to use @@ -1990,9 +2103,9 @@ self.WhWasmUtilInstaller = function(target){ arguably more efficient because it will hypothetically free the wrapper function quickly. */ - target.xCallWrapped = function(fname, resultType, argTypes, ...args){ + target.xCallWrapped = function(fArg, resultType, argTypes, ...args){ if(Array.isArray(arguments[3])) args = arguments[3]; - return target.xWrap(fname, resultType, argTypes||[]).apply(null, args||[]); + return target.xWrap(fArg, resultType, argTypes||[]).apply(null, args||[]); }; /** @@ -2005,9 +2118,12 @@ self.WhWasmUtilInstaller = function(target){ It throws if no adapter is found. ACHTUNG: the adapter may require that a scopedAllocPush() is - active and it may allocate memory within that scope. + active and it may allocate memory within that scope. It may also + require additional arguments, depending on the type of + conversion. */ target.xWrap.testConvertArg = cache.xWrap.convertArg; + /** This function is ONLY exposed in the public API to facilitate testing. It should not be used in application-level code, only diff --git a/ext/wasm/demo-worker1-promiser.js b/ext/wasm/demo-worker1-promiser.js index a65cc31b6e..ef955403c5 100644 --- a/ext/wasm/demo-worker1-promiser.js +++ b/ext/wasm/demo-worker1-promiser.js @@ -76,9 +76,7 @@ await wtest('config-get', (ev)=>{ const r = ev.result; log('sqlite3.config subset:', r); - T.assert('boolean' === typeof r.bigIntEnabled) - .assert('string'===typeof r.wasmfsOpfsDir) - .assert('boolean' === typeof r.wasmfsOpfsEnabled); + T.assert('boolean' === typeof r.bigIntEnabled); sqConfig = r; }); logHtml('', @@ -213,7 +211,7 @@ sql: "select 'foo' foo, a bar, 'baz' baz from t limit 2", callback: resultRowTest3, columnNames: [], - rowMode: ':bar' + rowMode: '$bar' }, function(ev){ log("exec() result row:",ev); T.assert(2===resultRowTest3.counter); diff --git a/ext/wasm/dist.make b/ext/wasm/dist.make index 8b1a2d4f1c..7073c24b78 100644 --- a/ext/wasm/dist.make +++ b/ext/wasm/dist.make @@ -56,6 +56,21 @@ dist.common.extras := \ $(dir.common)/SqliteTestUtil.js .PHONY: dist snapshot +# DIST_STRIP_COMMENTS $(call)able to be used in stripping C-style +# from the dist copies of certain files. +# +# $1 = source js file +# $2 = flags for $(bin.stripcomments) +define DIST_STRIP_COMMENTS +$(bin.stripccomments) $(2) < $(1) > $(dist-dir.jswasm)/$(notdir $(1)) || exit; +endef +# STRIP_K1.js = list of JS files which need to be passed through +# $(bin.stripcomments) with a single -k flag. +STRIP_K1.js := $(sqlite3-worker1.js) $(sqlite3-worker1-promiser.js) \ + $(sqlite3-worker1-bundler-friendly.js) $(sqlite3-worker1-promiser-bundler-friendly.js) +# STRIP_K2.js = list of JS files which need to be passed through +# $(bin.stripcomments) with two -k flags. +STRIP_K2.js := $(sqlite3.js) $(sqlite3.mjs) $(sqlite3-bundler-friendly.mjs) ######################################################################## # dist: create the end-user deliverable archive. # @@ -71,7 +86,7 @@ dist.common.extras := \ # target name equal to the archive name. dist: \ $(bin.stripccomments) $(bin.version-info) \ - $(dist.build) \ + $(dist.build) $(STRIP_K1.js) $(STRIP_K2.js) \ $(MAKEFILE) $(MAKEFILE.dist) @echo "Making end-user deliverables..." @rm -fr $(dist-dir.top) @@ -80,10 +95,8 @@ dist: \ @cp -p README-dist.txt $(dist-dir.top)/README.txt @cp -p index-dist.html $(dist-dir.top)/index.html @cp -p $(dist.jswasm.extras) $(dist-dir.jswasm) - @$(bin.stripccomments) -k -k < $(sqlite3.js) \ - > $(dist-dir.jswasm)/$(notdir $(sqlite3.js)) - @$(bin.stripccomments) -k -k < $(sqlite3.mjs) \ - > $(dist-dir.jswasm)/$(notdir $(sqlite3.mjs)) + @$(foreach JS,$(STRIP_K1.js),$(call DIST_STRIP_COMMENTS,$(JS),-k)) + @$(foreach JS,$(STRIP_K2.js),$(call DIST_STRIP_COMMENTS,$(JS),-k -k)) @cp -p $(dist.common.extras) $(dist-dir.common) @set -e; \ vnum=$$($(bin.version-info) --download-version); \ diff --git a/ext/wasm/index.html b/ext/wasm/index.html index 767cc5e74a..969d9553eb 100644 --- a/ext/wasm/index.html +++ b/ext/wasm/index.html @@ -131,7 +131,7 @@ diff --git a/ext/wasm/module-symbols.html b/ext/wasm/module-symbols.html index 6298aeb374..865f1b3338 100644 --- a/ext/wasm/module-symbols.html +++ b/ext/wasm/module-symbols.html @@ -133,10 +133,10 @@ The sqlite3.version object exposes the following...

- +

sqlite3_...() Function List

- +

The sqlite3.capi namespace exposes the following sqlite3_...() functions... @@ -194,6 +194,7 @@